source/rotate.cc - Issue 2617703002: Add MSA optimized rotate functions (used 16x16 transpose)

Side by Side Diff: source/rotate.cc

Issue 2617703002: Add MSA optimized rotate functions (used 16x16 transpose) (Closed)

Patch Set: correct file mode Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.	2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

(...skipping 11 matching lines...) Expand all Loading...
22 #endif	22 #endif

23	23

24 LIBYUV_API	24 LIBYUV_API

25 void TransposePlane(const uint8* src,	25 void TransposePlane(const uint8* src,

26 int src_stride,	26 int src_stride,

27 uint8* dst,	27 uint8* dst,

28 int dst_stride,	28 int dst_stride,

29 int width,	29 int width,

30 int height) {	30 int height) {

31 int i = height;	31 int i = height;

	32 #if defined(HAS_TRANSPOSEWX16_MSA)

	33 void (TransposeWx16)(const uint8 src, int src_stride, uint8* dst,

	34 int dst_stride, int width) = TransposeWx16_C;

	35 #else

32 void (TransposeWx8)(const uint8 src, int src_stride, uint8* dst,	36 void (TransposeWx8)(const uint8 src, int src_stride, uint8* dst,

33 int dst_stride, int width) = TransposeWx8_C;	37 int dst_stride, int width) = TransposeWx8_C;

	38 #endif

34 #if defined(HAS_TRANSPOSEWX8_NEON)	39 #if defined(HAS_TRANSPOSEWX8_NEON)

35 if (TestCpuFlag(kCpuHasNEON)) {	40 if (TestCpuFlag(kCpuHasNEON)) {

36 TransposeWx8 = TransposeWx8_NEON;	41 TransposeWx8 = TransposeWx8_NEON;

37 }	42 }

38 #endif	43 #endif

39 #if defined(HAS_TRANSPOSEWX8_SSSE3)	44 #if defined(HAS_TRANSPOSEWX8_SSSE3)

40 if (TestCpuFlag(kCpuHasSSSE3)) {	45 if (TestCpuFlag(kCpuHasSSSE3)) {

41 TransposeWx8 = TransposeWx8_Any_SSSE3;	46 TransposeWx8 = TransposeWx8_Any_SSSE3;

42 if (IS_ALIGNED(width, 8)) {	47 if (IS_ALIGNED(width, 8)) {

43 TransposeWx8 = TransposeWx8_SSSE3;	48 TransposeWx8 = TransposeWx8_SSSE3;

(...skipping 11 matching lines...) Expand all Loading...
55 #if defined(HAS_TRANSPOSEWX8_DSPR2)	60 #if defined(HAS_TRANSPOSEWX8_DSPR2)

56 if (TestCpuFlag(kCpuHasDSPR2)) {	61 if (TestCpuFlag(kCpuHasDSPR2)) {

57 if (IS_ALIGNED(width, 4) && IS_ALIGNED(src, 4) &&	62 if (IS_ALIGNED(width, 4) && IS_ALIGNED(src, 4) &&

58 IS_ALIGNED(src_stride, 4)) {	63 IS_ALIGNED(src_stride, 4)) {

59 TransposeWx8 = TransposeWx8_Fast_DSPR2;	64 TransposeWx8 = TransposeWx8_Fast_DSPR2;

60 } else {	65 } else {

61 TransposeWx8 = TransposeWx8_DSPR2;	66 TransposeWx8 = TransposeWx8_DSPR2;

62 }	67 }

63 }	68 }

64 #endif	69 #endif

65 #if defined(HAS_TRANSPOSEWX8_MSA)	70 #if defined(HAS_TRANSPOSEWX16_MSA)

66 if (TestCpuFlag(kCpuHasMSA)) {	71 if (TestCpuFlag(kCpuHasMSA)) {

67 TransposeWx8 = TransposeWx8_Any_MSA;	72 TransposeWx16 = TransposeWx16_Any_MSA;

68 if (IS_ALIGNED(width, 16)) {	73 if (IS_ALIGNED(width, 16)) {

69 TransposeWx8 = TransposeWx8_MSA;	74 TransposeWx16 = TransposeWx16_MSA;

70 }	75 }

71 }	76 }

72 #endif	77 #endif

73	78

	79 #if defined(HAS_TRANSPOSEWX16_MSA)

	80 // Work across the source in 16x16 tiles

	81 while (i >= 16) {

	82 TransposeWx16(src, src_stride, dst, dst_stride, width);

	83 src += 16 * src_stride; // Go down 16 rows.

	84 dst += 16; // Move over 16 columns.

	85 i -= 16;

	86 }

	87 #else

74 // Work across the source in 8x8 tiles	88 // Work across the source in 8x8 tiles

75 while (i >= 8) {	89 while (i >= 8) {

76 TransposeWx8(src, src_stride, dst, dst_stride, width);	90 TransposeWx8(src, src_stride, dst, dst_stride, width);

77 src += 8 * src_stride; // Go down 8 rows.	91 src += 8 * src_stride; // Go down 8 rows.

78 dst += 8; // Move over 8 columns.	92 dst += 8; // Move over 8 columns.

79 i -= 8;	93 i -= 8;

80 }	94 }

	95 #endif

81	96

82 if (i > 0) {	97 if (i > 0) {

83 TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);	98 TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);

84 }	99 }

85 }	100 }

86	101

87 LIBYUV_API	102 LIBYUV_API

88 void RotatePlane90(const uint8* src,	103 void RotatePlane90(const uint8* src,

89 int src_stride,	104 int src_stride,

90 uint8* dst,	105 uint8* dst,

(...skipping 120 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
211 LIBYUV_API	226 LIBYUV_API

212 void TransposeUV(const uint8* src,	227 void TransposeUV(const uint8* src,

213 int src_stride,	228 int src_stride,

214 uint8* dst_a,	229 uint8* dst_a,

215 int dst_stride_a,	230 int dst_stride_a,

216 uint8* dst_b,	231 uint8* dst_b,

217 int dst_stride_b,	232 int dst_stride_b,

218 int width,	233 int width,

219 int height) {	234 int height) {

220 int i = height;	235 int i = height;

	236 #if defined(HAS_TRANSPOSEUVWX16_MSA)

	237 void (TransposeUVWx16)(const uint8 src, int src_stride, uint8* dst_a,

	238 int dst_stride_a, uint8* dst_b, int dst_stride_b,

	239 int width) = TransposeUVWx16_C;

	240 #else

221 void (TransposeUVWx8)(const uint8 src, int src_stride, uint8* dst_a,	241 void (TransposeUVWx8)(const uint8 src, int src_stride, uint8* dst_a,

222 int dst_stride_a, uint8* dst_b, int dst_stride_b,	242 int dst_stride_a, uint8* dst_b, int dst_stride_b,

223 int width) = TransposeUVWx8_C;	243 int width) = TransposeUVWx8_C;

	244 #endif

224 #if defined(HAS_TRANSPOSEUVWX8_NEON)	245 #if defined(HAS_TRANSPOSEUVWX8_NEON)

225 if (TestCpuFlag(kCpuHasNEON)) {	246 if (TestCpuFlag(kCpuHasNEON)) {

226 TransposeUVWx8 = TransposeUVWx8_NEON;	247 TransposeUVWx8 = TransposeUVWx8_NEON;

227 }	248 }

228 #endif	249 #endif

229 #if defined(HAS_TRANSPOSEUVWX8_SSE2)	250 #if defined(HAS_TRANSPOSEUVWX8_SSE2)

230 if (TestCpuFlag(kCpuHasSSE2)) {	251 if (TestCpuFlag(kCpuHasSSE2)) {

231 TransposeUVWx8 = TransposeUVWx8_Any_SSE2;	252 TransposeUVWx8 = TransposeUVWx8_Any_SSE2;

232 if (IS_ALIGNED(width, 8)) {	253 if (IS_ALIGNED(width, 8)) {

233 TransposeUVWx8 = TransposeUVWx8_SSE2;	254 TransposeUVWx8 = TransposeUVWx8_SSE2;

234 }	255 }

235 }	256 }

236 #endif	257 #endif

237 #if defined(HAS_TRANSPOSEUVWX8_DSPR2)	258 #if defined(HAS_TRANSPOSEUVWX8_DSPR2)

238 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && IS_ALIGNED(src, 4) &&	259 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && IS_ALIGNED(src, 4) &&

239 IS_ALIGNED(src_stride, 4)) {	260 IS_ALIGNED(src_stride, 4)) {

240 TransposeUVWx8 = TransposeUVWx8_DSPR2;	261 TransposeUVWx8 = TransposeUVWx8_DSPR2;

241 }	262 }

242 #endif	263 #endif

243 #if defined(HAS_TRANSPOSEUVWX8_MSA)	264 #if defined(HAS_TRANSPOSEUVWX16_MSA)

244 if (TestCpuFlag(kCpuHasMSA)) {	265 if (TestCpuFlag(kCpuHasMSA)) {

245 TransposeUVWx8 = TransposeUVWx8_Any_MSA;	266 TransposeUVWx16 = TransposeUVWx16_Any_MSA;

246 if (IS_ALIGNED(width, 8)) {	267 if (IS_ALIGNED(width, 8)) {

247 TransposeUVWx8 = TransposeUVWx8_MSA;	268 TransposeUVWx16 = TransposeUVWx16_MSA;

248 }	269 }

249 }	270 }

250 #endif	271 #endif

251	272

	273 #if defined(HAS_TRANSPOSEUVWX16_MSA)

	274 // Work through the source in 8x8 tiles.

	275 while (i >= 16) {

	276 TransposeUVWx16(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,

	277 width);

	278 src += 16 * src_stride; // Go down 16 rows.

	279 dst_a += 16; // Move over 8 columns.

	280 dst_b += 16; // Move over 8 columns.

	281 i -= 16;

	282 }

	283 #else

252 // Work through the source in 8x8 tiles.	284 // Work through the source in 8x8 tiles.

253 while (i >= 8) {	285 while (i >= 8) {

254 TransposeUVWx8(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,	286 TransposeUVWx8(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,

255 width);	287 width);

256 src += 8 * src_stride; // Go down 8 rows.	288 src += 8 * src_stride; // Go down 8 rows.

257 dst_a += 8; // Move over 8 columns.	289 dst_a += 8; // Move over 8 columns.

258 dst_b += 8; // Move over 8 columns.	290 dst_b += 8; // Move over 8 columns.

259 i -= 8;	291 i -= 8;

260 }	292 }

	293 #endif

261	294

262 if (i > 0) {	295 if (i > 0) {

263 TransposeUVWxH_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,	296 TransposeUVWxH_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,

264 width, i);	297 width, i);

265 }	298 }

266 }	299 }

267	300

268 LIBYUV_API	301 LIBYUV_API

269 void RotateUV90(const uint8* src,	302 void RotateUV90(const uint8* src,

270 int src_stride,	303 int src_stride,

(...skipping 231 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
502 default:	535 default:

503 break;	536 break;

504 }	537 }

505 return -1;	538 return -1;

506 }	539 }

507	540

508 #ifdef __cplusplus	541 #ifdef __cplusplus

509 } // extern "C"	542 } // extern "C"

510 } // namespace libyuv	543 } // namespace libyuv

511 #endif	544 #endif

OLD	NEW

« no previous file with comments | « source/convert_argb.cc ('k') | source/rotate_any.cc » ('j') | no next file with comments »