Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(364)

Side by Side Diff: source/rotate.cc

Issue 2617703002: Add MSA optimized rotate functions (used 16x16 transpose) (Closed)
Patch Set: correct file mode Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/convert_argb.cc ('k') | source/rotate_any.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 11 matching lines...) Expand all
22 #endif 22 #endif
23 23
24 LIBYUV_API 24 LIBYUV_API
25 void TransposePlane(const uint8* src, 25 void TransposePlane(const uint8* src,
26 int src_stride, 26 int src_stride,
27 uint8* dst, 27 uint8* dst,
28 int dst_stride, 28 int dst_stride,
29 int width, 29 int width,
30 int height) { 30 int height) {
31 int i = height; 31 int i = height;
32 #if defined(HAS_TRANSPOSEWX16_MSA)
33 void (*TransposeWx16)(const uint8* src, int src_stride, uint8* dst,
34 int dst_stride, int width) = TransposeWx16_C;
35 #else
32 void (*TransposeWx8)(const uint8* src, int src_stride, uint8* dst, 36 void (*TransposeWx8)(const uint8* src, int src_stride, uint8* dst,
33 int dst_stride, int width) = TransposeWx8_C; 37 int dst_stride, int width) = TransposeWx8_C;
38 #endif
34 #if defined(HAS_TRANSPOSEWX8_NEON) 39 #if defined(HAS_TRANSPOSEWX8_NEON)
35 if (TestCpuFlag(kCpuHasNEON)) { 40 if (TestCpuFlag(kCpuHasNEON)) {
36 TransposeWx8 = TransposeWx8_NEON; 41 TransposeWx8 = TransposeWx8_NEON;
37 } 42 }
38 #endif 43 #endif
39 #if defined(HAS_TRANSPOSEWX8_SSSE3) 44 #if defined(HAS_TRANSPOSEWX8_SSSE3)
40 if (TestCpuFlag(kCpuHasSSSE3)) { 45 if (TestCpuFlag(kCpuHasSSSE3)) {
41 TransposeWx8 = TransposeWx8_Any_SSSE3; 46 TransposeWx8 = TransposeWx8_Any_SSSE3;
42 if (IS_ALIGNED(width, 8)) { 47 if (IS_ALIGNED(width, 8)) {
43 TransposeWx8 = TransposeWx8_SSSE3; 48 TransposeWx8 = TransposeWx8_SSSE3;
(...skipping 11 matching lines...) Expand all
55 #if defined(HAS_TRANSPOSEWX8_DSPR2) 60 #if defined(HAS_TRANSPOSEWX8_DSPR2)
56 if (TestCpuFlag(kCpuHasDSPR2)) { 61 if (TestCpuFlag(kCpuHasDSPR2)) {
57 if (IS_ALIGNED(width, 4) && IS_ALIGNED(src, 4) && 62 if (IS_ALIGNED(width, 4) && IS_ALIGNED(src, 4) &&
58 IS_ALIGNED(src_stride, 4)) { 63 IS_ALIGNED(src_stride, 4)) {
59 TransposeWx8 = TransposeWx8_Fast_DSPR2; 64 TransposeWx8 = TransposeWx8_Fast_DSPR2;
60 } else { 65 } else {
61 TransposeWx8 = TransposeWx8_DSPR2; 66 TransposeWx8 = TransposeWx8_DSPR2;
62 } 67 }
63 } 68 }
64 #endif 69 #endif
65 #if defined(HAS_TRANSPOSEWX8_MSA) 70 #if defined(HAS_TRANSPOSEWX16_MSA)
66 if (TestCpuFlag(kCpuHasMSA)) { 71 if (TestCpuFlag(kCpuHasMSA)) {
67 TransposeWx8 = TransposeWx8_Any_MSA; 72 TransposeWx16 = TransposeWx16_Any_MSA;
68 if (IS_ALIGNED(width, 16)) { 73 if (IS_ALIGNED(width, 16)) {
69 TransposeWx8 = TransposeWx8_MSA; 74 TransposeWx16 = TransposeWx16_MSA;
70 } 75 }
71 } 76 }
72 #endif 77 #endif
73 78
79 #if defined(HAS_TRANSPOSEWX16_MSA)
80 // Work across the source in 16x16 tiles
81 while (i >= 16) {
82 TransposeWx16(src, src_stride, dst, dst_stride, width);
83 src += 16 * src_stride; // Go down 16 rows.
84 dst += 16; // Move over 16 columns.
85 i -= 16;
86 }
87 #else
74 // Work across the source in 8x8 tiles 88 // Work across the source in 8x8 tiles
75 while (i >= 8) { 89 while (i >= 8) {
76 TransposeWx8(src, src_stride, dst, dst_stride, width); 90 TransposeWx8(src, src_stride, dst, dst_stride, width);
77 src += 8 * src_stride; // Go down 8 rows. 91 src += 8 * src_stride; // Go down 8 rows.
78 dst += 8; // Move over 8 columns. 92 dst += 8; // Move over 8 columns.
79 i -= 8; 93 i -= 8;
80 } 94 }
95 #endif
81 96
82 if (i > 0) { 97 if (i > 0) {
83 TransposeWxH_C(src, src_stride, dst, dst_stride, width, i); 98 TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
84 } 99 }
85 } 100 }
86 101
87 LIBYUV_API 102 LIBYUV_API
88 void RotatePlane90(const uint8* src, 103 void RotatePlane90(const uint8* src,
89 int src_stride, 104 int src_stride,
90 uint8* dst, 105 uint8* dst,
(...skipping 120 matching lines...) Expand 10 before | Expand all | Expand 10 after
211 LIBYUV_API 226 LIBYUV_API
212 void TransposeUV(const uint8* src, 227 void TransposeUV(const uint8* src,
213 int src_stride, 228 int src_stride,
214 uint8* dst_a, 229 uint8* dst_a,
215 int dst_stride_a, 230 int dst_stride_a,
216 uint8* dst_b, 231 uint8* dst_b,
217 int dst_stride_b, 232 int dst_stride_b,
218 int width, 233 int width,
219 int height) { 234 int height) {
220 int i = height; 235 int i = height;
236 #if defined(HAS_TRANSPOSEUVWX16_MSA)
237 void (*TransposeUVWx16)(const uint8* src, int src_stride, uint8* dst_a,
238 int dst_stride_a, uint8* dst_b, int dst_stride_b,
239 int width) = TransposeUVWx16_C;
240 #else
221 void (*TransposeUVWx8)(const uint8* src, int src_stride, uint8* dst_a, 241 void (*TransposeUVWx8)(const uint8* src, int src_stride, uint8* dst_a,
222 int dst_stride_a, uint8* dst_b, int dst_stride_b, 242 int dst_stride_a, uint8* dst_b, int dst_stride_b,
223 int width) = TransposeUVWx8_C; 243 int width) = TransposeUVWx8_C;
244 #endif
224 #if defined(HAS_TRANSPOSEUVWX8_NEON) 245 #if defined(HAS_TRANSPOSEUVWX8_NEON)
225 if (TestCpuFlag(kCpuHasNEON)) { 246 if (TestCpuFlag(kCpuHasNEON)) {
226 TransposeUVWx8 = TransposeUVWx8_NEON; 247 TransposeUVWx8 = TransposeUVWx8_NEON;
227 } 248 }
228 #endif 249 #endif
229 #if defined(HAS_TRANSPOSEUVWX8_SSE2) 250 #if defined(HAS_TRANSPOSEUVWX8_SSE2)
230 if (TestCpuFlag(kCpuHasSSE2)) { 251 if (TestCpuFlag(kCpuHasSSE2)) {
231 TransposeUVWx8 = TransposeUVWx8_Any_SSE2; 252 TransposeUVWx8 = TransposeUVWx8_Any_SSE2;
232 if (IS_ALIGNED(width, 8)) { 253 if (IS_ALIGNED(width, 8)) {
233 TransposeUVWx8 = TransposeUVWx8_SSE2; 254 TransposeUVWx8 = TransposeUVWx8_SSE2;
234 } 255 }
235 } 256 }
236 #endif 257 #endif
237 #if defined(HAS_TRANSPOSEUVWX8_DSPR2) 258 #if defined(HAS_TRANSPOSEUVWX8_DSPR2)
238 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && IS_ALIGNED(src, 4) && 259 if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) && IS_ALIGNED(src, 4) &&
239 IS_ALIGNED(src_stride, 4)) { 260 IS_ALIGNED(src_stride, 4)) {
240 TransposeUVWx8 = TransposeUVWx8_DSPR2; 261 TransposeUVWx8 = TransposeUVWx8_DSPR2;
241 } 262 }
242 #endif 263 #endif
243 #if defined(HAS_TRANSPOSEUVWX8_MSA) 264 #if defined(HAS_TRANSPOSEUVWX16_MSA)
244 if (TestCpuFlag(kCpuHasMSA)) { 265 if (TestCpuFlag(kCpuHasMSA)) {
245 TransposeUVWx8 = TransposeUVWx8_Any_MSA; 266 TransposeUVWx16 = TransposeUVWx16_Any_MSA;
246 if (IS_ALIGNED(width, 8)) { 267 if (IS_ALIGNED(width, 8)) {
247 TransposeUVWx8 = TransposeUVWx8_MSA; 268 TransposeUVWx16 = TransposeUVWx16_MSA;
248 } 269 }
249 } 270 }
250 #endif 271 #endif
251 272
273 #if defined(HAS_TRANSPOSEUVWX16_MSA)
274 // Work through the source in 8x8 tiles.
275 while (i >= 16) {
276 TransposeUVWx16(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
277 width);
278 src += 16 * src_stride; // Go down 16 rows.
279 dst_a += 16; // Move over 8 columns.
280 dst_b += 16; // Move over 8 columns.
281 i -= 16;
282 }
283 #else
252 // Work through the source in 8x8 tiles. 284 // Work through the source in 8x8 tiles.
253 while (i >= 8) { 285 while (i >= 8) {
254 TransposeUVWx8(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, 286 TransposeUVWx8(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
255 width); 287 width);
256 src += 8 * src_stride; // Go down 8 rows. 288 src += 8 * src_stride; // Go down 8 rows.
257 dst_a += 8; // Move over 8 columns. 289 dst_a += 8; // Move over 8 columns.
258 dst_b += 8; // Move over 8 columns. 290 dst_b += 8; // Move over 8 columns.
259 i -= 8; 291 i -= 8;
260 } 292 }
293 #endif
261 294
262 if (i > 0) { 295 if (i > 0) {
263 TransposeUVWxH_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, 296 TransposeUVWxH_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
264 width, i); 297 width, i);
265 } 298 }
266 } 299 }
267 300
268 LIBYUV_API 301 LIBYUV_API
269 void RotateUV90(const uint8* src, 302 void RotateUV90(const uint8* src,
270 int src_stride, 303 int src_stride,
(...skipping 231 matching lines...) Expand 10 before | Expand all | Expand 10 after
502 default: 535 default:
503 break; 536 break;
504 } 537 }
505 return -1; 538 return -1;
506 } 539 }
507 540
508 #ifdef __cplusplus 541 #ifdef __cplusplus
509 } // extern "C" 542 } // extern "C"
510 } // namespace libyuv 543 } // namespace libyuv
511 #endif 544 #endif
OLDNEW
« no previous file with comments | « source/convert_argb.cc ('k') | source/rotate_any.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698