Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(118)

Side by Side Diff: source/rotate_neon.cc

Issue 1895743008: Remove initialize to zero on output variables for inline. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: use early write for all outputs to avoid them being reassigned to input Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | source/rotate_neon64.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "libyuv/row.h" 11 #include "libyuv/row.h"
12 #include "libyuv/rotate_row.h" 12 #include "libyuv/rotate_row.h"
13 13
14 #include "libyuv/basic_types.h" 14 #include "libyuv/basic_types.h"
15 15
16 #ifdef __cplusplus 16 #ifdef __cplusplus
17 namespace libyuv { 17 namespace libyuv {
18 extern "C" { 18 extern "C" {
19 #endif 19 #endif
20 20
21 #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ 21 #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
22 !defined(__aarch64__) 22 !defined(__aarch64__)
23 23
24 static uvec8 kVTbl4x4Transpose = 24 static uvec8 kVTbl4x4Transpose =
25 { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; 25 { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
26 26
27 void TransposeWx8_NEON(const uint8* src, int src_stride, 27 void TransposeWx8_NEON(const uint8* src, int src_stride,
28 uint8* dst, int dst_stride, 28 uint8* dst, int dst_stride,
29 int width) { 29 int width) {
30 const uint8* src_temp = NULL; 30 const uint8* src_temp;
31 asm volatile ( 31 asm volatile (
32 // loops are on blocks of 8. loop will stop when 32 // loops are on blocks of 8. loop will stop when
33 // counter gets to or below 0. starting the counter 33 // counter gets to or below 0. starting the counter
34 // at w-8 allow for this 34 // at w-8 allow for this
35 "sub %5, #8 \n" 35 "sub %5, #8 \n"
36 36
37 // handle 8x8 blocks. this should be the majority of the plane 37 // handle 8x8 blocks. this should be the majority of the plane
38 "1: \n" 38 "1: \n"
39 "mov %0, %1 \n" 39 "mov %0, %1 \n"
40 40
(...skipping 181 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 MEMACCESS(1) 222 MEMACCESS(1)
223 "vld1.8 {d0[6]}, [%1], %2 \n" 223 "vld1.8 {d0[6]}, [%1], %2 \n"
224 MEMACCESS(1) 224 MEMACCESS(1)
225 "vld1.8 {d0[7]}, [%1] \n" 225 "vld1.8 {d0[7]}, [%1] \n"
226 226
227 MEMACCESS(3) 227 MEMACCESS(3)
228 "vst1.64 {d0}, [%3] \n" 228 "vst1.64 {d0}, [%3] \n"
229 229
230 "4: \n" 230 "4: \n"
231 231
232 : "+r"(src_temp), // %0 232 : "=&r"(src_temp), // %0
233 "+r"(src), // %1 233 "+r"(src), // %1
234 "+r"(src_stride), // %2 234 "+r"(src_stride), // %2
235 "+r"(dst), // %3 235 "+r"(dst), // %3
236 "+r"(dst_stride), // %4 236 "+r"(dst_stride), // %4
237 "+r"(width) // %5 237 "+r"(width) // %5
238 : "r"(&kVTbl4x4Transpose) // %6 238 : "r"(&kVTbl4x4Transpose) // %6
239 : "memory", "cc", "q0", "q1", "q2", "q3" 239 : "memory", "cc", "q0", "q1", "q2", "q3"
240 ); 240 );
241 } 241 }
242 242
243 static uvec8 kVTbl4x4TransposeDi = 243 static uvec8 kVTbl4x4TransposeDi =
244 { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 }; 244 { 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
245 245
246 void TransposeUVWx8_NEON(const uint8* src, int src_stride, 246 void TransposeUVWx8_NEON(const uint8* src, int src_stride,
247 uint8* dst_a, int dst_stride_a, 247 uint8* dst_a, int dst_stride_a,
248 uint8* dst_b, int dst_stride_b, 248 uint8* dst_b, int dst_stride_b,
249 int width) { 249 int width) {
250 const uint8* src_temp = NULL; 250 const uint8* src_temp;
251 asm volatile ( 251 asm volatile (
252 // loops are on blocks of 8. loop will stop when 252 // loops are on blocks of 8. loop will stop when
253 // counter gets to or below 0. starting the counter 253 // counter gets to or below 0. starting the counter
254 // at w-8 allow for this 254 // at w-8 allow for this
255 "sub %7, #8 \n" 255 "sub %7, #8 \n"
256 256
257 // handle 8x8 blocks. this should be the majority of the plane 257 // handle 8x8 blocks. this should be the majority of the plane
258 "1: \n" 258 "1: \n"
259 "mov %0, %1 \n" 259 "mov %0, %1 \n"
260 260
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 MEMACCESS(1) 505 MEMACCESS(1)
506 "vld2.8 {d0[7], d1[7]}, [%1] \n" 506 "vld2.8 {d0[7], d1[7]}, [%1] \n"
507 507
508 MEMACCESS(3) 508 MEMACCESS(3)
509 "vst1.64 {d0}, [%3] \n" 509 "vst1.64 {d0}, [%3] \n"
510 MEMACCESS(5) 510 MEMACCESS(5)
511 "vst1.64 {d1}, [%5] \n" 511 "vst1.64 {d1}, [%5] \n"
512 512
513 "4: \n" 513 "4: \n"
514 514
515 : "+r"(src_temp), // %0 515 : "=&r"(src_temp), // %0
516 "+r"(src), // %1 516 "+r"(src), // %1
517 "+r"(src_stride), // %2 517 "+r"(src_stride), // %2
518 "+r"(dst_a), // %3 518 "+r"(dst_a), // %3
519 "+r"(dst_stride_a), // %4 519 "+r"(dst_stride_a), // %4
520 "+r"(dst_b), // %5 520 "+r"(dst_b), // %5
521 "+r"(dst_stride_b), // %6 521 "+r"(dst_stride_b), // %6
522 "+r"(width) // %7 522 "+r"(width) // %7
523 : "r"(&kVTbl4x4TransposeDi) // %8 523 : "r"(&kVTbl4x4TransposeDi) // %8
524 : "memory", "cc", 524 : "memory", "cc",
525 "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11" 525 "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
526 ); 526 );
527 } 527 }
528 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) 528 #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
529 529
530 #ifdef __cplusplus 530 #ifdef __cplusplus
531 } // extern "C" 531 } // extern "C"
532 } // namespace libyuv 532 } // namespace libyuv
533 #endif 533 #endif
OLDNEW
« no previous file with comments | « include/libyuv/version.h ('k') | source/rotate_neon64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698