Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(172)

Side by Side Diff: source/rotate_neon64.cc

Issue 1895743008: Remove initialize to zero on output variables for inline. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: use early write for all outputs to avoid them being reassigned to input Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/rotate_neon.cc ('k') | source/row_gcc.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "libyuv/row.h" 11 #include "libyuv/row.h"
12 #include "libyuv/rotate_row.h" 12 #include "libyuv/rotate_row.h"
13 13
14 #include "libyuv/basic_types.h" 14 #include "libyuv/basic_types.h"
15 15
16 #ifdef __cplusplus 16 #ifdef __cplusplus
17 namespace libyuv { 17 namespace libyuv {
18 extern "C" { 18 extern "C" {
19 #endif 19 #endif
20 20
21 // This module is for GCC Neon armv8 64 bit. 21 // This module is for GCC Neon armv8 64 bit.
22 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 22 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
23 23
24 static uvec8 kVTbl4x4Transpose = 24 static uvec8 kVTbl4x4Transpose =
25 { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; 25 { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
26 26
27 void TransposeWx8_NEON(const uint8* src, int src_stride, 27 void TransposeWx8_NEON(const uint8* src, int src_stride,
28 uint8* dst, int dst_stride, int width) { 28 uint8* dst, int dst_stride, int width) {
29 const uint8* src_temp = NULL; 29 const uint8* src_temp;
30 int64 width64 = (int64) width; // Work around clang 3.4 warning. 30 int64 width64 = (int64) width; // Work around clang 3.4 warning.
31 asm volatile ( 31 asm volatile (
32 // loops are on blocks of 8. loop will stop when 32 // loops are on blocks of 8. loop will stop when
33 // counter gets to or below 0. starting the counter 33 // counter gets to or below 0. starting the counter
34 // at w-8 allow for this 34 // at w-8 allow for this
35 "sub %3, %3, #8 \n" 35 "sub %3, %3, #8 \n"
36 36
37 // handle 8x8 blocks. this should be the majority of the plane 37 // handle 8x8 blocks. this should be the majority of the plane
38 "1: \n" 38 "1: \n"
39 "mov %0, %1 \n" 39 "mov %0, %1 \n"
(...skipping 188 matching lines...) Expand 10 before | Expand all | Expand 10 after
228 MEMACCESS(1) 228 MEMACCESS(1)
229 "ld1 {v0.b}[6], [%1], %5 \n" 229 "ld1 {v0.b}[6], [%1], %5 \n"
230 MEMACCESS(1) 230 MEMACCESS(1)
231 "ld1 {v0.b}[7], [%1] \n" 231 "ld1 {v0.b}[7], [%1] \n"
232 232
233 MEMACCESS(2) 233 MEMACCESS(2)
234 "st1 {v0.8b}, [%2] \n" 234 "st1 {v0.8b}, [%2] \n"
235 235
236 "4: \n" 236 "4: \n"
237 237
238 : "+r"(src_temp), // %0 238 : "=&r"(src_temp), // %0
239 "+r"(src), // %1 239 "+r"(src), // %1
240 "+r"(dst), // %2 240 "+r"(dst), // %2
241 "+r"(width64) // %3 241 "+r"(width64) // %3
242 : "r"(&kVTbl4x4Transpose), // %4 242 : "r"(&kVTbl4x4Transpose), // %4
243 "r"(static_cast<ptrdiff_t>(src_stride)), // %5 243 "r"(static_cast<ptrdiff_t>(src_stride)), // %5
244 "r"(static_cast<ptrdiff_t>(dst_stride)) // %6 244 "r"(static_cast<ptrdiff_t>(dst_stride)) // %6
245 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", 245 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
246 "v17", "v18", "v19", "v20", "v21", "v22", "v23" 246 "v17", "v18", "v19", "v20", "v21", "v22", "v23"
247 ); 247 );
248 } 248 }
249 249
250 static uint8 kVTbl4x4TransposeDi[32] = 250 static uint8 kVTbl4x4TransposeDi[32] =
251 { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54, 251 { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
252 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55}; 252 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
253 253
254 void TransposeUVWx8_NEON(const uint8* src, int src_stride, 254 void TransposeUVWx8_NEON(const uint8* src, int src_stride,
255 uint8* dst_a, int dst_stride_a, 255 uint8* dst_a, int dst_stride_a,
256 uint8* dst_b, int dst_stride_b, 256 uint8* dst_b, int dst_stride_b,
257 int width) { 257 int width) {
258 const uint8* src_temp = NULL; 258 const uint8* src_temp;
259 int64 width64 = (int64) width; // Work around clang 3.4 warning. 259 int64 width64 = (int64) width; // Work around clang 3.4 warning.
260 asm volatile ( 260 asm volatile (
261 // loops are on blocks of 8. loop will stop when 261 // loops are on blocks of 8. loop will stop when
262 // counter gets to or below 0. starting the counter 262 // counter gets to or below 0. starting the counter
263 // at w-8 allow for this 263 // at w-8 allow for this
264 "sub %4, %4, #8 \n" 264 "sub %4, %4, #8 \n"
265 265
266 // handle 8x8 blocks. this should be the majority of the plane 266 // handle 8x8 blocks. this should be the majority of the plane
267 "1: \n" 267 "1: \n"
268 "mov %0, %1 \n" 268 "mov %0, %1 \n"
(...skipping 244 matching lines...) Expand 10 before | Expand all | Expand 10 after
513 MEMACCESS(1) 513 MEMACCESS(1)
514 "ld2 {v0.b, v1.b}[7], [%1] \n" 514 "ld2 {v0.b, v1.b}[7], [%1] \n"
515 515
516 MEMACCESS(2) 516 MEMACCESS(2)
517 "st1 {v0.d}[0], [%2] \n" 517 "st1 {v0.d}[0], [%2] \n"
518 MEMACCESS(3) 518 MEMACCESS(3)
519 "st1 {v1.d}[0], [%3] \n" 519 "st1 {v1.d}[0], [%3] \n"
520 520
521 "4: \n" 521 "4: \n"
522 522
523 : "+r"(src_temp), // %0 523 : "=&r"(src_temp), // %0
524 "+r"(src), // %1 524 "+r"(src), // %1
525 "+r"(dst_a), // %2 525 "+r"(dst_a), // %2
526 "+r"(dst_b), // %3 526 "+r"(dst_b), // %3
527 "+r"(width64) // %4 527 "+r"(width64) // %4
528 : "r"(static_cast<ptrdiff_t>(src_stride)), // %5 528 : "r"(static_cast<ptrdiff_t>(src_stride)), // %5
529 "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6 529 "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
530 "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7 530 "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
531 "r"(&kVTbl4x4TransposeDi) // %8 531 "r"(&kVTbl4x4TransposeDi) // %8
532 : "memory", "cc", 532 : "memory", "cc",
533 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", 533 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
534 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", 534 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
535 "v30", "v31" 535 "v30", "v31"
536 ); 536 );
537 } 537 }
538 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 538 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
539 539
540 #ifdef __cplusplus 540 #ifdef __cplusplus
541 } // extern "C" 541 } // extern "C"
542 } // namespace libyuv 542 } // namespace libyuv
543 #endif 543 #endif
OLDNEW
« no previous file with comments | « source/rotate_neon.cc ('k') | source/row_gcc.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698