source/rotate_neon64.cc - Issue 1895743008: Remove initialize to zero on output variables for inline.

Side by Side Diff: source/rotate_neon64.cc

Issue 1895743008: Remove initialize to zero on output variables for inline. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master

Patch Set: use early write for all outputs to avoid them being reassigned to input Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.	2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.

3 *	3 *

4 * Use of this source code is governed by a BSD-style license	4 * Use of this source code is governed by a BSD-style license

5 * that can be found in the LICENSE file in the root of the source	5 * that can be found in the LICENSE file in the root of the source

6 * tree. An additional intellectual property rights grant can be found	6 * tree. An additional intellectual property rights grant can be found

7 * in the file PATENTS. All contributing project authors may	7 * in the file PATENTS. All contributing project authors may

8 * be found in the AUTHORS file in the root of the source tree.	8 * be found in the AUTHORS file in the root of the source tree.

9 */	9 */

10	10

11 #include "libyuv/row.h"	11 #include "libyuv/row.h"

12 #include "libyuv/rotate_row.h"	12 #include "libyuv/rotate_row.h"

13	13

14 #include "libyuv/basic_types.h"	14 #include "libyuv/basic_types.h"

15	15

16 #ifdef __cplusplus	16 #ifdef __cplusplus

17 namespace libyuv {	17 namespace libyuv {

18 extern "C" {	18 extern "C" {

19 #endif	19 #endif

20	20

21 // This module is for GCC Neon armv8 64 bit.	21 // This module is for GCC Neon armv8 64 bit.

22 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)	22 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)

23	23

24 static uvec8 kVTbl4x4Transpose =	24 static uvec8 kVTbl4x4Transpose =

25 { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };	25 { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };

26	26

27 void TransposeWx8_NEON(const uint8* src, int src_stride,	27 void TransposeWx8_NEON(const uint8* src, int src_stride,

28 uint8* dst, int dst_stride, int width) {	28 uint8* dst, int dst_stride, int width) {

29 const uint8* src_temp = NULL;	29 const uint8* src_temp;

30 int64 width64 = (int64) width; // Work around clang 3.4 warning.	30 int64 width64 = (int64) width; // Work around clang 3.4 warning.

31 asm volatile (	31 asm volatile (

32 // loops are on blocks of 8. loop will stop when	32 // loops are on blocks of 8. loop will stop when

33 // counter gets to or below 0. starting the counter	33 // counter gets to or below 0. starting the counter

34 // at w-8 allow for this	34 // at w-8 allow for this

35 "sub %3, %3, #8 \n"	35 "sub %3, %3, #8 \n"

36	36

37 // handle 8x8 blocks. this should be the majority of the plane	37 // handle 8x8 blocks. this should be the majority of the plane

38 "1: \n"	38 "1: \n"

39 "mov %0, %1 \n"	39 "mov %0, %1 \n"

(...skipping 188 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
228 MEMACCESS(1)	228 MEMACCESS(1)

229 "ld1 {v0.b}[6], [%1], %5 \n"	229 "ld1 {v0.b}[6], [%1], %5 \n"

230 MEMACCESS(1)	230 MEMACCESS(1)

231 "ld1 {v0.b}[7], [%1] \n"	231 "ld1 {v0.b}[7], [%1] \n"

232	232

233 MEMACCESS(2)	233 MEMACCESS(2)

234 "st1 {v0.8b}, [%2] \n"	234 "st1 {v0.8b}, [%2] \n"

235	235

236 "4: \n"	236 "4: \n"

237	237

238 : "+r"(src_temp), // %0	238 : "=&r"(src_temp), // %0

239 "+r"(src), // %1	239 "+r"(src), // %1

240 "+r"(dst), // %2	240 "+r"(dst), // %2

241 "+r"(width64) // %3	241 "+r"(width64) // %3

242 : "r"(&kVTbl4x4Transpose), // %4	242 : "r"(&kVTbl4x4Transpose), // %4

243 "r"(static_cast<ptrdiff_t>(src_stride)), // %5	243 "r"(static_cast<ptrdiff_t>(src_stride)), // %5

244 "r"(static_cast<ptrdiff_t>(dst_stride)) // %6	244 "r"(static_cast<ptrdiff_t>(dst_stride)) // %6

245 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",	245 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",

246 "v17", "v18", "v19", "v20", "v21", "v22", "v23"	246 "v17", "v18", "v19", "v20", "v21", "v22", "v23"

247 );	247 );

248 }	248 }

249	249

250 static uint8 kVTbl4x4TransposeDi[32] =	250 static uint8 kVTbl4x4TransposeDi[32] =

251 { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,	251 { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,

252 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};	252 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};

253	253

254 void TransposeUVWx8_NEON(const uint8* src, int src_stride,	254 void TransposeUVWx8_NEON(const uint8* src, int src_stride,

255 uint8* dst_a, int dst_stride_a,	255 uint8* dst_a, int dst_stride_a,

256 uint8* dst_b, int dst_stride_b,	256 uint8* dst_b, int dst_stride_b,

257 int width) {	257 int width) {

258 const uint8* src_temp = NULL;	258 const uint8* src_temp;

259 int64 width64 = (int64) width; // Work around clang 3.4 warning.	259 int64 width64 = (int64) width; // Work around clang 3.4 warning.

260 asm volatile (	260 asm volatile (

261 // loops are on blocks of 8. loop will stop when	261 // loops are on blocks of 8. loop will stop when

262 // counter gets to or below 0. starting the counter	262 // counter gets to or below 0. starting the counter

263 // at w-8 allow for this	263 // at w-8 allow for this

264 "sub %4, %4, #8 \n"	264 "sub %4, %4, #8 \n"

265	265

266 // handle 8x8 blocks. this should be the majority of the plane	266 // handle 8x8 blocks. this should be the majority of the plane

267 "1: \n"	267 "1: \n"

268 "mov %0, %1 \n"	268 "mov %0, %1 \n"

(...skipping 244 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
513 MEMACCESS(1)	513 MEMACCESS(1)

514 "ld2 {v0.b, v1.b}[7], [%1] \n"	514 "ld2 {v0.b, v1.b}[7], [%1] \n"

515	515

516 MEMACCESS(2)	516 MEMACCESS(2)

517 "st1 {v0.d}[0], [%2] \n"	517 "st1 {v0.d}[0], [%2] \n"

518 MEMACCESS(3)	518 MEMACCESS(3)

519 "st1 {v1.d}[0], [%3] \n"	519 "st1 {v1.d}[0], [%3] \n"

520	520

521 "4: \n"	521 "4: \n"

522	522

523 : "+r"(src_temp), // %0	523 : "=&r"(src_temp), // %0

524 "+r"(src), // %1	524 "+r"(src), // %1

525 "+r"(dst_a), // %2	525 "+r"(dst_a), // %2

526 "+r"(dst_b), // %3	526 "+r"(dst_b), // %3

527 "+r"(width64) // %4	527 "+r"(width64) // %4

528 : "r"(static_cast<ptrdiff_t>(src_stride)), // %5	528 : "r"(static_cast<ptrdiff_t>(src_stride)), // %5

529 "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6	529 "r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6

530 "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7	530 "r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7

531 "r"(&kVTbl4x4TransposeDi) // %8	531 "r"(&kVTbl4x4TransposeDi) // %8

532 : "memory", "cc",	532 : "memory", "cc",

533 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",	533 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",

534 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",	534 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",

535 "v30", "v31"	535 "v30", "v31"

536 );	536 );

537 }	537 }

538 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)	538 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)

539	539

540 #ifdef __cplusplus	540 #ifdef __cplusplus

541 } // extern "C"	541 } // extern "C"

542 } // namespace libyuv	542 } // namespace libyuv

543 #endif	543 #endif

OLD	NEW

« no previous file with comments | « source/rotate_neon.cc ('k') | source/row_gcc.cc » ('j') | no next file with comments »