Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(934)

Side by Side Diff: source/row_neon64.cc

Issue 1574253004: refactor ARGBToI422 using ARGBToI420 internally (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon.cc ('k') | source/row_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 1459 matching lines...) Expand 10 before | Expand all | Expand 10 after
1470 #endif // HAS_ARGBTOUV444ROW_NEON 1470 #endif // HAS_ARGBTOUV444ROW_NEON
1471 1471
1472 #define RGBTOUV_SETUP_REG \ 1472 #define RGBTOUV_SETUP_REG \
1473 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ 1473 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
1474 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ 1474 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
1475 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ 1475 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
1476 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ 1476 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
1477 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ 1477 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
1478 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ 1478 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
1479 1479
1480 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16.
1481 #ifdef HAS_ARGBTOUV422ROW_NEON
1482 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1483 int width) {
1484 asm volatile (
1485 RGBTOUV_SETUP_REG
1486 "1: \n"
1487 MEMACCESS(0)
1488 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
1489
1490 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
1491 "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
1492 "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
1493
1494 "subs %w3, %w3, #16 \n" // 16 processed per loop.
1495 "mul v3.8h, v0.8h, v20.8h \n" // B
1496 "mls v3.8h, v1.8h, v21.8h \n" // G
1497 "mls v3.8h, v2.8h, v22.8h \n" // R
1498 "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
1499
1500 "mul v4.8h, v2.8h, v20.8h \n" // R
1501 "mls v4.8h, v1.8h, v24.8h \n" // G
1502 "mls v4.8h, v0.8h, v23.8h \n" // B
1503 "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned
1504
1505 "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U
1506 "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V
1507
1508 MEMACCESS(1)
1509 "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
1510 MEMACCESS(2)
1511 "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
1512 "b.gt 1b \n"
1513 : "+r"(src_argb), // %0
1514 "+r"(dst_u), // %1
1515 "+r"(dst_v), // %2
1516 "+r"(width) // %3
1517 :
1518 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
1519 "v20", "v21", "v22", "v23", "v24", "v25"
1520 );
1521 }
1522 #endif // HAS_ARGBTOUV422ROW_NEON
1523
1524 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. 1480 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
1525 #ifdef HAS_ARGBTOUV411ROW_NEON 1481 #ifdef HAS_ARGBTOUV411ROW_NEON
1526 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 1482 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1527 int width) { 1483 int width) {
1528 asm volatile ( 1484 asm volatile (
1529 RGBTOUV_SETUP_REG 1485 RGBTOUV_SETUP_REG
1530 "1: \n" 1486 "1: \n"
1531 MEMACCESS(0) 1487 MEMACCESS(0)
1532 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. 1488 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
1533 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. 1489 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
(...skipping 1462 matching lines...) Expand 10 before | Expand all | Expand 10 after
2996 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 2952 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
2997 ); 2953 );
2998 } 2954 }
2999 #endif // HAS_SOBELYROW_NEON 2955 #endif // HAS_SOBELYROW_NEON
3000 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 2956 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
3001 2957
3002 #ifdef __cplusplus 2958 #ifdef __cplusplus
3003 } // extern "C" 2959 } // extern "C"
3004 } // namespace libyuv 2960 } // namespace libyuv
3005 #endif 2961 #endif
OLDNEW
« no previous file with comments | « source/row_neon.cc ('k') | source/row_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698