OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 1459 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1470 #endif // HAS_ARGBTOUV444ROW_NEON | 1470 #endif // HAS_ARGBTOUV444ROW_NEON |
1471 | 1471 |
1472 #define RGBTOUV_SETUP_REG \ | 1472 #define RGBTOUV_SETUP_REG \ |
1473 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ | 1473 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ |
1474 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ | 1474 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ |
1475 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ | 1475 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ |
1476 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ | 1476 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ |
1477 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ | 1477 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ |
1478 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ | 1478 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ |
1479 | 1479 |
1480 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. | |
1481 #ifdef HAS_ARGBTOUV422ROW_NEON | |
1482 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | |
1483 int width) { | |
1484 asm volatile ( | |
1485 RGBTOUV_SETUP_REG | |
1486 "1: \n" | |
1487 MEMACCESS(0) | |
1488 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. | |
1489 | |
1490 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. | |
1491 "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. | |
1492 "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. | |
1493 | |
1494 "subs %w3, %w3, #16 \n" // 16 processed per loop. | |
1495 "mul v3.8h, v0.8h, v20.8h \n" // B | |
1496 "mls v3.8h, v1.8h, v21.8h \n" // G | |
1497 "mls v3.8h, v2.8h, v22.8h \n" // R | |
1498 "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned | |
1499 | |
1500 "mul v4.8h, v2.8h, v20.8h \n" // R | |
1501 "mls v4.8h, v1.8h, v24.8h \n" // G | |
1502 "mls v4.8h, v0.8h, v23.8h \n" // B | |
1503 "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned | |
1504 | |
1505 "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U | |
1506 "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V | |
1507 | |
1508 MEMACCESS(1) | |
1509 "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. | |
1510 MEMACCESS(2) | |
1511 "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. | |
1512 "b.gt 1b \n" | |
1513 : "+r"(src_argb), // %0 | |
1514 "+r"(dst_u), // %1 | |
1515 "+r"(dst_v), // %2 | |
1516 "+r"(width) // %3 | |
1517 : | |
1518 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", | |
1519 "v20", "v21", "v22", "v23", "v24", "v25" | |
1520 ); | |
1521 } | |
1522 #endif // HAS_ARGBTOUV422ROW_NEON | |
1523 | |
1524 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. | 1480 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. |
1525 #ifdef HAS_ARGBTOUV411ROW_NEON | 1481 #ifdef HAS_ARGBTOUV411ROW_NEON |
1526 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | 1482 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, |
1527 int width) { | 1483 int width) { |
1528 asm volatile ( | 1484 asm volatile ( |
1529 RGBTOUV_SETUP_REG | 1485 RGBTOUV_SETUP_REG |
1530 "1: \n" | 1486 "1: \n" |
1531 MEMACCESS(0) | 1487 MEMACCESS(0) |
1532 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. | 1488 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. |
1533 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. | 1489 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. |
(...skipping 1462 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2996 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List | 2952 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List |
2997 ); | 2953 ); |
2998 } | 2954 } |
2999 #endif // HAS_SOBELYROW_NEON | 2955 #endif // HAS_SOBELYROW_NEON |
3000 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) | 2956 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
3001 | 2957 |
3002 #ifdef __cplusplus | 2958 #ifdef __cplusplus |
3003 } // extern "C" | 2959 } // extern "C" |
3004 } // namespace libyuv | 2960 } // namespace libyuv |
3005 #endif | 2961 #endif |
OLD | NEW |