| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 1459 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1470 #endif // HAS_ARGBTOUV444ROW_NEON | 1470 #endif // HAS_ARGBTOUV444ROW_NEON |
| 1471 | 1471 |
| 1472 #define RGBTOUV_SETUP_REG \ | 1472 #define RGBTOUV_SETUP_REG \ |
| 1473 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ | 1473 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ |
| 1474 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ | 1474 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ |
| 1475 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ | 1475 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ |
| 1476 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ | 1476 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ |
| 1477 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ | 1477 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ |
| 1478 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ | 1478 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ |
| 1479 | 1479 |
| 1480 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. | |
| 1481 #ifdef HAS_ARGBTOUV422ROW_NEON | |
| 1482 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | |
| 1483 int width) { | |
| 1484 asm volatile ( | |
| 1485 RGBTOUV_SETUP_REG | |
| 1486 "1: \n" | |
| 1487 MEMACCESS(0) | |
| 1488 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. | |
| 1489 | |
| 1490 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. | |
| 1491 "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. | |
| 1492 "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. | |
| 1493 | |
| 1494 "subs %w3, %w3, #16 \n" // 16 processed per loop. | |
| 1495 "mul v3.8h, v0.8h, v20.8h \n" // B | |
| 1496 "mls v3.8h, v1.8h, v21.8h \n" // G | |
| 1497 "mls v3.8h, v2.8h, v22.8h \n" // R | |
| 1498 "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned | |
| 1499 | |
| 1500 "mul v4.8h, v2.8h, v20.8h \n" // R | |
| 1501 "mls v4.8h, v1.8h, v24.8h \n" // G | |
| 1502 "mls v4.8h, v0.8h, v23.8h \n" // B | |
| 1503 "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned | |
| 1504 | |
| 1505 "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U | |
| 1506 "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V | |
| 1507 | |
| 1508 MEMACCESS(1) | |
| 1509 "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. | |
| 1510 MEMACCESS(2) | |
| 1511 "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. | |
| 1512 "b.gt 1b \n" | |
| 1513 : "+r"(src_argb), // %0 | |
| 1514 "+r"(dst_u), // %1 | |
| 1515 "+r"(dst_v), // %2 | |
| 1516 "+r"(width) // %3 | |
| 1517 : | |
| 1518 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", | |
| 1519 "v20", "v21", "v22", "v23", "v24", "v25" | |
| 1520 ); | |
| 1521 } | |
| 1522 #endif // HAS_ARGBTOUV422ROW_NEON | |
| 1523 | |
| 1524 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. | 1480 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. |
| 1525 #ifdef HAS_ARGBTOUV411ROW_NEON | 1481 #ifdef HAS_ARGBTOUV411ROW_NEON |
| 1526 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | 1482 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, |
| 1527 int width) { | 1483 int width) { |
| 1528 asm volatile ( | 1484 asm volatile ( |
| 1529 RGBTOUV_SETUP_REG | 1485 RGBTOUV_SETUP_REG |
| 1530 "1: \n" | 1486 "1: \n" |
| 1531 MEMACCESS(0) | 1487 MEMACCESS(0) |
| 1532 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. | 1488 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. |
| 1533 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. | 1489 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. |
| (...skipping 1462 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2996 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List | 2952 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List |
| 2997 ); | 2953 ); |
| 2998 } | 2954 } |
| 2999 #endif // HAS_SOBELYROW_NEON | 2955 #endif // HAS_SOBELYROW_NEON |
| 3000 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) | 2956 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) |
| 3001 | 2957 |
| 3002 #ifdef __cplusplus | 2958 #ifdef __cplusplus |
| 3003 } // extern "C" | 2959 } // extern "C" |
| 3004 } // namespace libyuv | 2960 } // namespace libyuv |
| 3005 #endif | 2961 #endif |
| OLD | NEW |