| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 1356 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1367 "bgt 1b \n" | 1367 "bgt 1b \n" |
| 1368 : "+r"(src_argb), // %0 | 1368 : "+r"(src_argb), // %0 |
| 1369 "+r"(dst_u), // %1 | 1369 "+r"(dst_u), // %1 |
| 1370 "+r"(dst_v), // %2 | 1370 "+r"(dst_v), // %2 |
| 1371 "+r"(width) // %3 | 1371 "+r"(width) // %3 |
| 1372 : | 1372 : |
| 1373 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" | 1373 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" |
| 1374 ); | 1374 ); |
| 1375 } | 1375 } |
| 1376 | 1376 |
| 1377 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. | |
| 1378 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | |
| 1379 int width) { | |
| 1380 asm volatile ( | |
| 1381 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient | |
| 1382 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient | |
| 1383 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient | |
| 1384 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient | |
| 1385 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient | |
| 1386 "vmov.u16 q15, #0x8080 \n" // 128.5 | |
| 1387 "1: \n" | |
| 1388 MEMACCESS(0) | |
| 1389 "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. | |
| 1390 MEMACCESS(0) | |
| 1391 "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. | |
| 1392 | |
| 1393 "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. | |
| 1394 "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. | |
| 1395 "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. | |
| 1396 | |
| 1397 "subs %3, %3, #16 \n" // 16 processed per loop. | |
| 1398 "vmul.s16 q8, q0, q10 \n" // B | |
| 1399 "vmls.s16 q8, q1, q11 \n" // G | |
| 1400 "vmls.s16 q8, q2, q12 \n" // R | |
| 1401 "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned | |
| 1402 | |
| 1403 "vmul.s16 q9, q2, q10 \n" // R | |
| 1404 "vmls.s16 q9, q1, q14 \n" // G | |
| 1405 "vmls.s16 q9, q0, q13 \n" // B | |
| 1406 "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned | |
| 1407 | |
| 1408 "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U | |
| 1409 "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V | |
| 1410 | |
| 1411 MEMACCESS(1) | |
| 1412 "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. | |
| 1413 MEMACCESS(2) | |
| 1414 "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. | |
| 1415 "bgt 1b \n" | |
| 1416 : "+r"(src_argb), // %0 | |
| 1417 "+r"(dst_u), // %1 | |
| 1418 "+r"(dst_v), // %2 | |
| 1419 "+r"(width) // %3 | |
| 1420 : | |
| 1421 : "cc", "memory", "q0", "q1", "q2", "q3", | |
| 1422 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | |
| 1423 ); | |
| 1424 } | |
| 1425 | |
| 1426 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. | 1377 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. |
| 1427 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | 1378 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, |
| 1428 int width) { | 1379 int width) { |
| 1429 asm volatile ( | 1380 asm volatile ( |
| 1430 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient | 1381 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient |
| 1431 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient | 1382 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient |
| 1432 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient | 1383 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient |
| 1433 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient | 1384 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient |
| 1434 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient | 1385 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient |
| 1435 "vmov.u16 q15, #0x8080 \n" // 128.5 | 1386 "vmov.u16 q15, #0x8080 \n" // 128.5 |
| (...skipping 1443 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2879 "r"(6) // %5 | 2830 "r"(6) // %5 |
| 2880 : "cc", "memory", "q0", "q1" // Clobber List | 2831 : "cc", "memory", "q0", "q1" // Clobber List |
| 2881 ); | 2832 ); |
| 2882 } | 2833 } |
| 2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 2834 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) |
| 2884 | 2835 |
| 2885 #ifdef __cplusplus | 2836 #ifdef __cplusplus |
| 2886 } // extern "C" | 2837 } // extern "C" |
| 2887 } // namespace libyuv | 2838 } // namespace libyuv |
| 2888 #endif | 2839 #endif |
| OLD | NEW |