OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 1356 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1367 "bgt 1b \n" | 1367 "bgt 1b \n" |
1368 : "+r"(src_argb), // %0 | 1368 : "+r"(src_argb), // %0 |
1369 "+r"(dst_u), // %1 | 1369 "+r"(dst_u), // %1 |
1370 "+r"(dst_v), // %2 | 1370 "+r"(dst_v), // %2 |
1371 "+r"(width) // %3 | 1371 "+r"(width) // %3 |
1372 : | 1372 : |
1373 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" | 1373 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" |
1374 ); | 1374 ); |
1375 } | 1375 } |
1376 | 1376 |
1377 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. | |
1378 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | |
1379 int width) { | |
1380 asm volatile ( | |
1381 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient | |
1382 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient | |
1383 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient | |
1384 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient | |
1385 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient | |
1386 "vmov.u16 q15, #0x8080 \n" // 128.5 | |
1387 "1: \n" | |
1388 MEMACCESS(0) | |
1389 "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. | |
1390 MEMACCESS(0) | |
1391 "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. | |
1392 | |
1393 "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. | |
1394 "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. | |
1395 "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. | |
1396 | |
1397 "subs %3, %3, #16 \n" // 16 processed per loop. | |
1398 "vmul.s16 q8, q0, q10 \n" // B | |
1399 "vmls.s16 q8, q1, q11 \n" // G | |
1400 "vmls.s16 q8, q2, q12 \n" // R | |
1401 "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned | |
1402 | |
1403 "vmul.s16 q9, q2, q10 \n" // R | |
1404 "vmls.s16 q9, q1, q14 \n" // G | |
1405 "vmls.s16 q9, q0, q13 \n" // B | |
1406 "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned | |
1407 | |
1408 "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U | |
1409 "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V | |
1410 | |
1411 MEMACCESS(1) | |
1412 "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. | |
1413 MEMACCESS(2) | |
1414 "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. | |
1415 "bgt 1b \n" | |
1416 : "+r"(src_argb), // %0 | |
1417 "+r"(dst_u), // %1 | |
1418 "+r"(dst_v), // %2 | |
1419 "+r"(width) // %3 | |
1420 : | |
1421 : "cc", "memory", "q0", "q1", "q2", "q3", | |
1422 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" | |
1423 ); | |
1424 } | |
1425 | |
1426 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. | 1377 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. |
1427 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, | 1378 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, |
1428 int width) { | 1379 int width) { |
1429 asm volatile ( | 1380 asm volatile ( |
1430 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient | 1381 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient |
1431 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient | 1382 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient |
1432 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient | 1383 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient |
1433 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient | 1384 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient |
1434 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient | 1385 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient |
1435 "vmov.u16 q15, #0x8080 \n" // 128.5 | 1386 "vmov.u16 q15, #0x8080 \n" // 128.5 |
(...skipping 1443 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2879 "r"(6) // %5 | 2830 "r"(6) // %5 |
2880 : "cc", "memory", "q0", "q1" // Clobber List | 2831 : "cc", "memory", "q0", "q1" // Clobber List |
2881 ); | 2832 ); |
2882 } | 2833 } |
2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) | 2834 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) |
2884 | 2835 |
2885 #ifdef __cplusplus | 2836 #ifdef __cplusplus |
2886 } // extern "C" | 2837 } // extern "C" |
2887 } // namespace libyuv | 2838 } // namespace libyuv |
2888 #endif | 2839 #endif |
OLD | NEW |