Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(244)

Side by Side Diff: source/row_neon.cc

Issue 1574253004: refactor ARGBToI422 using ARGBToI420 internally (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 1356 matching lines...) Expand 10 before | Expand all | Expand 10 after
1367 "bgt 1b \n" 1367 "bgt 1b \n"
1368 : "+r"(src_argb), // %0 1368 : "+r"(src_argb), // %0
1369 "+r"(dst_u), // %1 1369 "+r"(dst_u), // %1
1370 "+r"(dst_v), // %2 1370 "+r"(dst_v), // %2
1371 "+r"(width) // %3 1371 "+r"(width) // %3
1372 : 1372 :
1373 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" 1373 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
1374 ); 1374 );
1375 } 1375 }
1376 1376
1377 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16.
1378 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1379 int width) {
1380 asm volatile (
1381 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
1382 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
1383 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
1384 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
1385 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
1386 "vmov.u16 q15, #0x8080 \n" // 128.5
1387 "1: \n"
1388 MEMACCESS(0)
1389 "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
1390 MEMACCESS(0)
1391 "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
1392
1393 "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
1394 "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
1395 "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
1396
1397 "subs %3, %3, #16 \n" // 16 processed per loop.
1398 "vmul.s16 q8, q0, q10 \n" // B
1399 "vmls.s16 q8, q1, q11 \n" // G
1400 "vmls.s16 q8, q2, q12 \n" // R
1401 "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
1402
1403 "vmul.s16 q9, q2, q10 \n" // R
1404 "vmls.s16 q9, q1, q14 \n" // G
1405 "vmls.s16 q9, q0, q13 \n" // B
1406 "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
1407
1408 "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
1409 "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
1410
1411 MEMACCESS(1)
1412 "vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
1413 MEMACCESS(2)
1414 "vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
1415 "bgt 1b \n"
1416 : "+r"(src_argb), // %0
1417 "+r"(dst_u), // %1
1418 "+r"(dst_v), // %2
1419 "+r"(width) // %3
1420 :
1421 : "cc", "memory", "q0", "q1", "q2", "q3",
1422 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
1423 );
1424 }
1425
1426 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32. 1377 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
1427 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 1378 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1428 int width) { 1379 int width) {
1429 asm volatile ( 1380 asm volatile (
1430 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient 1381 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
1431 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient 1382 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
1432 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient 1383 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
1433 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient 1384 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
1434 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient 1385 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
1435 "vmov.u16 q15, #0x8080 \n" // 128.5 1386 "vmov.u16 q15, #0x8080 \n" // 128.5
(...skipping 1443 matching lines...) Expand 10 before | Expand all | Expand 10 after
2879 "r"(6) // %5 2830 "r"(6) // %5
2880 : "cc", "memory", "q0", "q1" // Clobber List 2831 : "cc", "memory", "q0", "q1" // Clobber List
2881 ); 2832 );
2882 } 2833 }
2883 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) 2834 #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
2884 2835
2885 #ifdef __cplusplus 2836 #ifdef __cplusplus
2886 } // extern "C" 2837 } // extern "C"
2887 } // namespace libyuv 2838 } // namespace libyuv
2888 #endif 2839 #endif
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698