Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(362)

Side by Side Diff: source/row_neon.cc

Issue 2406123002: Remove I411 support, update doc and switch to side by side test (Closed)
Patch Set: bump version, disable a few lint warnings Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 12 matching lines...) Expand all
23 23
24 // Read 8 Y, 4 U and 4 V from 422 24 // Read 8 Y, 4 U and 4 V from 422
25 #define READYUV422 \ 25 #define READYUV422 \
26 MEMACCESS(0) \ 26 MEMACCESS(0) \
27 "vld1.8 {d0}, [%0]! \n" \ 27 "vld1.8 {d0}, [%0]! \n" \
28 MEMACCESS(1) \ 28 MEMACCESS(1) \
29 "vld1.32 {d2[0]}, [%1]! \n" \ 29 "vld1.32 {d2[0]}, [%1]! \n" \
30 MEMACCESS(2) \ 30 MEMACCESS(2) \
31 "vld1.32 {d2[1]}, [%2]! \n" 31 "vld1.32 {d2[1]}, [%2]! \n"
32 32
33 // Read 8 Y, 2 U and 2 V from 422
34 #define READYUV411 \
35 MEMACCESS(0) \
36 "vld1.8 {d0}, [%0]! \n" \
37 MEMACCESS(1) \
38 "vld1.16 {d2[0]}, [%1]! \n" \
39 MEMACCESS(2) \
40 "vld1.16 {d2[1]}, [%2]! \n" \
41 "vmov.u8 d3, d2 \n" \
42 "vzip.u8 d2, d3 \n"
43
44 // Read 8 Y, 8 U and 8 V from 444 33 // Read 8 Y, 8 U and 8 V from 444
45 #define READYUV444 \ 34 #define READYUV444 \
46 MEMACCESS(0) \ 35 MEMACCESS(0) \
47 "vld1.8 {d0}, [%0]! \n" \ 36 "vld1.8 {d0}, [%0]! \n" \
48 MEMACCESS(1) \ 37 MEMACCESS(1) \
49 "vld1.8 {d2}, [%1]! \n" \ 38 "vld1.8 {d2}, [%1]! \n" \
50 MEMACCESS(2) \ 39 MEMACCESS(2) \
51 "vld1.8 {d3}, [%2]! \n" \ 40 "vld1.8 {d3}, [%2]! \n" \
52 "vpaddl.u8 q1, q1 \n" \ 41 "vpaddl.u8 q1, q1 \n" \
53 "vrshrn.u16 d2, q1, #1 \n" 42 "vrshrn.u16 d2, q1, #1 \n"
(...skipping 168 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 "+r"(width) // %5 211 "+r"(width) // %5
223 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 212 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
224 [kUVToG]"r"(&yuvconstants->kUVToG), 213 [kUVToG]"r"(&yuvconstants->kUVToG),
225 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 214 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
226 [kYToRgb]"r"(&yuvconstants->kYToRgb) 215 [kYToRgb]"r"(&yuvconstants->kYToRgb)
227 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", 216 : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
228 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 217 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
229 ); 218 );
230 } 219 }
231 220
232 void I411ToARGBRow_NEON(const uint8* src_y,
233 const uint8* src_u,
234 const uint8* src_v,
235 uint8* dst_argb,
236 const struct YuvConstants* yuvconstants,
237 int width) {
238 asm volatile (
239 YUVTORGB_SETUP
240 "vmov.u8 d23, #255 \n"
241 "1: \n"
242 READYUV411
243 YUVTORGB
244 "subs %4, %4, #8 \n"
245 MEMACCESS(3)
246 "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
247 "bgt 1b \n"
248 : "+r"(src_y), // %0
249 "+r"(src_u), // %1
250 "+r"(src_v), // %2
251 "+r"(dst_argb), // %3
252 "+r"(width) // %4
253 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
254 [kUVToG]"r"(&yuvconstants->kUVToG),
255 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
256 [kYToRgb]"r"(&yuvconstants->kYToRgb)
257 : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
258 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
259 );
260 }
261
262 void I422ToRGBARow_NEON(const uint8* src_y, 221 void I422ToRGBARow_NEON(const uint8* src_y,
263 const uint8* src_u, 222 const uint8* src_u,
264 const uint8* src_v, 223 const uint8* src_v,
265 uint8* dst_rgba, 224 uint8* dst_rgba,
266 const struct YuvConstants* yuvconstants, 225 const struct YuvConstants* yuvconstants,
267 int width) { 226 int width) {
268 asm volatile ( 227 asm volatile (
269 YUVTORGB_SETUP 228 YUVTORGB_SETUP
270 "1: \n" 229 "1: \n"
271 READYUV422 230 READYUV422
(...skipping 1104 matching lines...) Expand 10 before | Expand all | Expand 10 after
1376 "bgt 1b \n" 1335 "bgt 1b \n"
1377 : "+r"(src_argb), // %0 1336 : "+r"(src_argb), // %0
1378 "+r"(dst_u), // %1 1337 "+r"(dst_u), // %1
1379 "+r"(dst_v), // %2 1338 "+r"(dst_v), // %2
1380 "+r"(width) // %3 1339 "+r"(width) // %3
1381 : 1340 :
1382 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15" 1341 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"
1383 ); 1342 );
1384 } 1343 }
1385 1344
1386 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
1387 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1388 int width) {
1389 asm volatile (
1390 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient
1391 "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient
1392 "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient
1393 "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient
1394 "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient
1395 "vmov.u16 q15, #0x8080 \n" // 128.5
1396 "1: \n"
1397 MEMACCESS(0)
1398 "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels.
1399 MEMACCESS(0)
1400 "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels.
1401 "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts.
1402 "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts.
1403 "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts.
1404 MEMACCESS(0)
1405 "vld4.8 {d8, d10, d12, d14}, [%0]! \n" // load 8 more ARGB pixels.
1406 MEMACCESS(0)
1407 "vld4.8 {d9, d11, d13, d15}, [%0]! \n" // load last 8 ARGB pixels.
1408 "vpaddl.u8 q4, q4 \n" // B 16 bytes -> 8 shorts.
1409 "vpaddl.u8 q5, q5 \n" // G 16 bytes -> 8 shorts.
1410 "vpaddl.u8 q6, q6 \n" // R 16 bytes -> 8 shorts.
1411
1412 "vpadd.u16 d0, d0, d1 \n" // B 16 shorts -> 8 shorts.
1413 "vpadd.u16 d1, d8, d9 \n" // B
1414 "vpadd.u16 d2, d2, d3 \n" // G 16 shorts -> 8 shorts.
1415 "vpadd.u16 d3, d10, d11 \n" // G
1416 "vpadd.u16 d4, d4, d5 \n" // R 16 shorts -> 8 shorts.
1417 "vpadd.u16 d5, d12, d13 \n" // R
1418
1419 "vrshr.u16 q0, q0, #1 \n" // 2x average
1420 "vrshr.u16 q1, q1, #1 \n"
1421 "vrshr.u16 q2, q2, #1 \n"
1422
1423 "subs %3, %3, #32 \n" // 32 processed per loop.
1424 "vmul.s16 q8, q0, q10 \n" // B
1425 "vmls.s16 q8, q1, q11 \n" // G
1426 "vmls.s16 q8, q2, q12 \n" // R
1427 "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned
1428 "vmul.s16 q9, q2, q10 \n" // R
1429 "vmls.s16 q9, q1, q14 \n" // G
1430 "vmls.s16 q9, q0, q13 \n" // B
1431 "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned
1432 "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U
1433 "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V
1434 MEMACCESS(1)
1435 "vst1.8 {d0}, [%1]! \n" // store 8 pixels U.
1436 MEMACCESS(2)
1437 "vst1.8 {d1}, [%2]! \n" // store 8 pixels V.
1438 "bgt 1b \n"
1439 : "+r"(src_argb), // %0
1440 "+r"(dst_u), // %1
1441 "+r"(dst_v), // %2
1442 "+r"(width) // %3
1443 :
1444 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7",
1445 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
1446 );
1447 }
1448
1449 // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. 1345 // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
1450 #define RGBTOUV(QB, QG, QR) \ 1346 #define RGBTOUV(QB, QG, QR) \
1451 "vmul.s16 q8, " #QB ", q10 \n" /* B */ \ 1347 "vmul.s16 q8, " #QB ", q10 \n" /* B */ \
1452 "vmls.s16 q8, " #QG ", q11 \n" /* G */ \ 1348 "vmls.s16 q8, " #QG ", q11 \n" /* G */ \
1453 "vmls.s16 q8, " #QR ", q12 \n" /* R */ \ 1349 "vmls.s16 q8, " #QR ", q12 \n" /* R */ \
1454 "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \ 1350 "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \
1455 "vmul.s16 q9, " #QR ", q10 \n" /* R */ \ 1351 "vmul.s16 q9, " #QR ", q10 \n" /* R */ \
1456 "vmls.s16 q9, " #QG ", q14 \n" /* G */ \ 1352 "vmls.s16 q9, " #QG ", q14 \n" /* G */ \
1457 "vmls.s16 q9, " #QB ", q13 \n" /* B */ \ 1353 "vmls.s16 q9, " #QB ", q13 \n" /* B */ \
1458 "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \ 1354 "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \
(...skipping 1377 matching lines...) Expand 10 before | Expand all | Expand 10 after
2836 "r"(6) // %5 2732 "r"(6) // %5
2837 : "cc", "memory", "q0", "q1" // Clobber List 2733 : "cc", "memory", "q0", "q1" // Clobber List
2838 ); 2734 );
2839 } 2735 }
2840 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) 2736 #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
2841 2737
2842 #ifdef __cplusplus 2738 #ifdef __cplusplus
2843 } // extern "C" 2739 } // extern "C"
2844 } // namespace libyuv 2740 } // namespace libyuv
2845 #endif 2741 #endif
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698