Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(64)

Side by Side Diff: source/row_neon64.cc

Issue 2406123002: Remove I411 support, update doc and switch to side by side test (Closed)
Patch Set: bump version, disable a few lint warnings Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon.cc ('k') | source/row_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
11 #include "libyuv/row.h" 11 #include "libyuv/row.h"
12 12
13 #ifdef __cplusplus 13 #ifdef __cplusplus
14 namespace libyuv { 14 namespace libyuv {
15 extern "C" { 15 extern "C" {
16 #endif 16 #endif
17 17
18 // This module is for GCC Neon armv8 64 bit. 18 // This module is for GCC Neon armv8 64 bit.
19 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 19 #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
20 20
21 // Read 8 Y, 4 U and 4 V from 422 21 // Read 8 Y, 4 U and 4 V from 422
22 #define READYUV422 \ 22 #define READYUV422 \
23 MEMACCESS(0) \ 23 MEMACCESS(0) \
24 "ld1 {v0.8b}, [%0], #8 \n" \ 24 "ld1 {v0.8b}, [%0], #8 \n" \
25 MEMACCESS(1) \ 25 MEMACCESS(1) \
26 "ld1 {v1.s}[0], [%1], #4 \n" \ 26 "ld1 {v1.s}[0], [%1], #4 \n" \
27 MEMACCESS(2) \ 27 MEMACCESS(2) \
28 "ld1 {v1.s}[1], [%2], #4 \n" 28 "ld1 {v1.s}[1], [%2], #4 \n"
29 29
30 // Read 8 Y, 2 U and 2 V from 422
31 #define READYUV411 \
32 MEMACCESS(0) \
33 "ld1 {v0.8b}, [%0], #8 \n" \
34 MEMACCESS(1) \
35 "ld1 {v2.h}[0], [%1], #2 \n" \
36 MEMACCESS(2) \
37 "ld1 {v2.h}[1], [%2], #2 \n" \
38 "zip1 v1.8b, v2.8b, v2.8b \n"
39
40 // Read 8 Y, 8 U and 8 V from 444 30 // Read 8 Y, 8 U and 8 V from 444
41 #define READYUV444 \ 31 #define READYUV444 \
42 MEMACCESS(0) \ 32 MEMACCESS(0) \
43 "ld1 {v0.8b}, [%0], #8 \n" \ 33 "ld1 {v0.8b}, [%0], #8 \n" \
44 MEMACCESS(1) \ 34 MEMACCESS(1) \
45 "ld1 {v1.d}[0], [%1], #8 \n" \ 35 "ld1 {v1.d}[0], [%1], #8 \n" \
46 MEMACCESS(2) \ 36 MEMACCESS(2) \
47 "ld1 {v1.d}[1], [%2], #8 \n" \ 37 "ld1 {v1.d}[1], [%2], #8 \n" \
48 "uaddlp v1.8h, v1.16b \n" \ 38 "uaddlp v1.8h, v1.16b \n" \
49 "rshrn v1.8b, v1.8h, #1 \n" 39 "rshrn v1.8b, v1.8h, #1 \n"
(...skipping 163 matching lines...) Expand 10 before | Expand all | Expand 10 after
213 "+r"(width) // %5 203 "+r"(width) // %5
214 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 204 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
215 [kUVToG]"r"(&yuvconstants->kUVToG), 205 [kUVToG]"r"(&yuvconstants->kUVToG),
216 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 206 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
217 [kYToRgb]"r"(&yuvconstants->kYToRgb) 207 [kYToRgb]"r"(&yuvconstants->kYToRgb)
218 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 208 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
219 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 209 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
220 ); 210 );
221 } 211 }
222 212
223 void I411ToARGBRow_NEON(const uint8* src_y,
224 const uint8* src_u,
225 const uint8* src_v,
226 uint8* dst_argb,
227 const struct YuvConstants* yuvconstants,
228 int width) {
229 asm volatile (
230 YUVTORGB_SETUP
231 "movi v23.8b, #255 \n" /* A */
232 "1: \n"
233 READYUV411
234 YUVTORGB(v22, v21, v20)
235 "subs %w4, %w4, #8 \n"
236 MEMACCESS(3)
237 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
238 "b.gt 1b \n"
239 : "+r"(src_y), // %0
240 "+r"(src_u), // %1
241 "+r"(src_v), // %2
242 "+r"(dst_argb), // %3
243 "+r"(width) // %4
244 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
245 [kUVToG]"r"(&yuvconstants->kUVToG),
246 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
247 [kYToRgb]"r"(&yuvconstants->kYToRgb)
248 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
249 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
250 );
251 }
252
253 void I422ToRGBARow_NEON(const uint8* src_y, 213 void I422ToRGBARow_NEON(const uint8* src_y,
254 const uint8* src_u, 214 const uint8* src_u,
255 const uint8* src_v, 215 const uint8* src_v,
256 uint8* dst_rgba, 216 uint8* dst_rgba,
257 const struct YuvConstants* yuvconstants, 217 const struct YuvConstants* yuvconstants,
258 int width) { 218 int width) {
259 asm volatile ( 219 asm volatile (
260 YUVTORGB_SETUP 220 YUVTORGB_SETUP
261 "movi v20.8b, #255 \n" /* A */ 221 "movi v20.8b, #255 \n" /* A */
262 "1: \n" 222 "1: \n"
(...skipping 1125 matching lines...) Expand 10 before | Expand all | Expand 10 after
1388 } 1348 }
1389 1349
1390 #define RGBTOUV_SETUP_REG \ 1350 #define RGBTOUV_SETUP_REG \
1391 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ 1351 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
1392 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ 1352 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
1393 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ 1353 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
1394 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ 1354 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
1395 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ 1355 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
1396 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ 1356 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
1397 1357
1398 // 32x1 pixels -> 8x1. width is number of argb pixels. e.g. 32.
1399 void ARGBToUV411Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1400 int width) {
1401 asm volatile (
1402 RGBTOUV_SETUP_REG
1403 "1: \n"
1404 MEMACCESS(0)
1405 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
1406 "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts.
1407 "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts.
1408 "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts.
1409 MEMACCESS(0)
1410 "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%0], #64 \n" // load next 16.
1411 "uaddlp v4.8h, v4.16b \n" // B 16 bytes -> 8 shorts.
1412 "uaddlp v5.8h, v5.16b \n" // G 16 bytes -> 8 shorts.
1413 "uaddlp v6.8h, v6.16b \n" // R 16 bytes -> 8 shorts.
1414
1415 "addp v0.8h, v0.8h, v4.8h \n" // B 16 shorts -> 8 shorts.
1416 "addp v1.8h, v1.8h, v5.8h \n" // G 16 shorts -> 8 shorts.
1417 "addp v2.8h, v2.8h, v6.8h \n" // R 16 shorts -> 8 shorts.
1418
1419 "urshr v0.8h, v0.8h, #1 \n" // 2x average
1420 "urshr v1.8h, v1.8h, #1 \n"
1421 "urshr v2.8h, v2.8h, #1 \n"
1422
1423 "subs %w3, %w3, #32 \n" // 32 processed per loop.
1424 "mul v3.8h, v0.8h, v20.8h \n" // B
1425 "mls v3.8h, v1.8h, v21.8h \n" // G
1426 "mls v3.8h, v2.8h, v22.8h \n" // R
1427 "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned
1428 "mul v4.8h, v2.8h, v20.8h \n" // R
1429 "mls v4.8h, v1.8h, v24.8h \n" // G
1430 "mls v4.8h, v0.8h, v23.8h \n" // B
1431 "add v4.8h, v4.8h, v25.8h \n" // +128 -> unsigned
1432 "uqshrn v0.8b, v3.8h, #8 \n" // 16 bit to 8 bit U
1433 "uqshrn v1.8b, v4.8h, #8 \n" // 16 bit to 8 bit V
1434 MEMACCESS(1)
1435 "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U.
1436 MEMACCESS(2)
1437 "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V.
1438 "b.gt 1b \n"
1439 : "+r"(src_argb), // %0
1440 "+r"(dst_u), // %1
1441 "+r"(dst_v), // %2
1442 "+r"(width) // %3
1443 :
1444 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
1445 "v20", "v21", "v22", "v23", "v24", "v25"
1446 );
1447 }
1448
1449 // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. 1358 // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16.
1450 #define RGBTOUV(QB, QG, QR) \ 1359 #define RGBTOUV(QB, QG, QR) \
1451 "mul v3.8h, " #QB ",v20.8h \n" /* B */ \ 1360 "mul v3.8h, " #QB ",v20.8h \n" /* B */ \
1452 "mul v4.8h, " #QR ",v20.8h \n" /* R */ \ 1361 "mul v4.8h, " #QR ",v20.8h \n" /* R */ \
1453 "mls v3.8h, " #QG ",v21.8h \n" /* G */ \ 1362 "mls v3.8h, " #QG ",v21.8h \n" /* G */ \
1454 "mls v4.8h, " #QG ",v24.8h \n" /* G */ \ 1363 "mls v4.8h, " #QG ",v24.8h \n" /* G */ \
1455 "mls v3.8h, " #QR ",v22.8h \n" /* R */ \ 1364 "mls v3.8h, " #QR ",v22.8h \n" /* R */ \
1456 "mls v4.8h, " #QB ",v23.8h \n" /* B */ \ 1365 "mls v4.8h, " #QB ",v23.8h \n" /* B */ \
1457 "add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \ 1366 "add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \
1458 "add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \ 1367 "add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \
(...skipping 1341 matching lines...) Expand 10 before | Expand all | Expand 10 after
2800 "r"(6LL) // %5 2709 "r"(6LL) // %5
2801 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 2710 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
2802 ); 2711 );
2803 } 2712 }
2804 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 2713 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
2805 2714
2806 #ifdef __cplusplus 2715 #ifdef __cplusplus
2807 } // extern "C" 2716 } // extern "C"
2808 } // namespace libyuv 2717 } // namespace libyuv
2809 #endif 2718 #endif
OLDNEW
« no previous file with comments | « source/row_neon.cc ('k') | source/row_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698