| Index: source/row_gcc.cc
|
| diff --git a/source/row_gcc.cc b/source/row_gcc.cc
|
| index 9940cba125655c238f8bdc4b72e9a2070882b73d..348935405761d16bf12ed18278fe277a1559d3a6 100644
|
| --- a/source/row_gcc.cc
|
| +++ b/source/row_gcc.cc
|
| @@ -1947,6 +1947,19 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
|
|
| #endif // HAS_I422TOARGBROW_SSSE3
|
|
|
| +// Read 16 UV from 444
|
| +#define READYUV444_AVX2 \
|
| + "vmovdqu " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
| + MEMOPREG(vmovdqu, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
| + "lea " MEMLEA(0x10, [u_buf]) ",%[u_buf] \n" \
|
| + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
|
| + "vpermq $0xd8,%%ymm1,%%ymm1 \n" \
|
| + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
|
| + "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
| + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
|
| + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
|
| + "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
|
| +
|
| // Read 8 UV from 422, upsample to 16 UV.
|
| #define READYUV422_AVX2 \
|
| "vmovq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
| @@ -2079,6 +2092,39 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
| "vmovdqu %%ymm0," MEMACCESS2(0x20, [dst_argb]) " \n" \
|
| "lea " MEMLEA(0x40, [dst_argb]) ", %[dst_argb] \n"
|
|
|
| +#ifdef HAS_I444TOARGBROW_AVX2
|
| +// 16 pixels
|
| +// 16 UV values with 16 Y producing 16 ARGB (64 bytes).
|
| +void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* dst_argb,
|
| + const struct YuvConstants* yuvconstants,
|
| + int width) {
|
| + asm volatile (
|
| + YUVTORGB_SETUP_AVX2(yuvconstants)
|
| + "sub %[u_buf],%[v_buf] \n"
|
| + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
| + LABELALIGN
|
| + "1: \n"
|
| + READYUV444_AVX2
|
| + YUVTORGB_AVX2(yuvconstants)
|
| + STOREARGB_AVX2
|
| + "sub $0x10,%[width] \n"
|
| + "jg 1b \n"
|
| + "vzeroupper \n"
|
| + : [y_buf]"+r"(y_buf), // %[y_buf]
|
| + [u_buf]"+r"(u_buf), // %[u_buf]
|
| + [v_buf]"+r"(v_buf), // %[v_buf]
|
| + [dst_argb]"+r"(dst_argb), // %[dst_argb]
|
| + [width]"+rm"(width) // %[width]
|
| + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
| + : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
|
| + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
| + );
|
| +}
|
| +#endif // HAS_I444TOARGBROW_AVX2
|
| +
|
| #if defined(HAS_I422TOARGBROW_AVX2)
|
| // 16 pixels
|
| // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
| @@ -2091,7 +2137,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
|
| asm volatile (
|
| YUVTORGB_SETUP_AVX2(yuvconstants)
|
| "sub %[u_buf],%[v_buf] \n"
|
| - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
| + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
| LABELALIGN
|
| "1: \n"
|
| READYUV422_AVX2
|
|
|