| Index: source/row_gcc.cc
|
| diff --git a/source/row_gcc.cc b/source/row_gcc.cc
|
| index 1ff1dc2f6e6fac8a4b1a78a508b7d48f4b1a4115..6fab45076245b0dbd5b94d1f9f7e14e478c09611 100644
|
| --- a/source/row_gcc.cc
|
| +++ b/source/row_gcc.cc
|
| @@ -1431,7 +1431,7 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
|
|
| #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2)
|
|
|
| -// Read 8 UV from 411
|
| +// Read 8 UV from 444
|
| #define READYUV444 \
|
| "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
| MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
| @@ -1952,6 +1952,20 @@ void OMITFP I422ToRGBARow_SSSE3(const uint8* y_buf,
|
| "vpermq $0xd8,%%ymm5,%%ymm5 \n" \
|
| "lea " MEMLEA(0x10, [a_buf]) ",%[a_buf] \n"
|
|
|
| +// Read 4 UV from 411, upsample to 16 UV.
|
| +#define READYUV411_AVX2 \
|
| + "vmovd " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
| + MEMOPREG(vmovd, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
| + "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
|
| + "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \
|
| + "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \
|
| + "vpermq $0xd8,%%ymm0,%%ymm0 \n" \
|
| + "vpunpckldq %%ymm0,%%ymm0,%%ymm0 \n" \
|
| + "vmovdqu " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
| + "vpermq $0xd8,%%ymm4,%%ymm4 \n" \
|
| + "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \
|
| + "lea " MEMLEA(0x10, [y_buf]) ",%[y_buf] \n"
|
| +
|
| // Read 8 UV from NV12, upsample to 16 UV.
|
| #define READNV12_AVX2 \
|
| "vmovdqu " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
|
| @@ -2067,7 +2081,7 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
|
| asm volatile (
|
| YUVTORGB_SETUP_AVX2(yuvconstants)
|
| "sub %[u_buf],%[v_buf] \n"
|
| - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
| + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
| LABELALIGN
|
| "1: \n"
|
| READYUV444_AVX2
|
| @@ -2088,6 +2102,39 @@ void OMITFP I444ToARGBRow_AVX2(const uint8* y_buf,
|
| }
|
| #endif // HAS_I444TOARGBROW_AVX2
|
|
|
| +#ifdef HAS_I411TOARGBROW_AVX2
|
| +// 16 pixels
|
| +// 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
| +void OMITFP I411ToARGBRow_AVX2(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* dst_argb,
|
| + const struct YuvConstants* yuvconstants,
|
| + int width) {
|
| + asm volatile (
|
| + YUVTORGB_SETUP_AVX2(yuvconstants)
|
| + "sub %[u_buf],%[v_buf] \n"
|
| + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
| + LABELALIGN
|
| + "1: \n"
|
| + READYUV411_AVX2
|
| + YUVTORGB_AVX2(yuvconstants)
|
| + STOREARGB_AVX2
|
| + "sub $0x10,%[width] \n"
|
| + "jg 1b \n"
|
| + "vzeroupper \n"
|
| + : [y_buf]"+r"(y_buf), // %[y_buf]
|
| + [u_buf]"+r"(u_buf), // %[u_buf]
|
| + [v_buf]"+r"(v_buf), // %[v_buf]
|
| + [dst_argb]"+r"(dst_argb), // %[dst_argb]
|
| + [width]"+rm"(width) // %[width]
|
| + : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
|
| + : "memory", "cc", NACL_R14 YUVTORGB_REGS_AVX2
|
| + "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
| + );
|
| +}
|
| +#endif // HAS_I411TOARGBROW_AVX2
|
| +
|
| #if defined(HAS_I422TOARGBROW_AVX2)
|
| // 16 pixels
|
| // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
|
| @@ -2100,7 +2147,7 @@ void OMITFP I422ToARGBRow_AVX2(const uint8* y_buf,
|
| asm volatile (
|
| YUVTORGB_SETUP_AVX2(yuvconstants)
|
| "sub %[u_buf],%[v_buf] \n"
|
| - "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
| + "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n"
|
| LABELALIGN
|
| "1: \n"
|
| READYUV422_AVX2
|
|
|