| Index: source/row_gcc.cc
|
| diff --git a/source/row_gcc.cc b/source/row_gcc.cc
|
| index af5ca2b520725d86749f4e914bcf9b40397b92ea..68846dc8f9f21094e3b9f122834b8250d0d1096c 100644
|
| --- a/source/row_gcc.cc
|
| +++ b/source/row_gcc.cc
|
| @@ -1324,7 +1324,9 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
| "movq " MEMACCESS([u_buf]) ",%%xmm0 \n" \
|
| MEMOPREG(movq, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
| "lea " MEMLEA(0x8, [u_buf]) ",%[u_buf] \n" \
|
| - "punpcklbw %%xmm1,%%xmm0 \n"
|
| + "punpcklbw %%xmm1,%%xmm0 \n" \
|
| + "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
| + "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
|
|
|
| // Read 4 UV from 422, upsample to 8 UV
|
| #define READYUV422 \
|
| @@ -1332,7 +1334,9 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
| MEMOPREG(movd, 0x00, [u_buf], [v_buf], 1, xmm1) \
|
| "lea " MEMLEA(0x4, [u_buf]) ",%[u_buf] \n" \
|
| "punpcklbw %%xmm1,%%xmm0 \n" \
|
| - "punpcklwd %%xmm0,%%xmm0 \n"
|
| + "punpcklwd %%xmm0,%%xmm0 \n" \
|
| + "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
| + "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
|
|
|
| // Read 2 UV from 411, upsample to 8 UV
|
| #define READYUV411 \
|
| @@ -1341,13 +1345,17 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
| "lea " MEMLEA(0x2, [u_buf]) ",%[u_buf] \n" \
|
| "punpcklbw %%xmm1,%%xmm0 \n" \
|
| "punpcklwd %%xmm0,%%xmm0 \n" \
|
| - "punpckldq %%xmm0,%%xmm0 \n"
|
| + "punpckldq %%xmm0,%%xmm0 \n" \
|
| + "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
| + "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
|
|
|
| // Read 4 UV from NV12, upsample to 8 UV
|
| #define READNV12 \
|
| "movq " MEMACCESS([uv_buf]) ",%%xmm0 \n" \
|
| "lea " MEMLEA(0x8, [uv_buf]) ",%[uv_buf] \n" \
|
| - "punpcklwd %%xmm0,%%xmm0 \n"
|
| + "punpcklwd %%xmm0,%%xmm0 \n" \
|
| + "movq " MEMACCESS([y_buf]) ",%%xmm4 \n" \
|
| + "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n"
|
|
|
| // Convert 8 pixels: 8 UV and 8 Y
|
| #define YUVTORGB(yuvconstants) \
|
| @@ -1363,13 +1371,11 @@ void RGBAToUVRow_SSSE3(const uint8* src_rgba0, int src_stride_rgba,
|
| "movdqa " MEMACCESS2(160, [yuvconstants]) ",%%xmm2 \n" \
|
| "pmaddubsw " MEMACCESS2(64, [yuvconstants]) ",%%xmm3 \n" \
|
| "psubw %%xmm3,%%xmm2 \n" \
|
| - "movq " MEMACCESS([y_buf]) ",%%xmm3 \n" \
|
| - "lea " MEMLEA(0x8, [y_buf]) ",%[y_buf] \n" \
|
| - "punpcklbw %%xmm3,%%xmm3 \n" \
|
| - "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm3 \n" \
|
| - "paddsw %%xmm3,%%xmm0 \n" \
|
| - "paddsw %%xmm3,%%xmm1 \n" \
|
| - "paddsw %%xmm3,%%xmm2 \n" \
|
| + "punpcklbw %%xmm4,%%xmm4 \n" \
|
| + "pmulhuw " MEMACCESS2(192, [yuvconstants]) ",%%xmm4 \n" \
|
| + "paddsw %%xmm4,%%xmm0 \n" \
|
| + "paddsw %%xmm4,%%xmm1 \n" \
|
| + "paddsw %%xmm4,%%xmm2 \n" \
|
| "psraw $0x6,%%xmm0 \n" \
|
| "psraw $0x6,%%xmm1 \n" \
|
| "psraw $0x6,%%xmm2 \n" \
|
|
|