| Index: source/row_win.cc
|
| diff --git a/source/row_win.cc b/source/row_win.cc
|
| index f09d2a75be433942f5d816f572837cab344eaa75..3bc9f9bf9061f4d7dafb42b1d684b54f9c8592f1 100644
|
| --- a/source/row_win.cc
|
| +++ b/source/row_win.cc
|
| @@ -34,7 +34,10 @@ extern "C" {
|
| xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
|
| xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
|
| xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
|
| - u_buf += 4;
|
| + u_buf += 4; \
|
| + xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
|
| + xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
|
| + y_buf += 8; \
|
|
|
| // Convert 8 pixels: 8 UV and 8 Y.
|
| #define YUVTORGB(YuvConstants) \
|
| @@ -46,13 +49,10 @@ extern "C" {
|
| xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \
|
| xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \
|
| xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \
|
| - xmm3 = _mm_loadl_epi64((__m128i*)y_buf); \
|
| - y_buf += 8; \
|
| - xmm3 = _mm_unpacklo_epi8(xmm3, xmm3); \
|
| - xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)YuvConstants->kYToRgb); \
|
| - xmm0 = _mm_adds_epi16(xmm0, xmm3); \
|
| - xmm1 = _mm_adds_epi16(xmm1, xmm3); \
|
| - xmm2 = _mm_adds_epi16(xmm2, xmm3); \
|
| + xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)YuvConstants->kYToRgb); \
|
| + xmm0 = _mm_adds_epi16(xmm0, xmm4); \
|
| + xmm1 = _mm_adds_epi16(xmm1, xmm4); \
|
| + xmm2 = _mm_adds_epi16(xmm2, xmm4); \
|
| xmm0 = _mm_srai_epi16(xmm0, 6); \
|
| xmm1 = _mm_srai_epi16(xmm1, 6); \
|
| xmm2 = _mm_srai_epi16(xmm2, 6); \
|
| @@ -90,12 +90,12 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
| uint8* dst_argb,
|
| struct YuvConstants* yuvconstants,
|
| int width) {
|
| - __m128i xmm0, xmm1, xmm2, xmm3;
|
| + __m128i xmm0, xmm1, xmm2, xmm4;
|
| const __m128i xmm5 = _mm_set1_epi8(-1);
|
| const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
| while (width > 0) {
|
| READYUV422
|
| - YUVTORGB(YuvConstants)
|
| + YUVTORGB(yuvconstants)
|
| STOREARGB
|
| width -= 8;
|
| }
|
| @@ -109,12 +109,12 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
| uint8* dst_argb,
|
| struct YuvConstants* yuvconstants,
|
| int width) {
|
| - __m128i xmm0, xmm1, xmm2, xmm3;
|
| + __m128i xmm0, xmm1, xmm2, xmm4;
|
| const __m128i xmm5 = _mm_set1_epi8(-1);
|
| const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
|
| while (width > 0) {
|
| READYUV422
|
| - YUVTORGB(YuvConstants)
|
| + YUVTORGB(yuvconstants)
|
| STOREABGR
|
| width -= 8;
|
| }
|
| @@ -1852,6 +1852,10 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
| __asm vpermq ymm0, ymm0, 0xd8 \
|
| __asm vpermq ymm1, ymm1, 0xd8 \
|
| __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
|
| + __asm vmovdqu xmm4, [eax] /* Y */ \
|
| + __asm vpermq ymm4, ymm4, 0xd8 \
|
| + __asm vpunpcklbw ymm4, ymm4, ymm4 \
|
| + __asm lea eax, [eax + 16] \
|
| }
|
|
|
| // Read 8 UV from 422, upsample to 16 UV.
|
| @@ -1862,6 +1866,10 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
| __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
|
| __asm vpermq ymm0, ymm0, 0xd8 \
|
| __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
|
| + __asm vmovdqu xmm4, [eax] /* Y */ \
|
| + __asm vpermq ymm4, ymm4, 0xd8 \
|
| + __asm vpunpcklbw ymm4, ymm4, ymm4 \
|
| + __asm lea eax, [eax + 16] \
|
| }
|
|
|
| // Read 4 UV from 411, upsample to 16 UV.
|
| @@ -1873,6 +1881,10 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
| __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
|
| __asm vpermq ymm0, ymm0, 0xd8 \
|
| __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \
|
| + __asm vmovdqu xmm4, [eax] /* Y */ \
|
| + __asm vpermq ymm4, ymm4, 0xd8 \
|
| + __asm vpunpcklbw ymm4, ymm4, ymm4 \
|
| + __asm lea eax, [eax + 16] \
|
| }
|
|
|
| // Read 8 UV from NV12, upsample to 16 UV.
|
| @@ -1881,6 +1893,10 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
| __asm lea esi, [esi + 16] \
|
| __asm vpermq ymm0, ymm0, 0xd8 \
|
| __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
|
| + __asm vmovdqu xmm4, [eax] /* Y */ \
|
| + __asm vpermq ymm4, ymm4, 0xd8 \
|
| + __asm vpunpcklbw ymm4, ymm4, ymm4 \
|
| + __asm lea eax, [eax + 16] \
|
| }
|
|
|
| // Convert 16 pixels: 16 UV and 16 Y.
|
| @@ -1895,14 +1911,10 @@ void RGBAToUVRow_SSSE3(const uint8* src_argb0, int src_stride_argb,
|
| __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \
|
| __asm vpsubw ymm0, ymm3, ymm0 \
|
| /* Step 2: Find Y contribution to 16 R,G,B values */ \
|
| - __asm vmovdqu xmm3, [eax] \
|
| - __asm lea eax, [eax + 16] \
|
| - __asm vpermq ymm3, ymm3, 0xd8 \
|
| - __asm vpunpcklbw ymm3, ymm3, ymm3 \
|
| - __asm vpmulhuw ymm3, ymm3, ymmword ptr [YuvConstants + KYTORGB] \
|
| - __asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
|
| - __asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
|
| - __asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \
|
| + __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \
|
| + __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \
|
| + __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \
|
| + __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \
|
| __asm vpsraw ymm0, ymm0, 6 \
|
| __asm vpsraw ymm1, ymm1, 6 \
|
| __asm vpsraw ymm2, ymm2, 6 \
|
| @@ -1981,7 +1993,7 @@ void I422ToARGBRow_AVX2(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
| @@ -2021,7 +2033,7 @@ void I444ToARGBRow_AVX2(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
| @@ -2060,7 +2072,7 @@ void I444ToABGRRow_AVX2(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // abgr
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
| @@ -2099,7 +2111,7 @@ void I411ToARGBRow_AVX2(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // abgr
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
| @@ -2136,7 +2148,7 @@ void NV12ToARGBRow_AVX2(const uint8* y_buf,
|
| mov eax, [esp + 8 + 4] // Y
|
| mov esi, [esp + 8 + 8] // UV
|
| mov edx, [esp + 8 + 12] // argb
|
| - mov ebp, [esp + 8 + 16] // YuvConstants
|
| + mov ebp, [esp + 8 + 16] // yuvconstants
|
| mov ecx, [esp + 8 + 20] // width
|
| vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
|
|
| @@ -2175,7 +2187,7 @@ void I422ToBGRARow_AVX2(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // abgr
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
| @@ -2215,7 +2227,7 @@ void I422ToRGBARow_AVX2(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // abgr
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
| @@ -2255,7 +2267,7 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
| @@ -2286,6 +2298,9 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
| __asm movq xmm1, qword ptr [esi + edi] /* V */ \
|
| __asm lea esi, [esi + 8] \
|
| __asm punpcklbw xmm0, xmm1 /* UV */ \
|
| + __asm movq xmm4, qword ptr [eax] \
|
| + __asm punpcklbw xmm4, xmm4 \
|
| + __asm lea eax, [eax + 8] \
|
| }
|
|
|
| // Read 4 UV from 422, upsample to 8 UV.
|
| @@ -2295,6 +2310,9 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
| __asm lea esi, [esi + 4] \
|
| __asm punpcklbw xmm0, xmm1 /* UV */ \
|
| __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
|
| + __asm movq xmm4, qword ptr [eax] \
|
| + __asm punpcklbw xmm4, xmm4 \
|
| + __asm lea eax, [eax + 8] \
|
| }
|
|
|
| // Read 2 UV from 411, upsample to 8 UV.
|
| @@ -2305,6 +2323,9 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
| __asm punpcklbw xmm0, xmm1 /* UV */ \
|
| __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
|
| __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \
|
| + __asm movq xmm4, qword ptr [eax] \
|
| + __asm punpcklbw xmm4, xmm4 \
|
| + __asm lea eax, [eax + 8] \
|
| }
|
|
|
| // Read 4 UV from NV12, upsample to 8 UV.
|
| @@ -2312,6 +2333,47 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
| __asm movq xmm0, qword ptr [esi] /* UV */ \
|
| __asm lea esi, [esi + 8] \
|
| __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
|
| + __asm movq xmm4, qword ptr [eax] \
|
| + __asm punpcklbw xmm4, xmm4 \
|
| + __asm lea eax, [eax + 8] \
|
| + }
|
| +
|
| +// YUY2 shuf 8 Y to 16 Y.
|
| +static const vec8 kShuffleYUY2Y = {
|
| + 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
|
| +};
|
| +
|
| +// YUY2 shuf 4 UV to 8 UV.
|
| +static const vec8 kShuffleYUY2UV = {
|
| + 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15
|
| +};
|
| +
|
| +// Read 4 YUY2 with 8 Y and update 4 UV to 8 UV.
|
| +#define READYUY2 __asm { \
|
| + __asm movdqu xmm4, [eax] /* YUY2 */ \
|
| + __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \
|
| + __asm movdqu xmm0, [eax] /* UV */ \
|
| + __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \
|
| + __asm lea eax, [eax + 16] \
|
| + }
|
| +
|
| +// UYVY shuf 8 Y to 16 Y.
|
| +static const vec8 kShuffleUYVYY = {
|
| + 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15
|
| +};
|
| +
|
| +// UYVY shuf 4 UV to 8 UV.
|
| +static const vec8 kShuffleUYVYUV = {
|
| + 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14
|
| +};
|
| +
|
| +// Read 4 UYVY with 8 Y and update 4 UV to 8 UV.
|
| +#define READUYVY __asm { \
|
| + __asm movdqu xmm4, [eax] /* UYVY */ \
|
| + __asm pshufb xmm4, xmmword ptr kShuffleUYVYY \
|
| + __asm movdqu xmm0, [eax] /* UV */ \
|
| + __asm pshufb xmm0, xmmword ptr kShuffleUYVYUV \
|
| + __asm lea eax, [eax + 16] \
|
| }
|
|
|
| // Convert 8 pixels: 8 UV and 8 Y.
|
| @@ -2328,13 +2390,10 @@ void I422ToABGRRow_AVX2(const uint8* y_buf,
|
| __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \
|
| __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \
|
| __asm psubw xmm2, xmm3 \
|
| - __asm movq xmm3, qword ptr [eax] \
|
| - __asm lea eax, [eax + 8] \
|
| - __asm punpcklbw xmm3, xmm3 \
|
| - __asm pmulhuw xmm3, xmmword ptr [YuvConstants + KYTORGB] \
|
| - __asm paddsw xmm0, xmm3 /* B += Y */ \
|
| - __asm paddsw xmm1, xmm3 /* G += Y */ \
|
| - __asm paddsw xmm2, xmm3 /* R += Y */ \
|
| + __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \
|
| + __asm paddsw xmm0, xmm4 /* B += Y */ \
|
| + __asm paddsw xmm1, xmm4 /* G += Y */ \
|
| + __asm paddsw xmm2, xmm4 /* R += Y */ \
|
| __asm psraw xmm0, 6 \
|
| __asm psraw xmm1, 6 \
|
| __asm psraw xmm2, 6 \
|
| @@ -2480,7 +2539,7 @@ void I444ToARGBRow_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
| @@ -2517,7 +2576,7 @@ void I444ToABGRRow_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // abgr
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
| @@ -2554,7 +2613,7 @@ void I422ToRGB24Row_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
|
| @@ -2592,7 +2651,7 @@ void I422ToRAWRow_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
|
| @@ -2630,7 +2689,7 @@ void I422ToRGB565Row_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
|
| @@ -2673,7 +2732,7 @@ void I422ToARGBRow_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
| @@ -2711,7 +2770,7 @@ void I411ToARGBRow_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // abgr
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
| @@ -2745,7 +2804,7 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
| mov eax, [esp + 8 + 4] // Y
|
| mov esi, [esp + 8 + 8] // UV
|
| mov edx, [esp + 8 + 12] // argb
|
| - mov ebp, [esp + 8 + 16] // YuvConstants
|
| + mov ebp, [esp + 8 + 16] // yuvconstants
|
| mov ecx, [esp + 8 + 20] // width
|
| pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
|
|
| @@ -2763,6 +2822,62 @@ void NV12ToARGBRow_SSSE3(const uint8* y_buf,
|
| }
|
| }
|
|
|
| +// 8 pixels.
|
| +// 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
|
| +__declspec(naked)
|
| +void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
|
| + uint8* dst_argb,
|
| + struct YuvConstants* yuvconstants,
|
| + int width) {
|
| + __asm {
|
| + push ebp
|
| + mov eax, [esp + 4 + 4] // yuy2
|
| + mov edx, [esp + 4 + 8] // argb
|
| + mov ebp, [esp + 4 + 12] // yuvconstants
|
| + mov ecx, [esp + 4 + 16] // width
|
| + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
| +
|
| + convertloop:
|
| + READYUY2
|
| + YUVTORGB(ebp)
|
| + STOREARGB
|
| +
|
| + sub ecx, 8
|
| + jg convertloop
|
| +
|
| + pop ebp
|
| + ret
|
| + }
|
| +}
|
| +
|
| +// 8 pixels.
|
| +// 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
|
| +__declspec(naked)
|
| +void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
|
| + uint8* dst_argb,
|
| + struct YuvConstants* yuvconstants,
|
| + int width) {
|
| + __asm {
|
| + push ebp
|
| + mov eax, [esp + 4 + 4] // uyvy
|
| + mov edx, [esp + 4 + 8] // argb
|
| + mov ebp, [esp + 4 + 12] // yuvconstants
|
| + mov ecx, [esp + 4 + 16] // width
|
| + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
| +
|
| + convertloop:
|
| + READUYVY
|
| + YUVTORGB(ebp)
|
| + STOREARGB
|
| +
|
| + sub ecx, 8
|
| + jg convertloop
|
| +
|
| + pop ebp
|
| + ret
|
| + }
|
| +}
|
| +
|
| __declspec(naked)
|
| void I422ToBGRARow_SSSE3(const uint8* y_buf,
|
| const uint8* u_buf,
|
| @@ -2778,7 +2893,7 @@ void I422ToBGRARow_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
|
|
| @@ -2812,7 +2927,7 @@ void I422ToABGRRow_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
| @@ -2847,7 +2962,7 @@ void I422ToRGBARow_SSSE3(const uint8* y_buf,
|
| mov esi, [esp + 12 + 8] // U
|
| mov edi, [esp + 12 + 12] // V
|
| mov edx, [esp + 12 + 16] // argb
|
| - mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ebp, [esp + 12 + 20] // yuvconstants
|
| mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
|
|
| @@ -3512,8 +3627,7 @@ void ARGBSetRow_X86(uint8* dst_argb, uint32 v32, int count) {
|
|
|
| #ifdef HAS_YUY2TOYROW_AVX2
|
| __declspec(naked)
|
| -void YUY2ToYRow_AVX2(const uint8* src_yuy2,
|
| - uint8* dst_y, int pix) {
|
| +void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix) {
|
| __asm {
|
| mov eax, [esp + 4] // src_yuy2
|
| mov edx, [esp + 8] // dst_y
|
|
|