| Index: source/row_win.cc
|
| diff --git a/source/row_win.cc b/source/row_win.cc
|
| index 91aea8e9450f5f634ef1d33325dd4659f6754b3f..62beb1c9b0b5b91f8541330651f112ad5da61d7a 100644
|
| --- a/source/row_win.cc
|
| +++ b/source/row_win.cc
|
| @@ -2172,41 +2172,83 @@ void I422ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
| }
|
| #endif // HAS_I422TOARGBMATRIXROW_AVX2
|
|
|
| -#ifdef HAS_I444TOARGBROW_AVX2
|
| +#ifdef HAS_I444TOARGBMATRIXROW_AVX2
|
| // 16 pixels
|
| // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
|
| __declspec(naked)
|
| -void I444ToARGBRow_AVX2(const uint8* y_buf,
|
| - const uint8* u_buf,
|
| - const uint8* v_buf,
|
| - uint8* dst_argb,
|
| - int width) {
|
| +void I444ToARGBMatrixRow_AVX2(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* dst_argb,
|
| + struct YuvConstants* YuvConstants,
|
| + int width) {
|
| __asm {
|
| push esi
|
| push edi
|
| - mov eax, [esp + 8 + 4] // Y
|
| - mov esi, [esp + 8 + 8] // U
|
| - mov edi, [esp + 8 + 12] // V
|
| - mov edx, [esp + 8 + 16] // argb
|
| - mov ecx, [esp + 8 + 20] // width
|
| + push ebp
|
| + mov eax, [esp + 12 + 4] // Y
|
| + mov esi, [esp + 12 + 8] // U
|
| + mov edi, [esp + 12 + 12] // V
|
| + mov edx, [esp + 12 + 16] // argb
|
| + mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
| -
|
| convertloop:
|
| READYUV444_AVX2
|
| - YUVTORGB_AVX2(kYuvConstants)
|
| + YUVTORGB_AVX2(ebp)
|
| STOREARGB_AVX2
|
|
|
| sub ecx, 16
|
| jg convertloop
|
|
|
| + pop ebp
|
| pop edi
|
| pop esi
|
| vzeroupper
|
| ret
|
| }
|
| }
|
| -#endif // HAS_I444TOARGBROW_AVX2
|
| +#endif // HAS_I444TOARGBMATRIXROW_AVX2
|
| +
|
| +#ifdef HAS_I444TOABGRMATRIXROW_AVX2
|
| +// 16 pixels
|
| +// 16 UV values with 16 Y producing 16 ABGR (64 bytes).
|
| +__declspec(naked)
|
| +void I444ToABGRMatrixRow_AVX2(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* dst_abgr,
|
| + struct YuvConstants* YuvConstants,
|
| + int width) {
|
| + __asm {
|
| + push esi
|
| + push edi
|
| + push ebp
|
| + mov eax, [esp + 12 + 4] // Y
|
| + mov esi, [esp + 12 + 8] // U
|
| + mov edi, [esp + 12 + 12] // V
|
| + mov edx, [esp + 12 + 16] // abgr
|
| + mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ecx, [esp + 12 + 24] // width
|
| + sub edi, esi
|
| + vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
|
| + convertloop:
|
| + READYUV444_AVX2
|
| + YUVTORGB_AVX2(ebp)
|
| + STOREABGR_AVX2
|
| +
|
| + sub ecx, 16
|
| + jg convertloop
|
| +
|
| + pop ebp
|
| + pop edi
|
| + pop esi
|
| + vzeroupper
|
| + ret
|
| + }
|
| +}
|
| +#endif // HAS_I444TOABGRMATRIXROW_AVX2
|
|
|
| #ifdef HAS_I411TOARGBROW_AVX2
|
| // 16 pixels
|
| @@ -2608,30 +2650,71 @@ void I422ToABGRMatrixRow_AVX2(const uint8* y_buf,
|
| // 8 pixels.
|
| // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
|
| __declspec(naked)
|
| -void I444ToARGBRow_SSSE3(const uint8* y_buf,
|
| - const uint8* u_buf,
|
| - const uint8* v_buf,
|
| - uint8* dst_argb,
|
| - int width) {
|
| +void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* dst_argb,
|
| + struct YuvConstants* YuvConstants,
|
| + int width) {
|
| __asm {
|
| push esi
|
| push edi
|
| - mov eax, [esp + 8 + 4] // Y
|
| - mov esi, [esp + 8 + 8] // U
|
| - mov edi, [esp + 8 + 12] // V
|
| - mov edx, [esp + 8 + 16] // argb
|
| - mov ecx, [esp + 8 + 20] // width
|
| + push ebp
|
| + mov eax, [esp + 12 + 4] // Y
|
| + mov esi, [esp + 12 + 8] // U
|
| + mov edi, [esp + 12 + 12] // V
|
| + mov edx, [esp + 12 + 16] // argb
|
| + mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ecx, [esp + 12 + 24] // width
|
| sub edi, esi
|
| - pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
| + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
|
|
| convertloop:
|
| READYUV444
|
| - YUVTORGB(kYuvConstants)
|
| + YUVTORGB(ebp)
|
| STOREARGB
|
|
|
| sub ecx, 8
|
| jg convertloop
|
|
|
| + pop ebp
|
| + pop edi
|
| + pop esi
|
| + ret
|
| + }
|
| +}
|
| +
|
| +// 8 pixels.
|
| +// 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
|
| +__declspec(naked)
|
| +void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf,
|
| + const uint8* u_buf,
|
| + const uint8* v_buf,
|
| + uint8* dst_abgr,
|
| + struct YuvConstants* YuvConstants,
|
| + int width) {
|
| + __asm {
|
| + push esi
|
| + push edi
|
| + push ebp
|
| + mov eax, [esp + 12 + 4] // Y
|
| + mov esi, [esp + 12 + 8] // U
|
| + mov edi, [esp + 12 + 12] // V
|
| + mov edx, [esp + 12 + 16] // abgr
|
| + mov ebp, [esp + 12 + 20] // YuvConstants
|
| + mov ecx, [esp + 12 + 24] // width
|
| + sub edi, esi
|
| + pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
|
| +
|
| + convertloop:
|
| + READYUV444
|
| + YUVTORGB(ebp)
|
| + STOREABGR
|
| +
|
| + sub ecx, 8
|
| + jg convertloop
|
| +
|
| + pop ebp
|
| pop edi
|
| pop esi
|
| ret
|
|
|