Index: source/row_win.cc |
diff --git a/source/row_win.cc b/source/row_win.cc |
index 91aea8e9450f5f634ef1d33325dd4659f6754b3f..62beb1c9b0b5b91f8541330651f112ad5da61d7a 100644 |
--- a/source/row_win.cc |
+++ b/source/row_win.cc |
@@ -2172,41 +2172,83 @@ void I422ToARGBMatrixRow_AVX2(const uint8* y_buf, |
} |
#endif // HAS_I422TOARGBMATRIXROW_AVX2 |
-#ifdef HAS_I444TOARGBROW_AVX2 |
+#ifdef HAS_I444TOARGBMATRIXROW_AVX2 |
// 16 pixels |
// 16 UV values with 16 Y producing 16 ARGB (64 bytes). |
__declspec(naked) |
-void I444ToARGBRow_AVX2(const uint8* y_buf, |
- const uint8* u_buf, |
- const uint8* v_buf, |
- uint8* dst_argb, |
- int width) { |
+void I444ToARGBMatrixRow_AVX2(const uint8* y_buf, |
+ const uint8* u_buf, |
+ const uint8* v_buf, |
+ uint8* dst_argb, |
+ struct YuvConstants* YuvConstants, |
+ int width) { |
__asm { |
push esi |
push edi |
- mov eax, [esp + 8 + 4] // Y |
- mov esi, [esp + 8 + 8] // U |
- mov edi, [esp + 8 + 12] // V |
- mov edx, [esp + 8 + 16] // argb |
- mov ecx, [esp + 8 + 20] // width |
+ push ebp |
+ mov eax, [esp + 12 + 4] // Y |
+ mov esi, [esp + 12 + 8] // U |
+ mov edi, [esp + 12 + 12] // V |
+ mov edx, [esp + 12 + 16] // argb |
+ mov ebp, [esp + 12 + 20] // YuvConstants |
+ mov ecx, [esp + 12 + 24] // width |
sub edi, esi |
vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
- |
convertloop: |
READYUV444_AVX2 |
- YUVTORGB_AVX2(kYuvConstants) |
+ YUVTORGB_AVX2(ebp) |
STOREARGB_AVX2 |
sub ecx, 16 |
jg convertloop |
+ pop ebp |
pop edi |
pop esi |
vzeroupper |
ret |
} |
} |
-#endif // HAS_I444TOARGBROW_AVX2 |
+#endif // HAS_I444TOARGBMATRIXROW_AVX2 |
+ |
+#ifdef HAS_I444TOABGRMATRIXROW_AVX2 |
+// 16 pixels |
+// 16 UV values with 16 Y producing 16 ABGR (64 bytes). |
+__declspec(naked) |
+void I444ToABGRMatrixRow_AVX2(const uint8* y_buf, |
+ const uint8* u_buf, |
+ const uint8* v_buf, |
+ uint8* dst_abgr, |
+ struct YuvConstants* YuvConstants, |
+ int width) { |
+ __asm { |
+ push esi |
+ push edi |
+ push ebp |
+ mov eax, [esp + 12 + 4] // Y |
+ mov esi, [esp + 12 + 8] // U |
+ mov edi, [esp + 12 + 12] // V |
+ mov edx, [esp + 12 + 16] // abgr |
+ mov ebp, [esp + 12 + 20] // YuvConstants |
+ mov ecx, [esp + 12 + 24] // width |
+ sub edi, esi |
+ vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
+ convertloop: |
+ READYUV444_AVX2 |
+ YUVTORGB_AVX2(ebp) |
+ STOREABGR_AVX2 |
+ |
+ sub ecx, 16 |
+ jg convertloop |
+ |
+ pop ebp |
+ pop edi |
+ pop esi |
+ vzeroupper |
+ ret |
+ } |
+} |
+#endif // HAS_I444TOABGRMATRIXROW_AVX2 |
#ifdef HAS_I411TOARGBROW_AVX2 |
// 16 pixels |
@@ -2608,30 +2650,71 @@ void I422ToABGRMatrixRow_AVX2(const uint8* y_buf, |
// 8 pixels. |
// 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). |
__declspec(naked) |
-void I444ToARGBRow_SSSE3(const uint8* y_buf, |
- const uint8* u_buf, |
- const uint8* v_buf, |
- uint8* dst_argb, |
- int width) { |
+void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, |
+ const uint8* u_buf, |
+ const uint8* v_buf, |
+ uint8* dst_argb, |
+ struct YuvConstants* YuvConstants, |
+ int width) { |
__asm { |
push esi |
push edi |
- mov eax, [esp + 8 + 4] // Y |
- mov esi, [esp + 8 + 8] // U |
- mov edi, [esp + 8 + 12] // V |
- mov edx, [esp + 8 + 16] // argb |
- mov ecx, [esp + 8 + 20] // width |
+ push ebp |
+ mov eax, [esp + 12 + 4] // Y |
+ mov esi, [esp + 12 + 8] // U |
+ mov edi, [esp + 12 + 12] // V |
+ mov edx, [esp + 12 + 16] // argb |
+ mov ebp, [esp + 12 + 20] // YuvConstants |
+ mov ecx, [esp + 12 + 24] // width |
sub edi, esi |
- pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
convertloop: |
READYUV444 |
- YUVTORGB(kYuvConstants) |
+ YUVTORGB(ebp) |
STOREARGB |
sub ecx, 8 |
jg convertloop |
+ pop ebp |
+ pop edi |
+ pop esi |
+ ret |
+ } |
+} |
+ |
+// 8 pixels. |
+// 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). |
+__declspec(naked) |
+void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, |
+ const uint8* u_buf, |
+ const uint8* v_buf, |
+ uint8* dst_abgr, |
+ struct YuvConstants* YuvConstants, |
+ int width) { |
+ __asm { |
+ push esi |
+ push edi |
+ push ebp |
+ mov eax, [esp + 12 + 4] // Y |
+ mov esi, [esp + 12 + 8] // U |
+ mov edi, [esp + 12 + 12] // V |
+ mov edx, [esp + 12 + 16] // abgr |
+ mov ebp, [esp + 12 + 20] // YuvConstants |
+ mov ecx, [esp + 12 + 24] // width |
+ sub edi, esi |
+ pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
+ |
+ convertloop: |
+ READYUV444 |
+ YUVTORGB(ebp) |
+ STOREABGR |
+ |
+ sub ecx, 8 |
+ jg convertloop |
+ |
+ pop ebp |
pop edi |
pop esi |
ret |