Index: source/row_win.cc |
diff --git a/source/row_win.cc b/source/row_win.cc |
index e5b27b835e57d5b314709a24f2171cb9f9ed3d6d..6e35c70c6c94aa924a5268f6d9958acefde0397c 100644 |
--- a/source/row_win.cc |
+++ b/source/row_win.cc |
@@ -3460,32 +3460,6 @@ void MirrorRow_AVX2(const uint8* src, uint8* dst, int width) { |
} |
#endif // HAS_MIRRORROW_AVX2 |
-#ifdef HAS_MIRRORROW_SSE2 |
-__declspec(naked) |
-void MirrorRow_SSE2(const uint8* src, uint8* dst, int width) { |
- __asm { |
- mov eax, [esp + 4] // src |
- mov edx, [esp + 8] // dst |
- mov ecx, [esp + 12] // width |
- |
- convertloop: |
- movdqu xmm0, [eax - 16 + ecx] |
- movdqa xmm1, xmm0 // swap bytes |
- psllw xmm0, 8 |
- psrlw xmm1, 8 |
- por xmm0, xmm1 |
- pshuflw xmm0, xmm0, 0x1b // swap words |
- pshufhw xmm0, xmm0, 0x1b |
- pshufd xmm0, xmm0, 0x4e // swap qwords |
- movdqu [edx], xmm0 |
- lea edx, [edx + 16] |
- sub ecx, 16 |
- jg convertloop |
- ret |
- } |
-} |
-#endif // HAS_MIRRORROW_SSE2 |
- |
#ifdef HAS_MIRRORROW_UV_SSSE3 |
// Shuffle table for reversing the bytes of UV channels. |
static const uvec8 kShuffleMirrorUV = { |
@@ -4382,107 +4356,14 @@ void UYVYToUV422Row_SSE2(const uint8* src_uyvy, |
} |
#endif // HAS_YUY2TOYROW_SSE2 |
-#ifdef HAS_ARGBBLENDROW_SSE2 |
-// Blend 8 pixels at a time. |
-__declspec(naked) |
-void ARGBBlendRow_SSE2(const uint8* src_argb0, const uint8* src_argb1, |
- uint8* dst_argb, int width) { |
- __asm { |
- push esi |
- mov eax, [esp + 4 + 4] // src_argb0 |
- mov esi, [esp + 4 + 8] // src_argb1 |
- mov edx, [esp + 4 + 12] // dst_argb |
- mov ecx, [esp + 4 + 16] // width |
- pcmpeqb xmm7, xmm7 // generate constant 1 |
- psrlw xmm7, 15 |
- pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff |
- psrlw xmm6, 8 |
- pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 |
- psllw xmm5, 8 |
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000 |
- pslld xmm4, 24 |
- sub ecx, 4 |
- jl convertloop4b // less than 4 pixels? |
- |
- // 4 pixel loop. |
- convertloop4: |
- movdqu xmm3, [eax] // src argb |
- lea eax, [eax + 16] |
- movdqa xmm0, xmm3 // src argb |
- pxor xmm3, xmm4 // ~alpha |
- movdqu xmm2, [esi] // _r_b |
- psrlw xmm3, 8 // alpha |
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words |
- pshuflw xmm3, xmm3, 0F5h |
- pand xmm2, xmm6 // _r_b |
- paddw xmm3, xmm7 // 256 - alpha |
- pmullw xmm2, xmm3 // _r_b * alpha |
- movdqu xmm1, [esi] // _a_g |
- lea esi, [esi + 16] |
- psrlw xmm1, 8 // _a_g |
- por xmm0, xmm4 // set alpha to 255 |
- pmullw xmm1, xmm3 // _a_g * alpha |
- psrlw xmm2, 8 // _r_b convert to 8 bits again |
- paddusb xmm0, xmm2 // + src argb |
- pand xmm1, xmm5 // a_g_ convert to 8 bits again |
- paddusb xmm0, xmm1 // + src argb |
- movdqu [edx], xmm0 |
- lea edx, [edx + 16] |
- sub ecx, 4 |
- jge convertloop4 |
- |
- convertloop4b: |
- add ecx, 4 - 1 |
- jl convertloop1b |
- |
- // 1 pixel loop. |
- convertloop1: |
- movd xmm3, [eax] // src argb |
- lea eax, [eax + 4] |
- movdqa xmm0, xmm3 // src argb |
- pxor xmm3, xmm4 // ~alpha |
- movd xmm2, [esi] // _r_b |
- psrlw xmm3, 8 // alpha |
- pshufhw xmm3, xmm3, 0F5h // 8 alpha words |
- pshuflw xmm3, xmm3, 0F5h |
- pand xmm2, xmm6 // _r_b |
- paddw xmm3, xmm7 // 256 - alpha |
- pmullw xmm2, xmm3 // _r_b * alpha |
- movd xmm1, [esi] // _a_g |
- lea esi, [esi + 4] |
- psrlw xmm1, 8 // _a_g |
- por xmm0, xmm4 // set alpha to 255 |
- pmullw xmm1, xmm3 // _a_g * alpha |
- psrlw xmm2, 8 // _r_b convert to 8 bits again |
- paddusb xmm0, xmm2 // + src argb |
- pand xmm1, xmm5 // a_g_ convert to 8 bits again |
- paddusb xmm0, xmm1 // + src argb |
- movd [edx], xmm0 |
- lea edx, [edx + 4] |
- sub ecx, 1 |
- jge convertloop1 |
- |
- convertloop1b: |
- pop esi |
- ret |
- } |
-} |
-#endif // HAS_ARGBBLENDROW_SSE2 |
- |
#ifdef HAS_ARGBBLENDROW_SSSE3 |
// Shuffle table for isolating alpha. |
static const uvec8 kShuffleAlpha = { |
3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, |
11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80 |
}; |
-// Same as SSE2, but replaces: |
-// psrlw xmm3, 8 // alpha |
-// pshufhw xmm3, xmm3, 0F5h // 8 alpha words |
-// pshuflw xmm3, xmm3, 0F5h |
-// with.. |
-// pshufb xmm3, kShuffleAlpha // alpha |
-// Blend 8 pixels at a time. |
+// Blend 8 pixels at a time. |
__declspec(naked) |
void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, |
uint8* dst_argb, int width) { |
@@ -4564,48 +4445,6 @@ void ARGBBlendRow_SSSE3(const uint8* src_argb0, const uint8* src_argb1, |
} |
#endif // HAS_ARGBBLENDROW_SSSE3 |
-#ifdef HAS_ARGBATTENUATEROW_SSE2 |
-// Attenuate 4 pixels at a time. |
-__declspec(naked) |
-void ARGBAttenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, int width) { |
- __asm { |
- mov eax, [esp + 4] // src_argb0 |
- mov edx, [esp + 8] // dst_argb |
- mov ecx, [esp + 12] // width |
- pcmpeqb xmm4, xmm4 // generate mask 0xff000000 |
- pslld xmm4, 24 |
- pcmpeqb xmm5, xmm5 // generate mask 0x00ffffff |
- psrld xmm5, 8 |
- |
- convertloop: |
- movdqu xmm0, [eax] // read 4 pixels |
- punpcklbw xmm0, xmm0 // first 2 |
- pshufhw xmm2, xmm0, 0FFh // 8 alpha words |
- pshuflw xmm2, xmm2, 0FFh |
- pmulhuw xmm0, xmm2 // rgb * a |
- movdqu xmm1, [eax] // read 4 pixels |
- punpckhbw xmm1, xmm1 // next 2 pixels |
- pshufhw xmm2, xmm1, 0FFh // 8 alpha words |
- pshuflw xmm2, xmm2, 0FFh |
- pmulhuw xmm1, xmm2 // rgb * a |
- movdqu xmm2, [eax] // alphas |
- lea eax, [eax + 16] |
- psrlw xmm0, 8 |
- pand xmm2, xmm4 |
- psrlw xmm1, 8 |
- packuswb xmm0, xmm1 |
- pand xmm0, xmm5 // keep original alphas |
- por xmm0, xmm2 |
- movdqu [edx], xmm0 |
- lea edx, [edx + 16] |
- sub ecx, 4 |
- jg convertloop |
- |
- ret |
- } |
-} |
-#endif // HAS_ARGBATTENUATEROW_SSE2 |
- |
#ifdef HAS_ARGBATTENUATEROW_SSSE3 |
// Shuffle table duplicating alpha. |
static const uvec8 kShuffleAlpha0 = { |