Index: source/row_win.cc |
diff --git a/source/row_win.cc b/source/row_win.cc |
index a8c16c3c1ef3a4c0ed99814f9c3898636213e504..4a0a14fe258868c16151fd73ce11f4a85de0cb67 100644 |
--- a/source/row_win.cc |
+++ b/source/row_win.cc |
@@ -3532,6 +3532,33 @@ void ARGBCopyAlphaRow_AVX2(const uint8* src, uint8* dst, int width) { |
} |
#endif // HAS_ARGBCOPYALPHAROW_AVX2 |
+#ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 |
+// width in pixels |
+__declspec(naked) |
+void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { |
+ __asm { |
+ mov eax, [esp + 4] // src_argb |
+ mov edx, [esp + 8] // dst_a |
+ mov ecx, [esp + 12] // width |
+ |
+ extractloop: |
+ movdqu xmm0, [eax] |
+ movdqu xmm1, [eax + 16] |
+ lea eax, [eax + 32] |
+ psrld xmm0, 24 |
+ psrld xmm1, 24 |
+ packssdw xmm0, xmm1 |
+ packuswb xmm0, xmm1 |
fbarchard1
2016/05/25 21:34:35
nit
should this be packuswb xmm0, xmm0?
packssd
magjed_chromium
2016/05/26 08:28:33
Done.
|
+ movq qword ptr [edx], xmm0 |
fbarchard1
2016/05/25 21:34:35
fyi could be a little faster if it writes full 16
|
+ lea edx, [edx + 8] |
+ sub ecx, 8 |
+ jg extractloop |
+ |
+ ret |
+ } |
+} |
+#endif // HAS_ARGBEXTRACTALPHAROW_SSE2 |
+ |
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 |
// width in pixels |
__declspec(naked) |