Index: source/row_gcc.cc |
diff --git a/source/row_gcc.cc b/source/row_gcc.cc |
index bf9ddde42c3a4c0e759d831f90c06996b49e8419..80fbbb8717890957dab76bbd3f90d5e865111f7a 100644 |
--- a/source/row_gcc.cc |
+++ b/source/row_gcc.cc |
@@ -2860,6 +2860,36 @@ void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { |
} |
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2 |
+#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 |
+void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) { |
+ asm volatile ( |
+ "vmovdqa %3,%%ymm6 \n" |
+ LABELALIGN |
+ "1: \n" |
+ "vmovdqu " MEMACCESS(0) ", %%ymm0 \n" |
+ "vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n" |
+ "lea " MEMLEA(0x40, 0) ", %0 \n" |
+ "vpsrld $0x18, %%ymm0, %%ymm0 \n" |
+ "vpsrld $0x18, %%ymm1, %%ymm1 \n" |
+ "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates |
+ "vpackuswb %%ymm0, %%ymm0, %%ymm0 \n" // mutates |
+ "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. |
+ "vmovdqu %%xmm0," MEMACCESS(1) " \n" |
+ "lea " MEMLEA(0x10, 1) ", %1 \n" |
+ "sub $0x10, %2 \n" |
+ "jg 1b \n" |
+ "vzeroupper \n" |
+ : "+r"(src_argb), // %0 |
+ "+r"(dst_a), // %1 |
+ "+rm"(width) // %2 |
wangcheng
2016/10/13 19:52:18
Do you need to check "width" is multiply of 16?
fbarchard1
2016/10/13 21:46:27
planar_functions.cc does that check:
#if defined(
|
+ : "m"(kPermdARGBToY_AVX) // %3 |
+ : "memory", "cc" |
+ , "xmm0", "xmm1", "xmm6" |
+ ); |
+} |
+#endif // HAS_ARGBEXTRACTALPHAROW_AVX2 |
+ |
+ |
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 |
// width in pixels |
void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { |