Chromium Code Reviews| Index: source/row_gcc.cc |
| diff --git a/source/row_gcc.cc b/source/row_gcc.cc |
| index bf9ddde42c3a4c0e759d831f90c06996b49e8419..80fbbb8717890957dab76bbd3f90d5e865111f7a 100644 |
| --- a/source/row_gcc.cc |
| +++ b/source/row_gcc.cc |
| @@ -2860,6 +2860,36 @@ void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) { |
| } |
| #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 |
| +#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 |
| +void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) { |
| + asm volatile ( |
| + "vmovdqa %3,%%ymm6 \n" |
| + LABELALIGN |
| + "1: \n" |
| + "vmovdqu " MEMACCESS(0) ", %%ymm0 \n" |
| + "vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n" |
| + "lea " MEMLEA(0x40, 0) ", %0 \n" |
| + "vpsrld $0x18, %%ymm0, %%ymm0 \n" |
| + "vpsrld $0x18, %%ymm1, %%ymm1 \n" |
| + "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates |
| + "vpackuswb %%ymm0, %%ymm0, %%ymm0 \n" // mutates |
| + "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. |
| + "vmovdqu %%xmm0," MEMACCESS(1) " \n" |
| + "lea " MEMLEA(0x10, 1) ", %1 \n" |
| + "sub $0x10, %2 \n" |
| + "jg 1b \n" |
| + "vzeroupper \n" |
| + : "+r"(src_argb), // %0 |
| + "+r"(dst_a), // %1 |
| + "+rm"(width) // %2 |
|
wangcheng
2016/10/13 19:52:18
Do you need to check "width" is multiply of 16?
fbarchard1
2016/10/13 21:46:27
planar_functions.cc does that check:
#if defined(
|
| + : "m"(kPermdARGBToY_AVX) // %3 |
| + : "memory", "cc" |
| + , "xmm0", "xmm1", "xmm6" |
| + ); |
| +} |
| +#endif // HAS_ARGBEXTRACTALPHAROW_AVX2 |
| + |
| + |
| #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 |
| // width in pixels |
| void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) { |