Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(22)

Unified Diff: source/row_gcc.cc

Issue 2420553002: Add ARGBExtractAlpha_AVX2 function (Closed)
Patch Set: vpermd instead of 2 vpermq Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/row_gcc.cc
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index bf9ddde42c3a4c0e759d831f90c06996b49e8419..80fbbb8717890957dab76bbd3f90d5e865111f7a 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -2860,6 +2860,36 @@ void ARGBExtractAlphaRow_SSE2(const uint8* src_argb, uint8* dst_a, int width) {
}
#endif // HAS_ARGBEXTRACTALPHAROW_SSE2
+#ifdef HAS_ARGBEXTRACTALPHAROW_AVX2
+void ARGBExtractAlphaRow_AVX2(const uint8* src_argb, uint8* dst_a, int width) {
+ asm volatile (
+ "vmovdqa %3,%%ymm6 \n"
+ LABELALIGN
+ "1: \n"
+ "vmovdqu " MEMACCESS(0) ", %%ymm0 \n"
+ "vmovdqu " MEMACCESS2(0x20, 0) ", %%ymm1 \n"
+ "lea " MEMLEA(0x40, 0) ", %0 \n"
+ "vpsrld $0x18, %%ymm0, %%ymm0 \n"
+ "vpsrld $0x18, %%ymm1, %%ymm1 \n"
+ "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates
+ "vpackuswb %%ymm0, %%ymm0, %%ymm0 \n" // mutates
+ "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate.
+ "vmovdqu %%xmm0," MEMACCESS(1) " \n"
+ "lea " MEMLEA(0x10, 1) ", %1 \n"
+ "sub $0x10, %2 \n"
+ "jg 1b \n"
+ "vzeroupper \n"
+ : "+r"(src_argb), // %0
+ "+r"(dst_a), // %1
+ "+rm"(width) // %2
wangcheng 2016/10/13 19:52:18 Do you need to check "width" is multiply of 16?
fbarchard1 2016/10/13 21:46:27 planar_functions.cc does that check: #if defined(
+ : "m"(kPermdARGBToY_AVX) // %3
+ : "memory", "cc"
+ , "xmm0", "xmm1", "xmm6"
+ );
+}
+#endif // HAS_ARGBEXTRACTALPHAROW_AVX2
+
+
#ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2
// width in pixels
void ARGBCopyYToAlphaRow_SSE2(const uint8* src, uint8* dst, int width) {
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698