Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Unified Diff: source/row_gcc.cc

Issue 1535833003: avx2 interpolate use 8 bit (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: gcc version of interpolate Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « source/row_common.cc ('k') | source/row_neon.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/row_gcc.cc
diff --git a/source/row_gcc.cc b/source/row_gcc.cc
index d1b25140f94d513054cdb4349bda879eadf869d0..7dc3fd3e27c8ddad6e1267b68311a4f63ae43b46 100644
--- a/source/row_gcc.cc
+++ b/source/row_gcc.cc
@@ -4791,20 +4791,19 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
int source_y_fraction) {
asm volatile (
"sub %1,%0 \n"
- "shr %3 \n"
"cmp $0x0,%3 \n"
"je 100f \n"
- "cmp $0x40,%3 \n"
+ "cmp $0x80,%3 \n"
"je 50f \n"
"movd %3,%%xmm0 \n"
"neg %3 \n"
- "add $0x80,%3 \n"
+ "add $0x100,%3 \n"
"movd %3,%%xmm5 \n"
"punpcklbw %%xmm0,%%xmm5 \n"
"punpcklwd %%xmm5,%%xmm5 \n"
"pshufd $0x0,%%xmm5,%%xmm5 \n"
- "mov $0x400040,%%eax \n"
+ "mov $0x80808080,%%eax \n"
"movd %%eax,%%xmm4 \n"
"pshufd $0x0,%%xmm4,%%xmm4 \n"
@@ -4813,17 +4812,21 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"1: \n"
"movdqu " MEMACCESS(1) ",%%xmm0 \n"
MEMOPREG(movdqu,0x00,1,4,1,xmm2)
- "movdqa %%xmm0,%%xmm1 \n"
- "punpcklbw %%xmm2,%%xmm0 \n"
- "punpckhbw %%xmm2,%%xmm1 \n"
- "pmaddubsw %%xmm5,%%xmm0 \n"
- "pmaddubsw %%xmm5,%%xmm1 \n"
- "paddw %%xmm4,%%xmm0 \n"
- "paddw %%xmm4,%%xmm1 \n"
- "psrlw $0x7,%%xmm0 \n"
- "psrlw $0x7,%%xmm1 \n"
- "packuswb %%xmm1,%%xmm0 \n"
- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
+ "movdqa %%xmm0,%%xmm1 \n"
+ "punpcklbw %%xmm2,%%xmm0 \n"
+ "punpckhbw %%xmm2,%%xmm1 \n"
+ "psubb %%xmm4,%%xmm0 \n"
+ "psubb %%xmm4,%%xmm1 \n"
+ "movdqa %%xmm5,%%xmm2 \n"
+ "movdqa %%xmm5,%%xmm3 \n"
+ "pmaddubsw %%xmm0,%%xmm2 \n"
+ "pmaddubsw %%xmm1,%%xmm3 \n"
+ "paddw %%xmm4,%%xmm2 \n"
+ "paddw %%xmm4,%%xmm3 \n"
+ "psrlw $0x8,%%xmm2 \n"
+ "psrlw $0x8,%%xmm3 \n"
+ "packuswb %%xmm3,%%xmm2 \n"
+ MEMOPMEM(movdqu,xmm2,0x00,1,0,1)
"lea " MEMLEA(0x10,1) ",%1 \n"
"sub $0x10,%2 \n"
"jg 1b \n"
@@ -4857,7 +4860,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
"+r"(source_y_fraction) // %3
: "r"((intptr_t)(src_stride)) // %4
: "memory", "cc", "eax", NACL_R14
- "xmm0", "xmm1", "xmm2", "xmm4", "xmm5"
+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
);
}
#endif // HAS_INTERPOLATEROW_SSSE3
@@ -4881,9 +4884,8 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
"vmovd %3,%%xmm5 \n"
"vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n"
"vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"
- "vpxor %%ymm0,%%ymm0,%%ymm0 \n"
- "vpermd %%ymm5,%%ymm0,%%ymm5 \n"
- "mov $0x400040,%%eax \n"
+ "vbroadcastss %%xmm5,%%ymm5 \n"
+ "mov $0x80808080,%%eax \n"
"vmovd %%eax,%%xmm4 \n"
"vbroadcastss %%xmm4,%%ymm4 \n"
@@ -4894,12 +4896,14 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
MEMOPREG(vmovdqu,0x00,1,4,1,ymm2)
"vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"
"vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n"
- "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n"
- "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpsubb %%ymm4,%%ymm1,%%ymm1 \n"
+ "vpsubb %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n"
+ "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n"
"vpaddw %%ymm4,%%ymm1,%%ymm1 \n"
- "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
- "vpsrlw $0x7,%%ymm1,%%ymm1 \n"
+ "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"
+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
"lea " MEMLEA(0x20,1) ",%1 \n"
« no previous file with comments | « source/row_common.cc ('k') | source/row_neon.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698