source/row_gcc.cc - Issue 1535833003: avx2 interpolate use 8 bit

Unified Diff: source/row_gcc.cc

Issue 1535833003: avx2 interpolate use 8 bit (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master

Patch Set: gcc version of interpolate Created 5 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/row_gcc.cc

diff --git a/source/row_gcc.cc b/source/row_gcc.cc

index d1b25140f94d513054cdb4349bda879eadf869d0..7dc3fd3e27c8ddad6e1267b68311a4f63ae43b46 100644

--- a/source/row_gcc.cc

+++ b/source/row_gcc.cc

@@ -4791,20 +4791,19 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,

int source_y_fraction) {

asm volatile (

"sub %1,%0 \n"

- "shr %3 \n"

"cmp $0x0,%3 \n"

"je 100f \n"

- "cmp $0x40,%3 \n"

+ "cmp $0x80,%3 \n"

"je 50f \n"

"movd %3,%%xmm0 \n"

"neg %3 \n"

- "add $0x80,%3 \n"

+ "add $0x100,%3 \n"

"movd %3,%%xmm5 \n"

"punpcklbw %%xmm0,%%xmm5 \n"

"punpcklwd %%xmm5,%%xmm5 \n"

"pshufd $0x0,%%xmm5,%%xmm5 \n"

- "mov $0x400040,%%eax \n"

+ "mov $0x80808080,%%eax \n"

"movd %%eax,%%xmm4 \n"

"pshufd $0x0,%%xmm4,%%xmm4 \n"

@@ -4813,17 +4812,21 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,

"1: \n"

"movdqu " MEMACCESS(1) ",%%xmm0 \n"

MEMOPREG(movdqu,0x00,1,4,1,xmm2)

- "movdqa %%xmm0,%%xmm1 \n"

- "punpcklbw %%xmm2,%%xmm0 \n"

- "punpckhbw %%xmm2,%%xmm1 \n"

- "pmaddubsw %%xmm5,%%xmm0 \n"

- "pmaddubsw %%xmm5,%%xmm1 \n"

- "paddw %%xmm4,%%xmm0 \n"

- "paddw %%xmm4,%%xmm1 \n"

- "psrlw $0x7,%%xmm0 \n"

- "psrlw $0x7,%%xmm1 \n"

- "packuswb %%xmm1,%%xmm0 \n"

- MEMOPMEM(movdqu,xmm0,0x00,1,0,1)

+ "movdqa %%xmm0,%%xmm1 \n"

+ "punpcklbw %%xmm2,%%xmm0 \n"

+ "punpckhbw %%xmm2,%%xmm1 \n"

+ "psubb %%xmm4,%%xmm0 \n"

+ "psubb %%xmm4,%%xmm1 \n"

+ "movdqa %%xmm5,%%xmm2 \n"

+ "movdqa %%xmm5,%%xmm3 \n"

+ "pmaddubsw %%xmm0,%%xmm2 \n"

+ "pmaddubsw %%xmm1,%%xmm3 \n"

+ "paddw %%xmm4,%%xmm2 \n"

+ "paddw %%xmm4,%%xmm3 \n"

+ "psrlw $0x8,%%xmm2 \n"

+ "psrlw $0x8,%%xmm3 \n"

+ "packuswb %%xmm3,%%xmm2 \n"

+ MEMOPMEM(movdqu,xmm2,0x00,1,0,1)

"lea " MEMLEA(0x10,1) ",%1 \n"

"sub $0x10,%2 \n"

"jg 1b \n"

@@ -4857,7 +4860,7 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,

"+r"(source_y_fraction) // %3

: "r"((intptr_t)(src_stride)) // %4

: "memory", "cc", "eax", NACL_R14

- "xmm0", "xmm1", "xmm2", "xmm4", "xmm5"

+ "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"

);

}

#endif // HAS_INTERPOLATEROW_SSSE3

@@ -4881,9 +4884,8 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,

"vmovd %3,%%xmm5 \n"

"vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n"

"vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"

- "vpxor %%ymm0,%%ymm0,%%ymm0 \n"

- "vpermd %%ymm5,%%ymm0,%%ymm5 \n"

- "mov $0x400040,%%eax \n"

+ "vbroadcastss %%xmm5,%%ymm5 \n"

+ "mov $0x80808080,%%eax \n"

"vmovd %%eax,%%xmm4 \n"

"vbroadcastss %%xmm4,%%ymm4 \n"

@@ -4894,12 +4896,14 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,

MEMOPREG(vmovdqu,0x00,1,4,1,ymm2)

"vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"

"vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"

- "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n"

- "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n"

- "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"

+ "vpsubb %%ymm4,%%ymm1,%%ymm1 \n"

+ "vpsubb %%ymm4,%%ymm0,%%ymm0 \n"

+ "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n"

+ "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n"

"vpaddw %%ymm4,%%ymm1,%%ymm1 \n"

- "vpsrlw $0x7,%%ymm0,%%ymm0 \n"

- "vpsrlw $0x7,%%ymm1,%%ymm1 \n"

+ "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"

+ "vpsrlw $0x8,%%ymm1,%%ymm1 \n"

+ "vpsrlw $0x8,%%ymm0,%%ymm0 \n"

"vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"

MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)

"lea " MEMLEA(0x20,1) ",%1 \n"

« no previous file with comments | « source/row_common.cc ('k') | source/row_neon.cc » ('j') | no next file with comments »