| Index: source/row_gcc.cc
|
| diff --git a/source/row_gcc.cc b/source/row_gcc.cc
|
| index 0ccf76b302c4a470279f0c0348e6dfd9bdc79fed..d1b25140f94d513054cdb4349bda879eadf869d0 100644
|
| --- a/source/row_gcc.cc
|
| +++ b/source/row_gcc.cc
|
| @@ -4794,12 +4794,8 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
| "shr %3 \n"
|
| "cmp $0x0,%3 \n"
|
| "je 100f \n"
|
| - "cmp $0x20,%3 \n"
|
| - "je 75f \n"
|
| "cmp $0x40,%3 \n"
|
| "je 50f \n"
|
| - "cmp $0x60,%3 \n"
|
| - "je 25f \n"
|
|
|
| "movd %3,%%xmm0 \n"
|
| "neg %3 \n"
|
| @@ -4808,6 +4804,9 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
| "punpcklbw %%xmm0,%%xmm5 \n"
|
| "punpcklwd %%xmm5,%%xmm5 \n"
|
| "pshufd $0x0,%%xmm5,%%xmm5 \n"
|
| + "mov $0x400040,%%eax \n"
|
| + "movd %%eax,%%xmm4 \n"
|
| + "pshufd $0x0,%%xmm4,%%xmm4 \n"
|
|
|
| // General purpose row blend.
|
| LABELALIGN
|
| @@ -4819,6 +4818,8 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
| "punpckhbw %%xmm2,%%xmm1 \n"
|
| "pmaddubsw %%xmm5,%%xmm0 \n"
|
| "pmaddubsw %%xmm5,%%xmm1 \n"
|
| + "paddw %%xmm4,%%xmm0 \n"
|
| + "paddw %%xmm4,%%xmm1 \n"
|
| "psrlw $0x7,%%xmm0 \n"
|
| "psrlw $0x7,%%xmm1 \n"
|
| "packuswb %%xmm1,%%xmm0 \n"
|
| @@ -4828,19 +4829,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
| "jg 1b \n"
|
| "jmp 99f \n"
|
|
|
| - // Blend 25 / 75.
|
| - LABELALIGN
|
| - "25: \n"
|
| - "movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
| - MEMOPREG(movdqu,0x00,1,4,1,xmm1)
|
| - "pavgb %%xmm1,%%xmm0 \n"
|
| - "pavgb %%xmm1,%%xmm0 \n"
|
| - MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
|
| - "lea " MEMLEA(0x10,1) ",%1 \n"
|
| - "sub $0x10,%2 \n"
|
| - "jg 25b \n"
|
| - "jmp 99f \n"
|
| -
|
| // Blend 50 / 50.
|
| LABELALIGN
|
| "50: \n"
|
| @@ -4853,19 +4841,6 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
| "jg 50b \n"
|
| "jmp 99f \n"
|
|
|
| - // Blend 75 / 25.
|
| - LABELALIGN
|
| - "75: \n"
|
| - "movdqu " MEMACCESS(1) ",%%xmm1 \n"
|
| - MEMOPREG(movdqu,0x00,1,4,1,xmm0)
|
| - "pavgb %%xmm1,%%xmm0 \n"
|
| - "pavgb %%xmm1,%%xmm0 \n"
|
| - MEMOPMEM(movdqu,xmm0,0x00,1,0,1)
|
| - "lea " MEMLEA(0x10,1) ",%1 \n"
|
| - "sub $0x10,%2 \n"
|
| - "jg 75b \n"
|
| - "jmp 99f \n"
|
| -
|
| // Blend 100 / 0 - Copy row unchanged.
|
| LABELALIGN
|
| "100: \n"
|
| @@ -4881,8 +4856,8 @@ void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
|
| "+r"(dst_width), // %2
|
| "+r"(source_y_fraction) // %3
|
| : "r"((intptr_t)(src_stride)) // %4
|
| - : "memory", "cc", NACL_R14
|
| - "xmm0", "xmm1", "xmm2", "xmm5"
|
| + : "memory", "cc", "eax", NACL_R14
|
| + "xmm0", "xmm1", "xmm2", "xmm4", "xmm5"
|
| );
|
| }
|
| #endif // HAS_INTERPOLATEROW_SSSE3
|
| @@ -4897,12 +4872,8 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
|
| "cmp $0x0,%3 \n"
|
| "je 100f \n"
|
| "sub %1,%0 \n"
|
| - "cmp $0x20,%3 \n"
|
| - "je 75f \n"
|
| "cmp $0x40,%3 \n"
|
| "je 50f \n"
|
| - "cmp $0x60,%3 \n"
|
| - "je 25f \n"
|
|
|
| "vmovd %3,%%xmm0 \n"
|
| "neg %3 \n"
|
| @@ -4912,6 +4883,9 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
|
| "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"
|
| "vpxor %%ymm0,%%ymm0,%%ymm0 \n"
|
| "vpermd %%ymm5,%%ymm0,%%ymm5 \n"
|
| + "mov $0x400040,%%eax \n"
|
| + "vmovd %%eax,%%xmm4 \n"
|
| + "vbroadcastss %%xmm4,%%ymm4 \n"
|
|
|
| // General purpose row blend.
|
| LABELALIGN
|
| @@ -4922,6 +4896,8 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
|
| "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
|
| "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n"
|
| "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n"
|
| + "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"
|
| + "vpaddw %%ymm4,%%ymm1,%%ymm1 \n"
|
| "vpsrlw $0x7,%%ymm0,%%ymm0 \n"
|
| "vpsrlw $0x7,%%ymm1,%%ymm1 \n"
|
| "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
|
| @@ -4931,19 +4907,6 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
|
| "jg 1b \n"
|
| "jmp 99f \n"
|
|
|
| - // Blend 25 / 75.
|
| - LABELALIGN
|
| - "25: \n"
|
| - "vmovdqu " MEMACCESS(1) ",%%ymm0 \n"
|
| - MEMOPREG(vmovdqu,0x00,1,4,1,ymm1)
|
| - "vpavgb %%ymm1,%%ymm0,%%ymm0 \n"
|
| - "vpavgb %%ymm1,%%ymm0,%%ymm0 \n"
|
| - MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
|
| - "lea " MEMLEA(0x20,1) ",%1 \n"
|
| - "sub $0x20,%2 \n"
|
| - "jg 25b \n"
|
| - "jmp 99f \n"
|
| -
|
| // Blend 50 / 50.
|
| LABELALIGN
|
| "50: \n"
|
| @@ -4955,19 +4918,6 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
|
| "jg 50b \n"
|
| "jmp 99f \n"
|
|
|
| - // Blend 75 / 25.
|
| - LABELALIGN
|
| - "75: \n"
|
| - "vmovdqu " MEMACCESS(1) ",%%ymm1 \n"
|
| - MEMOPREG(vmovdqu,0x00,1,4,1,ymm0)
|
| - "vpavgb %%ymm1,%%ymm0,%%ymm0 \n"
|
| - "vpavgb %%ymm1,%%ymm0,%%ymm0 \n"
|
| - MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
|
| - "lea " MEMLEA(0x20,1) ",%1 \n"
|
| - "sub $0x20,%2 \n"
|
| - "jg 75b \n"
|
| - "jmp 99f \n"
|
| -
|
| // Blend 100 / 0 - Copy row unchanged.
|
| LABELALIGN
|
| "100: \n"
|
| @@ -4982,123 +4932,12 @@ void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
|
| "+c"(dst_width), // %2
|
| "+r"(source_y_fraction) // %3
|
| : "r"((intptr_t)(src_stride)) // %4
|
| - : "memory", "cc", NACL_R14
|
| - "xmm0", "xmm1", "xmm2", "xmm5"
|
| + : "memory", "cc", "eax", NACL_R14
|
| + "xmm0", "xmm1", "xmm2", "xmm4", "xmm5"
|
| );
|
| }
|
| #endif // HAS_INTERPOLATEROW_AVX2
|
|
|
| -#ifdef HAS_INTERPOLATEROW_SSE2
|
| -// Bilinear filter 16x2 -> 16x1
|
| -void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr,
|
| - ptrdiff_t src_stride, int dst_width,
|
| - int source_y_fraction) {
|
| - asm volatile (
|
| - "sub %1,%0 \n"
|
| - "shr %3 \n"
|
| - "cmp $0x0,%3 \n"
|
| - "je 100f \n"
|
| - "cmp $0x20,%3 \n"
|
| - "je 75f \n"
|
| - "cmp $0x40,%3 \n"
|
| - "je 50f \n"
|
| - "cmp $0x60,%3 \n"
|
| - "je 25f \n"
|
| -
|
| - "movd %3,%%xmm0 \n"
|
| - "neg %3 \n"
|
| - "add $0x80,%3 \n"
|
| - "movd %3,%%xmm5 \n"
|
| - "punpcklbw %%xmm0,%%xmm5 \n"
|
| - "punpcklwd %%xmm5,%%xmm5 \n"
|
| - "pshufd $0x0,%%xmm5,%%xmm5 \n"
|
| - "pxor %%xmm4,%%xmm4 \n"
|
| -
|
| - // General purpose row blend.
|
| - LABELALIGN
|
| - "1: \n"
|
| - "movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
| - MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2
|
| - "movdqa %%xmm0,%%xmm1 \n"
|
| - "movdqa %%xmm2,%%xmm3 \n"
|
| - "punpcklbw %%xmm4,%%xmm2 \n"
|
| - "punpckhbw %%xmm4,%%xmm3 \n"
|
| - "punpcklbw %%xmm4,%%xmm0 \n"
|
| - "punpckhbw %%xmm4,%%xmm1 \n"
|
| - "psubw %%xmm0,%%xmm2 \n"
|
| - "psubw %%xmm1,%%xmm3 \n"
|
| - "paddw %%xmm2,%%xmm2 \n"
|
| - "paddw %%xmm3,%%xmm3 \n"
|
| - "pmulhw %%xmm5,%%xmm2 \n"
|
| - "pmulhw %%xmm5,%%xmm3 \n"
|
| - "paddw %%xmm2,%%xmm0 \n"
|
| - "paddw %%xmm3,%%xmm1 \n"
|
| - "packuswb %%xmm1,%%xmm0 \n"
|
| - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
| - "lea " MEMLEA(0x10,1) ",%1 \n"
|
| - "sub $0x10,%2 \n"
|
| - "jg 1b \n"
|
| - "jmp 99f \n"
|
| -
|
| - // Blend 25 / 75.
|
| - LABELALIGN
|
| - "25: \n"
|
| - "movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
| - MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
|
| - "pavgb %%xmm1,%%xmm0 \n"
|
| - "pavgb %%xmm1,%%xmm0 \n"
|
| - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
| - "lea " MEMLEA(0x10,1) ",%1 \n"
|
| - "sub $0x10,%2 \n"
|
| - "jg 25b \n"
|
| - "jmp 99f \n"
|
| -
|
| - // Blend 50 / 50.
|
| - LABELALIGN
|
| - "50: \n"
|
| - "movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
| - MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1
|
| - "pavgb %%xmm1,%%xmm0 \n"
|
| - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
| - "lea " MEMLEA(0x10,1) ",%1 \n"
|
| - "sub $0x10,%2 \n"
|
| - "jg 50b \n"
|
| - "jmp 99f \n"
|
| -
|
| - // Blend 75 / 25.
|
| - LABELALIGN
|
| - "75: \n"
|
| - "movdqu " MEMACCESS(1) ",%%xmm1 \n"
|
| - MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0
|
| - "pavgb %%xmm1,%%xmm0 \n"
|
| - "pavgb %%xmm1,%%xmm0 \n"
|
| - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
| - "lea " MEMLEA(0x10,1) ",%1 \n"
|
| - "sub $0x10,%2 \n"
|
| - "jg 75b \n"
|
| - "jmp 99f \n"
|
| -
|
| - // Blend 100 / 0 - Copy row unchanged.
|
| - LABELALIGN
|
| - "100: \n"
|
| - "movdqu " MEMACCESS(1) ",%%xmm0 \n"
|
| - MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1)
|
| - "lea " MEMLEA(0x10,1) ",%1 \n"
|
| - "sub $0x10,%2 \n"
|
| - "jg 100b \n"
|
| -
|
| - "99: \n"
|
| - : "+r"(dst_ptr), // %0
|
| - "+r"(src_ptr), // %1
|
| - "+r"(dst_width), // %2
|
| - "+r"(source_y_fraction) // %3
|
| - : "r"((intptr_t)(src_stride)) // %4
|
| - : "memory", "cc", NACL_R14
|
| - "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
|
| - );
|
| -}
|
| -#endif // HAS_INTERPOLATEROW_SSE2
|
| -
|
| #ifdef HAS_ARGBSHUFFLEROW_SSSE3
|
| // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
|
| void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
|
|
|