Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(117)

Side by Side Diff: source/row_gcc.cc

Issue 1535833003: avx2 interpolate use 8 bit (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: gcc version of interpolate Created 5 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_common.cc ('k') | source/row_neon.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 4773 matching lines...) Expand 10 before | Expand all | Expand 10 after
4784 } 4784 }
4785 #endif // HAS_ARGBAFFINEROW_SSE2 4785 #endif // HAS_ARGBAFFINEROW_SSE2
4786 4786
4787 #ifdef HAS_INTERPOLATEROW_SSSE3 4787 #ifdef HAS_INTERPOLATEROW_SSSE3
4788 // Bilinear filter 16x2 -> 16x1 4788 // Bilinear filter 16x2 -> 16x1
4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
4790 ptrdiff_t src_stride, int dst_width, 4790 ptrdiff_t src_stride, int dst_width,
4791 int source_y_fraction) { 4791 int source_y_fraction) {
4792 asm volatile ( 4792 asm volatile (
4793 "sub %1,%0 \n" 4793 "sub %1,%0 \n"
4794 "shr %3 \n"
4795 "cmp $0x0,%3 \n" 4794 "cmp $0x0,%3 \n"
4796 "je 100f \n" 4795 "je 100f \n"
4797 "cmp $0x40,%3 \n" 4796 "cmp $0x80,%3 \n"
4798 "je 50f \n" 4797 "je 50f \n"
4799 4798
4800 "movd %3,%%xmm0 \n" 4799 "movd %3,%%xmm0 \n"
4801 "neg %3 \n" 4800 "neg %3 \n"
4802 "add $0x80,%3 \n" 4801 "add $0x100,%3 \n"
4803 "movd %3,%%xmm5 \n" 4802 "movd %3,%%xmm5 \n"
4804 "punpcklbw %%xmm0,%%xmm5 \n" 4803 "punpcklbw %%xmm0,%%xmm5 \n"
4805 "punpcklwd %%xmm5,%%xmm5 \n" 4804 "punpcklwd %%xmm5,%%xmm5 \n"
4806 "pshufd $0x0,%%xmm5,%%xmm5 \n" 4805 "pshufd $0x0,%%xmm5,%%xmm5 \n"
4807 "mov $0x400040,%%eax \n" 4806 "mov $0x80808080,%%eax \n"
4808 "movd %%eax,%%xmm4 \n" 4807 "movd %%eax,%%xmm4 \n"
4809 "pshufd $0x0,%%xmm4,%%xmm4 \n" 4808 "pshufd $0x0,%%xmm4,%%xmm4 \n"
4810 4809
4811 // General purpose row blend. 4810 // General purpose row blend.
4812 LABELALIGN 4811 LABELALIGN
4813 "1: \n" 4812 "1: \n"
4814 "movdqu " MEMACCESS(1) ",%%xmm0 \n" 4813 "movdqu " MEMACCESS(1) ",%%xmm0 \n"
4815 MEMOPREG(movdqu,0x00,1,4,1,xmm2) 4814 MEMOPREG(movdqu,0x00,1,4,1,xmm2)
4816 "movdqa %%xmm0,%%xmm1 \n" 4815 "movdqa %%xmm0,%%xmm1 \n"
4817 "punpcklbw %%xmm2,%%xmm0 \n" 4816 "punpcklbw %%xmm2,%%xmm0 \n"
4818 "punpckhbw %%xmm2,%%xmm1 \n" 4817 "punpckhbw %%xmm2,%%xmm1 \n"
4819 "pmaddubsw %%xmm5,%%xmm0 \n" 4818 "psubb %%xmm4,%%xmm0 \n"
4820 "pmaddubsw %%xmm5,%%xmm1 \n" 4819 "psubb %%xmm4,%%xmm1 \n"
4821 "paddw %%xmm4,%%xmm0 \n" 4820 "movdqa %%xmm5,%%xmm2 \n"
4822 "paddw %%xmm4,%%xmm1 \n" 4821 "movdqa %%xmm5,%%xmm3 \n"
4823 "psrlw $0x7,%%xmm0 \n" 4822 "pmaddubsw %%xmm0,%%xmm2 \n"
4824 "psrlw $0x7,%%xmm1 \n" 4823 "pmaddubsw %%xmm1,%%xmm3 \n"
4825 "packuswb %%xmm1,%%xmm0 \n" 4824 "paddw %%xmm4,%%xmm2 \n"
4826 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) 4825 "paddw %%xmm4,%%xmm3 \n"
4826 "psrlw $0x8,%%xmm2 \n"
4827 "psrlw $0x8,%%xmm3 \n"
4828 "packuswb %%xmm3,%%xmm2 \n"
4829 MEMOPMEM(movdqu,xmm2,0x00,1,0,1)
4827 "lea " MEMLEA(0x10,1) ",%1 \n" 4830 "lea " MEMLEA(0x10,1) ",%1 \n"
4828 "sub $0x10,%2 \n" 4831 "sub $0x10,%2 \n"
4829 "jg 1b \n" 4832 "jg 1b \n"
4830 "jmp 99f \n" 4833 "jmp 99f \n"
4831 4834
4832 // Blend 50 / 50. 4835 // Blend 50 / 50.
4833 LABELALIGN 4836 LABELALIGN
4834 "50: \n" 4837 "50: \n"
4835 "movdqu " MEMACCESS(1) ",%%xmm0 \n" 4838 "movdqu " MEMACCESS(1) ",%%xmm0 \n"
4836 MEMOPREG(movdqu,0x00,1,4,1,xmm1) 4839 MEMOPREG(movdqu,0x00,1,4,1,xmm1)
(...skipping 13 matching lines...) Expand all
4850 "sub $0x10,%2 \n" 4853 "sub $0x10,%2 \n"
4851 "jg 100b \n" 4854 "jg 100b \n"
4852 4855
4853 "99: \n" 4856 "99: \n"
4854 : "+r"(dst_ptr), // %0 4857 : "+r"(dst_ptr), // %0
4855 "+r"(src_ptr), // %1 4858 "+r"(src_ptr), // %1
4856 "+r"(dst_width), // %2 4859 "+r"(dst_width), // %2
4857 "+r"(source_y_fraction) // %3 4860 "+r"(source_y_fraction) // %3
4858 : "r"((intptr_t)(src_stride)) // %4 4861 : "r"((intptr_t)(src_stride)) // %4
4859 : "memory", "cc", "eax", NACL_R14 4862 : "memory", "cc", "eax", NACL_R14
4860 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" 4863 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
4861 ); 4864 );
4862 } 4865 }
4863 #endif // HAS_INTERPOLATEROW_SSSE3 4866 #endif // HAS_INTERPOLATEROW_SSSE3
4864 4867
4865 #ifdef HAS_INTERPOLATEROW_AVX2 4868 #ifdef HAS_INTERPOLATEROW_AVX2
4866 // Bilinear filter 32x2 -> 32x1 4869 // Bilinear filter 32x2 -> 32x1
4867 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, 4870 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr,
4868 ptrdiff_t src_stride, int dst_width, 4871 ptrdiff_t src_stride, int dst_width,
4869 int source_y_fraction) { 4872 int source_y_fraction) {
4870 asm volatile ( 4873 asm volatile (
4871 "shr %3 \n" 4874 "shr %3 \n"
4872 "cmp $0x0,%3 \n" 4875 "cmp $0x0,%3 \n"
4873 "je 100f \n" 4876 "je 100f \n"
4874 "sub %1,%0 \n" 4877 "sub %1,%0 \n"
4875 "cmp $0x40,%3 \n" 4878 "cmp $0x40,%3 \n"
4876 "je 50f \n" 4879 "je 50f \n"
4877 4880
4878 "vmovd %3,%%xmm0 \n" 4881 "vmovd %3,%%xmm0 \n"
4879 "neg %3 \n" 4882 "neg %3 \n"
4880 "add $0x80,%3 \n" 4883 "add $0x80,%3 \n"
4881 "vmovd %3,%%xmm5 \n" 4884 "vmovd %3,%%xmm5 \n"
4882 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" 4885 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n"
4883 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" 4886 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n"
4884 "vpxor %%ymm0,%%ymm0,%%ymm0 \n" 4887 "vbroadcastss %%xmm5,%%ymm5 \n"
4885 "vpermd %%ymm5,%%ymm0,%%ymm5 \n" 4888 "mov $0x80808080,%%eax \n"
4886 "mov $0x400040,%%eax \n"
4887 "vmovd %%eax,%%xmm4 \n" 4889 "vmovd %%eax,%%xmm4 \n"
4888 "vbroadcastss %%xmm4,%%ymm4 \n" 4890 "vbroadcastss %%xmm4,%%ymm4 \n"
4889 4891
4890 // General purpose row blend. 4892 // General purpose row blend.
4891 LABELALIGN 4893 LABELALIGN
4892 "1: \n" 4894 "1: \n"
4893 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" 4895 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n"
4894 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) 4896 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2)
4895 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" 4897 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n"
4896 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" 4898 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n"
4897 "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n" 4899 "vpsubb %%ymm4,%%ymm1,%%ymm1 \n"
4898 "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n" 4900 "vpsubb %%ymm4,%%ymm0,%%ymm0 \n"
4901 "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n"
4902 "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n"
4903 "vpaddw %%ymm4,%%ymm1,%%ymm1 \n"
4899 "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" 4904 "vpaddw %%ymm4,%%ymm0,%%ymm0 \n"
4900 "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" 4905 "vpsrlw $0x8,%%ymm1,%%ymm1 \n"
4901 "vpsrlw $0x7,%%ymm0,%%ymm0 \n" 4906 "vpsrlw $0x8,%%ymm0,%%ymm0 \n"
4902 "vpsrlw $0x7,%%ymm1,%%ymm1 \n"
4903 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" 4907 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
4904 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) 4908 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1)
4905 "lea " MEMLEA(0x20,1) ",%1 \n" 4909 "lea " MEMLEA(0x20,1) ",%1 \n"
4906 "sub $0x20,%2 \n" 4910 "sub $0x20,%2 \n"
4907 "jg 1b \n" 4911 "jg 1b \n"
4908 "jmp 99f \n" 4912 "jmp 99f \n"
4909 4913
4910 // Blend 50 / 50. 4914 // Blend 50 / 50.
4911 LABELALIGN 4915 LABELALIGN
4912 "50: \n" 4916 "50: \n"
(...skipping 527 matching lines...) Expand 10 before | Expand all | Expand 10 after
5440 ); 5444 );
5441 } 5445 }
5442 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5446 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5443 5447
5444 #endif // defined(__x86_64__) || defined(__i386__) 5448 #endif // defined(__x86_64__) || defined(__i386__)
5445 5449
5446 #ifdef __cplusplus 5450 #ifdef __cplusplus
5447 } // extern "C" 5451 } // extern "C"
5448 } // namespace libyuv 5452 } // namespace libyuv
5449 #endif 5453 #endif
OLDNEW
« no previous file with comments | « source/row_common.cc ('k') | source/row_neon.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698