OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 4773 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4784 } | 4784 } |
4785 #endif // HAS_ARGBAFFINEROW_SSE2 | 4785 #endif // HAS_ARGBAFFINEROW_SSE2 |
4786 | 4786 |
4787 #ifdef HAS_INTERPOLATEROW_SSSE3 | 4787 #ifdef HAS_INTERPOLATEROW_SSSE3 |
4788 // Bilinear filter 16x2 -> 16x1 | 4788 // Bilinear filter 16x2 -> 16x1 |
4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | 4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, |
4790 ptrdiff_t src_stride, int dst_width, | 4790 ptrdiff_t src_stride, int dst_width, |
4791 int source_y_fraction) { | 4791 int source_y_fraction) { |
4792 asm volatile ( | 4792 asm volatile ( |
4793 "sub %1,%0 \n" | 4793 "sub %1,%0 \n" |
4794 "shr %3 \n" | |
4795 "cmp $0x0,%3 \n" | 4794 "cmp $0x0,%3 \n" |
4796 "je 100f \n" | 4795 "je 100f \n" |
4797 "cmp $0x40,%3 \n" | 4796 "cmp $0x80,%3 \n" |
4798 "je 50f \n" | 4797 "je 50f \n" |
4799 | 4798 |
4800 "movd %3,%%xmm0 \n" | 4799 "movd %3,%%xmm0 \n" |
4801 "neg %3 \n" | 4800 "neg %3 \n" |
4802 "add $0x80,%3 \n" | 4801 "add $0x100,%3 \n" |
4803 "movd %3,%%xmm5 \n" | 4802 "movd %3,%%xmm5 \n" |
4804 "punpcklbw %%xmm0,%%xmm5 \n" | 4803 "punpcklbw %%xmm0,%%xmm5 \n" |
4805 "punpcklwd %%xmm5,%%xmm5 \n" | 4804 "punpcklwd %%xmm5,%%xmm5 \n" |
4806 "pshufd $0x0,%%xmm5,%%xmm5 \n" | 4805 "pshufd $0x0,%%xmm5,%%xmm5 \n" |
4807 "mov $0x400040,%%eax \n" | 4806 "mov $0x80808080,%%eax \n" |
4808 "movd %%eax,%%xmm4 \n" | 4807 "movd %%eax,%%xmm4 \n" |
4809 "pshufd $0x0,%%xmm4,%%xmm4 \n" | 4808 "pshufd $0x0,%%xmm4,%%xmm4 \n" |
4810 | 4809 |
4811 // General purpose row blend. | 4810 // General purpose row blend. |
4812 LABELALIGN | 4811 LABELALIGN |
4813 "1: \n" | 4812 "1: \n" |
4814 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4813 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
4815 MEMOPREG(movdqu,0x00,1,4,1,xmm2) | 4814 MEMOPREG(movdqu,0x00,1,4,1,xmm2) |
4816 "movdqa %%xmm0,%%xmm1 \n" | 4815 "movdqa %%xmm0,%%xmm1 \n" |
4817 "punpcklbw %%xmm2,%%xmm0 \n" | 4816 "punpcklbw %%xmm2,%%xmm0 \n" |
4818 "punpckhbw %%xmm2,%%xmm1 \n" | 4817 "punpckhbw %%xmm2,%%xmm1 \n" |
4819 "pmaddubsw %%xmm5,%%xmm0 \n" | 4818 "psubb %%xmm4,%%xmm0 \n" |
4820 "pmaddubsw %%xmm5,%%xmm1 \n" | 4819 "psubb %%xmm4,%%xmm1 \n" |
4821 "paddw %%xmm4,%%xmm0 \n" | 4820 "movdqa %%xmm5,%%xmm2 \n" |
4822 "paddw %%xmm4,%%xmm1 \n" | 4821 "movdqa %%xmm5,%%xmm3 \n" |
4823 "psrlw $0x7,%%xmm0 \n" | 4822 "pmaddubsw %%xmm0,%%xmm2 \n" |
4824 "psrlw $0x7,%%xmm1 \n" | 4823 "pmaddubsw %%xmm1,%%xmm3 \n" |
4825 "packuswb %%xmm1,%%xmm0 \n" | 4824 "paddw %%xmm4,%%xmm2 \n" |
4826 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4825 "paddw %%xmm4,%%xmm3 \n" |
| 4826 "psrlw $0x8,%%xmm2 \n" |
| 4827 "psrlw $0x8,%%xmm3 \n" |
| 4828 "packuswb %%xmm3,%%xmm2 \n" |
| 4829 MEMOPMEM(movdqu,xmm2,0x00,1,0,1) |
4827 "lea " MEMLEA(0x10,1) ",%1 \n" | 4830 "lea " MEMLEA(0x10,1) ",%1 \n" |
4828 "sub $0x10,%2 \n" | 4831 "sub $0x10,%2 \n" |
4829 "jg 1b \n" | 4832 "jg 1b \n" |
4830 "jmp 99f \n" | 4833 "jmp 99f \n" |
4831 | 4834 |
4832 // Blend 50 / 50. | 4835 // Blend 50 / 50. |
4833 LABELALIGN | 4836 LABELALIGN |
4834 "50: \n" | 4837 "50: \n" |
4835 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4838 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
4836 MEMOPREG(movdqu,0x00,1,4,1,xmm1) | 4839 MEMOPREG(movdqu,0x00,1,4,1,xmm1) |
(...skipping 13 matching lines...) Expand all Loading... |
4850 "sub $0x10,%2 \n" | 4853 "sub $0x10,%2 \n" |
4851 "jg 100b \n" | 4854 "jg 100b \n" |
4852 | 4855 |
4853 "99: \n" | 4856 "99: \n" |
4854 : "+r"(dst_ptr), // %0 | 4857 : "+r"(dst_ptr), // %0 |
4855 "+r"(src_ptr), // %1 | 4858 "+r"(src_ptr), // %1 |
4856 "+r"(dst_width), // %2 | 4859 "+r"(dst_width), // %2 |
4857 "+r"(source_y_fraction) // %3 | 4860 "+r"(source_y_fraction) // %3 |
4858 : "r"((intptr_t)(src_stride)) // %4 | 4861 : "r"((intptr_t)(src_stride)) // %4 |
4859 : "memory", "cc", "eax", NACL_R14 | 4862 : "memory", "cc", "eax", NACL_R14 |
4860 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" | 4863 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
4861 ); | 4864 ); |
4862 } | 4865 } |
4863 #endif // HAS_INTERPOLATEROW_SSSE3 | 4866 #endif // HAS_INTERPOLATEROW_SSSE3 |
4864 | 4867 |
4865 #ifdef HAS_INTERPOLATEROW_AVX2 | 4868 #ifdef HAS_INTERPOLATEROW_AVX2 |
4866 // Bilinear filter 32x2 -> 32x1 | 4869 // Bilinear filter 32x2 -> 32x1 |
4867 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, | 4870 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, |
4868 ptrdiff_t src_stride, int dst_width, | 4871 ptrdiff_t src_stride, int dst_width, |
4869 int source_y_fraction) { | 4872 int source_y_fraction) { |
4870 asm volatile ( | 4873 asm volatile ( |
4871 "shr %3 \n" | 4874 "shr %3 \n" |
4872 "cmp $0x0,%3 \n" | 4875 "cmp $0x0,%3 \n" |
4873 "je 100f \n" | 4876 "je 100f \n" |
4874 "sub %1,%0 \n" | 4877 "sub %1,%0 \n" |
4875 "cmp $0x40,%3 \n" | 4878 "cmp $0x40,%3 \n" |
4876 "je 50f \n" | 4879 "je 50f \n" |
4877 | 4880 |
4878 "vmovd %3,%%xmm0 \n" | 4881 "vmovd %3,%%xmm0 \n" |
4879 "neg %3 \n" | 4882 "neg %3 \n" |
4880 "add $0x80,%3 \n" | 4883 "add $0x80,%3 \n" |
4881 "vmovd %3,%%xmm5 \n" | 4884 "vmovd %3,%%xmm5 \n" |
4882 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" | 4885 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" |
4883 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" | 4886 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" |
4884 "vpxor %%ymm0,%%ymm0,%%ymm0 \n" | 4887 "vbroadcastss %%xmm5,%%ymm5 \n" |
4885 "vpermd %%ymm5,%%ymm0,%%ymm5 \n" | 4888 "mov $0x80808080,%%eax \n" |
4886 "mov $0x400040,%%eax \n" | |
4887 "vmovd %%eax,%%xmm4 \n" | 4889 "vmovd %%eax,%%xmm4 \n" |
4888 "vbroadcastss %%xmm4,%%ymm4 \n" | 4890 "vbroadcastss %%xmm4,%%ymm4 \n" |
4889 | 4891 |
4890 // General purpose row blend. | 4892 // General purpose row blend. |
4891 LABELALIGN | 4893 LABELALIGN |
4892 "1: \n" | 4894 "1: \n" |
4893 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" | 4895 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" |
4894 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) | 4896 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) |
4895 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" | 4897 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" |
4896 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" | 4898 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" |
4897 "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n" | 4899 "vpsubb %%ymm4,%%ymm1,%%ymm1 \n" |
4898 "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n" | 4900 "vpsubb %%ymm4,%%ymm0,%%ymm0 \n" |
| 4901 "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n" |
| 4902 "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n" |
| 4903 "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" |
4899 "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" | 4904 "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" |
4900 "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" | 4905 "vpsrlw $0x8,%%ymm1,%%ymm1 \n" |
4901 "vpsrlw $0x7,%%ymm0,%%ymm0 \n" | 4906 "vpsrlw $0x8,%%ymm0,%%ymm0 \n" |
4902 "vpsrlw $0x7,%%ymm1,%%ymm1 \n" | |
4903 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" | 4907 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" |
4904 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | 4908 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) |
4905 "lea " MEMLEA(0x20,1) ",%1 \n" | 4909 "lea " MEMLEA(0x20,1) ",%1 \n" |
4906 "sub $0x20,%2 \n" | 4910 "sub $0x20,%2 \n" |
4907 "jg 1b \n" | 4911 "jg 1b \n" |
4908 "jmp 99f \n" | 4912 "jmp 99f \n" |
4909 | 4913 |
4910 // Blend 50 / 50. | 4914 // Blend 50 / 50. |
4911 LABELALIGN | 4915 LABELALIGN |
4912 "50: \n" | 4916 "50: \n" |
(...skipping 527 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5440 ); | 5444 ); |
5441 } | 5445 } |
5442 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5446 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5443 | 5447 |
5444 #endif // defined(__x86_64__) || defined(__i386__) | 5448 #endif // defined(__x86_64__) || defined(__i386__) |
5445 | 5449 |
5446 #ifdef __cplusplus | 5450 #ifdef __cplusplus |
5447 } // extern "C" | 5451 } // extern "C" |
5448 } // namespace libyuv | 5452 } // namespace libyuv |
5449 #endif | 5453 #endif |
OLD | NEW |