| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 4773 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4784 } | 4784 } |
| 4785 #endif // HAS_ARGBAFFINEROW_SSE2 | 4785 #endif // HAS_ARGBAFFINEROW_SSE2 |
| 4786 | 4786 |
| 4787 #ifdef HAS_INTERPOLATEROW_SSSE3 | 4787 #ifdef HAS_INTERPOLATEROW_SSSE3 |
| 4788 // Bilinear filter 16x2 -> 16x1 | 4788 // Bilinear filter 16x2 -> 16x1 |
| 4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | 4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, |
| 4790 ptrdiff_t src_stride, int dst_width, | 4790 ptrdiff_t src_stride, int dst_width, |
| 4791 int source_y_fraction) { | 4791 int source_y_fraction) { |
| 4792 asm volatile ( | 4792 asm volatile ( |
| 4793 "sub %1,%0 \n" | 4793 "sub %1,%0 \n" |
| 4794 "shr %3 \n" | |
| 4795 "cmp $0x0,%3 \n" | 4794 "cmp $0x0,%3 \n" |
| 4796 "je 100f \n" | 4795 "je 100f \n" |
| 4797 "cmp $0x40,%3 \n" | 4796 "cmp $0x80,%3 \n" |
| 4798 "je 50f \n" | 4797 "je 50f \n" |
| 4799 | 4798 |
| 4800 "movd %3,%%xmm0 \n" | 4799 "movd %3,%%xmm0 \n" |
| 4801 "neg %3 \n" | 4800 "neg %3 \n" |
| 4802 "add $0x80,%3 \n" | 4801 "add $0x100,%3 \n" |
| 4803 "movd %3,%%xmm5 \n" | 4802 "movd %3,%%xmm5 \n" |
| 4804 "punpcklbw %%xmm0,%%xmm5 \n" | 4803 "punpcklbw %%xmm0,%%xmm5 \n" |
| 4805 "punpcklwd %%xmm5,%%xmm5 \n" | 4804 "punpcklwd %%xmm5,%%xmm5 \n" |
| 4806 "pshufd $0x0,%%xmm5,%%xmm5 \n" | 4805 "pshufd $0x0,%%xmm5,%%xmm5 \n" |
| 4807 "mov $0x400040,%%eax \n" | 4806 "mov $0x80808080,%%eax \n" |
| 4808 "movd %%eax,%%xmm4 \n" | 4807 "movd %%eax,%%xmm4 \n" |
| 4809 "pshufd $0x0,%%xmm4,%%xmm4 \n" | 4808 "pshufd $0x0,%%xmm4,%%xmm4 \n" |
| 4810 | 4809 |
| 4811 // General purpose row blend. | 4810 // General purpose row blend. |
| 4812 LABELALIGN | 4811 LABELALIGN |
| 4813 "1: \n" | 4812 "1: \n" |
| 4814 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4813 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
| 4815 MEMOPREG(movdqu,0x00,1,4,1,xmm2) | 4814 MEMOPREG(movdqu,0x00,1,4,1,xmm2) |
| 4816 "movdqa %%xmm0,%%xmm1 \n" | 4815 "movdqa %%xmm0,%%xmm1 \n" |
| 4817 "punpcklbw %%xmm2,%%xmm0 \n" | 4816 "punpcklbw %%xmm2,%%xmm0 \n" |
| 4818 "punpckhbw %%xmm2,%%xmm1 \n" | 4817 "punpckhbw %%xmm2,%%xmm1 \n" |
| 4819 "pmaddubsw %%xmm5,%%xmm0 \n" | 4818 "psubb %%xmm4,%%xmm0 \n" |
| 4820 "pmaddubsw %%xmm5,%%xmm1 \n" | 4819 "psubb %%xmm4,%%xmm1 \n" |
| 4821 "paddw %%xmm4,%%xmm0 \n" | 4820 "movdqa %%xmm5,%%xmm2 \n" |
| 4822 "paddw %%xmm4,%%xmm1 \n" | 4821 "movdqa %%xmm5,%%xmm3 \n" |
| 4823 "psrlw $0x7,%%xmm0 \n" | 4822 "pmaddubsw %%xmm0,%%xmm2 \n" |
| 4824 "psrlw $0x7,%%xmm1 \n" | 4823 "pmaddubsw %%xmm1,%%xmm3 \n" |
| 4825 "packuswb %%xmm1,%%xmm0 \n" | 4824 "paddw %%xmm4,%%xmm2 \n" |
| 4826 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4825 "paddw %%xmm4,%%xmm3 \n" |
| 4826 "psrlw $0x8,%%xmm2 \n" |
| 4827 "psrlw $0x8,%%xmm3 \n" |
| 4828 "packuswb %%xmm3,%%xmm2 \n" |
| 4829 MEMOPMEM(movdqu,xmm2,0x00,1,0,1) |
| 4827 "lea " MEMLEA(0x10,1) ",%1 \n" | 4830 "lea " MEMLEA(0x10,1) ",%1 \n" |
| 4828 "sub $0x10,%2 \n" | 4831 "sub $0x10,%2 \n" |
| 4829 "jg 1b \n" | 4832 "jg 1b \n" |
| 4830 "jmp 99f \n" | 4833 "jmp 99f \n" |
| 4831 | 4834 |
| 4832 // Blend 50 / 50. | 4835 // Blend 50 / 50. |
| 4833 LABELALIGN | 4836 LABELALIGN |
| 4834 "50: \n" | 4837 "50: \n" |
| 4835 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4838 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
| 4836 MEMOPREG(movdqu,0x00,1,4,1,xmm1) | 4839 MEMOPREG(movdqu,0x00,1,4,1,xmm1) |
| (...skipping 13 matching lines...) Expand all Loading... |
| 4850 "sub $0x10,%2 \n" | 4853 "sub $0x10,%2 \n" |
| 4851 "jg 100b \n" | 4854 "jg 100b \n" |
| 4852 | 4855 |
| 4853 "99: \n" | 4856 "99: \n" |
| 4854 : "+r"(dst_ptr), // %0 | 4857 : "+r"(dst_ptr), // %0 |
| 4855 "+r"(src_ptr), // %1 | 4858 "+r"(src_ptr), // %1 |
| 4856 "+r"(dst_width), // %2 | 4859 "+r"(dst_width), // %2 |
| 4857 "+r"(source_y_fraction) // %3 | 4860 "+r"(source_y_fraction) // %3 |
| 4858 : "r"((intptr_t)(src_stride)) // %4 | 4861 : "r"((intptr_t)(src_stride)) // %4 |
| 4859 : "memory", "cc", "eax", NACL_R14 | 4862 : "memory", "cc", "eax", NACL_R14 |
| 4860 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" | 4863 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" |
| 4861 ); | 4864 ); |
| 4862 } | 4865 } |
| 4863 #endif // HAS_INTERPOLATEROW_SSSE3 | 4866 #endif // HAS_INTERPOLATEROW_SSSE3 |
| 4864 | 4867 |
| 4865 #ifdef HAS_INTERPOLATEROW_AVX2 | 4868 #ifdef HAS_INTERPOLATEROW_AVX2 |
| 4866 // Bilinear filter 32x2 -> 32x1 | 4869 // Bilinear filter 32x2 -> 32x1 |
| 4867 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, | 4870 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, |
| 4868 ptrdiff_t src_stride, int dst_width, | 4871 ptrdiff_t src_stride, int dst_width, |
| 4869 int source_y_fraction) { | 4872 int source_y_fraction) { |
| 4870 asm volatile ( | 4873 asm volatile ( |
| 4871 "shr %3 \n" | 4874 "shr %3 \n" |
| 4872 "cmp $0x0,%3 \n" | 4875 "cmp $0x0,%3 \n" |
| 4873 "je 100f \n" | 4876 "je 100f \n" |
| 4874 "sub %1,%0 \n" | 4877 "sub %1,%0 \n" |
| 4875 "cmp $0x40,%3 \n" | 4878 "cmp $0x40,%3 \n" |
| 4876 "je 50f \n" | 4879 "je 50f \n" |
| 4877 | 4880 |
| 4878 "vmovd %3,%%xmm0 \n" | 4881 "vmovd %3,%%xmm0 \n" |
| 4879 "neg %3 \n" | 4882 "neg %3 \n" |
| 4880 "add $0x80,%3 \n" | 4883 "add $0x80,%3 \n" |
| 4881 "vmovd %3,%%xmm5 \n" | 4884 "vmovd %3,%%xmm5 \n" |
| 4882 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" | 4885 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" |
| 4883 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" | 4886 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" |
| 4884 "vpxor %%ymm0,%%ymm0,%%ymm0 \n" | 4887 "vbroadcastss %%xmm5,%%ymm5 \n" |
| 4885 "vpermd %%ymm5,%%ymm0,%%ymm5 \n" | 4888 "mov $0x80808080,%%eax \n" |
| 4886 "mov $0x400040,%%eax \n" | |
| 4887 "vmovd %%eax,%%xmm4 \n" | 4889 "vmovd %%eax,%%xmm4 \n" |
| 4888 "vbroadcastss %%xmm4,%%ymm4 \n" | 4890 "vbroadcastss %%xmm4,%%ymm4 \n" |
| 4889 | 4891 |
| 4890 // General purpose row blend. | 4892 // General purpose row blend. |
| 4891 LABELALIGN | 4893 LABELALIGN |
| 4892 "1: \n" | 4894 "1: \n" |
| 4893 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" | 4895 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" |
| 4894 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) | 4896 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) |
| 4895 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" | 4897 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" |
| 4896 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" | 4898 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" |
| 4897 "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n" | 4899 "vpsubb %%ymm4,%%ymm1,%%ymm1 \n" |
| 4898 "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n" | 4900 "vpsubb %%ymm4,%%ymm0,%%ymm0 \n" |
| 4901 "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n" |
| 4902 "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n" |
| 4903 "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" |
| 4899 "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" | 4904 "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" |
| 4900 "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" | 4905 "vpsrlw $0x8,%%ymm1,%%ymm1 \n" |
| 4901 "vpsrlw $0x7,%%ymm0,%%ymm0 \n" | 4906 "vpsrlw $0x8,%%ymm0,%%ymm0 \n" |
| 4902 "vpsrlw $0x7,%%ymm1,%%ymm1 \n" | |
| 4903 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" | 4907 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" |
| 4904 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | 4908 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) |
| 4905 "lea " MEMLEA(0x20,1) ",%1 \n" | 4909 "lea " MEMLEA(0x20,1) ",%1 \n" |
| 4906 "sub $0x20,%2 \n" | 4910 "sub $0x20,%2 \n" |
| 4907 "jg 1b \n" | 4911 "jg 1b \n" |
| 4908 "jmp 99f \n" | 4912 "jmp 99f \n" |
| 4909 | 4913 |
| 4910 // Blend 50 / 50. | 4914 // Blend 50 / 50. |
| 4911 LABELALIGN | 4915 LABELALIGN |
| 4912 "50: \n" | 4916 "50: \n" |
| (...skipping 527 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5440 ); | 5444 ); |
| 5441 } | 5445 } |
| 5442 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5446 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5443 | 5447 |
| 5444 #endif // defined(__x86_64__) || defined(__i386__) | 5448 #endif // defined(__x86_64__) || defined(__i386__) |
| 5445 | 5449 |
| 5446 #ifdef __cplusplus | 5450 #ifdef __cplusplus |
| 5447 } // extern "C" | 5451 } // extern "C" |
| 5448 } // namespace libyuv | 5452 } // namespace libyuv |
| 5449 #endif | 5453 #endif |
| OLD | NEW |