OLD | NEW |
1 // VERSION 2 | 1 // VERSION 2 |
2 /* | 2 /* |
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
4 * | 4 * |
5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
10 */ | 10 */ |
(...skipping 4776 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
4787 #ifdef HAS_INTERPOLATEROW_SSSE3 | 4787 #ifdef HAS_INTERPOLATEROW_SSSE3 |
4788 // Bilinear filter 16x2 -> 16x1 | 4788 // Bilinear filter 16x2 -> 16x1 |
4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | 4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, |
4790 ptrdiff_t src_stride, int dst_width, | 4790 ptrdiff_t src_stride, int dst_width, |
4791 int source_y_fraction) { | 4791 int source_y_fraction) { |
4792 asm volatile ( | 4792 asm volatile ( |
4793 "sub %1,%0 \n" | 4793 "sub %1,%0 \n" |
4794 "shr %3 \n" | 4794 "shr %3 \n" |
4795 "cmp $0x0,%3 \n" | 4795 "cmp $0x0,%3 \n" |
4796 "je 100f \n" | 4796 "je 100f \n" |
4797 "cmp $0x20,%3 \n" | |
4798 "je 75f \n" | |
4799 "cmp $0x40,%3 \n" | 4797 "cmp $0x40,%3 \n" |
4800 "je 50f \n" | 4798 "je 50f \n" |
4801 "cmp $0x60,%3 \n" | |
4802 "je 25f \n" | |
4803 | 4799 |
4804 "movd %3,%%xmm0 \n" | 4800 "movd %3,%%xmm0 \n" |
4805 "neg %3 \n" | 4801 "neg %3 \n" |
4806 "add $0x80,%3 \n" | 4802 "add $0x80,%3 \n" |
4807 "movd %3,%%xmm5 \n" | 4803 "movd %3,%%xmm5 \n" |
4808 "punpcklbw %%xmm0,%%xmm5 \n" | 4804 "punpcklbw %%xmm0,%%xmm5 \n" |
4809 "punpcklwd %%xmm5,%%xmm5 \n" | 4805 "punpcklwd %%xmm5,%%xmm5 \n" |
4810 "pshufd $0x0,%%xmm5,%%xmm5 \n" | 4806 "pshufd $0x0,%%xmm5,%%xmm5 \n" |
| 4807 "mov $0x400040,%%eax \n" |
| 4808 "movd %%eax,%%xmm4 \n" |
| 4809 "pshufd $0x0,%%xmm4,%%xmm4 \n" |
4811 | 4810 |
4812 // General purpose row blend. | 4811 // General purpose row blend. |
4813 LABELALIGN | 4812 LABELALIGN |
4814 "1: \n" | 4813 "1: \n" |
4815 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4814 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
4816 MEMOPREG(movdqu,0x00,1,4,1,xmm2) | 4815 MEMOPREG(movdqu,0x00,1,4,1,xmm2) |
4817 "movdqa %%xmm0,%%xmm1 \n" | 4816 "movdqa %%xmm0,%%xmm1 \n" |
4818 "punpcklbw %%xmm2,%%xmm0 \n" | 4817 "punpcklbw %%xmm2,%%xmm0 \n" |
4819 "punpckhbw %%xmm2,%%xmm1 \n" | 4818 "punpckhbw %%xmm2,%%xmm1 \n" |
4820 "pmaddubsw %%xmm5,%%xmm0 \n" | 4819 "pmaddubsw %%xmm5,%%xmm0 \n" |
4821 "pmaddubsw %%xmm5,%%xmm1 \n" | 4820 "pmaddubsw %%xmm5,%%xmm1 \n" |
| 4821 "paddw %%xmm4,%%xmm0 \n" |
| 4822 "paddw %%xmm4,%%xmm1 \n" |
4822 "psrlw $0x7,%%xmm0 \n" | 4823 "psrlw $0x7,%%xmm0 \n" |
4823 "psrlw $0x7,%%xmm1 \n" | 4824 "psrlw $0x7,%%xmm1 \n" |
4824 "packuswb %%xmm1,%%xmm0 \n" | 4825 "packuswb %%xmm1,%%xmm0 \n" |
4825 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4826 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) |
4826 "lea " MEMLEA(0x10,1) ",%1 \n" | 4827 "lea " MEMLEA(0x10,1) ",%1 \n" |
4827 "sub $0x10,%2 \n" | 4828 "sub $0x10,%2 \n" |
4828 "jg 1b \n" | 4829 "jg 1b \n" |
4829 "jmp 99f \n" | 4830 "jmp 99f \n" |
4830 | 4831 |
4831 // Blend 25 / 75. | |
4832 LABELALIGN | |
4833 "25: \n" | |
4834 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
4835 MEMOPREG(movdqu,0x00,1,4,1,xmm1) | |
4836 "pavgb %%xmm1,%%xmm0 \n" | |
4837 "pavgb %%xmm1,%%xmm0 \n" | |
4838 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | |
4839 "lea " MEMLEA(0x10,1) ",%1 \n" | |
4840 "sub $0x10,%2 \n" | |
4841 "jg 25b \n" | |
4842 "jmp 99f \n" | |
4843 | |
4844 // Blend 50 / 50. | 4832 // Blend 50 / 50. |
4845 LABELALIGN | 4833 LABELALIGN |
4846 "50: \n" | 4834 "50: \n" |
4847 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4835 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
4848 MEMOPREG(movdqu,0x00,1,4,1,xmm1) | 4836 MEMOPREG(movdqu,0x00,1,4,1,xmm1) |
4849 "pavgb %%xmm1,%%xmm0 \n" | 4837 "pavgb %%xmm1,%%xmm0 \n" |
4850 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4838 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) |
4851 "lea " MEMLEA(0x10,1) ",%1 \n" | 4839 "lea " MEMLEA(0x10,1) ",%1 \n" |
4852 "sub $0x10,%2 \n" | 4840 "sub $0x10,%2 \n" |
4853 "jg 50b \n" | 4841 "jg 50b \n" |
4854 "jmp 99f \n" | 4842 "jmp 99f \n" |
4855 | 4843 |
4856 // Blend 75 / 25. | |
4857 LABELALIGN | |
4858 "75: \n" | |
4859 "movdqu " MEMACCESS(1) ",%%xmm1 \n" | |
4860 MEMOPREG(movdqu,0x00,1,4,1,xmm0) | |
4861 "pavgb %%xmm1,%%xmm0 \n" | |
4862 "pavgb %%xmm1,%%xmm0 \n" | |
4863 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | |
4864 "lea " MEMLEA(0x10,1) ",%1 \n" | |
4865 "sub $0x10,%2 \n" | |
4866 "jg 75b \n" | |
4867 "jmp 99f \n" | |
4868 | |
4869 // Blend 100 / 0 - Copy row unchanged. | 4844 // Blend 100 / 0 - Copy row unchanged. |
4870 LABELALIGN | 4845 LABELALIGN |
4871 "100: \n" | 4846 "100: \n" |
4872 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4847 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
4873 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4848 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) |
4874 "lea " MEMLEA(0x10,1) ",%1 \n" | 4849 "lea " MEMLEA(0x10,1) ",%1 \n" |
4875 "sub $0x10,%2 \n" | 4850 "sub $0x10,%2 \n" |
4876 "jg 100b \n" | 4851 "jg 100b \n" |
4877 | 4852 |
4878 "99: \n" | 4853 "99: \n" |
4879 : "+r"(dst_ptr), // %0 | 4854 : "+r"(dst_ptr), // %0 |
4880 "+r"(src_ptr), // %1 | 4855 "+r"(src_ptr), // %1 |
4881 "+r"(dst_width), // %2 | 4856 "+r"(dst_width), // %2 |
4882 "+r"(source_y_fraction) // %3 | 4857 "+r"(source_y_fraction) // %3 |
4883 : "r"((intptr_t)(src_stride)) // %4 | 4858 : "r"((intptr_t)(src_stride)) // %4 |
4884 : "memory", "cc", NACL_R14 | 4859 : "memory", "cc", "eax", NACL_R14 |
4885 "xmm0", "xmm1", "xmm2", "xmm5" | 4860 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" |
4886 ); | 4861 ); |
4887 } | 4862 } |
4888 #endif // HAS_INTERPOLATEROW_SSSE3 | 4863 #endif // HAS_INTERPOLATEROW_SSSE3 |
4889 | 4864 |
4890 #ifdef HAS_INTERPOLATEROW_AVX2 | 4865 #ifdef HAS_INTERPOLATEROW_AVX2 |
4891 // Bilinear filter 32x2 -> 32x1 | 4866 // Bilinear filter 32x2 -> 32x1 |
4892 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, | 4867 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, |
4893 ptrdiff_t src_stride, int dst_width, | 4868 ptrdiff_t src_stride, int dst_width, |
4894 int source_y_fraction) { | 4869 int source_y_fraction) { |
4895 asm volatile ( | 4870 asm volatile ( |
4896 "shr %3 \n" | 4871 "shr %3 \n" |
4897 "cmp $0x0,%3 \n" | 4872 "cmp $0x0,%3 \n" |
4898 "je 100f \n" | 4873 "je 100f \n" |
4899 "sub %1,%0 \n" | 4874 "sub %1,%0 \n" |
4900 "cmp $0x20,%3 \n" | |
4901 "je 75f \n" | |
4902 "cmp $0x40,%3 \n" | 4875 "cmp $0x40,%3 \n" |
4903 "je 50f \n" | 4876 "je 50f \n" |
4904 "cmp $0x60,%3 \n" | |
4905 "je 25f \n" | |
4906 | 4877 |
4907 "vmovd %3,%%xmm0 \n" | 4878 "vmovd %3,%%xmm0 \n" |
4908 "neg %3 \n" | 4879 "neg %3 \n" |
4909 "add $0x80,%3 \n" | 4880 "add $0x80,%3 \n" |
4910 "vmovd %3,%%xmm5 \n" | 4881 "vmovd %3,%%xmm5 \n" |
4911 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" | 4882 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" |
4912 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" | 4883 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" |
4913 "vpxor %%ymm0,%%ymm0,%%ymm0 \n" | 4884 "vpxor %%ymm0,%%ymm0,%%ymm0 \n" |
4914 "vpermd %%ymm5,%%ymm0,%%ymm5 \n" | 4885 "vpermd %%ymm5,%%ymm0,%%ymm5 \n" |
| 4886 "mov $0x400040,%%eax \n" |
| 4887 "vmovd %%eax,%%xmm4 \n" |
| 4888 "vbroadcastss %%xmm4,%%ymm4 \n" |
4915 | 4889 |
4916 // General purpose row blend. | 4890 // General purpose row blend. |
4917 LABELALIGN | 4891 LABELALIGN |
4918 "1: \n" | 4892 "1: \n" |
4919 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" | 4893 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" |
4920 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) | 4894 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) |
4921 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" | 4895 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" |
4922 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" | 4896 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" |
4923 "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n" | 4897 "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n" |
4924 "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n" | 4898 "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n" |
| 4899 "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" |
| 4900 "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" |
4925 "vpsrlw $0x7,%%ymm0,%%ymm0 \n" | 4901 "vpsrlw $0x7,%%ymm0,%%ymm0 \n" |
4926 "vpsrlw $0x7,%%ymm1,%%ymm1 \n" | 4902 "vpsrlw $0x7,%%ymm1,%%ymm1 \n" |
4927 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" | 4903 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" |
4928 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | 4904 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) |
4929 "lea " MEMLEA(0x20,1) ",%1 \n" | 4905 "lea " MEMLEA(0x20,1) ",%1 \n" |
4930 "sub $0x20,%2 \n" | 4906 "sub $0x20,%2 \n" |
4931 "jg 1b \n" | 4907 "jg 1b \n" |
4932 "jmp 99f \n" | 4908 "jmp 99f \n" |
4933 | 4909 |
4934 // Blend 25 / 75. | |
4935 LABELALIGN | |
4936 "25: \n" | |
4937 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" | |
4938 MEMOPREG(vmovdqu,0x00,1,4,1,ymm1) | |
4939 "vpavgb %%ymm1,%%ymm0,%%ymm0 \n" | |
4940 "vpavgb %%ymm1,%%ymm0,%%ymm0 \n" | |
4941 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | |
4942 "lea " MEMLEA(0x20,1) ",%1 \n" | |
4943 "sub $0x20,%2 \n" | |
4944 "jg 25b \n" | |
4945 "jmp 99f \n" | |
4946 | |
4947 // Blend 50 / 50. | 4910 // Blend 50 / 50. |
4948 LABELALIGN | 4911 LABELALIGN |
4949 "50: \n" | 4912 "50: \n" |
4950 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" | 4913 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" |
4951 VMEMOPREG(vpavgb,0x00,1,4,1,ymm0,ymm0) // vpavgb (%1,%4,1),%%ymm0,%%ymm0 | 4914 VMEMOPREG(vpavgb,0x00,1,4,1,ymm0,ymm0) // vpavgb (%1,%4,1),%%ymm0,%%ymm0 |
4952 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | 4915 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) |
4953 "lea " MEMLEA(0x20,1) ",%1 \n" | 4916 "lea " MEMLEA(0x20,1) ",%1 \n" |
4954 "sub $0x20,%2 \n" | 4917 "sub $0x20,%2 \n" |
4955 "jg 50b \n" | 4918 "jg 50b \n" |
4956 "jmp 99f \n" | 4919 "jmp 99f \n" |
4957 | 4920 |
4958 // Blend 75 / 25. | |
4959 LABELALIGN | |
4960 "75: \n" | |
4961 "vmovdqu " MEMACCESS(1) ",%%ymm1 \n" | |
4962 MEMOPREG(vmovdqu,0x00,1,4,1,ymm0) | |
4963 "vpavgb %%ymm1,%%ymm0,%%ymm0 \n" | |
4964 "vpavgb %%ymm1,%%ymm0,%%ymm0 \n" | |
4965 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | |
4966 "lea " MEMLEA(0x20,1) ",%1 \n" | |
4967 "sub $0x20,%2 \n" | |
4968 "jg 75b \n" | |
4969 "jmp 99f \n" | |
4970 | |
4971 // Blend 100 / 0 - Copy row unchanged. | 4921 // Blend 100 / 0 - Copy row unchanged. |
4972 LABELALIGN | 4922 LABELALIGN |
4973 "100: \n" | 4923 "100: \n" |
4974 "rep movsb " MEMMOVESTRING(1,0) " \n" | 4924 "rep movsb " MEMMOVESTRING(1,0) " \n" |
4975 "jmp 999f \n" | 4925 "jmp 999f \n" |
4976 | 4926 |
4977 "99: \n" | 4927 "99: \n" |
4978 "vzeroupper \n" | 4928 "vzeroupper \n" |
4979 "999: \n" | 4929 "999: \n" |
4980 : "+D"(dst_ptr), // %0 | 4930 : "+D"(dst_ptr), // %0 |
4981 "+S"(src_ptr), // %1 | 4931 "+S"(src_ptr), // %1 |
4982 "+c"(dst_width), // %2 | 4932 "+c"(dst_width), // %2 |
4983 "+r"(source_y_fraction) // %3 | 4933 "+r"(source_y_fraction) // %3 |
4984 : "r"((intptr_t)(src_stride)) // %4 | 4934 : "r"((intptr_t)(src_stride)) // %4 |
4985 : "memory", "cc", NACL_R14 | 4935 : "memory", "cc", "eax", NACL_R14 |
4986 "xmm0", "xmm1", "xmm2", "xmm5" | 4936 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" |
4987 ); | 4937 ); |
4988 } | 4938 } |
4989 #endif // HAS_INTERPOLATEROW_AVX2 | 4939 #endif // HAS_INTERPOLATEROW_AVX2 |
4990 | 4940 |
4991 #ifdef HAS_INTERPOLATEROW_SSE2 | |
4992 // Bilinear filter 16x2 -> 16x1 | |
4993 void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, | |
4994 ptrdiff_t src_stride, int dst_width, | |
4995 int source_y_fraction) { | |
4996 asm volatile ( | |
4997 "sub %1,%0 \n" | |
4998 "shr %3 \n" | |
4999 "cmp $0x0,%3 \n" | |
5000 "je 100f \n" | |
5001 "cmp $0x20,%3 \n" | |
5002 "je 75f \n" | |
5003 "cmp $0x40,%3 \n" | |
5004 "je 50f \n" | |
5005 "cmp $0x60,%3 \n" | |
5006 "je 25f \n" | |
5007 | |
5008 "movd %3,%%xmm0 \n" | |
5009 "neg %3 \n" | |
5010 "add $0x80,%3 \n" | |
5011 "movd %3,%%xmm5 \n" | |
5012 "punpcklbw %%xmm0,%%xmm5 \n" | |
5013 "punpcklwd %%xmm5,%%xmm5 \n" | |
5014 "pshufd $0x0,%%xmm5,%%xmm5 \n" | |
5015 "pxor %%xmm4,%%xmm4 \n" | |
5016 | |
5017 // General purpose row blend. | |
5018 LABELALIGN | |
5019 "1: \n" | |
5020 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
5021 MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2 | |
5022 "movdqa %%xmm0,%%xmm1 \n" | |
5023 "movdqa %%xmm2,%%xmm3 \n" | |
5024 "punpcklbw %%xmm4,%%xmm2 \n" | |
5025 "punpckhbw %%xmm4,%%xmm3 \n" | |
5026 "punpcklbw %%xmm4,%%xmm0 \n" | |
5027 "punpckhbw %%xmm4,%%xmm1 \n" | |
5028 "psubw %%xmm0,%%xmm2 \n" | |
5029 "psubw %%xmm1,%%xmm3 \n" | |
5030 "paddw %%xmm2,%%xmm2 \n" | |
5031 "paddw %%xmm3,%%xmm3 \n" | |
5032 "pmulhw %%xmm5,%%xmm2 \n" | |
5033 "pmulhw %%xmm5,%%xmm3 \n" | |
5034 "paddw %%xmm2,%%xmm0 \n" | |
5035 "paddw %%xmm3,%%xmm1 \n" | |
5036 "packuswb %%xmm1,%%xmm0 \n" | |
5037 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
5038 "lea " MEMLEA(0x10,1) ",%1 \n" | |
5039 "sub $0x10,%2 \n" | |
5040 "jg 1b \n" | |
5041 "jmp 99f \n" | |
5042 | |
5043 // Blend 25 / 75. | |
5044 LABELALIGN | |
5045 "25: \n" | |
5046 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
5047 MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 | |
5048 "pavgb %%xmm1,%%xmm0 \n" | |
5049 "pavgb %%xmm1,%%xmm0 \n" | |
5050 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
5051 "lea " MEMLEA(0x10,1) ",%1 \n" | |
5052 "sub $0x10,%2 \n" | |
5053 "jg 25b \n" | |
5054 "jmp 99f \n" | |
5055 | |
5056 // Blend 50 / 50. | |
5057 LABELALIGN | |
5058 "50: \n" | |
5059 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
5060 MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 | |
5061 "pavgb %%xmm1,%%xmm0 \n" | |
5062 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
5063 "lea " MEMLEA(0x10,1) ",%1 \n" | |
5064 "sub $0x10,%2 \n" | |
5065 "jg 50b \n" | |
5066 "jmp 99f \n" | |
5067 | |
5068 // Blend 75 / 25. | |
5069 LABELALIGN | |
5070 "75: \n" | |
5071 "movdqu " MEMACCESS(1) ",%%xmm1 \n" | |
5072 MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0 | |
5073 "pavgb %%xmm1,%%xmm0 \n" | |
5074 "pavgb %%xmm1,%%xmm0 \n" | |
5075 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
5076 "lea " MEMLEA(0x10,1) ",%1 \n" | |
5077 "sub $0x10,%2 \n" | |
5078 "jg 75b \n" | |
5079 "jmp 99f \n" | |
5080 | |
5081 // Blend 100 / 0 - Copy row unchanged. | |
5082 LABELALIGN | |
5083 "100: \n" | |
5084 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
5085 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
5086 "lea " MEMLEA(0x10,1) ",%1 \n" | |
5087 "sub $0x10,%2 \n" | |
5088 "jg 100b \n" | |
5089 | |
5090 "99: \n" | |
5091 : "+r"(dst_ptr), // %0 | |
5092 "+r"(src_ptr), // %1 | |
5093 "+r"(dst_width), // %2 | |
5094 "+r"(source_y_fraction) // %3 | |
5095 : "r"((intptr_t)(src_stride)) // %4 | |
5096 : "memory", "cc", NACL_R14 | |
5097 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | |
5098 ); | |
5099 } | |
5100 #endif // HAS_INTERPOLATEROW_SSE2 | |
5101 | |
5102 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 | 4941 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 |
5103 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. | 4942 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. |
5104 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, | 4943 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, |
5105 const uint8* shuffler, int width) { | 4944 const uint8* shuffler, int width) { |
5106 asm volatile ( | 4945 asm volatile ( |
5107 "movdqu " MEMACCESS(3) ",%%xmm5 \n" | 4946 "movdqu " MEMACCESS(3) ",%%xmm5 \n" |
5108 LABELALIGN | 4947 LABELALIGN |
5109 "1: \n" | 4948 "1: \n" |
5110 "movdqu " MEMACCESS(0) ",%%xmm0 \n" | 4949 "movdqu " MEMACCESS(0) ",%%xmm0 \n" |
5111 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" | 4950 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" |
(...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
5601 ); | 5440 ); |
5602 } | 5441 } |
5603 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5442 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
5604 | 5443 |
5605 #endif // defined(__x86_64__) || defined(__i386__) | 5444 #endif // defined(__x86_64__) || defined(__i386__) |
5606 | 5445 |
5607 #ifdef __cplusplus | 5446 #ifdef __cplusplus |
5608 } // extern "C" | 5447 } // extern "C" |
5609 } // namespace libyuv | 5448 } // namespace libyuv |
5610 #endif | 5449 #endif |
OLD | NEW |