| OLD | NEW |
| 1 // VERSION 2 | 1 // VERSION 2 |
| 2 /* | 2 /* |
| 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 4 * | 4 * |
| 5 * Use of this source code is governed by a BSD-style license | 5 * Use of this source code is governed by a BSD-style license |
| 6 * that can be found in the LICENSE file in the root of the source | 6 * that can be found in the LICENSE file in the root of the source |
| 7 * tree. An additional intellectual property rights grant can be found | 7 * tree. An additional intellectual property rights grant can be found |
| 8 * in the file PATENTS. All contributing project authors may | 8 * in the file PATENTS. All contributing project authors may |
| 9 * be found in the AUTHORS file in the root of the source tree. | 9 * be found in the AUTHORS file in the root of the source tree. |
| 10 */ | 10 */ |
| (...skipping 4776 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 4787 #ifdef HAS_INTERPOLATEROW_SSSE3 | 4787 #ifdef HAS_INTERPOLATEROW_SSSE3 |
| 4788 // Bilinear filter 16x2 -> 16x1 | 4788 // Bilinear filter 16x2 -> 16x1 |
| 4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, | 4789 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, |
| 4790 ptrdiff_t src_stride, int dst_width, | 4790 ptrdiff_t src_stride, int dst_width, |
| 4791 int source_y_fraction) { | 4791 int source_y_fraction) { |
| 4792 asm volatile ( | 4792 asm volatile ( |
| 4793 "sub %1,%0 \n" | 4793 "sub %1,%0 \n" |
| 4794 "shr %3 \n" | 4794 "shr %3 \n" |
| 4795 "cmp $0x0,%3 \n" | 4795 "cmp $0x0,%3 \n" |
| 4796 "je 100f \n" | 4796 "je 100f \n" |
| 4797 "cmp $0x20,%3 \n" | |
| 4798 "je 75f \n" | |
| 4799 "cmp $0x40,%3 \n" | 4797 "cmp $0x40,%3 \n" |
| 4800 "je 50f \n" | 4798 "je 50f \n" |
| 4801 "cmp $0x60,%3 \n" | |
| 4802 "je 25f \n" | |
| 4803 | 4799 |
| 4804 "movd %3,%%xmm0 \n" | 4800 "movd %3,%%xmm0 \n" |
| 4805 "neg %3 \n" | 4801 "neg %3 \n" |
| 4806 "add $0x80,%3 \n" | 4802 "add $0x80,%3 \n" |
| 4807 "movd %3,%%xmm5 \n" | 4803 "movd %3,%%xmm5 \n" |
| 4808 "punpcklbw %%xmm0,%%xmm5 \n" | 4804 "punpcklbw %%xmm0,%%xmm5 \n" |
| 4809 "punpcklwd %%xmm5,%%xmm5 \n" | 4805 "punpcklwd %%xmm5,%%xmm5 \n" |
| 4810 "pshufd $0x0,%%xmm5,%%xmm5 \n" | 4806 "pshufd $0x0,%%xmm5,%%xmm5 \n" |
| 4807 "mov $0x400040,%%eax \n" |
| 4808 "movd %%eax,%%xmm4 \n" |
| 4809 "pshufd $0x0,%%xmm4,%%xmm4 \n" |
| 4811 | 4810 |
| 4812 // General purpose row blend. | 4811 // General purpose row blend. |
| 4813 LABELALIGN | 4812 LABELALIGN |
| 4814 "1: \n" | 4813 "1: \n" |
| 4815 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4814 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
| 4816 MEMOPREG(movdqu,0x00,1,4,1,xmm2) | 4815 MEMOPREG(movdqu,0x00,1,4,1,xmm2) |
| 4817 "movdqa %%xmm0,%%xmm1 \n" | 4816 "movdqa %%xmm0,%%xmm1 \n" |
| 4818 "punpcklbw %%xmm2,%%xmm0 \n" | 4817 "punpcklbw %%xmm2,%%xmm0 \n" |
| 4819 "punpckhbw %%xmm2,%%xmm1 \n" | 4818 "punpckhbw %%xmm2,%%xmm1 \n" |
| 4820 "pmaddubsw %%xmm5,%%xmm0 \n" | 4819 "pmaddubsw %%xmm5,%%xmm0 \n" |
| 4821 "pmaddubsw %%xmm5,%%xmm1 \n" | 4820 "pmaddubsw %%xmm5,%%xmm1 \n" |
| 4821 "paddw %%xmm4,%%xmm0 \n" |
| 4822 "paddw %%xmm4,%%xmm1 \n" |
| 4822 "psrlw $0x7,%%xmm0 \n" | 4823 "psrlw $0x7,%%xmm0 \n" |
| 4823 "psrlw $0x7,%%xmm1 \n" | 4824 "psrlw $0x7,%%xmm1 \n" |
| 4824 "packuswb %%xmm1,%%xmm0 \n" | 4825 "packuswb %%xmm1,%%xmm0 \n" |
| 4825 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4826 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) |
| 4826 "lea " MEMLEA(0x10,1) ",%1 \n" | 4827 "lea " MEMLEA(0x10,1) ",%1 \n" |
| 4827 "sub $0x10,%2 \n" | 4828 "sub $0x10,%2 \n" |
| 4828 "jg 1b \n" | 4829 "jg 1b \n" |
| 4829 "jmp 99f \n" | 4830 "jmp 99f \n" |
| 4830 | 4831 |
| 4831 // Blend 25 / 75. | |
| 4832 LABELALIGN | |
| 4833 "25: \n" | |
| 4834 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
| 4835 MEMOPREG(movdqu,0x00,1,4,1,xmm1) | |
| 4836 "pavgb %%xmm1,%%xmm0 \n" | |
| 4837 "pavgb %%xmm1,%%xmm0 \n" | |
| 4838 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | |
| 4839 "lea " MEMLEA(0x10,1) ",%1 \n" | |
| 4840 "sub $0x10,%2 \n" | |
| 4841 "jg 25b \n" | |
| 4842 "jmp 99f \n" | |
| 4843 | |
| 4844 // Blend 50 / 50. | 4832 // Blend 50 / 50. |
| 4845 LABELALIGN | 4833 LABELALIGN |
| 4846 "50: \n" | 4834 "50: \n" |
| 4847 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4835 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
| 4848 MEMOPREG(movdqu,0x00,1,4,1,xmm1) | 4836 MEMOPREG(movdqu,0x00,1,4,1,xmm1) |
| 4849 "pavgb %%xmm1,%%xmm0 \n" | 4837 "pavgb %%xmm1,%%xmm0 \n" |
| 4850 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4838 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) |
| 4851 "lea " MEMLEA(0x10,1) ",%1 \n" | 4839 "lea " MEMLEA(0x10,1) ",%1 \n" |
| 4852 "sub $0x10,%2 \n" | 4840 "sub $0x10,%2 \n" |
| 4853 "jg 50b \n" | 4841 "jg 50b \n" |
| 4854 "jmp 99f \n" | 4842 "jmp 99f \n" |
| 4855 | 4843 |
| 4856 // Blend 75 / 25. | |
| 4857 LABELALIGN | |
| 4858 "75: \n" | |
| 4859 "movdqu " MEMACCESS(1) ",%%xmm1 \n" | |
| 4860 MEMOPREG(movdqu,0x00,1,4,1,xmm0) | |
| 4861 "pavgb %%xmm1,%%xmm0 \n" | |
| 4862 "pavgb %%xmm1,%%xmm0 \n" | |
| 4863 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | |
| 4864 "lea " MEMLEA(0x10,1) ",%1 \n" | |
| 4865 "sub $0x10,%2 \n" | |
| 4866 "jg 75b \n" | |
| 4867 "jmp 99f \n" | |
| 4868 | |
| 4869 // Blend 100 / 0 - Copy row unchanged. | 4844 // Blend 100 / 0 - Copy row unchanged. |
| 4870 LABELALIGN | 4845 LABELALIGN |
| 4871 "100: \n" | 4846 "100: \n" |
| 4872 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | 4847 "movdqu " MEMACCESS(1) ",%%xmm0 \n" |
| 4873 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) | 4848 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) |
| 4874 "lea " MEMLEA(0x10,1) ",%1 \n" | 4849 "lea " MEMLEA(0x10,1) ",%1 \n" |
| 4875 "sub $0x10,%2 \n" | 4850 "sub $0x10,%2 \n" |
| 4876 "jg 100b \n" | 4851 "jg 100b \n" |
| 4877 | 4852 |
| 4878 "99: \n" | 4853 "99: \n" |
| 4879 : "+r"(dst_ptr), // %0 | 4854 : "+r"(dst_ptr), // %0 |
| 4880 "+r"(src_ptr), // %1 | 4855 "+r"(src_ptr), // %1 |
| 4881 "+r"(dst_width), // %2 | 4856 "+r"(dst_width), // %2 |
| 4882 "+r"(source_y_fraction) // %3 | 4857 "+r"(source_y_fraction) // %3 |
| 4883 : "r"((intptr_t)(src_stride)) // %4 | 4858 : "r"((intptr_t)(src_stride)) // %4 |
| 4884 : "memory", "cc", NACL_R14 | 4859 : "memory", "cc", "eax", NACL_R14 |
| 4885 "xmm0", "xmm1", "xmm2", "xmm5" | 4860 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" |
| 4886 ); | 4861 ); |
| 4887 } | 4862 } |
| 4888 #endif // HAS_INTERPOLATEROW_SSSE3 | 4863 #endif // HAS_INTERPOLATEROW_SSSE3 |
| 4889 | 4864 |
| 4890 #ifdef HAS_INTERPOLATEROW_AVX2 | 4865 #ifdef HAS_INTERPOLATEROW_AVX2 |
| 4891 // Bilinear filter 32x2 -> 32x1 | 4866 // Bilinear filter 32x2 -> 32x1 |
| 4892 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, | 4867 void InterpolateRow_AVX2(uint8* dst_ptr, const uint8* src_ptr, |
| 4893 ptrdiff_t src_stride, int dst_width, | 4868 ptrdiff_t src_stride, int dst_width, |
| 4894 int source_y_fraction) { | 4869 int source_y_fraction) { |
| 4895 asm volatile ( | 4870 asm volatile ( |
| 4896 "shr %3 \n" | 4871 "shr %3 \n" |
| 4897 "cmp $0x0,%3 \n" | 4872 "cmp $0x0,%3 \n" |
| 4898 "je 100f \n" | 4873 "je 100f \n" |
| 4899 "sub %1,%0 \n" | 4874 "sub %1,%0 \n" |
| 4900 "cmp $0x20,%3 \n" | |
| 4901 "je 75f \n" | |
| 4902 "cmp $0x40,%3 \n" | 4875 "cmp $0x40,%3 \n" |
| 4903 "je 50f \n" | 4876 "je 50f \n" |
| 4904 "cmp $0x60,%3 \n" | |
| 4905 "je 25f \n" | |
| 4906 | 4877 |
| 4907 "vmovd %3,%%xmm0 \n" | 4878 "vmovd %3,%%xmm0 \n" |
| 4908 "neg %3 \n" | 4879 "neg %3 \n" |
| 4909 "add $0x80,%3 \n" | 4880 "add $0x80,%3 \n" |
| 4910 "vmovd %3,%%xmm5 \n" | 4881 "vmovd %3,%%xmm5 \n" |
| 4911 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" | 4882 "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" |
| 4912 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" | 4883 "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" |
| 4913 "vpxor %%ymm0,%%ymm0,%%ymm0 \n" | 4884 "vpxor %%ymm0,%%ymm0,%%ymm0 \n" |
| 4914 "vpermd %%ymm5,%%ymm0,%%ymm5 \n" | 4885 "vpermd %%ymm5,%%ymm0,%%ymm5 \n" |
| 4886 "mov $0x400040,%%eax \n" |
| 4887 "vmovd %%eax,%%xmm4 \n" |
| 4888 "vbroadcastss %%xmm4,%%ymm4 \n" |
| 4915 | 4889 |
| 4916 // General purpose row blend. | 4890 // General purpose row blend. |
| 4917 LABELALIGN | 4891 LABELALIGN |
| 4918 "1: \n" | 4892 "1: \n" |
| 4919 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" | 4893 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" |
| 4920 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) | 4894 MEMOPREG(vmovdqu,0x00,1,4,1,ymm2) |
| 4921 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" | 4895 "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" |
| 4922 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" | 4896 "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" |
| 4923 "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n" | 4897 "vpmaddubsw %%ymm5,%%ymm0,%%ymm0 \n" |
| 4924 "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n" | 4898 "vpmaddubsw %%ymm5,%%ymm1,%%ymm1 \n" |
| 4899 "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" |
| 4900 "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" |
| 4925 "vpsrlw $0x7,%%ymm0,%%ymm0 \n" | 4901 "vpsrlw $0x7,%%ymm0,%%ymm0 \n" |
| 4926 "vpsrlw $0x7,%%ymm1,%%ymm1 \n" | 4902 "vpsrlw $0x7,%%ymm1,%%ymm1 \n" |
| 4927 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" | 4903 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" |
| 4928 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | 4904 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) |
| 4929 "lea " MEMLEA(0x20,1) ",%1 \n" | 4905 "lea " MEMLEA(0x20,1) ",%1 \n" |
| 4930 "sub $0x20,%2 \n" | 4906 "sub $0x20,%2 \n" |
| 4931 "jg 1b \n" | 4907 "jg 1b \n" |
| 4932 "jmp 99f \n" | 4908 "jmp 99f \n" |
| 4933 | 4909 |
| 4934 // Blend 25 / 75. | |
| 4935 LABELALIGN | |
| 4936 "25: \n" | |
| 4937 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" | |
| 4938 MEMOPREG(vmovdqu,0x00,1,4,1,ymm1) | |
| 4939 "vpavgb %%ymm1,%%ymm0,%%ymm0 \n" | |
| 4940 "vpavgb %%ymm1,%%ymm0,%%ymm0 \n" | |
| 4941 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | |
| 4942 "lea " MEMLEA(0x20,1) ",%1 \n" | |
| 4943 "sub $0x20,%2 \n" | |
| 4944 "jg 25b \n" | |
| 4945 "jmp 99f \n" | |
| 4946 | |
| 4947 // Blend 50 / 50. | 4910 // Blend 50 / 50. |
| 4948 LABELALIGN | 4911 LABELALIGN |
| 4949 "50: \n" | 4912 "50: \n" |
| 4950 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" | 4913 "vmovdqu " MEMACCESS(1) ",%%ymm0 \n" |
| 4951 VMEMOPREG(vpavgb,0x00,1,4,1,ymm0,ymm0) // vpavgb (%1,%4,1),%%ymm0,%%ymm0 | 4914 VMEMOPREG(vpavgb,0x00,1,4,1,ymm0,ymm0) // vpavgb (%1,%4,1),%%ymm0,%%ymm0 |
| 4952 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | 4915 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) |
| 4953 "lea " MEMLEA(0x20,1) ",%1 \n" | 4916 "lea " MEMLEA(0x20,1) ",%1 \n" |
| 4954 "sub $0x20,%2 \n" | 4917 "sub $0x20,%2 \n" |
| 4955 "jg 50b \n" | 4918 "jg 50b \n" |
| 4956 "jmp 99f \n" | 4919 "jmp 99f \n" |
| 4957 | 4920 |
| 4958 // Blend 75 / 25. | |
| 4959 LABELALIGN | |
| 4960 "75: \n" | |
| 4961 "vmovdqu " MEMACCESS(1) ",%%ymm1 \n" | |
| 4962 MEMOPREG(vmovdqu,0x00,1,4,1,ymm0) | |
| 4963 "vpavgb %%ymm1,%%ymm0,%%ymm0 \n" | |
| 4964 "vpavgb %%ymm1,%%ymm0,%%ymm0 \n" | |
| 4965 MEMOPMEM(vmovdqu,ymm0,0x00,1,0,1) | |
| 4966 "lea " MEMLEA(0x20,1) ",%1 \n" | |
| 4967 "sub $0x20,%2 \n" | |
| 4968 "jg 75b \n" | |
| 4969 "jmp 99f \n" | |
| 4970 | |
| 4971 // Blend 100 / 0 - Copy row unchanged. | 4921 // Blend 100 / 0 - Copy row unchanged. |
| 4972 LABELALIGN | 4922 LABELALIGN |
| 4973 "100: \n" | 4923 "100: \n" |
| 4974 "rep movsb " MEMMOVESTRING(1,0) " \n" | 4924 "rep movsb " MEMMOVESTRING(1,0) " \n" |
| 4975 "jmp 999f \n" | 4925 "jmp 999f \n" |
| 4976 | 4926 |
| 4977 "99: \n" | 4927 "99: \n" |
| 4978 "vzeroupper \n" | 4928 "vzeroupper \n" |
| 4979 "999: \n" | 4929 "999: \n" |
| 4980 : "+D"(dst_ptr), // %0 | 4930 : "+D"(dst_ptr), // %0 |
| 4981 "+S"(src_ptr), // %1 | 4931 "+S"(src_ptr), // %1 |
| 4982 "+c"(dst_width), // %2 | 4932 "+c"(dst_width), // %2 |
| 4983 "+r"(source_y_fraction) // %3 | 4933 "+r"(source_y_fraction) // %3 |
| 4984 : "r"((intptr_t)(src_stride)) // %4 | 4934 : "r"((intptr_t)(src_stride)) // %4 |
| 4985 : "memory", "cc", NACL_R14 | 4935 : "memory", "cc", "eax", NACL_R14 |
| 4986 "xmm0", "xmm1", "xmm2", "xmm5" | 4936 "xmm0", "xmm1", "xmm2", "xmm4", "xmm5" |
| 4987 ); | 4937 ); |
| 4988 } | 4938 } |
| 4989 #endif // HAS_INTERPOLATEROW_AVX2 | 4939 #endif // HAS_INTERPOLATEROW_AVX2 |
| 4990 | 4940 |
| 4991 #ifdef HAS_INTERPOLATEROW_SSE2 | |
| 4992 // Bilinear filter 16x2 -> 16x1 | |
| 4993 void InterpolateRow_SSE2(uint8* dst_ptr, const uint8* src_ptr, | |
| 4994 ptrdiff_t src_stride, int dst_width, | |
| 4995 int source_y_fraction) { | |
| 4996 asm volatile ( | |
| 4997 "sub %1,%0 \n" | |
| 4998 "shr %3 \n" | |
| 4999 "cmp $0x0,%3 \n" | |
| 5000 "je 100f \n" | |
| 5001 "cmp $0x20,%3 \n" | |
| 5002 "je 75f \n" | |
| 5003 "cmp $0x40,%3 \n" | |
| 5004 "je 50f \n" | |
| 5005 "cmp $0x60,%3 \n" | |
| 5006 "je 25f \n" | |
| 5007 | |
| 5008 "movd %3,%%xmm0 \n" | |
| 5009 "neg %3 \n" | |
| 5010 "add $0x80,%3 \n" | |
| 5011 "movd %3,%%xmm5 \n" | |
| 5012 "punpcklbw %%xmm0,%%xmm5 \n" | |
| 5013 "punpcklwd %%xmm5,%%xmm5 \n" | |
| 5014 "pshufd $0x0,%%xmm5,%%xmm5 \n" | |
| 5015 "pxor %%xmm4,%%xmm4 \n" | |
| 5016 | |
| 5017 // General purpose row blend. | |
| 5018 LABELALIGN | |
| 5019 "1: \n" | |
| 5020 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
| 5021 MEMOPREG(movdqu,0x00,1,4,1,xmm2) // movdqu (%1,%4,1),%%xmm2 | |
| 5022 "movdqa %%xmm0,%%xmm1 \n" | |
| 5023 "movdqa %%xmm2,%%xmm3 \n" | |
| 5024 "punpcklbw %%xmm4,%%xmm2 \n" | |
| 5025 "punpckhbw %%xmm4,%%xmm3 \n" | |
| 5026 "punpcklbw %%xmm4,%%xmm0 \n" | |
| 5027 "punpckhbw %%xmm4,%%xmm1 \n" | |
| 5028 "psubw %%xmm0,%%xmm2 \n" | |
| 5029 "psubw %%xmm1,%%xmm3 \n" | |
| 5030 "paddw %%xmm2,%%xmm2 \n" | |
| 5031 "paddw %%xmm3,%%xmm3 \n" | |
| 5032 "pmulhw %%xmm5,%%xmm2 \n" | |
| 5033 "pmulhw %%xmm5,%%xmm3 \n" | |
| 5034 "paddw %%xmm2,%%xmm0 \n" | |
| 5035 "paddw %%xmm3,%%xmm1 \n" | |
| 5036 "packuswb %%xmm1,%%xmm0 \n" | |
| 5037 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
| 5038 "lea " MEMLEA(0x10,1) ",%1 \n" | |
| 5039 "sub $0x10,%2 \n" | |
| 5040 "jg 1b \n" | |
| 5041 "jmp 99f \n" | |
| 5042 | |
| 5043 // Blend 25 / 75. | |
| 5044 LABELALIGN | |
| 5045 "25: \n" | |
| 5046 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
| 5047 MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 | |
| 5048 "pavgb %%xmm1,%%xmm0 \n" | |
| 5049 "pavgb %%xmm1,%%xmm0 \n" | |
| 5050 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
| 5051 "lea " MEMLEA(0x10,1) ",%1 \n" | |
| 5052 "sub $0x10,%2 \n" | |
| 5053 "jg 25b \n" | |
| 5054 "jmp 99f \n" | |
| 5055 | |
| 5056 // Blend 50 / 50. | |
| 5057 LABELALIGN | |
| 5058 "50: \n" | |
| 5059 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
| 5060 MEMOPREG(movdqu,0x00,1,4,1,xmm1) // movdqu (%1,%4,1),%%xmm1 | |
| 5061 "pavgb %%xmm1,%%xmm0 \n" | |
| 5062 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
| 5063 "lea " MEMLEA(0x10,1) ",%1 \n" | |
| 5064 "sub $0x10,%2 \n" | |
| 5065 "jg 50b \n" | |
| 5066 "jmp 99f \n" | |
| 5067 | |
| 5068 // Blend 75 / 25. | |
| 5069 LABELALIGN | |
| 5070 "75: \n" | |
| 5071 "movdqu " MEMACCESS(1) ",%%xmm1 \n" | |
| 5072 MEMOPREG(movdqu,0x00,1,4,1,xmm0) // movdqu (%1,%4,1),%%xmm0 | |
| 5073 "pavgb %%xmm1,%%xmm0 \n" | |
| 5074 "pavgb %%xmm1,%%xmm0 \n" | |
| 5075 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
| 5076 "lea " MEMLEA(0x10,1) ",%1 \n" | |
| 5077 "sub $0x10,%2 \n" | |
| 5078 "jg 75b \n" | |
| 5079 "jmp 99f \n" | |
| 5080 | |
| 5081 // Blend 100 / 0 - Copy row unchanged. | |
| 5082 LABELALIGN | |
| 5083 "100: \n" | |
| 5084 "movdqu " MEMACCESS(1) ",%%xmm0 \n" | |
| 5085 MEMOPMEM(movdqu,xmm0,0x00,1,0,1) // movdqu %%xmm0,(%1,%0,1) | |
| 5086 "lea " MEMLEA(0x10,1) ",%1 \n" | |
| 5087 "sub $0x10,%2 \n" | |
| 5088 "jg 100b \n" | |
| 5089 | |
| 5090 "99: \n" | |
| 5091 : "+r"(dst_ptr), // %0 | |
| 5092 "+r"(src_ptr), // %1 | |
| 5093 "+r"(dst_width), // %2 | |
| 5094 "+r"(source_y_fraction) // %3 | |
| 5095 : "r"((intptr_t)(src_stride)) // %4 | |
| 5096 : "memory", "cc", NACL_R14 | |
| 5097 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" | |
| 5098 ); | |
| 5099 } | |
| 5100 #endif // HAS_INTERPOLATEROW_SSE2 | |
| 5101 | |
| 5102 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 | 4941 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 |
| 5103 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. | 4942 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. |
| 5104 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, | 4943 void ARGBShuffleRow_SSSE3(const uint8* src_argb, uint8* dst_argb, |
| 5105 const uint8* shuffler, int width) { | 4944 const uint8* shuffler, int width) { |
| 5106 asm volatile ( | 4945 asm volatile ( |
| 5107 "movdqu " MEMACCESS(3) ",%%xmm5 \n" | 4946 "movdqu " MEMACCESS(3) ",%%xmm5 \n" |
| 5108 LABELALIGN | 4947 LABELALIGN |
| 5109 "1: \n" | 4948 "1: \n" |
| 5110 "movdqu " MEMACCESS(0) ",%%xmm0 \n" | 4949 "movdqu " MEMACCESS(0) ",%%xmm0 \n" |
| 5111 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" | 4950 "movdqu " MEMACCESS2(0x10,0) ",%%xmm1 \n" |
| (...skipping 489 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 5601 ); | 5440 ); |
| 5602 } | 5441 } |
| 5603 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 5442 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 5604 | 5443 |
| 5605 #endif // defined(__x86_64__) || defined(__i386__) | 5444 #endif // defined(__x86_64__) || defined(__i386__) |
| 5606 | 5445 |
| 5607 #ifdef __cplusplus | 5446 #ifdef __cplusplus |
| 5608 } // extern "C" | 5447 } // extern "C" |
| 5609 } // namespace libyuv | 5448 } // namespace libyuv |
| 5610 #endif | 5449 #endif |
| OLD | NEW |