Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(12)

Side by Side Diff: source/row_gcc.cc

Issue 1895743008: Remove initialize to zero on output variables for inline. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: use early write for all outputs to avoid them being reassigned to input Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/rotate_neon64.cc ('k') | source/row_mips.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // VERSION 2 1 // VERSION 2
2 /* 2 /*
3 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 3 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
4 * 4 *
5 * Use of this source code is governed by a BSD-style license 5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source 6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found 7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may 8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree. 9 * be found in the AUTHORS file in the root of the source tree.
10 */ 10 */
(...skipping 1793 matching lines...) Expand 10 before | Expand all | Expand 10 after
1804 } 1804 }
1805 #endif // HAS_I422ALPHATOARGBROW_SSSE3 1805 #endif // HAS_I422ALPHATOARGBROW_SSSE3
1806 1806
1807 #ifdef HAS_I411TOARGBROW_SSSE3 1807 #ifdef HAS_I411TOARGBROW_SSSE3
1808 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf, 1808 void OMITFP I411ToARGBRow_SSSE3(const uint8* y_buf,
1809 const uint8* u_buf, 1809 const uint8* u_buf,
1810 const uint8* v_buf, 1810 const uint8* v_buf,
1811 uint8* dst_argb, 1811 uint8* dst_argb,
1812 const struct YuvConstants* yuvconstants, 1812 const struct YuvConstants* yuvconstants,
1813 int width) { 1813 int width) {
1814 int temp = 0; 1814 int temp;
1815 asm volatile ( 1815 asm volatile (
1816 YUVTORGB_SETUP(yuvconstants) 1816 YUVTORGB_SETUP(yuvconstants)
1817 "sub %[u_buf],%[v_buf] \n" 1817 "sub %[u_buf],%[v_buf] \n"
1818 "pcmpeqb %%xmm5,%%xmm5 \n" 1818 "pcmpeqb %%xmm5,%%xmm5 \n"
1819 LABELALIGN 1819 LABELALIGN
1820 "1: \n" 1820 "1: \n"
1821 READYUV411_TEMP 1821 READYUV411_TEMP
1822 YUVTORGB(yuvconstants) 1822 YUVTORGB(yuvconstants)
1823 STOREARGB 1823 STOREARGB
1824 "subl $0x8,%[width] \n" 1824 "subl $0x8,%[width] \n"
1825 "jg 1b \n" 1825 "jg 1b \n"
1826 : [y_buf]"+r"(y_buf), // %[y_buf] 1826 : [y_buf]"+r"(y_buf), // %[y_buf]
1827 [u_buf]"+r"(u_buf), // %[u_buf] 1827 [u_buf]"+r"(u_buf), // %[u_buf]
1828 [v_buf]"+r"(v_buf), // %[v_buf] 1828 [v_buf]"+r"(v_buf), // %[v_buf]
1829 [dst_argb]"+r"(dst_argb), // %[dst_argb] 1829 [dst_argb]"+r"(dst_argb), // %[dst_argb]
1830 [temp]"+r"(temp), // %[temp] 1830 [temp]"=&r"(temp), // %[temp]
1831 #if defined(__i386__) && defined(__pic__) 1831 #if defined(__i386__) && defined(__pic__)
1832 [width]"+m"(width) // %[width] 1832 [width]"+m"(width) // %[width]
1833 #else 1833 #else
1834 [width]"+rm"(width) // %[width] 1834 [width]"+rm"(width) // %[width]
1835 #endif 1835 #endif
1836 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] 1836 : [yuvconstants]"r"(yuvconstants) // %[yuvconstants]
1837 : "memory", "cc", NACL_R14 YUVTORGB_REGS 1837 : "memory", "cc", NACL_R14 YUVTORGB_REGS
1838 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 1838 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
1839 ); 1839 );
1840 } 1840 }
1841 #endif 1841 #endif
1842 1842
1843 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf, 1843 void OMITFP NV12ToARGBRow_SSSE3(const uint8* y_buf,
1844 const uint8* uv_buf, 1844 const uint8* uv_buf,
(...skipping 1880 matching lines...) Expand 10 before | Expand all | Expand 10 after
3725 : "memory", "cc" 3725 : "memory", "cc"
3726 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" 3726 , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"
3727 ); 3727 );
3728 } 3728 }
3729 #endif // HAS_ARGBATTENUATEROW_AVX2 3729 #endif // HAS_ARGBATTENUATEROW_AVX2
3730 3730
3731 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 3731 #ifdef HAS_ARGBUNATTENUATEROW_SSE2
3732 // Unattenuate 4 pixels at a time. 3732 // Unattenuate 4 pixels at a time.
3733 void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb, 3733 void ARGBUnattenuateRow_SSE2(const uint8* src_argb, uint8* dst_argb,
3734 int width) { 3734 int width) {
3735 uintptr_t alpha = 0; 3735 uintptr_t alpha;
3736 asm volatile ( 3736 asm volatile (
3737 // 4 pixel loop. 3737 // 4 pixel loop.
3738 LABELALIGN 3738 LABELALIGN
3739 "1: \n" 3739 "1: \n"
3740 "movdqu " MEMACCESS(0) ",%%xmm0 \n" 3740 "movdqu " MEMACCESS(0) ",%%xmm0 \n"
3741 "movzb " MEMACCESS2(0x03,0) ",%3 \n" 3741 "movzb " MEMACCESS2(0x03,0) ",%3 \n"
3742 "punpcklbw %%xmm0,%%xmm0 \n" 3742 "punpcklbw %%xmm0,%%xmm0 \n"
3743 MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2 3743 MEMOPREG(movd,0x00,4,3,4,xmm2) // movd 0x0(%4,%3,4),%%xmm2
3744 "movzb " MEMACCESS2(0x07,0) ",%3 \n" 3744 "movzb " MEMACCESS2(0x07,0) ",%3 \n"
3745 MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3 3745 MEMOPREG(movd,0x00,4,3,4,xmm3) // movd 0x0(%4,%3,4),%%xmm3
(...skipping 10 matching lines...) Expand all
3756 "pshuflw $0x40,%%xmm2,%%xmm2 \n" 3756 "pshuflw $0x40,%%xmm2,%%xmm2 \n"
3757 "pshuflw $0x40,%%xmm3,%%xmm3 \n" 3757 "pshuflw $0x40,%%xmm3,%%xmm3 \n"
3758 "movlhps %%xmm3,%%xmm2 \n" 3758 "movlhps %%xmm3,%%xmm2 \n"
3759 "pmulhuw %%xmm2,%%xmm1 \n" 3759 "pmulhuw %%xmm2,%%xmm1 \n"
3760 "lea " MEMLEA(0x10,0) ",%0 \n" 3760 "lea " MEMLEA(0x10,0) ",%0 \n"
3761 "packuswb %%xmm1,%%xmm0 \n" 3761 "packuswb %%xmm1,%%xmm0 \n"
3762 "movdqu %%xmm0," MEMACCESS(1) " \n" 3762 "movdqu %%xmm0," MEMACCESS(1) " \n"
3763 "lea " MEMLEA(0x10,1) ",%1 \n" 3763 "lea " MEMLEA(0x10,1) ",%1 \n"
3764 "sub $0x4,%2 \n" 3764 "sub $0x4,%2 \n"
3765 "jg 1b \n" 3765 "jg 1b \n"
3766 : "+r"(src_argb), // %0 3766 : "+r"(src_argb), // %0
3767 "+r"(dst_argb), // %1 3767 "+r"(dst_argb), // %1
3768 "+r"(width), // %2 3768 "+r"(width), // %2
3769 "+r"(alpha) // %3 3769 "=&r"(alpha) // %3
3770 : "r"(fixed_invtbl8) // %4 3770 : "r"(fixed_invtbl8) // %4
3771 : "memory", "cc", NACL_R14 3771 : "memory", "cc", NACL_R14
3772 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" 3772 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"
3773 ); 3773 );
3774 } 3774 }
3775 #endif // HAS_ARGBUNATTENUATEROW_SSE2 3775 #endif // HAS_ARGBUNATTENUATEROW_SSE2
3776 3776
3777 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 3777 #ifdef HAS_ARGBUNATTENUATEROW_AVX2
3778 // Shuffle table duplicating alpha. 3778 // Shuffle table duplicating alpha.
3779 static const uvec8 kUnattenShuffleAlpha_AVX2 = { 3779 static const uvec8 kUnattenShuffleAlpha_AVX2 = {
3780 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u 3780 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u
3781 }; 3781 };
3782 // Unattenuate 8 pixels at a time. 3782 // Unattenuate 8 pixels at a time.
3783 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb, 3783 void ARGBUnattenuateRow_AVX2(const uint8* src_argb, uint8* dst_argb,
3784 int width) { 3784 int width) {
3785 uintptr_t alpha = 0; 3785 uintptr_t alpha;
3786 asm volatile ( 3786 asm volatile (
3787 "sub %0,%1 \n" 3787 "sub %0,%1 \n"
3788 "vbroadcastf128 %5,%%ymm5 \n" 3788 "vbroadcastf128 %5,%%ymm5 \n"
3789 3789
3790 // 8 pixel loop. 3790 // 8 pixel loop.
3791 LABELALIGN 3791 LABELALIGN
3792 "1: \n" 3792 "1: \n"
3793 // replace VPGATHER 3793 // replace VPGATHER
3794 "movzb " MEMACCESS2(0x03,0) ",%3 \n" 3794 "movzb " MEMACCESS2(0x03,0) ",%3 \n"
3795 MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0 3795 MEMOPREG(vmovd,0x00,4,3,4,xmm0) // vmovd 0x0(%4,%3,4),%%xmm0
(...skipping 28 matching lines...) Expand all
3824 "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" 3824 "vpshufb %%ymm5,%%ymm2,%%ymm2 \n"
3825 "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" 3825 "vpshufb %%ymm5,%%ymm3,%%ymm3 \n"
3826 "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" 3826 "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n"
3827 "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" 3827 "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n"
3828 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" 3828 "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n"
3829 MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1) 3829 MEMOPMEM(vmovdqu,ymm0,0x00,0,1,1) // vmovdqu %%ymm0,(%0,%1)
3830 "lea " MEMLEA(0x20,0) ",%0 \n" 3830 "lea " MEMLEA(0x20,0) ",%0 \n"
3831 "sub $0x8,%2 \n" 3831 "sub $0x8,%2 \n"
3832 "jg 1b \n" 3832 "jg 1b \n"
3833 "vzeroupper \n" 3833 "vzeroupper \n"
3834 : "+r"(src_argb), // %0 3834 : "+r"(src_argb), // %0
3835 "+r"(dst_argb), // %1 3835 "+r"(dst_argb), // %1
3836 "+r"(width), // %2 3836 "+r"(width), // %2
3837 "+r"(alpha) // %3 3837 "=&r"(alpha) // %3
3838 : "r"(fixed_invtbl8), // %4 3838 : "r"(fixed_invtbl8), // %4
3839 "m"(kUnattenShuffleAlpha_AVX2) // %5 3839 "m"(kUnattenShuffleAlpha_AVX2) // %5
3840 : "memory", "cc", NACL_R14 3840 : "memory", "cc", NACL_R14
3841 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 3841 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
3842 ); 3842 );
3843 } 3843 }
3844 #endif // HAS_ARGBUNATTENUATEROW_AVX2 3844 #endif // HAS_ARGBUNATTENUATEROW_AVX2
3845 3845
3846 #ifdef HAS_ARGBGRAYROW_SSSE3 3846 #ifdef HAS_ARGBGRAYROW_SSSE3
3847 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels 3847 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels
(...skipping 904 matching lines...) Expand 10 before | Expand all | Expand 10 after
4752 ); 4752 );
4753 } 4753 }
4754 #endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 4754 #endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2
4755 4755
4756 #ifdef HAS_ARGBAFFINEROW_SSE2 4756 #ifdef HAS_ARGBAFFINEROW_SSE2
4757 // Copy ARGB pixels from source image with slope to a row of destination. 4757 // Copy ARGB pixels from source image with slope to a row of destination.
4758 LIBYUV_API 4758 LIBYUV_API
4759 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride, 4759 void ARGBAffineRow_SSE2(const uint8* src_argb, int src_argb_stride,
4760 uint8* dst_argb, const float* src_dudv, int width) { 4760 uint8* dst_argb, const float* src_dudv, int width) {
4761 intptr_t src_argb_stride_temp = src_argb_stride; 4761 intptr_t src_argb_stride_temp = src_argb_stride;
4762 intptr_t temp = 0; 4762 intptr_t temp;
4763 asm volatile ( 4763 asm volatile (
4764 "movq " MEMACCESS(3) ",%%xmm2 \n" 4764 "movq " MEMACCESS(3) ",%%xmm2 \n"
4765 "movq " MEMACCESS2(0x08,3) ",%%xmm7 \n" 4765 "movq " MEMACCESS2(0x08,3) ",%%xmm7 \n"
4766 "shl $0x10,%1 \n" 4766 "shl $0x10,%1 \n"
4767 "add $0x4,%1 \n" 4767 "add $0x4,%1 \n"
4768 "movd %1,%%xmm5 \n" 4768 "movd %1,%%xmm5 \n"
4769 "sub $0x4,%4 \n" 4769 "sub $0x4,%4 \n"
4770 "jl 49f \n" 4770 "jl 49f \n"
4771 4771
4772 "pshufd $0x44,%%xmm7,%%xmm7 \n" 4772 "pshufd $0x44,%%xmm7,%%xmm7 \n"
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
4824 "movd %%xmm0," MEMACCESS(2) " \n" 4824 "movd %%xmm0," MEMACCESS(2) " \n"
4825 "lea " MEMLEA(0x04,2) ",%2 \n" 4825 "lea " MEMLEA(0x04,2) ",%2 \n"
4826 "sub $0x1,%4 \n" 4826 "sub $0x1,%4 \n"
4827 "jge 10b \n" 4827 "jge 10b \n"
4828 "19: \n" 4828 "19: \n"
4829 : "+r"(src_argb), // %0 4829 : "+r"(src_argb), // %0
4830 "+r"(src_argb_stride_temp), // %1 4830 "+r"(src_argb_stride_temp), // %1
4831 "+r"(dst_argb), // %2 4831 "+r"(dst_argb), // %2
4832 "+r"(src_dudv), // %3 4832 "+r"(src_dudv), // %3
4833 "+rm"(width), // %4 4833 "+rm"(width), // %4
4834 "+r"(temp) // %5 4834 "=&r"(temp) // %5
4835 : 4835 :
4836 : "memory", "cc", NACL_R14 4836 : "memory", "cc", NACL_R14
4837 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 4837 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
4838 ); 4838 );
4839 } 4839 }
4840 #endif // HAS_ARGBAFFINEROW_SSE2 4840 #endif // HAS_ARGBAFFINEROW_SSE2
4841 4841
4842 #ifdef HAS_INTERPOLATEROW_SSSE3 4842 #ifdef HAS_INTERPOLATEROW_SSSE3
4843 // Bilinear filter 16x2 -> 16x1 4843 // Bilinear filter 16x2 -> 16x1
4844 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr, 4844 void InterpolateRow_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
(...skipping 205 matching lines...) Expand 10 before | Expand all | Expand 10 after
5050 : "memory", "cc" 5050 : "memory", "cc"
5051 , "xmm0", "xmm1", "xmm5" 5051 , "xmm0", "xmm1", "xmm5"
5052 ); 5052 );
5053 } 5053 }
5054 #endif // HAS_ARGBSHUFFLEROW_AVX2 5054 #endif // HAS_ARGBSHUFFLEROW_AVX2
5055 5055
5056 #ifdef HAS_ARGBSHUFFLEROW_SSE2 5056 #ifdef HAS_ARGBSHUFFLEROW_SSE2
5057 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. 5057 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA.
5058 void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb, 5058 void ARGBShuffleRow_SSE2(const uint8* src_argb, uint8* dst_argb,
5059 const uint8* shuffler, int width) { 5059 const uint8* shuffler, int width) {
5060 uintptr_t pixel_temp = 0u; 5060 uintptr_t pixel_temp;
5061 asm volatile ( 5061 asm volatile (
5062 "pxor %%xmm5,%%xmm5 \n" 5062 "pxor %%xmm5,%%xmm5 \n"
5063 "mov " MEMACCESS(4) ",%k2 \n" 5063 "mov " MEMACCESS(4) ",%k2 \n"
5064 "cmp $0x3000102,%k2 \n" 5064 "cmp $0x3000102,%k2 \n"
5065 "je 3012f \n" 5065 "je 3012f \n"
5066 "cmp $0x10203,%k2 \n" 5066 "cmp $0x10203,%k2 \n"
5067 "je 123f \n" 5067 "je 123f \n"
5068 "cmp $0x30201,%k2 \n" 5068 "cmp $0x30201,%k2 \n"
5069 "je 321f \n" 5069 "je 321f \n"
5070 "cmp $0x2010003,%k2 \n" 5070 "cmp $0x2010003,%k2 \n"
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
5155 "pshuflw $0xc6,%%xmm0,%%xmm0 \n" 5155 "pshuflw $0xc6,%%xmm0,%%xmm0 \n"
5156 "pshufhw $0xc6,%%xmm1,%%xmm1 \n" 5156 "pshufhw $0xc6,%%xmm1,%%xmm1 \n"
5157 "pshuflw $0xc6,%%xmm1,%%xmm1 \n" 5157 "pshuflw $0xc6,%%xmm1,%%xmm1 \n"
5158 "packuswb %%xmm1,%%xmm0 \n" 5158 "packuswb %%xmm1,%%xmm0 \n"
5159 "movdqu %%xmm0," MEMACCESS(1) " \n" 5159 "movdqu %%xmm0," MEMACCESS(1) " \n"
5160 "lea " MEMLEA(0x10,1) ",%1 \n" 5160 "lea " MEMLEA(0x10,1) ",%1 \n"
5161 "sub $0x4,%3 \n" 5161 "sub $0x4,%3 \n"
5162 "jg 3012b \n" 5162 "jg 3012b \n"
5163 5163
5164 "99: \n" 5164 "99: \n"
5165 : "+r"(src_argb), // %0 5165 : "+r"(src_argb), // %0
5166 "+r"(dst_argb), // %1 5166 "+r"(dst_argb), // %1
5167 "+d"(pixel_temp), // %2 5167 "=&d"(pixel_temp), // %2
5168 "+r"(width) // %3 5168 "+r"(width) // %3
5169 : "r"(shuffler) // %4 5169 : "r"(shuffler) // %4
5170 : "memory", "cc", NACL_R14 5170 : "memory", "cc", NACL_R14
5171 "xmm0", "xmm1", "xmm5" 5171 "xmm0", "xmm1", "xmm5"
5172 ); 5172 );
5173 } 5173 }
5174 #endif // HAS_ARGBSHUFFLEROW_SSE2 5174 #endif // HAS_ARGBSHUFFLEROW_SSE2
5175 5175
5176 #ifdef HAS_I422TOYUY2ROW_SSE2 5176 #ifdef HAS_I422TOYUY2ROW_SSE2
5177 void I422ToYUY2Row_SSE2(const uint8* src_y, 5177 void I422ToYUY2Row_SSE2(const uint8* src_y,
5178 const uint8* src_u, 5178 const uint8* src_u,
5179 const uint8* src_v, 5179 const uint8* src_v,
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after
5336 : "memory", "cc", 5336 : "memory", "cc",
5337 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 5337 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
5338 ); 5338 );
5339 } 5339 }
5340 #endif // HAS_ARGBPOLYNOMIALROW_AVX2 5340 #endif // HAS_ARGBPOLYNOMIALROW_AVX2
5341 5341
5342 #ifdef HAS_ARGBCOLORTABLEROW_X86 5342 #ifdef HAS_ARGBCOLORTABLEROW_X86
5343 // Tranform ARGB pixels with color table. 5343 // Tranform ARGB pixels with color table.
5344 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, 5344 void ARGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb,
5345 int width) { 5345 int width) {
5346 uintptr_t pixel_temp = 0u; 5346 uintptr_t pixel_temp;
5347 asm volatile ( 5347 asm volatile (
5348 // 1 pixel loop. 5348 // 1 pixel loop.
5349 LABELALIGN 5349 LABELALIGN
5350 "1: \n" 5350 "1: \n"
5351 "movzb " MEMACCESS(0) ",%1 \n" 5351 "movzb " MEMACCESS(0) ",%1 \n"
5352 "lea " MEMLEA(0x4,0) ",%0 \n" 5352 "lea " MEMLEA(0x4,0) ",%0 \n"
5353 MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1 5353 MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1
5354 "mov %b1," MEMACCESS2(-0x4,0) " \n" 5354 "mov %b1," MEMACCESS2(-0x4,0) " \n"
5355 "movzb " MEMACCESS2(-0x3,0) ",%1 \n" 5355 "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
5356 MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1 5356 MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1
5357 "mov %b1," MEMACCESS2(-0x3,0) " \n" 5357 "mov %b1," MEMACCESS2(-0x3,0) " \n"
5358 "movzb " MEMACCESS2(-0x2,0) ",%1 \n" 5358 "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
5359 MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1 5359 MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1
5360 "mov %b1," MEMACCESS2(-0x2,0) " \n" 5360 "mov %b1," MEMACCESS2(-0x2,0) " \n"
5361 "movzb " MEMACCESS2(-0x1,0) ",%1 \n" 5361 "movzb " MEMACCESS2(-0x1,0) ",%1 \n"
5362 MEMOPARG(movzb,0x03,3,1,4,1) " \n" // movzb 0x3(%3,%1,4),%1 5362 MEMOPARG(movzb,0x03,3,1,4,1) " \n" // movzb 0x3(%3,%1,4),%1
5363 "mov %b1," MEMACCESS2(-0x1,0) " \n" 5363 "mov %b1," MEMACCESS2(-0x1,0) " \n"
5364 "dec %2 \n" 5364 "dec %2 \n"
5365 "jg 1b \n" 5365 "jg 1b \n"
5366 : "+r"(dst_argb), // %0 5366 : "+r"(dst_argb), // %0
5367 "+d"(pixel_temp), // %1 5367 "=&d"(pixel_temp), // %1
5368 "+r"(width) // %2 5368 "+r"(width) // %2
5369 : "r"(table_argb) // %3 5369 : "r"(table_argb) // %3
5370 : "memory", "cc"); 5370 : "memory", "cc");
5371 } 5371 }
5372 #endif // HAS_ARGBCOLORTABLEROW_X86 5372 #endif // HAS_ARGBCOLORTABLEROW_X86
5373 5373
5374 #ifdef HAS_RGBCOLORTABLEROW_X86 5374 #ifdef HAS_RGBCOLORTABLEROW_X86
5375 // Tranform RGB pixels with color table. 5375 // Tranform RGB pixels with color table.
5376 void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) { 5376 void RGBColorTableRow_X86(uint8* dst_argb, const uint8* table_argb, int width) {
5377 uintptr_t pixel_temp = 0u; 5377 uintptr_t pixel_temp;
5378 asm volatile ( 5378 asm volatile (
5379 // 1 pixel loop. 5379 // 1 pixel loop.
5380 LABELALIGN 5380 LABELALIGN
5381 "1: \n" 5381 "1: \n"
5382 "movzb " MEMACCESS(0) ",%1 \n" 5382 "movzb " MEMACCESS(0) ",%1 \n"
5383 "lea " MEMLEA(0x4,0) ",%0 \n" 5383 "lea " MEMLEA(0x4,0) ",%0 \n"
5384 MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1 5384 MEMOPARG(movzb,0x00,3,1,4,1) " \n" // movzb (%3,%1,4),%1
5385 "mov %b1," MEMACCESS2(-0x4,0) " \n" 5385 "mov %b1," MEMACCESS2(-0x4,0) " \n"
5386 "movzb " MEMACCESS2(-0x3,0) ",%1 \n" 5386 "movzb " MEMACCESS2(-0x3,0) ",%1 \n"
5387 MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1 5387 MEMOPARG(movzb,0x01,3,1,4,1) " \n" // movzb 0x1(%3,%1,4),%1
5388 "mov %b1," MEMACCESS2(-0x3,0) " \n" 5388 "mov %b1," MEMACCESS2(-0x3,0) " \n"
5389 "movzb " MEMACCESS2(-0x2,0) ",%1 \n" 5389 "movzb " MEMACCESS2(-0x2,0) ",%1 \n"
5390 MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1 5390 MEMOPARG(movzb,0x02,3,1,4,1) " \n" // movzb 0x2(%3,%1,4),%1
5391 "mov %b1," MEMACCESS2(-0x2,0) " \n" 5391 "mov %b1," MEMACCESS2(-0x2,0) " \n"
5392 "dec %2 \n" 5392 "dec %2 \n"
5393 "jg 1b \n" 5393 "jg 1b \n"
5394 : "+r"(dst_argb), // %0 5394 : "+r"(dst_argb), // %0
5395 "+d"(pixel_temp), // %1 5395 "=&d"(pixel_temp), // %1
5396 "+r"(width) // %2 5396 "+r"(width) // %2
5397 : "r"(table_argb) // %3 5397 : "r"(table_argb) // %3
5398 : "memory", "cc"); 5398 : "memory", "cc");
5399 } 5399 }
5400 #endif // HAS_RGBCOLORTABLEROW_X86 5400 #endif // HAS_RGBCOLORTABLEROW_X86
5401 5401
5402 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 5402 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3
5403 // Tranform RGB pixels with luma table. 5403 // Tranform RGB pixels with luma table.
5404 void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb, 5404 void ARGBLumaColorTableRow_SSSE3(const uint8* src_argb, uint8* dst_argb,
5405 int width, 5405 int width,
5406 const uint8* luma, uint32 lumacoeff) { 5406 const uint8* luma, uint32 lumacoeff) {
5407 uintptr_t pixel_temp = 0u; 5407 uintptr_t pixel_temp;
5408 uintptr_t table_temp = 0u; 5408 uintptr_t table_temp;
5409 asm volatile ( 5409 asm volatile (
5410 "movd %6,%%xmm3 \n" 5410 "movd %6,%%xmm3 \n"
5411 "pshufd $0x0,%%xmm3,%%xmm3 \n" 5411 "pshufd $0x0,%%xmm3,%%xmm3 \n"
5412 "pcmpeqb %%xmm4,%%xmm4 \n" 5412 "pcmpeqb %%xmm4,%%xmm4 \n"
5413 "psllw $0x8,%%xmm4 \n" 5413 "psllw $0x8,%%xmm4 \n"
5414 "pxor %%xmm5,%%xmm5 \n" 5414 "pxor %%xmm5,%%xmm5 \n"
5415 5415
5416 // 4 pixel loop. 5416 // 4 pixel loop.
5417 LABELALIGN 5417 LABELALIGN
5418 "1: \n" 5418 "1: \n"
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after
5480 "mov %b0," MEMACCESS2(0xd,3) " \n" 5480 "mov %b0," MEMACCESS2(0xd,3) " \n"
5481 "movzb " MEMACCESS2(0xe,2) ",%0 \n" 5481 "movzb " MEMACCESS2(0xe,2) ",%0 \n"
5482 MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0 5482 MEMOPARG(movzb,0x00,1,0,1,0) " \n" // movzb (%1,%0,1),%0
5483 "mov %b0," MEMACCESS2(0xe,3) " \n" 5483 "mov %b0," MEMACCESS2(0xe,3) " \n"
5484 "movzb " MEMACCESS2(0xf,2) ",%0 \n" 5484 "movzb " MEMACCESS2(0xf,2) ",%0 \n"
5485 "mov %b0," MEMACCESS2(0xf,3) " \n" 5485 "mov %b0," MEMACCESS2(0xf,3) " \n"
5486 "lea " MEMLEA(0x10,2) ",%2 \n" 5486 "lea " MEMLEA(0x10,2) ",%2 \n"
5487 "lea " MEMLEA(0x10,3) ",%3 \n" 5487 "lea " MEMLEA(0x10,3) ",%3 \n"
5488 "sub $0x4,%4 \n" 5488 "sub $0x4,%4 \n"
5489 "jg 1b \n" 5489 "jg 1b \n"
5490 : "+d"(pixel_temp), // %0 5490 : "=&d"(pixel_temp), // %0
5491 "+a"(table_temp), // %1 5491 "=&a"(table_temp), // %1
5492 "+r"(src_argb), // %2 5492 "+r"(src_argb), // %2
5493 "+r"(dst_argb), // %3 5493 "+r"(dst_argb), // %3
5494 "+rm"(width) // %4 5494 "+rm"(width) // %4
5495 : "r"(luma), // %5 5495 : "r"(luma), // %5
5496 "rm"(lumacoeff) // %6 5496 "rm"(lumacoeff) // %6
5497 : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5" 5497 : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5"
5498 ); 5498 );
5499 } 5499 }
5500 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 5500 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
5501 5501
5502 #endif // defined(__x86_64__) || defined(__i386__) 5502 #endif // defined(__x86_64__) || defined(__i386__)
5503 5503
5504 #ifdef __cplusplus 5504 #ifdef __cplusplus
5505 } // extern "C" 5505 } // extern "C"
5506 } // namespace libyuv 5506 } // namespace libyuv
5507 #endif 5507 #endif
OLDNEW
« no previous file with comments | « source/rotate_neon64.cc ('k') | source/row_mips.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698