Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(661)

Side by Side Diff: source/row_win.cc

Issue 1363503002: yuvconstants for all YUV to RGB conversion functions. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: mips dspr2 add constants parameter Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon64.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
76 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ 76 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \
77 xmm0 = _mm_unpacklo_epi8(xmm0, xmm5); \ 77 xmm0 = _mm_unpacklo_epi8(xmm0, xmm5); \
78 xmm1 = _mm_loadu_si128(&xmm2); \ 78 xmm1 = _mm_loadu_si128(&xmm2); \
79 xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \ 79 xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \
80 xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \ 80 xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \
81 _mm_storeu_si128((__m128i *)dst_argb, xmm2); \ 81 _mm_storeu_si128((__m128i *)dst_argb, xmm2); \
82 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ 82 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \
83 dst_argb += 32; 83 dst_argb += 32;
84 84
85 85
86 #if defined(HAS_I422TOARGBMATRIXROW_SSSE3) 86 #if defined(HAS_I422TOARGBROW_SSSE3)
87 void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, 87 void I422ToARGBRow_SSSE3(const uint8* y_buf,
88 const uint8* u_buf, 88 const uint8* u_buf,
89 const uint8* v_buf, 89 const uint8* v_buf,
90 uint8* dst_argb, 90 uint8* dst_argb,
91 struct YuvConstants* YuvConstants, 91 struct YuvConstants* yuvconstants,
92 int width) { 92 int width) {
93 __m128i xmm0, xmm1, xmm2, xmm3; 93 __m128i xmm0, xmm1, xmm2, xmm3;
94 const __m128i xmm5 = _mm_set1_epi8(-1); 94 const __m128i xmm5 = _mm_set1_epi8(-1);
95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
96 while (width > 0) { 96 while (width > 0) {
97 READYUV422 97 READYUV422
98 YUVTORGB(YuvConstants) 98 YUVTORGB(YuvConstants)
99 STOREARGB 99 STOREARGB
100 width -= 8; 100 width -= 8;
101 } 101 }
102 } 102 }
103 #endif 103 #endif
104 104
105 #if defined(HAS_I422TOABGRMATRIXROW_SSSE3) 105 #if defined(HAS_I422TOABGRROW_SSSE3)
106 void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf, 106 void I422ToABGRRow_SSSE3(const uint8* y_buf,
107 const uint8* u_buf, 107 const uint8* u_buf,
108 const uint8* v_buf, 108 const uint8* v_buf,
109 uint8* dst_argb, 109 uint8* dst_argb,
110 struct YuvConstants* YuvConstants, 110 struct YuvConstants* yuvconstants,
111 int width) { 111 int width) {
112 __m128i xmm0, xmm1, xmm2, xmm3; 112 __m128i xmm0, xmm1, xmm2, xmm3;
113 const __m128i xmm5 = _mm_set1_epi8(-1); 113 const __m128i xmm5 = _mm_set1_epi8(-1);
114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
115 while (width > 0) { 115 while (width > 0) {
116 READYUV422 116 READYUV422
117 YUVTORGB(YuvConstants) 117 YUVTORGB(YuvConstants)
118 STOREABGR 118 STOREABGR
119 width -= 8; 119 width -= 8;
120 } 120 }
121 } 121 }
(...skipping 1834 matching lines...) Expand 10 before | Expand all | Expand 10 after
1956 __asm vpermq ymm1, ymm1, 0xd8 \ 1956 __asm vpermq ymm1, ymm1, 0xd8 \
1957 __asm vpunpcklbw ymm2, ymm0, ymm5 /* BA */ \ 1957 __asm vpunpcklbw ymm2, ymm0, ymm5 /* BA */ \
1958 __asm vpermq ymm2, ymm2, 0xd8 \ 1958 __asm vpermq ymm2, ymm2, 0xd8 \
1959 __asm vpunpcklwd ymm0, ymm1, ymm2 /* RGBA first 8 pixels */ \ 1959 __asm vpunpcklwd ymm0, ymm1, ymm2 /* RGBA first 8 pixels */ \
1960 __asm vpunpckhwd ymm1, ymm1, ymm2 /* RGBA next 8 pixels */ \ 1960 __asm vpunpckhwd ymm1, ymm1, ymm2 /* RGBA next 8 pixels */ \
1961 __asm vmovdqu [edx], ymm0 \ 1961 __asm vmovdqu [edx], ymm0 \
1962 __asm vmovdqu [edx + 32], ymm1 \ 1962 __asm vmovdqu [edx + 32], ymm1 \
1963 __asm lea edx, [edx + 64] \ 1963 __asm lea edx, [edx + 64] \
1964 } 1964 }
1965 1965
1966 #ifdef HAS_I422TOARGBMATRIXROW_AVX2 1966 #ifdef HAS_I422TOARGBROW_AVX2
1967 // 16 pixels 1967 // 16 pixels
1968 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 1968 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
1969 __declspec(naked) 1969 __declspec(naked)
1970 void I422ToARGBMatrixRow_AVX2(const uint8* y_buf, 1970 void I422ToARGBRow_AVX2(const uint8* y_buf,
1971 const uint8* u_buf, 1971 const uint8* u_buf,
1972 const uint8* v_buf, 1972 const uint8* v_buf,
1973 uint8* dst_argb, 1973 uint8* dst_argb,
1974 struct YuvConstants* YuvConstants, 1974 struct YuvConstants* yuvconstants,
1975 int width) { 1975 int width) {
1976 __asm { 1976 __asm {
1977 push esi 1977 push esi
1978 push edi 1978 push edi
1979 push ebp 1979 push ebp
1980 mov eax, [esp + 12 + 4] // Y 1980 mov eax, [esp + 12 + 4] // Y
1981 mov esi, [esp + 12 + 8] // U 1981 mov esi, [esp + 12 + 8] // U
1982 mov edi, [esp + 12 + 12] // V 1982 mov edi, [esp + 12 + 12] // V
1983 mov edx, [esp + 12 + 16] // argb 1983 mov edx, [esp + 12 + 16] // argb
1984 mov ebp, [esp + 12 + 20] // YuvConstants 1984 mov ebp, [esp + 12 + 20] // YuvConstants
1985 mov ecx, [esp + 12 + 24] // width 1985 mov ecx, [esp + 12 + 24] // width
1986 sub edi, esi 1986 sub edi, esi
1987 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 1987 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
1988 1988
1989 convertloop: 1989 convertloop:
1990 READYUV422_AVX2 1990 READYUV422_AVX2
1991 YUVTORGB_AVX2(ebp) 1991 YUVTORGB_AVX2(ebp)
1992 STOREARGB_AVX2 1992 STOREARGB_AVX2
1993 1993
1994 sub ecx, 16 1994 sub ecx, 16
1995 jg convertloop 1995 jg convertloop
1996 1996
1997 pop ebp 1997 pop ebp
1998 pop edi 1998 pop edi
1999 pop esi 1999 pop esi
2000 vzeroupper 2000 vzeroupper
2001 ret 2001 ret
2002 } 2002 }
2003 } 2003 }
2004 #endif // HAS_I422TOARGBMATRIXROW_AVX2 2004 #endif // HAS_I422TOARGBROW_AVX2
2005 2005
2006 #ifdef HAS_I444TOARGBMATRIXROW_AVX2 2006 #ifdef HAS_I444TOARGBROW_AVX2
2007 // 16 pixels 2007 // 16 pixels
2008 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). 2008 // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
2009 __declspec(naked) 2009 __declspec(naked)
2010 void I444ToARGBMatrixRow_AVX2(const uint8* y_buf, 2010 void I444ToARGBRow_AVX2(const uint8* y_buf,
2011 const uint8* u_buf, 2011 const uint8* u_buf,
2012 const uint8* v_buf, 2012 const uint8* v_buf,
2013 uint8* dst_argb, 2013 uint8* dst_argb,
2014 struct YuvConstants* YuvConstants, 2014 struct YuvConstants* yuvconstants,
2015 int width) { 2015 int width) {
2016 __asm { 2016 __asm {
2017 push esi 2017 push esi
2018 push edi 2018 push edi
2019 push ebp 2019 push ebp
2020 mov eax, [esp + 12 + 4] // Y 2020 mov eax, [esp + 12 + 4] // Y
2021 mov esi, [esp + 12 + 8] // U 2021 mov esi, [esp + 12 + 8] // U
2022 mov edi, [esp + 12 + 12] // V 2022 mov edi, [esp + 12 + 12] // V
2023 mov edx, [esp + 12 + 16] // argb 2023 mov edx, [esp + 12 + 16] // argb
2024 mov ebp, [esp + 12 + 20] // YuvConstants 2024 mov ebp, [esp + 12 + 20] // YuvConstants
2025 mov ecx, [esp + 12 + 24] // width 2025 mov ecx, [esp + 12 + 24] // width
2026 sub edi, esi 2026 sub edi, esi
2027 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2027 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2028 convertloop: 2028 convertloop:
2029 READYUV444_AVX2 2029 READYUV444_AVX2
2030 YUVTORGB_AVX2(ebp) 2030 YUVTORGB_AVX2(ebp)
2031 STOREARGB_AVX2 2031 STOREARGB_AVX2
2032 2032
2033 sub ecx, 16 2033 sub ecx, 16
2034 jg convertloop 2034 jg convertloop
2035 2035
2036 pop ebp 2036 pop ebp
2037 pop edi 2037 pop edi
2038 pop esi 2038 pop esi
2039 vzeroupper 2039 vzeroupper
2040 ret 2040 ret
2041 } 2041 }
2042 } 2042 }
2043 #endif // HAS_I444TOARGBMATRIXROW_AVX2 2043 #endif // HAS_I444TOARGBROW_AVX2
2044 2044
2045 #ifdef HAS_I444TOABGRMATRIXROW_AVX2 2045 #ifdef HAS_I444TOABGRROW_AVX2
2046 // 16 pixels 2046 // 16 pixels
2047 // 16 UV values with 16 Y producing 16 ABGR (64 bytes). 2047 // 16 UV values with 16 Y producing 16 ABGR (64 bytes).
2048 __declspec(naked) 2048 __declspec(naked)
2049 void I444ToABGRMatrixRow_AVX2(const uint8* y_buf, 2049 void I444ToABGRRow_AVX2(const uint8* y_buf,
2050 const uint8* u_buf, 2050 const uint8* u_buf,
2051 const uint8* v_buf, 2051 const uint8* v_buf,
2052 uint8* dst_abgr, 2052 uint8* dst_abgr,
2053 struct YuvConstants* YuvConstants, 2053 struct YuvConstants* yuvconstants,
2054 int width) { 2054 int width) {
2055 __asm { 2055 __asm {
2056 push esi 2056 push esi
2057 push edi 2057 push edi
2058 push ebp 2058 push ebp
2059 mov eax, [esp + 12 + 4] // Y 2059 mov eax, [esp + 12 + 4] // Y
2060 mov esi, [esp + 12 + 8] // U 2060 mov esi, [esp + 12 + 8] // U
2061 mov edi, [esp + 12 + 12] // V 2061 mov edi, [esp + 12 + 12] // V
2062 mov edx, [esp + 12 + 16] // abgr 2062 mov edx, [esp + 12 + 16] // abgr
2063 mov ebp, [esp + 12 + 20] // YuvConstants 2063 mov ebp, [esp + 12 + 20] // YuvConstants
2064 mov ecx, [esp + 12 + 24] // width 2064 mov ecx, [esp + 12 + 24] // width
2065 sub edi, esi 2065 sub edi, esi
2066 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2066 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2067 convertloop: 2067 convertloop:
2068 READYUV444_AVX2 2068 READYUV444_AVX2
2069 YUVTORGB_AVX2(ebp) 2069 YUVTORGB_AVX2(ebp)
2070 STOREABGR_AVX2 2070 STOREABGR_AVX2
2071 2071
2072 sub ecx, 16 2072 sub ecx, 16
2073 jg convertloop 2073 jg convertloop
2074 2074
2075 pop ebp 2075 pop ebp
2076 pop edi 2076 pop edi
2077 pop esi 2077 pop esi
2078 vzeroupper 2078 vzeroupper
2079 ret 2079 ret
2080 } 2080 }
2081 } 2081 }
2082 #endif // HAS_I444TOABGRMATRIXROW_AVX2 2082 #endif // HAS_I444TOABGRROW_AVX2
2083 2083
2084 #ifdef HAS_I411TOARGBROW_AVX2 2084 #ifdef HAS_I411TOARGBROW_AVX2
2085 // 16 pixels 2085 // 16 pixels
2086 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2086 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2087 __declspec(naked) 2087 __declspec(naked)
2088 void I411ToARGBRow_AVX2(const uint8* y_buf, 2088 void I411ToARGBRow_AVX2(const uint8* y_buf,
2089 const uint8* u_buf, 2089 const uint8* u_buf,
2090 const uint8* v_buf, 2090 const uint8* v_buf,
2091 uint8* dst_argb, 2091 uint8* dst_argb,
2092 struct YuvConstants* yuvconstants,
2092 int width) { 2093 int width) {
2093 __asm { 2094 __asm {
2094 push esi 2095 push esi
2095 push edi 2096 push edi
2096 mov eax, [esp + 8 + 4] // Y 2097 push ebp
2097 mov esi, [esp + 8 + 8] // U 2098 mov eax, [esp + 12 + 4] // Y
2098 mov edi, [esp + 8 + 12] // V 2099 mov esi, [esp + 12 + 8] // U
2099 mov edx, [esp + 8 + 16] // argb 2100 mov edi, [esp + 12 + 12] // V
2100 mov ecx, [esp + 8 + 20] // width 2101 mov edx, [esp + 12 + 16] // abgr
2102 mov ebp, [esp + 12 + 20] // YuvConstants
2103 mov ecx, [esp + 12 + 24] // width
2101 sub edi, esi 2104 sub edi, esi
2102 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2105 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2103 2106
2104 convertloop: 2107 convertloop:
2105 READYUV411_AVX2 2108 READYUV411_AVX2
2106 YUVTORGB_AVX2(kYuvConstants) 2109 YUVTORGB_AVX2(ebp)
2107 STOREARGB_AVX2 2110 STOREARGB_AVX2
2108 2111
2109 sub ecx, 16 2112 sub ecx, 16
2110 jg convertloop 2113 jg convertloop
2111 2114
2115 pop ebp
2112 pop edi 2116 pop edi
2113 pop esi 2117 pop esi
2114 vzeroupper 2118 vzeroupper
2115 ret 2119 ret
2116 } 2120 }
2117 } 2121 }
2118 #endif // HAS_I411TOARGBROW_AVX2 2122 #endif // HAS_I411TOARGBROW_AVX2
2119 2123
2120 #ifdef HAS_NV12TOARGBROW_AVX2 2124 #ifdef HAS_NV12TOARGBROW_AVX2
2121 // 16 pixels. 2125 // 16 pixels.
2122 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2126 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2123 __declspec(naked) 2127 __declspec(naked)
2124 void NV12ToARGBRow_AVX2(const uint8* y_buf, 2128 void NV12ToARGBRow_AVX2(const uint8* y_buf,
2125 const uint8* uv_buf, 2129 const uint8* uv_buf,
2126 uint8* dst_argb, 2130 uint8* dst_argb,
2131 struct YuvConstants* yuvconstants,
2127 int width) { 2132 int width) {
2128 __asm { 2133 __asm {
2129 push esi 2134 push esi
2130 mov eax, [esp + 4 + 4] // Y 2135 push ebp
2131 mov esi, [esp + 4 + 8] // UV 2136 mov eax, [esp + 8 + 4] // Y
2132 mov edx, [esp + 4 + 12] // argb 2137 mov esi, [esp + 8 + 8] // UV
2133 mov ecx, [esp + 4 + 16] // width 2138 mov edx, [esp + 8 + 12] // argb
2139 mov ebp, [esp + 8 + 16] // YuvConstants
2140 mov ecx, [esp + 8 + 20] // width
2134 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2141 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2135 2142
2136 convertloop: 2143 convertloop:
2137 READNV12_AVX2 2144 READNV12_AVX2
2138 YUVTORGB_AVX2(kYuvConstants) 2145 YUVTORGB_AVX2(ebp)
2139 STOREARGB_AVX2 2146 STOREARGB_AVX2
2140 2147
2141 sub ecx, 16 2148 sub ecx, 16
2142 jg convertloop 2149 jg convertloop
2143 2150
2151 pop ebp
2144 pop esi 2152 pop esi
2145 vzeroupper 2153 vzeroupper
2146 ret 2154 ret
2147 } 2155 }
2148 } 2156 }
2149 #endif // HAS_NV12TOARGBROW_AVX2 2157 #endif // HAS_NV12TOARGBROW_AVX2
2150 2158
2151 #ifdef HAS_NV21TOARGBROW_AVX2
2152 // 16 pixels.
2153 // 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes).
2154 __declspec(naked)
2155 void NV21ToARGBRow_AVX2(const uint8* y_buf,
2156 const uint8* uv_buf,
2157 uint8* dst_argb,
2158 int width) {
2159 __asm {
2160 push esi
2161 mov eax, [esp + 4 + 4] // Y
2162 mov esi, [esp + 4 + 8] // UV
2163 mov edx, [esp + 4 + 12] // argb
2164 mov ecx, [esp + 4 + 16] // width
2165 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2166
2167 convertloop:
2168 READNV12_AVX2
2169 YUVTORGB_AVX2(kYvuConstants)
2170 STOREARGB_AVX2
2171
2172 sub ecx, 16
2173 jg convertloop
2174
2175 pop esi
2176 vzeroupper
2177 ret
2178 }
2179 }
2180 #endif // HAS_NV21TOARGBROW_AVX2
2181
2182 #ifdef HAS_I422TOBGRAROW_AVX2 2159 #ifdef HAS_I422TOBGRAROW_AVX2
2183 // 16 pixels 2160 // 16 pixels
2184 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). 2161 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
2185 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. 2162 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
2186 __declspec(naked) 2163 __declspec(naked)
2187 void I422ToBGRARow_AVX2(const uint8* y_buf, 2164 void I422ToBGRARow_AVX2(const uint8* y_buf,
2188 const uint8* u_buf, 2165 const uint8* u_buf,
2189 const uint8* v_buf, 2166 const uint8* v_buf,
2190 uint8* dst_argb, 2167 uint8* dst_argb,
2168 struct YuvConstants* yuvconstants,
2191 int width) { 2169 int width) {
2192 __asm { 2170 __asm {
2193 push esi 2171 push esi
2194 push edi 2172 push edi
2195 mov eax, [esp + 8 + 4] // Y 2173 push ebp
2196 mov esi, [esp + 8 + 8] // U 2174 mov eax, [esp + 12 + 4] // Y
2197 mov edi, [esp + 8 + 12] // V 2175 mov esi, [esp + 12 + 8] // U
2198 mov edx, [esp + 8 + 16] // argb 2176 mov edi, [esp + 12 + 12] // V
2199 mov ecx, [esp + 8 + 20] // width 2177 mov edx, [esp + 12 + 16] // abgr
2178 mov ebp, [esp + 12 + 20] // YuvConstants
2179 mov ecx, [esp + 12 + 24] // width
2200 sub edi, esi 2180 sub edi, esi
2201 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2181 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2202 2182
2203 convertloop: 2183 convertloop:
2204 READYUV422_AVX2 2184 READYUV422_AVX2
2205 YUVTORGB_AVX2(kYuvConstants) 2185 YUVTORGB_AVX2(ebp)
2206 STOREBGRA_AVX2 2186 STOREBGRA_AVX2
2207 2187
2208 sub ecx, 16 2188 sub ecx, 16
2209 jg convertloop 2189 jg convertloop
2210 2190
2191 pop ebp
2211 pop edi 2192 pop edi
2212 pop esi 2193 pop esi
2213 vzeroupper 2194 vzeroupper
2214 ret 2195 ret
2215 } 2196 }
2216 } 2197 }
2217 #endif // HAS_I422TOBGRAROW_AVX2 2198 #endif // HAS_I422TOBGRAROW_AVX2
2218 2199
2219 #ifdef HAS_I422TORGBAROW_AVX2 2200 #ifdef HAS_I422TORGBAROW_AVX2
2220 // 16 pixels 2201 // 16 pixels
2221 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). 2202 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
2222 __declspec(naked) 2203 __declspec(naked)
2223 void I422ToRGBARow_AVX2(const uint8* y_buf, 2204 void I422ToRGBARow_AVX2(const uint8* y_buf,
2224 const uint8* u_buf, 2205 const uint8* u_buf,
2225 const uint8* v_buf, 2206 const uint8* v_buf,
2226 uint8* dst_argb, 2207 uint8* dst_argb,
2208 struct YuvConstants* yuvconstants,
2227 int width) { 2209 int width) {
2228 __asm { 2210 __asm {
2229 push esi 2211 push esi
2230 push edi 2212 push edi
2231 mov eax, [esp + 8 + 4] // Y 2213 push ebp
2232 mov esi, [esp + 8 + 8] // U 2214 mov eax, [esp + 12 + 4] // Y
2233 mov edi, [esp + 8 + 12] // V 2215 mov esi, [esp + 12 + 8] // U
2234 mov edx, [esp + 8 + 16] // argb 2216 mov edi, [esp + 12 + 12] // V
2235 mov ecx, [esp + 8 + 20] // width 2217 mov edx, [esp + 12 + 16] // abgr
2218 mov ebp, [esp + 12 + 20] // YuvConstants
2219 mov ecx, [esp + 12 + 24] // width
2236 sub edi, esi 2220 sub edi, esi
2237 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2221 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2238 2222
2239 convertloop: 2223 convertloop:
2240 READYUV422_AVX2 2224 READYUV422_AVX2
2241 YUVTORGB_AVX2(kYuvConstants) 2225 YUVTORGB_AVX2(ebp)
2242 STORERGBA_AVX2 2226 STORERGBA_AVX2
2243 2227
2244 sub ecx, 16 2228 sub ecx, 16
2245 jg convertloop 2229 jg convertloop
2246 2230
2231 pop ebp
2247 pop edi 2232 pop edi
2248 pop esi 2233 pop esi
2249 vzeroupper 2234 vzeroupper
2250 ret 2235 ret
2251 } 2236 }
2252 } 2237 }
2253 #endif // HAS_I422TORGBAROW_AVX2 2238 #endif // HAS_I422TORGBAROW_AVX2
2254 2239
2255 #ifdef HAS_I422TOABGRROW_AVX2 2240 #ifdef HAS_I422TOABGRROW_AVX2
2256 // 16 pixels 2241 // 16 pixels
2257 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). 2242 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
2258 __declspec(naked) 2243 __declspec(naked)
2259 void I422ToABGRMatrixRow_AVX2(const uint8* y_buf, 2244 void I422ToABGRRow_AVX2(const uint8* y_buf,
2260 const uint8* u_buf, 2245 const uint8* u_buf,
2261 const uint8* v_buf, 2246 const uint8* v_buf,
2262 uint8* dst_argb, 2247 uint8* dst_argb,
2263 struct YuvConstants* YuvConstants, 2248 struct YuvConstants* yuvconstants,
2264 int width) { 2249 int width) {
2265 __asm { 2250 __asm {
2266 push esi 2251 push esi
2267 push edi 2252 push edi
2268 push ebp 2253 push ebp
2269 mov eax, [esp + 12 + 4] // Y 2254 mov eax, [esp + 12 + 4] // Y
2270 mov esi, [esp + 12 + 8] // U 2255 mov esi, [esp + 12 + 8] // U
2271 mov edi, [esp + 12 + 12] // V 2256 mov edi, [esp + 12 + 12] // V
2272 mov edx, [esp + 12 + 16] // argb 2257 mov edx, [esp + 12 + 16] // argb
2273 mov ebp, [esp + 12 + 20] // YuvConstants 2258 mov ebp, [esp + 12 + 20] // YuvConstants
2274 mov ecx, [esp + 12 + 24] // width 2259 mov ecx, [esp + 12 + 24] // width
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
2474 __asm por xmm3, xmm2 /* BG */ \ 2459 __asm por xmm3, xmm2 /* BG */ \
2475 __asm por xmm1, xmm3 /* BGR */ \ 2460 __asm por xmm1, xmm3 /* BGR */ \
2476 __asm packssdw xmm0, xmm1 \ 2461 __asm packssdw xmm0, xmm1 \
2477 __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ 2462 __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \
2478 __asm lea edx, [edx + 16] \ 2463 __asm lea edx, [edx + 16] \
2479 } 2464 }
2480 2465
2481 // 8 pixels. 2466 // 8 pixels.
2482 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). 2467 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
2483 __declspec(naked) 2468 __declspec(naked)
2484 void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, 2469 void I444ToARGBRow_SSSE3(const uint8* y_buf,
2485 const uint8* u_buf, 2470 const uint8* u_buf,
2486 const uint8* v_buf, 2471 const uint8* v_buf,
2487 uint8* dst_argb, 2472 uint8* dst_argb,
2488 struct YuvConstants* YuvConstants, 2473 struct YuvConstants* yuvconstants,
2489 int width) { 2474 int width) {
2490 __asm { 2475 __asm {
2491 push esi 2476 push esi
2492 push edi 2477 push edi
2493 push ebp 2478 push ebp
2494 mov eax, [esp + 12 + 4] // Y 2479 mov eax, [esp + 12 + 4] // Y
2495 mov esi, [esp + 12 + 8] // U 2480 mov esi, [esp + 12 + 8] // U
2496 mov edi, [esp + 12 + 12] // V 2481 mov edi, [esp + 12 + 12] // V
2497 mov edx, [esp + 12 + 16] // argb 2482 mov edx, [esp + 12 + 16] // argb
2498 mov ebp, [esp + 12 + 20] // YuvConstants 2483 mov ebp, [esp + 12 + 20] // YuvConstants
2499 mov ecx, [esp + 12 + 24] // width 2484 mov ecx, [esp + 12 + 24] // width
(...skipping 11 matching lines...) Expand all
2511 pop ebp 2496 pop ebp
2512 pop edi 2497 pop edi
2513 pop esi 2498 pop esi
2514 ret 2499 ret
2515 } 2500 }
2516 } 2501 }
2517 2502
2518 // 8 pixels. 2503 // 8 pixels.
2519 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). 2504 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
2520 __declspec(naked) 2505 __declspec(naked)
2521 void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, 2506 void I444ToABGRRow_SSSE3(const uint8* y_buf,
2522 const uint8* u_buf, 2507 const uint8* u_buf,
2523 const uint8* v_buf, 2508 const uint8* v_buf,
2524 uint8* dst_abgr, 2509 uint8* dst_abgr,
2525 struct YuvConstants* YuvConstants, 2510 struct YuvConstants* yuvconstants,
2526 int width) { 2511 int width) {
2527 __asm { 2512 __asm {
2528 push esi 2513 push esi
2529 push edi 2514 push edi
2530 push ebp 2515 push ebp
2531 mov eax, [esp + 12 + 4] // Y 2516 mov eax, [esp + 12 + 4] // Y
2532 mov esi, [esp + 12 + 8] // U 2517 mov esi, [esp + 12 + 8] // U
2533 mov edi, [esp + 12 + 12] // V 2518 mov edi, [esp + 12 + 12] // V
2534 mov edx, [esp + 12 + 16] // abgr 2519 mov edx, [esp + 12 + 16] // abgr
2535 mov ebp, [esp + 12 + 20] // YuvConstants 2520 mov ebp, [esp + 12 + 20] // YuvConstants
2536 mov ecx, [esp + 12 + 24] // width 2521 mov ecx, [esp + 12 + 24] // width
(...skipping 15 matching lines...) Expand all
2552 } 2537 }
2553 } 2538 }
2554 2539
2555 // 8 pixels. 2540 // 8 pixels.
2556 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). 2541 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes).
2557 __declspec(naked) 2542 __declspec(naked)
2558 void I422ToRGB24Row_SSSE3(const uint8* y_buf, 2543 void I422ToRGB24Row_SSSE3(const uint8* y_buf,
2559 const uint8* u_buf, 2544 const uint8* u_buf,
2560 const uint8* v_buf, 2545 const uint8* v_buf,
2561 uint8* dst_rgb24, 2546 uint8* dst_rgb24,
2547 struct YuvConstants* yuvconstants,
2562 int width) { 2548 int width) {
2563 __asm { 2549 __asm {
2564 push esi 2550 push esi
2565 push edi 2551 push edi
2566 mov eax, [esp + 8 + 4] // Y 2552 push ebp
2567 mov esi, [esp + 8 + 8] // U 2553 mov eax, [esp + 12 + 4] // Y
2568 mov edi, [esp + 8 + 12] // V 2554 mov esi, [esp + 12 + 8] // U
2569 mov edx, [esp + 8 + 16] // rgb24 2555 mov edi, [esp + 12 + 12] // V
2570 mov ecx, [esp + 8 + 20] // width 2556 mov edx, [esp + 12 + 16] // argb
2557 mov ebp, [esp + 12 + 20] // YuvConstants
2558 mov ecx, [esp + 12 + 24] // width
2571 sub edi, esi 2559 sub edi, esi
2572 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 2560 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
2573 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 2561 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
2574 2562
2575 convertloop: 2563 convertloop:
2576 READYUV422 2564 READYUV422
2577 YUVTORGB(kYuvConstants) 2565 YUVTORGB(ebp)
2578 STORERGB24 2566 STORERGB24
2579 2567
2580 sub ecx, 8 2568 sub ecx, 8
2581 jg convertloop 2569 jg convertloop
2582 2570
2571 pop ebp
2583 pop edi 2572 pop edi
2584 pop esi 2573 pop esi
2585 ret 2574 ret
2586 } 2575 }
2587 } 2576 }
2588 2577
2589 // 8 pixels. 2578 // 8 pixels.
2590 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). 2579 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes).
2591 __declspec(naked) 2580 __declspec(naked)
2592 void I422ToRAWRow_SSSE3(const uint8* y_buf, 2581 void I422ToRAWRow_SSSE3(const uint8* y_buf,
2593 const uint8* u_buf, 2582 const uint8* u_buf,
2594 const uint8* v_buf, 2583 const uint8* v_buf,
2595 uint8* dst_raw, 2584 uint8* dst_raw,
2585 struct YuvConstants* yuvconstants,
2596 int width) { 2586 int width) {
2597 __asm { 2587 __asm {
2598 push esi 2588 push esi
2599 push edi 2589 push edi
2600 mov eax, [esp + 8 + 4] // Y 2590 push ebp
2601 mov esi, [esp + 8 + 8] // U 2591 mov eax, [esp + 12 + 4] // Y
2602 mov edi, [esp + 8 + 12] // V 2592 mov esi, [esp + 12 + 8] // U
2603 mov edx, [esp + 8 + 16] // raw 2593 mov edi, [esp + 12 + 12] // V
2604 mov ecx, [esp + 8 + 20] // width 2594 mov edx, [esp + 12 + 16] // argb
2595 mov ebp, [esp + 12 + 20] // YuvConstants
2596 mov ecx, [esp + 12 + 24] // width
2605 sub edi, esi 2597 sub edi, esi
2606 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 2598 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
2607 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW 2599 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
2608 2600
2609 convertloop: 2601 convertloop:
2610 READYUV422 2602 READYUV422
2611 YUVTORGB(kYuvConstants) 2603 YUVTORGB(ebp)
2612 STORERAW 2604 STORERAW
2613 2605
2614 sub ecx, 8 2606 sub ecx, 8
2615 jg convertloop 2607 jg convertloop
2616 2608
2609 pop ebp
2617 pop edi 2610 pop edi
2618 pop esi 2611 pop esi
2619 ret 2612 ret
2620 } 2613 }
2621 } 2614 }
2622 2615
2623 // 8 pixels 2616 // 8 pixels
2624 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). 2617 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
2625 __declspec(naked) 2618 __declspec(naked)
2626 void I422ToRGB565Row_SSSE3(const uint8* y_buf, 2619 void I422ToRGB565Row_SSSE3(const uint8* y_buf,
2627 const uint8* u_buf, 2620 const uint8* u_buf,
2628 const uint8* v_buf, 2621 const uint8* v_buf,
2629 uint8* rgb565_buf, 2622 uint8* rgb565_buf,
2623 struct YuvConstants* yuvconstants,
2630 int width) { 2624 int width) {
2631 __asm { 2625 __asm {
2632 push esi 2626 push esi
2633 push edi 2627 push edi
2634 mov eax, [esp + 8 + 4] // Y 2628 push ebp
2635 mov esi, [esp + 8 + 8] // U 2629 mov eax, [esp + 12 + 4] // Y
2636 mov edi, [esp + 8 + 12] // V 2630 mov esi, [esp + 12 + 8] // U
2637 mov edx, [esp + 8 + 16] // rgb565 2631 mov edi, [esp + 12 + 12] // V
2638 mov ecx, [esp + 8 + 20] // width 2632 mov edx, [esp + 12 + 16] // argb
2633 mov ebp, [esp + 12 + 20] // YuvConstants
2634 mov ecx, [esp + 12 + 24] // width
2639 sub edi, esi 2635 sub edi, esi
2640 pcmpeqb xmm5, xmm5 // generate mask 0x0000001f 2636 pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
2641 psrld xmm5, 27 2637 psrld xmm5, 27
2642 pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 2638 pcmpeqb xmm6, xmm6 // generate mask 0x000007e0
2643 psrld xmm6, 26 2639 psrld xmm6, 26
2644 pslld xmm6, 5 2640 pslld xmm6, 5
2645 pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 2641 pcmpeqb xmm7, xmm7 // generate mask 0xfffff800
2646 pslld xmm7, 11 2642 pslld xmm7, 11
2647 2643
2648 convertloop: 2644 convertloop:
2649 READYUV422 2645 READYUV422
2650 YUVTORGB(kYuvConstants) 2646 YUVTORGB(ebp)
2651 STORERGB565 2647 STORERGB565
2652 2648
2653 sub ecx, 8 2649 sub ecx, 8
2654 jg convertloop 2650 jg convertloop
2655 2651
2652 pop ebp
2656 pop edi 2653 pop edi
2657 pop esi 2654 pop esi
2658 ret 2655 ret
2659 } 2656 }
2660 } 2657 }
2661 2658
2662 // 8 pixels. 2659 // 8 pixels.
2663 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2660 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
2664 __declspec(naked) 2661 __declspec(naked)
2665 void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, 2662 void I422ToARGBRow_SSSE3(const uint8* y_buf,
2666 const uint8* u_buf, 2663 const uint8* u_buf,
2667 const uint8* v_buf, 2664 const uint8* v_buf,
2668 uint8* dst_argb, 2665 uint8* dst_argb,
2669 struct YuvConstants* YuvConstants, 2666 struct YuvConstants* yuvconstants,
2670 int width) { 2667 int width) {
2671 __asm { 2668 __asm {
2672 push esi 2669 push esi
2673 push edi 2670 push edi
2674 push ebp 2671 push ebp
2675 mov eax, [esp + 12 + 4] // Y 2672 mov eax, [esp + 12 + 4] // Y
2676 mov esi, [esp + 12 + 8] // U 2673 mov esi, [esp + 12 + 8] // U
2677 mov edi, [esp + 12 + 12] // V 2674 mov edi, [esp + 12 + 12] // V
2678 mov edx, [esp + 12 + 16] // argb 2675 mov edx, [esp + 12 + 16] // argb
2679 mov ebp, [esp + 12 + 20] // YuvConstants 2676 mov ebp, [esp + 12 + 20] // YuvConstants
2680 mov ecx, [esp + 12 + 24] // width 2677 mov ecx, [esp + 12 + 24] // width
(...skipping 16 matching lines...) Expand all
2697 } 2694 }
2698 2695
2699 // 8 pixels. 2696 // 8 pixels.
2700 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2697 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
2701 // Similar to I420 but duplicate UV once more. 2698 // Similar to I420 but duplicate UV once more.
2702 __declspec(naked) 2699 __declspec(naked)
2703 void I411ToARGBRow_SSSE3(const uint8* y_buf, 2700 void I411ToARGBRow_SSSE3(const uint8* y_buf,
2704 const uint8* u_buf, 2701 const uint8* u_buf,
2705 const uint8* v_buf, 2702 const uint8* v_buf,
2706 uint8* dst_argb, 2703 uint8* dst_argb,
2704 struct YuvConstants* yuvconstants,
2707 int width) { 2705 int width) {
2708 __asm { 2706 __asm {
2709 push ebx
2710 push esi 2707 push esi
2711 push edi 2708 push edi
2709 push ebp
2712 mov eax, [esp + 12 + 4] // Y 2710 mov eax, [esp + 12 + 4] // Y
2713 mov esi, [esp + 12 + 8] // U 2711 mov esi, [esp + 12 + 8] // U
2714 mov edi, [esp + 12 + 12] // V 2712 mov edi, [esp + 12 + 12] // V
2715 mov edx, [esp + 12 + 16] // argb 2713 mov edx, [esp + 12 + 16] // abgr
2716 mov ecx, [esp + 12 + 20] // width 2714 mov ebp, [esp + 12 + 20] // YuvConstants
2715 mov ecx, [esp + 12 + 24] // width
2717 sub edi, esi 2716 sub edi, esi
2718 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2717 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2719 2718
2720 convertloop: 2719 convertloop:
2721 READYUV411 // modifies EBX 2720 READYUV411
2722 YUVTORGB(kYuvConstants) 2721 YUVTORGB(ebp)
2723 STOREARGB 2722 STOREARGB
2724 2723
2725 sub ecx, 8 2724 sub ecx, 8
2726 jg convertloop 2725 jg convertloop
2727 2726
2727 pop ebp
2728 pop edi 2728 pop edi
2729 pop esi 2729 pop esi
2730 pop ebx
2731 ret 2730 ret
2732 } 2731 }
2733 } 2732 }
2734 2733
2735 // 8 pixels. 2734 // 8 pixels.
2736 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2735 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
2737 __declspec(naked) 2736 __declspec(naked)
2738 void NV12ToARGBRow_SSSE3(const uint8* y_buf, 2737 void NV12ToARGBRow_SSSE3(const uint8* y_buf,
2739 const uint8* uv_buf, 2738 const uint8* uv_buf,
2740 uint8* dst_argb, 2739 uint8* dst_argb,
2740 struct YuvConstants* yuvconstants,
2741 int width) { 2741 int width) {
2742 __asm { 2742 __asm {
2743 push esi 2743 push esi
2744 mov eax, [esp + 4 + 4] // Y 2744 push ebp
2745 mov esi, [esp + 4 + 8] // UV 2745 mov eax, [esp + 8 + 4] // Y
2746 mov edx, [esp + 4 + 12] // argb 2746 mov esi, [esp + 8 + 8] // UV
2747 mov ecx, [esp + 4 + 16] // width 2747 mov edx, [esp + 8 + 12] // argb
2748 mov ebp, [esp + 8 + 16] // YuvConstants
2749 mov ecx, [esp + 8 + 20] // width
2748 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2750 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2749 2751
2750 convertloop: 2752 convertloop:
2751 READNV12 2753 READNV12
2752 YUVTORGB(kYuvConstants) 2754 YUVTORGB(ebp)
2753 STOREARGB 2755 STOREARGB
2754 2756
2755 sub ecx, 8 2757 sub ecx, 8
2756 jg convertloop 2758 jg convertloop
2757 2759
2760 pop ebp
2758 pop esi 2761 pop esi
2759 ret 2762 ret
2760 } 2763 }
2761 }
2762
2763 // 8 pixels.
2764 // 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes).
2765 __declspec(naked)
2766 void NV21ToARGBRow_SSSE3(const uint8* y_buf,
2767 const uint8* uv_buf,
2768 uint8* dst_argb,
2769 int width) {
2770 __asm {
2771 push esi
2772 mov eax, [esp + 4 + 4] // Y
2773 mov esi, [esp + 4 + 8] // UV
2774 mov edx, [esp + 4 + 12] // argb
2775 mov ecx, [esp + 4 + 16] // width
2776 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2777
2778 convertloop:
2779 READNV12
2780 YUVTORGB(kYvuConstants)
2781 STOREARGB
2782
2783 sub ecx, 8
2784 jg convertloop
2785
2786 pop esi
2787 ret
2788 }
2789 } 2764 }
2790 2765
2791 __declspec(naked) 2766 __declspec(naked)
2792 void I422ToBGRARow_SSSE3(const uint8* y_buf, 2767 void I422ToBGRARow_SSSE3(const uint8* y_buf,
2793 const uint8* u_buf, 2768 const uint8* u_buf,
2794 const uint8* v_buf, 2769 const uint8* v_buf,
2795 uint8* dst_bgra, 2770 uint8* dst_bgra,
2771 struct YuvConstants* yuvconstants,
2796 int width) { 2772 int width) {
2797 __asm { 2773 __asm {
2798 push esi 2774 push esi
2799 push edi 2775 push edi
2800 mov eax, [esp + 8 + 4] // Y 2776 push ebp
2801 mov esi, [esp + 8 + 8] // U 2777 mov eax, [esp + 12 + 4] // Y
2802 mov edi, [esp + 8 + 12] // V 2778 mov esi, [esp + 12 + 8] // U
2803 mov edx, [esp + 8 + 16] // bgra 2779 mov edi, [esp + 12 + 12] // V
2804 mov ecx, [esp + 8 + 20] // width 2780 mov edx, [esp + 12 + 16] // argb
2781 mov ebp, [esp + 12 + 20] // YuvConstants
2782 mov ecx, [esp + 12 + 24] // width
2805 sub edi, esi 2783 sub edi, esi
2806 2784
2807 convertloop: 2785 convertloop:
2808 READYUV422 2786 READYUV422
2809 YUVTORGB(kYuvConstants) 2787 YUVTORGB(ebp)
2810 STOREBGRA 2788 STOREBGRA
2811 2789
2812 sub ecx, 8 2790 sub ecx, 8
2813 jg convertloop 2791 jg convertloop
2814 2792
2793 pop ebp
2815 pop edi 2794 pop edi
2816 pop esi 2795 pop esi
2817 ret 2796 ret
2818 } 2797 }
2819 } 2798 }
2820 2799
2821 __declspec(naked) 2800 __declspec(naked)
2822 void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf, 2801 void I422ToABGRRow_SSSE3(const uint8* y_buf,
2823 const uint8* u_buf, 2802 const uint8* u_buf,
2824 const uint8* v_buf, 2803 const uint8* v_buf,
2825 uint8* dst_abgr, 2804 uint8* dst_abgr,
2826 struct YuvConstants* YuvConstants, 2805 struct YuvConstants* yuvconstants,
2827 int width) { 2806 int width) {
2828 __asm { 2807 __asm {
2829 push esi 2808 push esi
2830 push edi 2809 push edi
2831 push ebp 2810 push ebp
2832 mov eax, [esp + 12 + 4] // Y 2811 mov eax, [esp + 12 + 4] // Y
2833 mov esi, [esp + 12 + 8] // U 2812 mov esi, [esp + 12 + 8] // U
2834 mov edi, [esp + 12 + 12] // V 2813 mov edi, [esp + 12 + 12] // V
2835 mov edx, [esp + 12 + 16] // argb 2814 mov edx, [esp + 12 + 16] // argb
2836 mov ebp, [esp + 12 + 20] // YuvConstants 2815 mov ebp, [esp + 12 + 20] // YuvConstants
2837 mov ecx, [esp + 12 + 24] // width 2816 mov ecx, [esp + 12 + 24] // width
(...skipping 13 matching lines...) Expand all
2851 pop esi 2830 pop esi
2852 ret 2831 ret
2853 } 2832 }
2854 } 2833 }
2855 2834
2856 __declspec(naked) 2835 __declspec(naked)
2857 void I422ToRGBARow_SSSE3(const uint8* y_buf, 2836 void I422ToRGBARow_SSSE3(const uint8* y_buf,
2858 const uint8* u_buf, 2837 const uint8* u_buf,
2859 const uint8* v_buf, 2838 const uint8* v_buf,
2860 uint8* dst_rgba, 2839 uint8* dst_rgba,
2840 struct YuvConstants* yuvconstants,
2861 int width) { 2841 int width) {
2862 __asm { 2842 __asm {
2863 push esi 2843 push esi
2864 push edi 2844 push edi
2865 mov eax, [esp + 8 + 4] // Y 2845 push ebp
2866 mov esi, [esp + 8 + 8] // U 2846 mov eax, [esp + 12 + 4] // Y
2867 mov edi, [esp + 8 + 12] // V 2847 mov esi, [esp + 12 + 8] // U
2868 mov edx, [esp + 8 + 16] // rgba 2848 mov edi, [esp + 12 + 12] // V
2869 mov ecx, [esp + 8 + 20] // width 2849 mov edx, [esp + 12 + 16] // argb
2850 mov ebp, [esp + 12 + 20] // YuvConstants
2851 mov ecx, [esp + 12 + 24] // width
2870 sub edi, esi 2852 sub edi, esi
2871 2853
2872 convertloop: 2854 convertloop:
2873 READYUV422 2855 READYUV422
2874 YUVTORGB(kYuvConstants) 2856 YUVTORGB(ebp)
2875 STORERGBA 2857 STORERGBA
2876 2858
2877 sub ecx, 8 2859 sub ecx, 8
2878 jg convertloop 2860 jg convertloop
2879 2861
2862 pop ebp
2880 pop edi 2863 pop edi
2881 pop esi 2864 pop esi
2882 ret 2865 ret
2883 } 2866 }
2884 } 2867 }
2885
2886 #endif // HAS_I422TOARGBROW_SSSE3 2868 #endif // HAS_I422TOARGBROW_SSSE3
2887 2869
2888 #ifdef HAS_I400TOARGBROW_SSE2 2870 #ifdef HAS_I400TOARGBROW_SSE2
2889 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes). 2871 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
2890 __declspec(naked) 2872 __declspec(naked)
2891 void I400ToARGBRow_SSE2(const uint8* y_buf, 2873 void I400ToARGBRow_SSE2(const uint8* y_buf,
2892 uint8* rgb_buf, 2874 uint8* rgb_buf,
2893 int width) { 2875 int width) {
2894 __asm { 2876 __asm {
2895 mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) 2877 mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256)
(...skipping 3382 matching lines...) Expand 10 before | Expand all | Expand 10 after
6278 } 6260 }
6279 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6261 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6280 6262
6281 #endif // defined(_M_X64) 6263 #endif // defined(_M_X64)
6282 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6264 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6283 6265
6284 #ifdef __cplusplus 6266 #ifdef __cplusplus
6285 } // extern "C" 6267 } // extern "C"
6286 } // namespace libyuv 6268 } // namespace libyuv
6287 #endif 6269 #endif
OLDNEW
« no previous file with comments | « source/row_neon64.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698