Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(814)

Side by Side Diff: source/row_win.cc

Issue 1398623002: fix jpeg and bt.709 yuvconstants for neon64. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon64.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
94 _mm_storeu_si128((__m128i *)dst_abgr, xmm2); \ 94 _mm_storeu_si128((__m128i *)dst_abgr, xmm2); \
95 _mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \ 95 _mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \
96 dst_abgr += 32; 96 dst_abgr += 32;
97 97
98 98
99 #if defined(HAS_I422TOARGBROW_SSSE3) 99 #if defined(HAS_I422TOARGBROW_SSSE3)
100 void I422ToARGBRow_SSSE3(const uint8* y_buf, 100 void I422ToARGBRow_SSSE3(const uint8* y_buf,
101 const uint8* u_buf, 101 const uint8* u_buf,
102 const uint8* v_buf, 102 const uint8* v_buf,
103 uint8* dst_argb, 103 uint8* dst_argb,
104 struct YuvConstants* yuvconstants, 104 const struct YuvConstants* yuvconstants,
105 int width) { 105 int width) {
106 __m128i xmm0, xmm1, xmm2, xmm4; 106 __m128i xmm0, xmm1, xmm2, xmm4;
107 const __m128i xmm5 = _mm_set1_epi8(-1); 107 const __m128i xmm5 = _mm_set1_epi8(-1);
108 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 108 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
109 while (width > 0) { 109 while (width > 0) {
110 READYUV422 110 READYUV422
111 YUVTORGB(yuvconstants) 111 YUVTORGB(yuvconstants)
112 STOREARGB 112 STOREARGB
113 width -= 8; 113 width -= 8;
114 } 114 }
115 } 115 }
116 #endif 116 #endif
117 117
118 #if defined(HAS_I422TOABGRROW_SSSE3) 118 #if defined(HAS_I422TOABGRROW_SSSE3)
119 void I422ToABGRRow_SSSE3(const uint8* y_buf, 119 void I422ToABGRRow_SSSE3(const uint8* y_buf,
120 const uint8* u_buf, 120 const uint8* u_buf,
121 const uint8* v_buf, 121 const uint8* v_buf,
122 uint8* dst_abgr, 122 uint8* dst_abgr,
123 struct YuvConstants* yuvconstants, 123 const struct YuvConstants* yuvconstants,
124 int width) { 124 int width) {
125 __m128i xmm0, xmm1, xmm2, xmm4; 125 __m128i xmm0, xmm1, xmm2, xmm4;
126 const __m128i xmm5 = _mm_set1_epi8(-1); 126 const __m128i xmm5 = _mm_set1_epi8(-1);
127 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 127 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
128 while (width > 0) { 128 while (width > 0) {
129 READYUV422 129 READYUV422
130 YUVTORGB(yuvconstants) 130 YUVTORGB(yuvconstants)
131 STOREABGR 131 STOREABGR
132 width -= 8; 132 width -= 8;
133 } 133 }
134 } 134 }
135 #endif 135 #endif
136 136
137 #if defined(HAS_I422ALPHATOARGBROW_SSSE3) 137 #if defined(HAS_I422ALPHATOARGBROW_SSSE3)
138 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, 138 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
139 const uint8* u_buf, 139 const uint8* u_buf,
140 const uint8* v_buf, 140 const uint8* v_buf,
141 const uint8* a_buf, 141 const uint8* a_buf,
142 uint8* dst_argb, 142 uint8* dst_argb,
143 struct YuvConstants* yuvconstants, 143 const struct YuvConstants* yuvconstants,
144 int width) { 144 int width) {
145 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; 145 __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
146 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 146 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
147 while (width > 0) { 147 while (width > 0) {
148 READYUVA422 148 READYUVA422
149 YUVTORGB(yuvconstants) 149 YUVTORGB(yuvconstants)
150 STOREARGB 150 STOREARGB
151 width -= 8; 151 width -= 8;
152 } 152 }
153 } 153 }
154 #endif 154 #endif
155 155
156 #if defined(HAS_I422ALPHATOABGRROW_SSSE3) 156 #if defined(HAS_I422ALPHATOABGRROW_SSSE3)
157 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf, 157 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
158 const uint8* u_buf, 158 const uint8* u_buf,
159 const uint8* v_buf, 159 const uint8* v_buf,
160 const uint8* a_buf, 160 const uint8* a_buf,
161 uint8* dst_abgr, 161 uint8* dst_abgr,
162 struct YuvConstants* yuvconstants, 162 const struct YuvConstants* yuvconstants,
163 int width) { 163 int width) {
164 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; 164 __m128i xmm0, xmm1, xmm2, xmm4, xmm5;
165 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 165 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
166 while (width > 0) { 166 while (width > 0) {
167 READYUVA422 167 READYUVA422
168 YUVTORGB(yuvconstants) 168 YUVTORGB(yuvconstants)
169 STOREABGR 169 STOREABGR
170 width -= 8; 170 width -= 8;
171 } 171 }
172 } 172 }
(...skipping 1930 matching lines...) Expand 10 before | Expand all | Expand 10 after
2103 } 2103 }
2104 2104
2105 #ifdef HAS_I422TOARGBROW_AVX2 2105 #ifdef HAS_I422TOARGBROW_AVX2
2106 // 16 pixels 2106 // 16 pixels
2107 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2107 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2108 __declspec(naked) 2108 __declspec(naked)
2109 void I422ToARGBRow_AVX2(const uint8* y_buf, 2109 void I422ToARGBRow_AVX2(const uint8* y_buf,
2110 const uint8* u_buf, 2110 const uint8* u_buf,
2111 const uint8* v_buf, 2111 const uint8* v_buf,
2112 uint8* dst_argb, 2112 uint8* dst_argb,
2113 struct YuvConstants* yuvconstants, 2113 const struct YuvConstants* yuvconstants,
2114 int width) { 2114 int width) {
2115 __asm { 2115 __asm {
2116 push esi 2116 push esi
2117 push edi 2117 push edi
2118 push ebx 2118 push ebx
2119 mov eax, [esp + 12 + 4] // Y 2119 mov eax, [esp + 12 + 4] // Y
2120 mov esi, [esp + 12 + 8] // U 2120 mov esi, [esp + 12 + 8] // U
2121 mov edi, [esp + 12 + 12] // V 2121 mov edi, [esp + 12 + 12] // V
2122 mov edx, [esp + 12 + 16] // argb 2122 mov edx, [esp + 12 + 16] // argb
2123 mov ebx, [esp + 12 + 20] // yuvconstants 2123 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 20 matching lines...) Expand all
2144 2144
2145 #ifdef HAS_I422ALPHATOARGBROW_AVX2 2145 #ifdef HAS_I422ALPHATOARGBROW_AVX2
2146 // 16 pixels 2146 // 16 pixels
2147 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. 2147 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB.
2148 __declspec(naked) 2148 __declspec(naked)
2149 void I422AlphaToARGBRow_AVX2(const uint8* y_buf, 2149 void I422AlphaToARGBRow_AVX2(const uint8* y_buf,
2150 const uint8* u_buf, 2150 const uint8* u_buf,
2151 const uint8* v_buf, 2151 const uint8* v_buf,
2152 const uint8* a_buf, 2152 const uint8* a_buf,
2153 uint8* dst_argb, 2153 uint8* dst_argb,
2154 struct YuvConstants* yuvconstants, 2154 const struct YuvConstants* yuvconstants,
2155 int width) { 2155 int width) {
2156 __asm { 2156 __asm {
2157 push esi 2157 push esi
2158 push edi 2158 push edi
2159 push ebx 2159 push ebx
2160 push ebp 2160 push ebp
2161 mov eax, [esp + 16 + 4] // Y 2161 mov eax, [esp + 16 + 4] // Y
2162 mov esi, [esp + 16 + 8] // U 2162 mov esi, [esp + 16 + 8] // U
2163 mov edi, [esp + 16 + 12] // V 2163 mov edi, [esp + 16 + 12] // V
2164 mov ebp, [esp + 16 + 16] // A 2164 mov ebp, [esp + 16 + 16] // A
(...skipping 22 matching lines...) Expand all
2187 2187
2188 #ifdef HAS_I422ALPHATOABGRROW_AVX2 2188 #ifdef HAS_I422ALPHATOABGRROW_AVX2
2189 // 16 pixels 2189 // 16 pixels
2190 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. 2190 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR.
2191 __declspec(naked) 2191 __declspec(naked)
2192 void I422AlphaToABGRRow_AVX2(const uint8* y_buf, 2192 void I422AlphaToABGRRow_AVX2(const uint8* y_buf,
2193 const uint8* u_buf, 2193 const uint8* u_buf,
2194 const uint8* v_buf, 2194 const uint8* v_buf,
2195 const uint8* a_buf, 2195 const uint8* a_buf,
2196 uint8* dst_abgr, 2196 uint8* dst_abgr,
2197 struct YuvConstants* yuvconstants, 2197 const struct YuvConstants* yuvconstants,
2198 int width) { 2198 int width) {
2199 __asm { 2199 __asm {
2200 push esi 2200 push esi
2201 push edi 2201 push edi
2202 push ebx 2202 push ebx
2203 push ebp 2203 push ebp
2204 mov eax, [esp + 16 + 4] // Y 2204 mov eax, [esp + 16 + 4] // Y
2205 mov esi, [esp + 16 + 8] // U 2205 mov esi, [esp + 16 + 8] // U
2206 mov edi, [esp + 16 + 12] // V 2206 mov edi, [esp + 16 + 12] // V
2207 mov ebp, [esp + 16 + 16] // A 2207 mov ebp, [esp + 16 + 16] // A
(...skipping 21 matching lines...) Expand all
2229 #endif // HAS_I422ALPHATOABGRROW_AVX2 2229 #endif // HAS_I422ALPHATOABGRROW_AVX2
2230 2230
2231 #ifdef HAS_I444TOARGBROW_AVX2 2231 #ifdef HAS_I444TOARGBROW_AVX2
2232 // 16 pixels 2232 // 16 pixels
2233 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). 2233 // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
2234 __declspec(naked) 2234 __declspec(naked)
2235 void I444ToARGBRow_AVX2(const uint8* y_buf, 2235 void I444ToARGBRow_AVX2(const uint8* y_buf,
2236 const uint8* u_buf, 2236 const uint8* u_buf,
2237 const uint8* v_buf, 2237 const uint8* v_buf,
2238 uint8* dst_argb, 2238 uint8* dst_argb,
2239 struct YuvConstants* yuvconstants, 2239 const struct YuvConstants* yuvconstants,
2240 int width) { 2240 int width) {
2241 __asm { 2241 __asm {
2242 push esi 2242 push esi
2243 push edi 2243 push edi
2244 push ebx 2244 push ebx
2245 mov eax, [esp + 12 + 4] // Y 2245 mov eax, [esp + 12 + 4] // Y
2246 mov esi, [esp + 12 + 8] // U 2246 mov esi, [esp + 12 + 8] // U
2247 mov edi, [esp + 12 + 12] // V 2247 mov edi, [esp + 12 + 12] // V
2248 mov edx, [esp + 12 + 16] // argb 2248 mov edx, [esp + 12 + 16] // argb
2249 mov ebx, [esp + 12 + 20] // yuvconstants 2249 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 18 matching lines...) Expand all
2268 #endif // HAS_I444TOARGBROW_AVX2 2268 #endif // HAS_I444TOARGBROW_AVX2
2269 2269
2270 #ifdef HAS_I444TOABGRROW_AVX2 2270 #ifdef HAS_I444TOABGRROW_AVX2
2271 // 16 pixels 2271 // 16 pixels
2272 // 16 UV values with 16 Y producing 16 ABGR (64 bytes). 2272 // 16 UV values with 16 Y producing 16 ABGR (64 bytes).
2273 __declspec(naked) 2273 __declspec(naked)
2274 void I444ToABGRRow_AVX2(const uint8* y_buf, 2274 void I444ToABGRRow_AVX2(const uint8* y_buf,
2275 const uint8* u_buf, 2275 const uint8* u_buf,
2276 const uint8* v_buf, 2276 const uint8* v_buf,
2277 uint8* dst_abgr, 2277 uint8* dst_abgr,
2278 struct YuvConstants* yuvconstants, 2278 const struct YuvConstants* yuvconstants,
2279 int width) { 2279 int width) {
2280 __asm { 2280 __asm {
2281 push esi 2281 push esi
2282 push edi 2282 push edi
2283 push ebx 2283 push ebx
2284 mov eax, [esp + 12 + 4] // Y 2284 mov eax, [esp + 12 + 4] // Y
2285 mov esi, [esp + 12 + 8] // U 2285 mov esi, [esp + 12 + 8] // U
2286 mov edi, [esp + 12 + 12] // V 2286 mov edi, [esp + 12 + 12] // V
2287 mov edx, [esp + 12 + 16] // abgr 2287 mov edx, [esp + 12 + 16] // abgr
2288 mov ebx, [esp + 12 + 20] // yuvconstants 2288 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 18 matching lines...) Expand all
2307 #endif // HAS_I444TOABGRROW_AVX2 2307 #endif // HAS_I444TOABGRROW_AVX2
2308 2308
2309 #ifdef HAS_I411TOARGBROW_AVX2 2309 #ifdef HAS_I411TOARGBROW_AVX2
2310 // 16 pixels 2310 // 16 pixels
2311 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2311 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2312 __declspec(naked) 2312 __declspec(naked)
2313 void I411ToARGBRow_AVX2(const uint8* y_buf, 2313 void I411ToARGBRow_AVX2(const uint8* y_buf,
2314 const uint8* u_buf, 2314 const uint8* u_buf,
2315 const uint8* v_buf, 2315 const uint8* v_buf,
2316 uint8* dst_argb, 2316 uint8* dst_argb,
2317 struct YuvConstants* yuvconstants, 2317 const struct YuvConstants* yuvconstants,
2318 int width) { 2318 int width) {
2319 __asm { 2319 __asm {
2320 push esi 2320 push esi
2321 push edi 2321 push edi
2322 push ebx 2322 push ebx
2323 mov eax, [esp + 12 + 4] // Y 2323 mov eax, [esp + 12 + 4] // Y
2324 mov esi, [esp + 12 + 8] // U 2324 mov esi, [esp + 12 + 8] // U
2325 mov edi, [esp + 12 + 12] // V 2325 mov edi, [esp + 12 + 12] // V
2326 mov edx, [esp + 12 + 16] // abgr 2326 mov edx, [esp + 12 + 16] // abgr
2327 mov ebx, [esp + 12 + 20] // yuvconstants 2327 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 18 matching lines...) Expand all
2346 } 2346 }
2347 #endif // HAS_I411TOARGBROW_AVX2 2347 #endif // HAS_I411TOARGBROW_AVX2
2348 2348
2349 #ifdef HAS_NV12TOARGBROW_AVX2 2349 #ifdef HAS_NV12TOARGBROW_AVX2
2350 // 16 pixels. 2350 // 16 pixels.
2351 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2351 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2352 __declspec(naked) 2352 __declspec(naked)
2353 void NV12ToARGBRow_AVX2(const uint8* y_buf, 2353 void NV12ToARGBRow_AVX2(const uint8* y_buf,
2354 const uint8* uv_buf, 2354 const uint8* uv_buf,
2355 uint8* dst_argb, 2355 uint8* dst_argb,
2356 struct YuvConstants* yuvconstants, 2356 const struct YuvConstants* yuvconstants,
2357 int width) { 2357 int width) {
2358 __asm { 2358 __asm {
2359 push esi 2359 push esi
2360 push ebx 2360 push ebx
2361 mov eax, [esp + 8 + 4] // Y 2361 mov eax, [esp + 8 + 4] // Y
2362 mov esi, [esp + 8 + 8] // UV 2362 mov esi, [esp + 8 + 8] // UV
2363 mov edx, [esp + 8 + 12] // argb 2363 mov edx, [esp + 8 + 12] // argb
2364 mov ebx, [esp + 8 + 16] // yuvconstants 2364 mov ebx, [esp + 8 + 16] // yuvconstants
2365 mov ecx, [esp + 8 + 20] // width 2365 mov ecx, [esp + 8 + 20] // width
2366 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2366 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
(...skipping 14 matching lines...) Expand all
2381 } 2381 }
2382 #endif // HAS_NV12TOARGBROW_AVX2 2382 #endif // HAS_NV12TOARGBROW_AVX2
2383 2383
2384 #ifdef HAS_NV21TOARGBROW_AVX2 2384 #ifdef HAS_NV21TOARGBROW_AVX2
2385 // 16 pixels. 2385 // 16 pixels.
2386 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2386 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2387 __declspec(naked) 2387 __declspec(naked)
2388 void NV21ToARGBRow_AVX2(const uint8* y_buf, 2388 void NV21ToARGBRow_AVX2(const uint8* y_buf,
2389 const uint8* vu_buf, 2389 const uint8* vu_buf,
2390 uint8* dst_argb, 2390 uint8* dst_argb,
2391 struct YuvConstants* yuvconstants, 2391 const struct YuvConstants* yuvconstants,
2392 int width) { 2392 int width) {
2393 __asm { 2393 __asm {
2394 push esi 2394 push esi
2395 push ebx 2395 push ebx
2396 mov eax, [esp + 8 + 4] // Y 2396 mov eax, [esp + 8 + 4] // Y
2397 mov esi, [esp + 8 + 8] // VU 2397 mov esi, [esp + 8 + 8] // VU
2398 mov edx, [esp + 8 + 12] // argb 2398 mov edx, [esp + 8 + 12] // argb
2399 mov ebx, [esp + 8 + 16] // yuvconstants 2399 mov ebx, [esp + 8 + 16] // yuvconstants
2400 mov ecx, [esp + 8 + 20] // width 2400 mov ecx, [esp + 8 + 20] // width
2401 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2401 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
(...skipping 12 matching lines...) Expand all
2414 ret 2414 ret
2415 } 2415 }
2416 } 2416 }
2417 #endif // HAS_NV21TOARGBROW_AVX2 2417 #endif // HAS_NV21TOARGBROW_AVX2
2418 2418
2419 // 16 pixels. 2419 // 16 pixels.
2420 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). 2420 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
2421 __declspec(naked) 2421 __declspec(naked)
2422 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, 2422 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
2423 uint8* dst_argb, 2423 uint8* dst_argb,
2424 struct YuvConstants* yuvconstants, 2424 const struct YuvConstants* yuvconstants,
2425 int width) { 2425 int width) {
2426 __asm { 2426 __asm {
2427 push ebx 2427 push ebx
2428 mov eax, [esp + 4 + 4] // yuy2 2428 mov eax, [esp + 4 + 4] // yuy2
2429 mov edx, [esp + 4 + 8] // argb 2429 mov edx, [esp + 4 + 8] // argb
2430 mov ebx, [esp + 4 + 12] // yuvconstants 2430 mov ebx, [esp + 4 + 12] // yuvconstants
2431 mov ecx, [esp + 4 + 16] // width 2431 mov ecx, [esp + 4 + 16] // width
2432 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2432 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2433 2433
2434 convertloop: 2434 convertloop:
2435 READYUY2_AVX2 2435 READYUY2_AVX2
2436 YUVTORGB_AVX2(ebx) 2436 YUVTORGB_AVX2(ebx)
2437 STOREARGB_AVX2 2437 STOREARGB_AVX2
2438 2438
2439 sub ecx, 16 2439 sub ecx, 16
2440 jg convertloop 2440 jg convertloop
2441 2441
2442 pop ebx 2442 pop ebx
2443 vzeroupper 2443 vzeroupper
2444 ret 2444 ret
2445 } 2445 }
2446 } 2446 }
2447 2447
2448 // 16 pixels. 2448 // 16 pixels.
2449 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). 2449 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
2450 __declspec(naked) 2450 __declspec(naked)
2451 void UYVYToARGBRow_AVX2(const uint8* src_uyvy, 2451 void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
2452 uint8* dst_argb, 2452 uint8* dst_argb,
2453 struct YuvConstants* yuvconstants, 2453 const struct YuvConstants* yuvconstants,
2454 int width) { 2454 int width) {
2455 __asm { 2455 __asm {
2456 push ebx 2456 push ebx
2457 mov eax, [esp + 4 + 4] // uyvy 2457 mov eax, [esp + 4 + 4] // uyvy
2458 mov edx, [esp + 4 + 8] // argb 2458 mov edx, [esp + 4 + 8] // argb
2459 mov ebx, [esp + 4 + 12] // yuvconstants 2459 mov ebx, [esp + 4 + 12] // yuvconstants
2460 mov ecx, [esp + 4 + 16] // width 2460 mov ecx, [esp + 4 + 16] // width
2461 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2461 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2462 2462
2463 convertloop: 2463 convertloop:
(...skipping 13 matching lines...) Expand all
2477 2477
2478 #ifdef HAS_I422TOBGRAROW_AVX2 2478 #ifdef HAS_I422TOBGRAROW_AVX2
2479 // 16 pixels 2479 // 16 pixels
2480 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). 2480 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
2481 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. 2481 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
2482 __declspec(naked) 2482 __declspec(naked)
2483 void I422ToBGRARow_AVX2(const uint8* y_buf, 2483 void I422ToBGRARow_AVX2(const uint8* y_buf,
2484 const uint8* u_buf, 2484 const uint8* u_buf,
2485 const uint8* v_buf, 2485 const uint8* v_buf,
2486 uint8* dst_argb, 2486 uint8* dst_argb,
2487 struct YuvConstants* yuvconstants, 2487 const struct YuvConstants* yuvconstants,
2488 int width) { 2488 int width) {
2489 __asm { 2489 __asm {
2490 push esi 2490 push esi
2491 push edi 2491 push edi
2492 push ebx 2492 push ebx
2493 mov eax, [esp + 12 + 4] // Y 2493 mov eax, [esp + 12 + 4] // Y
2494 mov esi, [esp + 12 + 8] // U 2494 mov esi, [esp + 12 + 8] // U
2495 mov edi, [esp + 12 + 12] // V 2495 mov edi, [esp + 12 + 12] // V
2496 mov edx, [esp + 12 + 16] // abgr 2496 mov edx, [esp + 12 + 16] // abgr
2497 mov ebx, [esp + 12 + 20] // yuvconstants 2497 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 19 matching lines...) Expand all
2517 #endif // HAS_I422TOBGRAROW_AVX2 2517 #endif // HAS_I422TOBGRAROW_AVX2
2518 2518
2519 #ifdef HAS_I422TORGBAROW_AVX2 2519 #ifdef HAS_I422TORGBAROW_AVX2
2520 // 16 pixels 2520 // 16 pixels
2521 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). 2521 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
2522 __declspec(naked) 2522 __declspec(naked)
2523 void I422ToRGBARow_AVX2(const uint8* y_buf, 2523 void I422ToRGBARow_AVX2(const uint8* y_buf,
2524 const uint8* u_buf, 2524 const uint8* u_buf,
2525 const uint8* v_buf, 2525 const uint8* v_buf,
2526 uint8* dst_argb, 2526 uint8* dst_argb,
2527 struct YuvConstants* yuvconstants, 2527 const struct YuvConstants* yuvconstants,
2528 int width) { 2528 int width) {
2529 __asm { 2529 __asm {
2530 push esi 2530 push esi
2531 push edi 2531 push edi
2532 push ebx 2532 push ebx
2533 mov eax, [esp + 12 + 4] // Y 2533 mov eax, [esp + 12 + 4] // Y
2534 mov esi, [esp + 12 + 8] // U 2534 mov esi, [esp + 12 + 8] // U
2535 mov edi, [esp + 12 + 12] // V 2535 mov edi, [esp + 12 + 12] // V
2536 mov edx, [esp + 12 + 16] // abgr 2536 mov edx, [esp + 12 + 16] // abgr
2537 mov ebx, [esp + 12 + 20] // yuvconstants 2537 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 19 matching lines...) Expand all
2557 #endif // HAS_I422TORGBAROW_AVX2 2557 #endif // HAS_I422TORGBAROW_AVX2
2558 2558
2559 #ifdef HAS_I422TOABGRROW_AVX2 2559 #ifdef HAS_I422TOABGRROW_AVX2
2560 // 16 pixels 2560 // 16 pixels
2561 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). 2561 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
2562 __declspec(naked) 2562 __declspec(naked)
2563 void I422ToABGRRow_AVX2(const uint8* y_buf, 2563 void I422ToABGRRow_AVX2(const uint8* y_buf,
2564 const uint8* u_buf, 2564 const uint8* u_buf,
2565 const uint8* v_buf, 2565 const uint8* v_buf,
2566 uint8* dst_argb, 2566 uint8* dst_argb,
2567 struct YuvConstants* yuvconstants, 2567 const struct YuvConstants* yuvconstants,
2568 int width) { 2568 int width) {
2569 __asm { 2569 __asm {
2570 push esi 2570 push esi
2571 push edi 2571 push edi
2572 push ebx 2572 push ebx
2573 mov eax, [esp + 12 + 4] // Y 2573 mov eax, [esp + 12 + 4] // Y
2574 mov esi, [esp + 12 + 8] // U 2574 mov esi, [esp + 12 + 8] // U
2575 mov edi, [esp + 12 + 12] // V 2575 mov edi, [esp + 12 + 12] // V
2576 mov edx, [esp + 12 + 16] // argb 2576 mov edx, [esp + 12 + 16] // argb
2577 mov ebx, [esp + 12 + 20] // yuvconstants 2577 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
2833 __asm lea edx, [edx + 16] \ 2833 __asm lea edx, [edx + 16] \
2834 } 2834 }
2835 2835
2836 // 8 pixels. 2836 // 8 pixels.
2837 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). 2837 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes).
2838 __declspec(naked) 2838 __declspec(naked)
2839 void I444ToARGBRow_SSSE3(const uint8* y_buf, 2839 void I444ToARGBRow_SSSE3(const uint8* y_buf,
2840 const uint8* u_buf, 2840 const uint8* u_buf,
2841 const uint8* v_buf, 2841 const uint8* v_buf,
2842 uint8* dst_argb, 2842 uint8* dst_argb,
2843 struct YuvConstants* yuvconstants, 2843 const struct YuvConstants* yuvconstants,
2844 int width) { 2844 int width) {
2845 __asm { 2845 __asm {
2846 push esi 2846 push esi
2847 push edi 2847 push edi
2848 push ebx 2848 push ebx
2849 mov eax, [esp + 12 + 4] // Y 2849 mov eax, [esp + 12 + 4] // Y
2850 mov esi, [esp + 12 + 8] // U 2850 mov esi, [esp + 12 + 8] // U
2851 mov edi, [esp + 12 + 12] // V 2851 mov edi, [esp + 12 + 12] // V
2852 mov edx, [esp + 12 + 16] // argb 2852 mov edx, [esp + 12 + 16] // argb
2853 mov ebx, [esp + 12 + 20] // yuvconstants 2853 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 16 matching lines...) Expand all
2870 } 2870 }
2871 } 2871 }
2872 2872
2873 // 8 pixels. 2873 // 8 pixels.
2874 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). 2874 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
2875 __declspec(naked) 2875 __declspec(naked)
2876 void I444ToABGRRow_SSSE3(const uint8* y_buf, 2876 void I444ToABGRRow_SSSE3(const uint8* y_buf,
2877 const uint8* u_buf, 2877 const uint8* u_buf,
2878 const uint8* v_buf, 2878 const uint8* v_buf,
2879 uint8* dst_abgr, 2879 uint8* dst_abgr,
2880 struct YuvConstants* yuvconstants, 2880 const struct YuvConstants* yuvconstants,
2881 int width) { 2881 int width) {
2882 __asm { 2882 __asm {
2883 push esi 2883 push esi
2884 push edi 2884 push edi
2885 push ebx 2885 push ebx
2886 mov eax, [esp + 12 + 4] // Y 2886 mov eax, [esp + 12 + 4] // Y
2887 mov esi, [esp + 12 + 8] // U 2887 mov esi, [esp + 12 + 8] // U
2888 mov edi, [esp + 12 + 12] // V 2888 mov edi, [esp + 12 + 12] // V
2889 mov edx, [esp + 12 + 16] // abgr 2889 mov edx, [esp + 12 + 16] // abgr
2890 mov ebx, [esp + 12 + 20] // yuvconstants 2890 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 16 matching lines...) Expand all
2907 } 2907 }
2908 } 2908 }
2909 2909
2910 // 8 pixels. 2910 // 8 pixels.
2911 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). 2911 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes).
2912 __declspec(naked) 2912 __declspec(naked)
2913 void I422ToRGB24Row_SSSE3(const uint8* y_buf, 2913 void I422ToRGB24Row_SSSE3(const uint8* y_buf,
2914 const uint8* u_buf, 2914 const uint8* u_buf,
2915 const uint8* v_buf, 2915 const uint8* v_buf,
2916 uint8* dst_rgb24, 2916 uint8* dst_rgb24,
2917 struct YuvConstants* yuvconstants, 2917 const struct YuvConstants* yuvconstants,
2918 int width) { 2918 int width) {
2919 __asm { 2919 __asm {
2920 push esi 2920 push esi
2921 push edi 2921 push edi
2922 push ebx 2922 push ebx
2923 mov eax, [esp + 12 + 4] // Y 2923 mov eax, [esp + 12 + 4] // Y
2924 mov esi, [esp + 12 + 8] // U 2924 mov esi, [esp + 12 + 8] // U
2925 mov edi, [esp + 12 + 12] // V 2925 mov edi, [esp + 12 + 12] // V
2926 mov edx, [esp + 12 + 16] // argb 2926 mov edx, [esp + 12 + 16] // argb
2927 mov ebx, [esp + 12 + 20] // yuvconstants 2927 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 17 matching lines...) Expand all
2945 } 2945 }
2946 } 2946 }
2947 2947
2948 // 8 pixels. 2948 // 8 pixels.
2949 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). 2949 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes).
2950 __declspec(naked) 2950 __declspec(naked)
2951 void I422ToRAWRow_SSSE3(const uint8* y_buf, 2951 void I422ToRAWRow_SSSE3(const uint8* y_buf,
2952 const uint8* u_buf, 2952 const uint8* u_buf,
2953 const uint8* v_buf, 2953 const uint8* v_buf,
2954 uint8* dst_raw, 2954 uint8* dst_raw,
2955 struct YuvConstants* yuvconstants, 2955 const struct YuvConstants* yuvconstants,
2956 int width) { 2956 int width) {
2957 __asm { 2957 __asm {
2958 push esi 2958 push esi
2959 push edi 2959 push edi
2960 push ebx 2960 push ebx
2961 mov eax, [esp + 12 + 4] // Y 2961 mov eax, [esp + 12 + 4] // Y
2962 mov esi, [esp + 12 + 8] // U 2962 mov esi, [esp + 12 + 8] // U
2963 mov edi, [esp + 12 + 12] // V 2963 mov edi, [esp + 12 + 12] // V
2964 mov edx, [esp + 12 + 16] // argb 2964 mov edx, [esp + 12 + 16] // argb
2965 mov ebx, [esp + 12 + 20] // yuvconstants 2965 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 17 matching lines...) Expand all
2983 } 2983 }
2984 } 2984 }
2985 2985
2986 // 8 pixels 2986 // 8 pixels
2987 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). 2987 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
2988 __declspec(naked) 2988 __declspec(naked)
2989 void I422ToRGB565Row_SSSE3(const uint8* y_buf, 2989 void I422ToRGB565Row_SSSE3(const uint8* y_buf,
2990 const uint8* u_buf, 2990 const uint8* u_buf,
2991 const uint8* v_buf, 2991 const uint8* v_buf,
2992 uint8* rgb565_buf, 2992 uint8* rgb565_buf,
2993 struct YuvConstants* yuvconstants, 2993 const struct YuvConstants* yuvconstants,
2994 int width) { 2994 int width) {
2995 __asm { 2995 __asm {
2996 push esi 2996 push esi
2997 push edi 2997 push edi
2998 push ebx 2998 push ebx
2999 mov eax, [esp + 12 + 4] // Y 2999 mov eax, [esp + 12 + 4] // Y
3000 mov esi, [esp + 12 + 8] // U 3000 mov esi, [esp + 12 + 8] // U
3001 mov edi, [esp + 12 + 12] // V 3001 mov edi, [esp + 12 + 12] // V
3002 mov edx, [esp + 12 + 16] // argb 3002 mov edx, [esp + 12 + 16] // argb
3003 mov ebx, [esp + 12 + 20] // yuvconstants 3003 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 22 matching lines...) Expand all
3026 } 3026 }
3027 } 3027 }
3028 3028
3029 // 8 pixels. 3029 // 8 pixels.
3030 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 3030 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
3031 __declspec(naked) 3031 __declspec(naked)
3032 void I422ToARGBRow_SSSE3(const uint8* y_buf, 3032 void I422ToARGBRow_SSSE3(const uint8* y_buf,
3033 const uint8* u_buf, 3033 const uint8* u_buf,
3034 const uint8* v_buf, 3034 const uint8* v_buf,
3035 uint8* dst_argb, 3035 uint8* dst_argb,
3036 struct YuvConstants* yuvconstants, 3036 const struct YuvConstants* yuvconstants,
3037 int width) { 3037 int width) {
3038 __asm { 3038 __asm {
3039 push esi 3039 push esi
3040 push edi 3040 push edi
3041 push ebx 3041 push ebx
3042 mov eax, [esp + 12 + 4] // Y 3042 mov eax, [esp + 12 + 4] // Y
3043 mov esi, [esp + 12 + 8] // U 3043 mov esi, [esp + 12 + 8] // U
3044 mov edi, [esp + 12 + 12] // V 3044 mov edi, [esp + 12 + 12] // V
3045 mov edx, [esp + 12 + 16] // argb 3045 mov edx, [esp + 12 + 16] // argb
3046 mov ebx, [esp + 12 + 20] // yuvconstants 3046 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 17 matching lines...) Expand all
3064 } 3064 }
3065 3065
3066 // 8 pixels. 3066 // 8 pixels.
3067 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB. 3067 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB.
3068 __declspec(naked) 3068 __declspec(naked)
3069 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, 3069 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
3070 const uint8* u_buf, 3070 const uint8* u_buf,
3071 const uint8* v_buf, 3071 const uint8* v_buf,
3072 const uint8* a_buf, 3072 const uint8* a_buf,
3073 uint8* dst_argb, 3073 uint8* dst_argb,
3074 struct YuvConstants* yuvconstants, 3074 const struct YuvConstants* yuvconstants,
3075 int width) { 3075 int width) {
3076 __asm { 3076 __asm {
3077 push esi 3077 push esi
3078 push edi 3078 push edi
3079 push ebx 3079 push ebx
3080 push ebp 3080 push ebp
3081 mov eax, [esp + 16 + 4] // Y 3081 mov eax, [esp + 16 + 4] // Y
3082 mov esi, [esp + 16 + 8] // U 3082 mov esi, [esp + 16 + 8] // U
3083 mov edi, [esp + 16 + 12] // V 3083 mov edi, [esp + 16 + 12] // V
3084 mov ebp, [esp + 16 + 16] // A 3084 mov ebp, [esp + 16 + 16] // A
(...skipping 19 matching lines...) Expand all
3104 } 3104 }
3105 3105
3106 // 8 pixels. 3106 // 8 pixels.
3107 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR. 3107 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR.
3108 __declspec(naked) 3108 __declspec(naked)
3109 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf, 3109 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
3110 const uint8* u_buf, 3110 const uint8* u_buf,
3111 const uint8* v_buf, 3111 const uint8* v_buf,
3112 const uint8* a_buf, 3112 const uint8* a_buf,
3113 uint8* dst_abgr, 3113 uint8* dst_abgr,
3114 struct YuvConstants* yuvconstants, 3114 const struct YuvConstants* yuvconstants,
3115 int width) { 3115 int width) {
3116 __asm { 3116 __asm {
3117 push esi 3117 push esi
3118 push edi 3118 push edi
3119 push ebx 3119 push ebx
3120 push ebp 3120 push ebp
3121 mov eax, [esp + 16 + 4] // Y 3121 mov eax, [esp + 16 + 4] // Y
3122 mov esi, [esp + 16 + 8] // U 3122 mov esi, [esp + 16 + 8] // U
3123 mov edi, [esp + 16 + 12] // V 3123 mov edi, [esp + 16 + 12] // V
3124 mov ebp, [esp + 16 + 16] // A 3124 mov ebp, [esp + 16 + 16] // A
(...skipping 19 matching lines...) Expand all
3144 } 3144 }
3145 3145
3146 // 8 pixels. 3146 // 8 pixels.
3147 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 3147 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
3148 // Similar to I420 but duplicate UV once more. 3148 // Similar to I420 but duplicate UV once more.
3149 __declspec(naked) 3149 __declspec(naked)
3150 void I411ToARGBRow_SSSE3(const uint8* y_buf, 3150 void I411ToARGBRow_SSSE3(const uint8* y_buf,
3151 const uint8* u_buf, 3151 const uint8* u_buf,
3152 const uint8* v_buf, 3152 const uint8* v_buf,
3153 uint8* dst_argb, 3153 uint8* dst_argb,
3154 struct YuvConstants* yuvconstants, 3154 const struct YuvConstants* yuvconstants,
3155 int width) { 3155 int width) {
3156 __asm { 3156 __asm {
3157 push esi 3157 push esi
3158 push edi 3158 push edi
3159 push ebx 3159 push ebx
3160 mov eax, [esp + 12 + 4] // Y 3160 mov eax, [esp + 12 + 4] // Y
3161 mov esi, [esp + 12 + 8] // U 3161 mov esi, [esp + 12 + 8] // U
3162 mov edi, [esp + 12 + 12] // V 3162 mov edi, [esp + 12 + 12] // V
3163 mov edx, [esp + 12 + 16] // abgr 3163 mov edx, [esp + 12 + 16] // abgr
3164 mov ebx, [esp + 12 + 20] // yuvconstants 3164 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 15 matching lines...) Expand all
3180 ret 3180 ret
3181 } 3181 }
3182 } 3182 }
3183 3183
3184 // 8 pixels. 3184 // 8 pixels.
3185 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 3185 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
3186 __declspec(naked) 3186 __declspec(naked)
3187 void NV12ToARGBRow_SSSE3(const uint8* y_buf, 3187 void NV12ToARGBRow_SSSE3(const uint8* y_buf,
3188 const uint8* uv_buf, 3188 const uint8* uv_buf,
3189 uint8* dst_argb, 3189 uint8* dst_argb,
3190 struct YuvConstants* yuvconstants, 3190 const struct YuvConstants* yuvconstants,
3191 int width) { 3191 int width) {
3192 __asm { 3192 __asm {
3193 push esi 3193 push esi
3194 push ebx 3194 push ebx
3195 mov eax, [esp + 8 + 4] // Y 3195 mov eax, [esp + 8 + 4] // Y
3196 mov esi, [esp + 8 + 8] // UV 3196 mov esi, [esp + 8 + 8] // UV
3197 mov edx, [esp + 8 + 12] // argb 3197 mov edx, [esp + 8 + 12] // argb
3198 mov ebx, [esp + 8 + 16] // yuvconstants 3198 mov ebx, [esp + 8 + 16] // yuvconstants
3199 mov ecx, [esp + 8 + 20] // width 3199 mov ecx, [esp + 8 + 20] // width
3200 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 3200 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
(...skipping 11 matching lines...) Expand all
3212 ret 3212 ret
3213 } 3213 }
3214 } 3214 }
3215 3215
3216 // 8 pixels. 3216 // 8 pixels.
3217 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 3217 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
3218 __declspec(naked) 3218 __declspec(naked)
3219 void NV21ToARGBRow_SSSE3(const uint8* y_buf, 3219 void NV21ToARGBRow_SSSE3(const uint8* y_buf,
3220 const uint8* vu_buf, 3220 const uint8* vu_buf,
3221 uint8* dst_argb, 3221 uint8* dst_argb,
3222 struct YuvConstants* yuvconstants, 3222 const struct YuvConstants* yuvconstants,
3223 int width) { 3223 int width) {
3224 __asm { 3224 __asm {
3225 push esi 3225 push esi
3226 push ebx 3226 push ebx
3227 mov eax, [esp + 8 + 4] // Y 3227 mov eax, [esp + 8 + 4] // Y
3228 mov esi, [esp + 8 + 8] // VU 3228 mov esi, [esp + 8 + 8] // VU
3229 mov edx, [esp + 8 + 12] // argb 3229 mov edx, [esp + 8 + 12] // argb
3230 mov ebx, [esp + 8 + 16] // yuvconstants 3230 mov ebx, [esp + 8 + 16] // yuvconstants
3231 mov ecx, [esp + 8 + 20] // width 3231 mov ecx, [esp + 8 + 20] // width
3232 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 3232 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
(...skipping 10 matching lines...) Expand all
3243 pop esi 3243 pop esi
3244 ret 3244 ret
3245 } 3245 }
3246 } 3246 }
3247 3247
3248 // 8 pixels. 3248 // 8 pixels.
3249 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). 3249 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
3250 __declspec(naked) 3250 __declspec(naked)
3251 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, 3251 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
3252 uint8* dst_argb, 3252 uint8* dst_argb,
3253 struct YuvConstants* yuvconstants, 3253 const struct YuvConstants* yuvconstants,
3254 int width) { 3254 int width) {
3255 __asm { 3255 __asm {
3256 push ebx 3256 push ebx
3257 mov eax, [esp + 4 + 4] // yuy2 3257 mov eax, [esp + 4 + 4] // yuy2
3258 mov edx, [esp + 4 + 8] // argb 3258 mov edx, [esp + 4 + 8] // argb
3259 mov ebx, [esp + 4 + 12] // yuvconstants 3259 mov ebx, [esp + 4 + 12] // yuvconstants
3260 mov ecx, [esp + 4 + 16] // width 3260 mov ecx, [esp + 4 + 16] // width
3261 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 3261 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
3262 3262
3263 convertloop: 3263 convertloop:
3264 READYUY2 3264 READYUY2
3265 YUVTORGB(ebx) 3265 YUVTORGB(ebx)
3266 STOREARGB 3266 STOREARGB
3267 3267
3268 sub ecx, 8 3268 sub ecx, 8
3269 jg convertloop 3269 jg convertloop
3270 3270
3271 pop ebx 3271 pop ebx
3272 ret 3272 ret
3273 } 3273 }
3274 } 3274 }
3275 3275
3276 // 8 pixels. 3276 // 8 pixels.
3277 // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes). 3277 // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
3278 __declspec(naked) 3278 __declspec(naked)
3279 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, 3279 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
3280 uint8* dst_argb, 3280 uint8* dst_argb,
3281 struct YuvConstants* yuvconstants, 3281 const struct YuvConstants* yuvconstants,
3282 int width) { 3282 int width) {
3283 __asm { 3283 __asm {
3284 push ebx 3284 push ebx
3285 mov eax, [esp + 4 + 4] // uyvy 3285 mov eax, [esp + 4 + 4] // uyvy
3286 mov edx, [esp + 4 + 8] // argb 3286 mov edx, [esp + 4 + 8] // argb
3287 mov ebx, [esp + 4 + 12] // yuvconstants 3287 mov ebx, [esp + 4 + 12] // yuvconstants
3288 mov ecx, [esp + 4 + 16] // width 3288 mov ecx, [esp + 4 + 16] // width
3289 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 3289 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
3290 3290
3291 convertloop: 3291 convertloop:
3292 READUYVY 3292 READUYVY
3293 YUVTORGB(ebx) 3293 YUVTORGB(ebx)
3294 STOREARGB 3294 STOREARGB
3295 3295
3296 sub ecx, 8 3296 sub ecx, 8
3297 jg convertloop 3297 jg convertloop
3298 3298
3299 pop ebx 3299 pop ebx
3300 ret 3300 ret
3301 } 3301 }
3302 } 3302 }
3303 3303
3304 __declspec(naked) 3304 __declspec(naked)
3305 void I422ToBGRARow_SSSE3(const uint8* y_buf, 3305 void I422ToBGRARow_SSSE3(const uint8* y_buf,
3306 const uint8* u_buf, 3306 const uint8* u_buf,
3307 const uint8* v_buf, 3307 const uint8* v_buf,
3308 uint8* dst_bgra, 3308 uint8* dst_bgra,
3309 struct YuvConstants* yuvconstants, 3309 const struct YuvConstants* yuvconstants,
3310 int width) { 3310 int width) {
3311 __asm { 3311 __asm {
3312 push esi 3312 push esi
3313 push edi 3313 push edi
3314 push ebx 3314 push ebx
3315 mov eax, [esp + 12 + 4] // Y 3315 mov eax, [esp + 12 + 4] // Y
3316 mov esi, [esp + 12 + 8] // U 3316 mov esi, [esp + 12 + 8] // U
3317 mov edi, [esp + 12 + 12] // V 3317 mov edi, [esp + 12 + 12] // V
3318 mov edx, [esp + 12 + 16] // argb 3318 mov edx, [esp + 12 + 16] // argb
3319 mov ebx, [esp + 12 + 20] // yuvconstants 3319 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 13 matching lines...) Expand all
3333 pop esi 3333 pop esi
3334 ret 3334 ret
3335 } 3335 }
3336 } 3336 }
3337 3337
3338 __declspec(naked) 3338 __declspec(naked)
3339 void I422ToABGRRow_SSSE3(const uint8* y_buf, 3339 void I422ToABGRRow_SSSE3(const uint8* y_buf,
3340 const uint8* u_buf, 3340 const uint8* u_buf,
3341 const uint8* v_buf, 3341 const uint8* v_buf,
3342 uint8* dst_abgr, 3342 uint8* dst_abgr,
3343 struct YuvConstants* yuvconstants, 3343 const struct YuvConstants* yuvconstants,
3344 int width) { 3344 int width) {
3345 __asm { 3345 __asm {
3346 push esi 3346 push esi
3347 push edi 3347 push edi
3348 push ebx 3348 push ebx
3349 mov eax, [esp + 12 + 4] // Y 3349 mov eax, [esp + 12 + 4] // Y
3350 mov esi, [esp + 12 + 8] // U 3350 mov esi, [esp + 12 + 8] // U
3351 mov edi, [esp + 12 + 12] // V 3351 mov edi, [esp + 12 + 12] // V
3352 mov edx, [esp + 12 + 16] // argb 3352 mov edx, [esp + 12 + 16] // argb
3353 mov ebx, [esp + 12 + 20] // yuvconstants 3353 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 14 matching lines...) Expand all
3368 pop esi 3368 pop esi
3369 ret 3369 ret
3370 } 3370 }
3371 } 3371 }
3372 3372
3373 __declspec(naked) 3373 __declspec(naked)
3374 void I422ToRGBARow_SSSE3(const uint8* y_buf, 3374 void I422ToRGBARow_SSSE3(const uint8* y_buf,
3375 const uint8* u_buf, 3375 const uint8* u_buf,
3376 const uint8* v_buf, 3376 const uint8* v_buf,
3377 uint8* dst_rgba, 3377 uint8* dst_rgba,
3378 struct YuvConstants* yuvconstants, 3378 const struct YuvConstants* yuvconstants,
3379 int width) { 3379 int width) {
3380 __asm { 3380 __asm {
3381 push esi 3381 push esi
3382 push edi 3382 push edi
3383 push ebx 3383 push ebx
3384 mov eax, [esp + 12 + 4] // Y 3384 mov eax, [esp + 12 + 4] // Y
3385 mov esi, [esp + 12 + 8] // U 3385 mov esi, [esp + 12 + 8] // U
3386 mov edi, [esp + 12 + 12] // V 3386 mov edi, [esp + 12 + 12] // V
3387 mov edx, [esp + 12 + 16] // argb 3387 mov edx, [esp + 12 + 16] // argb
3388 mov ebx, [esp + 12 + 20] // yuvconstants 3388 mov ebx, [esp + 12 + 20] // yuvconstants
(...skipping 3247 matching lines...) Expand 10 before | Expand all | Expand 10 after
6636 } 6636 }
6637 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6637 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6638 6638
6639 #endif // defined(_M_X64) 6639 #endif // defined(_M_X64)
6640 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6640 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6641 6641
6642 #ifdef __cplusplus 6642 #ifdef __cplusplus
6643 } // extern "C" 6643 } // extern "C"
6644 } // namespace libyuv 6644 } // namespace libyuv
6645 #endif 6645 #endif
OLDNEW
« no previous file with comments | « source/row_neon64.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698