OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
94 _mm_storeu_si128((__m128i *)dst_abgr, xmm2); \ | 94 _mm_storeu_si128((__m128i *)dst_abgr, xmm2); \ |
95 _mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \ | 95 _mm_storeu_si128((__m128i *)(dst_abgr + 16), xmm1); \ |
96 dst_abgr += 32; | 96 dst_abgr += 32; |
97 | 97 |
98 | 98 |
99 #if defined(HAS_I422TOARGBROW_SSSE3) | 99 #if defined(HAS_I422TOARGBROW_SSSE3) |
100 void I422ToARGBRow_SSSE3(const uint8* y_buf, | 100 void I422ToARGBRow_SSSE3(const uint8* y_buf, |
101 const uint8* u_buf, | 101 const uint8* u_buf, |
102 const uint8* v_buf, | 102 const uint8* v_buf, |
103 uint8* dst_argb, | 103 uint8* dst_argb, |
104 struct YuvConstants* yuvconstants, | 104 const struct YuvConstants* yuvconstants, |
105 int width) { | 105 int width) { |
106 __m128i xmm0, xmm1, xmm2, xmm4; | 106 __m128i xmm0, xmm1, xmm2, xmm4; |
107 const __m128i xmm5 = _mm_set1_epi8(-1); | 107 const __m128i xmm5 = _mm_set1_epi8(-1); |
108 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 108 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
109 while (width > 0) { | 109 while (width > 0) { |
110 READYUV422 | 110 READYUV422 |
111 YUVTORGB(yuvconstants) | 111 YUVTORGB(yuvconstants) |
112 STOREARGB | 112 STOREARGB |
113 width -= 8; | 113 width -= 8; |
114 } | 114 } |
115 } | 115 } |
116 #endif | 116 #endif |
117 | 117 |
118 #if defined(HAS_I422TOABGRROW_SSSE3) | 118 #if defined(HAS_I422TOABGRROW_SSSE3) |
119 void I422ToABGRRow_SSSE3(const uint8* y_buf, | 119 void I422ToABGRRow_SSSE3(const uint8* y_buf, |
120 const uint8* u_buf, | 120 const uint8* u_buf, |
121 const uint8* v_buf, | 121 const uint8* v_buf, |
122 uint8* dst_abgr, | 122 uint8* dst_abgr, |
123 struct YuvConstants* yuvconstants, | 123 const struct YuvConstants* yuvconstants, |
124 int width) { | 124 int width) { |
125 __m128i xmm0, xmm1, xmm2, xmm4; | 125 __m128i xmm0, xmm1, xmm2, xmm4; |
126 const __m128i xmm5 = _mm_set1_epi8(-1); | 126 const __m128i xmm5 = _mm_set1_epi8(-1); |
127 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 127 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
128 while (width > 0) { | 128 while (width > 0) { |
129 READYUV422 | 129 READYUV422 |
130 YUVTORGB(yuvconstants) | 130 YUVTORGB(yuvconstants) |
131 STOREABGR | 131 STOREABGR |
132 width -= 8; | 132 width -= 8; |
133 } | 133 } |
134 } | 134 } |
135 #endif | 135 #endif |
136 | 136 |
137 #if defined(HAS_I422ALPHATOARGBROW_SSSE3) | 137 #if defined(HAS_I422ALPHATOARGBROW_SSSE3) |
138 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, | 138 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, |
139 const uint8* u_buf, | 139 const uint8* u_buf, |
140 const uint8* v_buf, | 140 const uint8* v_buf, |
141 const uint8* a_buf, | 141 const uint8* a_buf, |
142 uint8* dst_argb, | 142 uint8* dst_argb, |
143 struct YuvConstants* yuvconstants, | 143 const struct YuvConstants* yuvconstants, |
144 int width) { | 144 int width) { |
145 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; | 145 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; |
146 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 146 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
147 while (width > 0) { | 147 while (width > 0) { |
148 READYUVA422 | 148 READYUVA422 |
149 YUVTORGB(yuvconstants) | 149 YUVTORGB(yuvconstants) |
150 STOREARGB | 150 STOREARGB |
151 width -= 8; | 151 width -= 8; |
152 } | 152 } |
153 } | 153 } |
154 #endif | 154 #endif |
155 | 155 |
156 #if defined(HAS_I422ALPHATOABGRROW_SSSE3) | 156 #if defined(HAS_I422ALPHATOABGRROW_SSSE3) |
157 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf, | 157 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf, |
158 const uint8* u_buf, | 158 const uint8* u_buf, |
159 const uint8* v_buf, | 159 const uint8* v_buf, |
160 const uint8* a_buf, | 160 const uint8* a_buf, |
161 uint8* dst_abgr, | 161 uint8* dst_abgr, |
162 struct YuvConstants* yuvconstants, | 162 const struct YuvConstants* yuvconstants, |
163 int width) { | 163 int width) { |
164 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; | 164 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; |
165 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 165 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
166 while (width > 0) { | 166 while (width > 0) { |
167 READYUVA422 | 167 READYUVA422 |
168 YUVTORGB(yuvconstants) | 168 YUVTORGB(yuvconstants) |
169 STOREABGR | 169 STOREABGR |
170 width -= 8; | 170 width -= 8; |
171 } | 171 } |
172 } | 172 } |
(...skipping 1930 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2103 } | 2103 } |
2104 | 2104 |
2105 #ifdef HAS_I422TOARGBROW_AVX2 | 2105 #ifdef HAS_I422TOARGBROW_AVX2 |
2106 // 16 pixels | 2106 // 16 pixels |
2107 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2107 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2108 __declspec(naked) | 2108 __declspec(naked) |
2109 void I422ToARGBRow_AVX2(const uint8* y_buf, | 2109 void I422ToARGBRow_AVX2(const uint8* y_buf, |
2110 const uint8* u_buf, | 2110 const uint8* u_buf, |
2111 const uint8* v_buf, | 2111 const uint8* v_buf, |
2112 uint8* dst_argb, | 2112 uint8* dst_argb, |
2113 struct YuvConstants* yuvconstants, | 2113 const struct YuvConstants* yuvconstants, |
2114 int width) { | 2114 int width) { |
2115 __asm { | 2115 __asm { |
2116 push esi | 2116 push esi |
2117 push edi | 2117 push edi |
2118 push ebx | 2118 push ebx |
2119 mov eax, [esp + 12 + 4] // Y | 2119 mov eax, [esp + 12 + 4] // Y |
2120 mov esi, [esp + 12 + 8] // U | 2120 mov esi, [esp + 12 + 8] // U |
2121 mov edi, [esp + 12 + 12] // V | 2121 mov edi, [esp + 12 + 12] // V |
2122 mov edx, [esp + 12 + 16] // argb | 2122 mov edx, [esp + 12 + 16] // argb |
2123 mov ebx, [esp + 12 + 20] // yuvconstants | 2123 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 20 matching lines...) Expand all Loading... |
2144 | 2144 |
2145 #ifdef HAS_I422ALPHATOARGBROW_AVX2 | 2145 #ifdef HAS_I422ALPHATOARGBROW_AVX2 |
2146 // 16 pixels | 2146 // 16 pixels |
2147 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. | 2147 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. |
2148 __declspec(naked) | 2148 __declspec(naked) |
2149 void I422AlphaToARGBRow_AVX2(const uint8* y_buf, | 2149 void I422AlphaToARGBRow_AVX2(const uint8* y_buf, |
2150 const uint8* u_buf, | 2150 const uint8* u_buf, |
2151 const uint8* v_buf, | 2151 const uint8* v_buf, |
2152 const uint8* a_buf, | 2152 const uint8* a_buf, |
2153 uint8* dst_argb, | 2153 uint8* dst_argb, |
2154 struct YuvConstants* yuvconstants, | 2154 const struct YuvConstants* yuvconstants, |
2155 int width) { | 2155 int width) { |
2156 __asm { | 2156 __asm { |
2157 push esi | 2157 push esi |
2158 push edi | 2158 push edi |
2159 push ebx | 2159 push ebx |
2160 push ebp | 2160 push ebp |
2161 mov eax, [esp + 16 + 4] // Y | 2161 mov eax, [esp + 16 + 4] // Y |
2162 mov esi, [esp + 16 + 8] // U | 2162 mov esi, [esp + 16 + 8] // U |
2163 mov edi, [esp + 16 + 12] // V | 2163 mov edi, [esp + 16 + 12] // V |
2164 mov ebp, [esp + 16 + 16] // A | 2164 mov ebp, [esp + 16 + 16] // A |
(...skipping 22 matching lines...) Expand all Loading... |
2187 | 2187 |
2188 #ifdef HAS_I422ALPHATOABGRROW_AVX2 | 2188 #ifdef HAS_I422ALPHATOABGRROW_AVX2 |
2189 // 16 pixels | 2189 // 16 pixels |
2190 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. | 2190 // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ABGR. |
2191 __declspec(naked) | 2191 __declspec(naked) |
2192 void I422AlphaToABGRRow_AVX2(const uint8* y_buf, | 2192 void I422AlphaToABGRRow_AVX2(const uint8* y_buf, |
2193 const uint8* u_buf, | 2193 const uint8* u_buf, |
2194 const uint8* v_buf, | 2194 const uint8* v_buf, |
2195 const uint8* a_buf, | 2195 const uint8* a_buf, |
2196 uint8* dst_abgr, | 2196 uint8* dst_abgr, |
2197 struct YuvConstants* yuvconstants, | 2197 const struct YuvConstants* yuvconstants, |
2198 int width) { | 2198 int width) { |
2199 __asm { | 2199 __asm { |
2200 push esi | 2200 push esi |
2201 push edi | 2201 push edi |
2202 push ebx | 2202 push ebx |
2203 push ebp | 2203 push ebp |
2204 mov eax, [esp + 16 + 4] // Y | 2204 mov eax, [esp + 16 + 4] // Y |
2205 mov esi, [esp + 16 + 8] // U | 2205 mov esi, [esp + 16 + 8] // U |
2206 mov edi, [esp + 16 + 12] // V | 2206 mov edi, [esp + 16 + 12] // V |
2207 mov ebp, [esp + 16 + 16] // A | 2207 mov ebp, [esp + 16 + 16] // A |
(...skipping 21 matching lines...) Expand all Loading... |
2229 #endif // HAS_I422ALPHATOABGRROW_AVX2 | 2229 #endif // HAS_I422ALPHATOABGRROW_AVX2 |
2230 | 2230 |
2231 #ifdef HAS_I444TOARGBROW_AVX2 | 2231 #ifdef HAS_I444TOARGBROW_AVX2 |
2232 // 16 pixels | 2232 // 16 pixels |
2233 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). | 2233 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). |
2234 __declspec(naked) | 2234 __declspec(naked) |
2235 void I444ToARGBRow_AVX2(const uint8* y_buf, | 2235 void I444ToARGBRow_AVX2(const uint8* y_buf, |
2236 const uint8* u_buf, | 2236 const uint8* u_buf, |
2237 const uint8* v_buf, | 2237 const uint8* v_buf, |
2238 uint8* dst_argb, | 2238 uint8* dst_argb, |
2239 struct YuvConstants* yuvconstants, | 2239 const struct YuvConstants* yuvconstants, |
2240 int width) { | 2240 int width) { |
2241 __asm { | 2241 __asm { |
2242 push esi | 2242 push esi |
2243 push edi | 2243 push edi |
2244 push ebx | 2244 push ebx |
2245 mov eax, [esp + 12 + 4] // Y | 2245 mov eax, [esp + 12 + 4] // Y |
2246 mov esi, [esp + 12 + 8] // U | 2246 mov esi, [esp + 12 + 8] // U |
2247 mov edi, [esp + 12 + 12] // V | 2247 mov edi, [esp + 12 + 12] // V |
2248 mov edx, [esp + 12 + 16] // argb | 2248 mov edx, [esp + 12 + 16] // argb |
2249 mov ebx, [esp + 12 + 20] // yuvconstants | 2249 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 18 matching lines...) Expand all Loading... |
2268 #endif // HAS_I444TOARGBROW_AVX2 | 2268 #endif // HAS_I444TOARGBROW_AVX2 |
2269 | 2269 |
2270 #ifdef HAS_I444TOABGRROW_AVX2 | 2270 #ifdef HAS_I444TOABGRROW_AVX2 |
2271 // 16 pixels | 2271 // 16 pixels |
2272 // 16 UV values with 16 Y producing 16 ABGR (64 bytes). | 2272 // 16 UV values with 16 Y producing 16 ABGR (64 bytes). |
2273 __declspec(naked) | 2273 __declspec(naked) |
2274 void I444ToABGRRow_AVX2(const uint8* y_buf, | 2274 void I444ToABGRRow_AVX2(const uint8* y_buf, |
2275 const uint8* u_buf, | 2275 const uint8* u_buf, |
2276 const uint8* v_buf, | 2276 const uint8* v_buf, |
2277 uint8* dst_abgr, | 2277 uint8* dst_abgr, |
2278 struct YuvConstants* yuvconstants, | 2278 const struct YuvConstants* yuvconstants, |
2279 int width) { | 2279 int width) { |
2280 __asm { | 2280 __asm { |
2281 push esi | 2281 push esi |
2282 push edi | 2282 push edi |
2283 push ebx | 2283 push ebx |
2284 mov eax, [esp + 12 + 4] // Y | 2284 mov eax, [esp + 12 + 4] // Y |
2285 mov esi, [esp + 12 + 8] // U | 2285 mov esi, [esp + 12 + 8] // U |
2286 mov edi, [esp + 12 + 12] // V | 2286 mov edi, [esp + 12 + 12] // V |
2287 mov edx, [esp + 12 + 16] // abgr | 2287 mov edx, [esp + 12 + 16] // abgr |
2288 mov ebx, [esp + 12 + 20] // yuvconstants | 2288 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 18 matching lines...) Expand all Loading... |
2307 #endif // HAS_I444TOABGRROW_AVX2 | 2307 #endif // HAS_I444TOABGRROW_AVX2 |
2308 | 2308 |
2309 #ifdef HAS_I411TOARGBROW_AVX2 | 2309 #ifdef HAS_I411TOARGBROW_AVX2 |
2310 // 16 pixels | 2310 // 16 pixels |
2311 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2311 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2312 __declspec(naked) | 2312 __declspec(naked) |
2313 void I411ToARGBRow_AVX2(const uint8* y_buf, | 2313 void I411ToARGBRow_AVX2(const uint8* y_buf, |
2314 const uint8* u_buf, | 2314 const uint8* u_buf, |
2315 const uint8* v_buf, | 2315 const uint8* v_buf, |
2316 uint8* dst_argb, | 2316 uint8* dst_argb, |
2317 struct YuvConstants* yuvconstants, | 2317 const struct YuvConstants* yuvconstants, |
2318 int width) { | 2318 int width) { |
2319 __asm { | 2319 __asm { |
2320 push esi | 2320 push esi |
2321 push edi | 2321 push edi |
2322 push ebx | 2322 push ebx |
2323 mov eax, [esp + 12 + 4] // Y | 2323 mov eax, [esp + 12 + 4] // Y |
2324 mov esi, [esp + 12 + 8] // U | 2324 mov esi, [esp + 12 + 8] // U |
2325 mov edi, [esp + 12 + 12] // V | 2325 mov edi, [esp + 12 + 12] // V |
2326 mov edx, [esp + 12 + 16] // abgr | 2326 mov edx, [esp + 12 + 16] // abgr |
2327 mov ebx, [esp + 12 + 20] // yuvconstants | 2327 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 18 matching lines...) Expand all Loading... |
2346 } | 2346 } |
2347 #endif // HAS_I411TOARGBROW_AVX2 | 2347 #endif // HAS_I411TOARGBROW_AVX2 |
2348 | 2348 |
2349 #ifdef HAS_NV12TOARGBROW_AVX2 | 2349 #ifdef HAS_NV12TOARGBROW_AVX2 |
2350 // 16 pixels. | 2350 // 16 pixels. |
2351 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2351 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2352 __declspec(naked) | 2352 __declspec(naked) |
2353 void NV12ToARGBRow_AVX2(const uint8* y_buf, | 2353 void NV12ToARGBRow_AVX2(const uint8* y_buf, |
2354 const uint8* uv_buf, | 2354 const uint8* uv_buf, |
2355 uint8* dst_argb, | 2355 uint8* dst_argb, |
2356 struct YuvConstants* yuvconstants, | 2356 const struct YuvConstants* yuvconstants, |
2357 int width) { | 2357 int width) { |
2358 __asm { | 2358 __asm { |
2359 push esi | 2359 push esi |
2360 push ebx | 2360 push ebx |
2361 mov eax, [esp + 8 + 4] // Y | 2361 mov eax, [esp + 8 + 4] // Y |
2362 mov esi, [esp + 8 + 8] // UV | 2362 mov esi, [esp + 8 + 8] // UV |
2363 mov edx, [esp + 8 + 12] // argb | 2363 mov edx, [esp + 8 + 12] // argb |
2364 mov ebx, [esp + 8 + 16] // yuvconstants | 2364 mov ebx, [esp + 8 + 16] // yuvconstants |
2365 mov ecx, [esp + 8 + 20] // width | 2365 mov ecx, [esp + 8 + 20] // width |
2366 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2366 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
(...skipping 14 matching lines...) Expand all Loading... |
2381 } | 2381 } |
2382 #endif // HAS_NV12TOARGBROW_AVX2 | 2382 #endif // HAS_NV12TOARGBROW_AVX2 |
2383 | 2383 |
2384 #ifdef HAS_NV21TOARGBROW_AVX2 | 2384 #ifdef HAS_NV21TOARGBROW_AVX2 |
2385 // 16 pixels. | 2385 // 16 pixels. |
2386 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2386 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2387 __declspec(naked) | 2387 __declspec(naked) |
2388 void NV21ToARGBRow_AVX2(const uint8* y_buf, | 2388 void NV21ToARGBRow_AVX2(const uint8* y_buf, |
2389 const uint8* vu_buf, | 2389 const uint8* vu_buf, |
2390 uint8* dst_argb, | 2390 uint8* dst_argb, |
2391 struct YuvConstants* yuvconstants, | 2391 const struct YuvConstants* yuvconstants, |
2392 int width) { | 2392 int width) { |
2393 __asm { | 2393 __asm { |
2394 push esi | 2394 push esi |
2395 push ebx | 2395 push ebx |
2396 mov eax, [esp + 8 + 4] // Y | 2396 mov eax, [esp + 8 + 4] // Y |
2397 mov esi, [esp + 8 + 8] // VU | 2397 mov esi, [esp + 8 + 8] // VU |
2398 mov edx, [esp + 8 + 12] // argb | 2398 mov edx, [esp + 8 + 12] // argb |
2399 mov ebx, [esp + 8 + 16] // yuvconstants | 2399 mov ebx, [esp + 8 + 16] // yuvconstants |
2400 mov ecx, [esp + 8 + 20] // width | 2400 mov ecx, [esp + 8 + 20] // width |
2401 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2401 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
(...skipping 12 matching lines...) Expand all Loading... |
2414 ret | 2414 ret |
2415 } | 2415 } |
2416 } | 2416 } |
2417 #endif // HAS_NV21TOARGBROW_AVX2 | 2417 #endif // HAS_NV21TOARGBROW_AVX2 |
2418 | 2418 |
2419 // 16 pixels. | 2419 // 16 pixels. |
2420 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). | 2420 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). |
2421 __declspec(naked) | 2421 __declspec(naked) |
2422 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, | 2422 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, |
2423 uint8* dst_argb, | 2423 uint8* dst_argb, |
2424 struct YuvConstants* yuvconstants, | 2424 const struct YuvConstants* yuvconstants, |
2425 int width) { | 2425 int width) { |
2426 __asm { | 2426 __asm { |
2427 push ebx | 2427 push ebx |
2428 mov eax, [esp + 4 + 4] // yuy2 | 2428 mov eax, [esp + 4 + 4] // yuy2 |
2429 mov edx, [esp + 4 + 8] // argb | 2429 mov edx, [esp + 4 + 8] // argb |
2430 mov ebx, [esp + 4 + 12] // yuvconstants | 2430 mov ebx, [esp + 4 + 12] // yuvconstants |
2431 mov ecx, [esp + 4 + 16] // width | 2431 mov ecx, [esp + 4 + 16] // width |
2432 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2432 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
2433 | 2433 |
2434 convertloop: | 2434 convertloop: |
2435 READYUY2_AVX2 | 2435 READYUY2_AVX2 |
2436 YUVTORGB_AVX2(ebx) | 2436 YUVTORGB_AVX2(ebx) |
2437 STOREARGB_AVX2 | 2437 STOREARGB_AVX2 |
2438 | 2438 |
2439 sub ecx, 16 | 2439 sub ecx, 16 |
2440 jg convertloop | 2440 jg convertloop |
2441 | 2441 |
2442 pop ebx | 2442 pop ebx |
2443 vzeroupper | 2443 vzeroupper |
2444 ret | 2444 ret |
2445 } | 2445 } |
2446 } | 2446 } |
2447 | 2447 |
2448 // 16 pixels. | 2448 // 16 pixels. |
2449 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). | 2449 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). |
2450 __declspec(naked) | 2450 __declspec(naked) |
2451 void UYVYToARGBRow_AVX2(const uint8* src_uyvy, | 2451 void UYVYToARGBRow_AVX2(const uint8* src_uyvy, |
2452 uint8* dst_argb, | 2452 uint8* dst_argb, |
2453 struct YuvConstants* yuvconstants, | 2453 const struct YuvConstants* yuvconstants, |
2454 int width) { | 2454 int width) { |
2455 __asm { | 2455 __asm { |
2456 push ebx | 2456 push ebx |
2457 mov eax, [esp + 4 + 4] // uyvy | 2457 mov eax, [esp + 4 + 4] // uyvy |
2458 mov edx, [esp + 4 + 8] // argb | 2458 mov edx, [esp + 4 + 8] // argb |
2459 mov ebx, [esp + 4 + 12] // yuvconstants | 2459 mov ebx, [esp + 4 + 12] // yuvconstants |
2460 mov ecx, [esp + 4 + 16] // width | 2460 mov ecx, [esp + 4 + 16] // width |
2461 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2461 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
2462 | 2462 |
2463 convertloop: | 2463 convertloop: |
(...skipping 13 matching lines...) Expand all Loading... |
2477 | 2477 |
2478 #ifdef HAS_I422TOBGRAROW_AVX2 | 2478 #ifdef HAS_I422TOBGRAROW_AVX2 |
2479 // 16 pixels | 2479 // 16 pixels |
2480 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). | 2480 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). |
2481 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. | 2481 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. |
2482 __declspec(naked) | 2482 __declspec(naked) |
2483 void I422ToBGRARow_AVX2(const uint8* y_buf, | 2483 void I422ToBGRARow_AVX2(const uint8* y_buf, |
2484 const uint8* u_buf, | 2484 const uint8* u_buf, |
2485 const uint8* v_buf, | 2485 const uint8* v_buf, |
2486 uint8* dst_argb, | 2486 uint8* dst_argb, |
2487 struct YuvConstants* yuvconstants, | 2487 const struct YuvConstants* yuvconstants, |
2488 int width) { | 2488 int width) { |
2489 __asm { | 2489 __asm { |
2490 push esi | 2490 push esi |
2491 push edi | 2491 push edi |
2492 push ebx | 2492 push ebx |
2493 mov eax, [esp + 12 + 4] // Y | 2493 mov eax, [esp + 12 + 4] // Y |
2494 mov esi, [esp + 12 + 8] // U | 2494 mov esi, [esp + 12 + 8] // U |
2495 mov edi, [esp + 12 + 12] // V | 2495 mov edi, [esp + 12 + 12] // V |
2496 mov edx, [esp + 12 + 16] // abgr | 2496 mov edx, [esp + 12 + 16] // abgr |
2497 mov ebx, [esp + 12 + 20] // yuvconstants | 2497 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 19 matching lines...) Expand all Loading... |
2517 #endif // HAS_I422TOBGRAROW_AVX2 | 2517 #endif // HAS_I422TOBGRAROW_AVX2 |
2518 | 2518 |
2519 #ifdef HAS_I422TORGBAROW_AVX2 | 2519 #ifdef HAS_I422TORGBAROW_AVX2 |
2520 // 16 pixels | 2520 // 16 pixels |
2521 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). | 2521 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). |
2522 __declspec(naked) | 2522 __declspec(naked) |
2523 void I422ToRGBARow_AVX2(const uint8* y_buf, | 2523 void I422ToRGBARow_AVX2(const uint8* y_buf, |
2524 const uint8* u_buf, | 2524 const uint8* u_buf, |
2525 const uint8* v_buf, | 2525 const uint8* v_buf, |
2526 uint8* dst_argb, | 2526 uint8* dst_argb, |
2527 struct YuvConstants* yuvconstants, | 2527 const struct YuvConstants* yuvconstants, |
2528 int width) { | 2528 int width) { |
2529 __asm { | 2529 __asm { |
2530 push esi | 2530 push esi |
2531 push edi | 2531 push edi |
2532 push ebx | 2532 push ebx |
2533 mov eax, [esp + 12 + 4] // Y | 2533 mov eax, [esp + 12 + 4] // Y |
2534 mov esi, [esp + 12 + 8] // U | 2534 mov esi, [esp + 12 + 8] // U |
2535 mov edi, [esp + 12 + 12] // V | 2535 mov edi, [esp + 12 + 12] // V |
2536 mov edx, [esp + 12 + 16] // abgr | 2536 mov edx, [esp + 12 + 16] // abgr |
2537 mov ebx, [esp + 12 + 20] // yuvconstants | 2537 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 19 matching lines...) Expand all Loading... |
2557 #endif // HAS_I422TORGBAROW_AVX2 | 2557 #endif // HAS_I422TORGBAROW_AVX2 |
2558 | 2558 |
2559 #ifdef HAS_I422TOABGRROW_AVX2 | 2559 #ifdef HAS_I422TOABGRROW_AVX2 |
2560 // 16 pixels | 2560 // 16 pixels |
2561 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | 2561 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). |
2562 __declspec(naked) | 2562 __declspec(naked) |
2563 void I422ToABGRRow_AVX2(const uint8* y_buf, | 2563 void I422ToABGRRow_AVX2(const uint8* y_buf, |
2564 const uint8* u_buf, | 2564 const uint8* u_buf, |
2565 const uint8* v_buf, | 2565 const uint8* v_buf, |
2566 uint8* dst_argb, | 2566 uint8* dst_argb, |
2567 struct YuvConstants* yuvconstants, | 2567 const struct YuvConstants* yuvconstants, |
2568 int width) { | 2568 int width) { |
2569 __asm { | 2569 __asm { |
2570 push esi | 2570 push esi |
2571 push edi | 2571 push edi |
2572 push ebx | 2572 push ebx |
2573 mov eax, [esp + 12 + 4] // Y | 2573 mov eax, [esp + 12 + 4] // Y |
2574 mov esi, [esp + 12 + 8] // U | 2574 mov esi, [esp + 12 + 8] // U |
2575 mov edi, [esp + 12 + 12] // V | 2575 mov edi, [esp + 12 + 12] // V |
2576 mov edx, [esp + 12 + 16] // argb | 2576 mov edx, [esp + 12 + 16] // argb |
2577 mov ebx, [esp + 12 + 20] // yuvconstants | 2577 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2833 __asm lea edx, [edx + 16] \ | 2833 __asm lea edx, [edx + 16] \ |
2834 } | 2834 } |
2835 | 2835 |
2836 // 8 pixels. | 2836 // 8 pixels. |
2837 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). | 2837 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). |
2838 __declspec(naked) | 2838 __declspec(naked) |
2839 void I444ToARGBRow_SSSE3(const uint8* y_buf, | 2839 void I444ToARGBRow_SSSE3(const uint8* y_buf, |
2840 const uint8* u_buf, | 2840 const uint8* u_buf, |
2841 const uint8* v_buf, | 2841 const uint8* v_buf, |
2842 uint8* dst_argb, | 2842 uint8* dst_argb, |
2843 struct YuvConstants* yuvconstants, | 2843 const struct YuvConstants* yuvconstants, |
2844 int width) { | 2844 int width) { |
2845 __asm { | 2845 __asm { |
2846 push esi | 2846 push esi |
2847 push edi | 2847 push edi |
2848 push ebx | 2848 push ebx |
2849 mov eax, [esp + 12 + 4] // Y | 2849 mov eax, [esp + 12 + 4] // Y |
2850 mov esi, [esp + 12 + 8] // U | 2850 mov esi, [esp + 12 + 8] // U |
2851 mov edi, [esp + 12 + 12] // V | 2851 mov edi, [esp + 12 + 12] // V |
2852 mov edx, [esp + 12 + 16] // argb | 2852 mov edx, [esp + 12 + 16] // argb |
2853 mov ebx, [esp + 12 + 20] // yuvconstants | 2853 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 16 matching lines...) Expand all Loading... |
2870 } | 2870 } |
2871 } | 2871 } |
2872 | 2872 |
2873 // 8 pixels. | 2873 // 8 pixels. |
2874 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). | 2874 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). |
2875 __declspec(naked) | 2875 __declspec(naked) |
2876 void I444ToABGRRow_SSSE3(const uint8* y_buf, | 2876 void I444ToABGRRow_SSSE3(const uint8* y_buf, |
2877 const uint8* u_buf, | 2877 const uint8* u_buf, |
2878 const uint8* v_buf, | 2878 const uint8* v_buf, |
2879 uint8* dst_abgr, | 2879 uint8* dst_abgr, |
2880 struct YuvConstants* yuvconstants, | 2880 const struct YuvConstants* yuvconstants, |
2881 int width) { | 2881 int width) { |
2882 __asm { | 2882 __asm { |
2883 push esi | 2883 push esi |
2884 push edi | 2884 push edi |
2885 push ebx | 2885 push ebx |
2886 mov eax, [esp + 12 + 4] // Y | 2886 mov eax, [esp + 12 + 4] // Y |
2887 mov esi, [esp + 12 + 8] // U | 2887 mov esi, [esp + 12 + 8] // U |
2888 mov edi, [esp + 12 + 12] // V | 2888 mov edi, [esp + 12 + 12] // V |
2889 mov edx, [esp + 12 + 16] // abgr | 2889 mov edx, [esp + 12 + 16] // abgr |
2890 mov ebx, [esp + 12 + 20] // yuvconstants | 2890 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 16 matching lines...) Expand all Loading... |
2907 } | 2907 } |
2908 } | 2908 } |
2909 | 2909 |
2910 // 8 pixels. | 2910 // 8 pixels. |
2911 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). | 2911 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). |
2912 __declspec(naked) | 2912 __declspec(naked) |
2913 void I422ToRGB24Row_SSSE3(const uint8* y_buf, | 2913 void I422ToRGB24Row_SSSE3(const uint8* y_buf, |
2914 const uint8* u_buf, | 2914 const uint8* u_buf, |
2915 const uint8* v_buf, | 2915 const uint8* v_buf, |
2916 uint8* dst_rgb24, | 2916 uint8* dst_rgb24, |
2917 struct YuvConstants* yuvconstants, | 2917 const struct YuvConstants* yuvconstants, |
2918 int width) { | 2918 int width) { |
2919 __asm { | 2919 __asm { |
2920 push esi | 2920 push esi |
2921 push edi | 2921 push edi |
2922 push ebx | 2922 push ebx |
2923 mov eax, [esp + 12 + 4] // Y | 2923 mov eax, [esp + 12 + 4] // Y |
2924 mov esi, [esp + 12 + 8] // U | 2924 mov esi, [esp + 12 + 8] // U |
2925 mov edi, [esp + 12 + 12] // V | 2925 mov edi, [esp + 12 + 12] // V |
2926 mov edx, [esp + 12 + 16] // argb | 2926 mov edx, [esp + 12 + 16] // argb |
2927 mov ebx, [esp + 12 + 20] // yuvconstants | 2927 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 17 matching lines...) Expand all Loading... |
2945 } | 2945 } |
2946 } | 2946 } |
2947 | 2947 |
2948 // 8 pixels. | 2948 // 8 pixels. |
2949 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). | 2949 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). |
2950 __declspec(naked) | 2950 __declspec(naked) |
2951 void I422ToRAWRow_SSSE3(const uint8* y_buf, | 2951 void I422ToRAWRow_SSSE3(const uint8* y_buf, |
2952 const uint8* u_buf, | 2952 const uint8* u_buf, |
2953 const uint8* v_buf, | 2953 const uint8* v_buf, |
2954 uint8* dst_raw, | 2954 uint8* dst_raw, |
2955 struct YuvConstants* yuvconstants, | 2955 const struct YuvConstants* yuvconstants, |
2956 int width) { | 2956 int width) { |
2957 __asm { | 2957 __asm { |
2958 push esi | 2958 push esi |
2959 push edi | 2959 push edi |
2960 push ebx | 2960 push ebx |
2961 mov eax, [esp + 12 + 4] // Y | 2961 mov eax, [esp + 12 + 4] // Y |
2962 mov esi, [esp + 12 + 8] // U | 2962 mov esi, [esp + 12 + 8] // U |
2963 mov edi, [esp + 12 + 12] // V | 2963 mov edi, [esp + 12 + 12] // V |
2964 mov edx, [esp + 12 + 16] // argb | 2964 mov edx, [esp + 12 + 16] // argb |
2965 mov ebx, [esp + 12 + 20] // yuvconstants | 2965 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 17 matching lines...) Expand all Loading... |
2983 } | 2983 } |
2984 } | 2984 } |
2985 | 2985 |
2986 // 8 pixels | 2986 // 8 pixels |
2987 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). | 2987 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). |
2988 __declspec(naked) | 2988 __declspec(naked) |
2989 void I422ToRGB565Row_SSSE3(const uint8* y_buf, | 2989 void I422ToRGB565Row_SSSE3(const uint8* y_buf, |
2990 const uint8* u_buf, | 2990 const uint8* u_buf, |
2991 const uint8* v_buf, | 2991 const uint8* v_buf, |
2992 uint8* rgb565_buf, | 2992 uint8* rgb565_buf, |
2993 struct YuvConstants* yuvconstants, | 2993 const struct YuvConstants* yuvconstants, |
2994 int width) { | 2994 int width) { |
2995 __asm { | 2995 __asm { |
2996 push esi | 2996 push esi |
2997 push edi | 2997 push edi |
2998 push ebx | 2998 push ebx |
2999 mov eax, [esp + 12 + 4] // Y | 2999 mov eax, [esp + 12 + 4] // Y |
3000 mov esi, [esp + 12 + 8] // U | 3000 mov esi, [esp + 12 + 8] // U |
3001 mov edi, [esp + 12 + 12] // V | 3001 mov edi, [esp + 12 + 12] // V |
3002 mov edx, [esp + 12 + 16] // argb | 3002 mov edx, [esp + 12 + 16] // argb |
3003 mov ebx, [esp + 12 + 20] // yuvconstants | 3003 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 22 matching lines...) Expand all Loading... |
3026 } | 3026 } |
3027 } | 3027 } |
3028 | 3028 |
3029 // 8 pixels. | 3029 // 8 pixels. |
3030 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). | 3030 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). |
3031 __declspec(naked) | 3031 __declspec(naked) |
3032 void I422ToARGBRow_SSSE3(const uint8* y_buf, | 3032 void I422ToARGBRow_SSSE3(const uint8* y_buf, |
3033 const uint8* u_buf, | 3033 const uint8* u_buf, |
3034 const uint8* v_buf, | 3034 const uint8* v_buf, |
3035 uint8* dst_argb, | 3035 uint8* dst_argb, |
3036 struct YuvConstants* yuvconstants, | 3036 const struct YuvConstants* yuvconstants, |
3037 int width) { | 3037 int width) { |
3038 __asm { | 3038 __asm { |
3039 push esi | 3039 push esi |
3040 push edi | 3040 push edi |
3041 push ebx | 3041 push ebx |
3042 mov eax, [esp + 12 + 4] // Y | 3042 mov eax, [esp + 12 + 4] // Y |
3043 mov esi, [esp + 12 + 8] // U | 3043 mov esi, [esp + 12 + 8] // U |
3044 mov edi, [esp + 12 + 12] // V | 3044 mov edi, [esp + 12 + 12] // V |
3045 mov edx, [esp + 12 + 16] // argb | 3045 mov edx, [esp + 12 + 16] // argb |
3046 mov ebx, [esp + 12 + 20] // yuvconstants | 3046 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 17 matching lines...) Expand all Loading... |
3064 } | 3064 } |
3065 | 3065 |
3066 // 8 pixels. | 3066 // 8 pixels. |
3067 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB. | 3067 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB. |
3068 __declspec(naked) | 3068 __declspec(naked) |
3069 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, | 3069 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, |
3070 const uint8* u_buf, | 3070 const uint8* u_buf, |
3071 const uint8* v_buf, | 3071 const uint8* v_buf, |
3072 const uint8* a_buf, | 3072 const uint8* a_buf, |
3073 uint8* dst_argb, | 3073 uint8* dst_argb, |
3074 struct YuvConstants* yuvconstants, | 3074 const struct YuvConstants* yuvconstants, |
3075 int width) { | 3075 int width) { |
3076 __asm { | 3076 __asm { |
3077 push esi | 3077 push esi |
3078 push edi | 3078 push edi |
3079 push ebx | 3079 push ebx |
3080 push ebp | 3080 push ebp |
3081 mov eax, [esp + 16 + 4] // Y | 3081 mov eax, [esp + 16 + 4] // Y |
3082 mov esi, [esp + 16 + 8] // U | 3082 mov esi, [esp + 16 + 8] // U |
3083 mov edi, [esp + 16 + 12] // V | 3083 mov edi, [esp + 16 + 12] // V |
3084 mov ebp, [esp + 16 + 16] // A | 3084 mov ebp, [esp + 16 + 16] // A |
(...skipping 19 matching lines...) Expand all Loading... |
3104 } | 3104 } |
3105 | 3105 |
3106 // 8 pixels. | 3106 // 8 pixels. |
3107 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR. | 3107 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR. |
3108 __declspec(naked) | 3108 __declspec(naked) |
3109 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf, | 3109 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf, |
3110 const uint8* u_buf, | 3110 const uint8* u_buf, |
3111 const uint8* v_buf, | 3111 const uint8* v_buf, |
3112 const uint8* a_buf, | 3112 const uint8* a_buf, |
3113 uint8* dst_abgr, | 3113 uint8* dst_abgr, |
3114 struct YuvConstants* yuvconstants, | 3114 const struct YuvConstants* yuvconstants, |
3115 int width) { | 3115 int width) { |
3116 __asm { | 3116 __asm { |
3117 push esi | 3117 push esi |
3118 push edi | 3118 push edi |
3119 push ebx | 3119 push ebx |
3120 push ebp | 3120 push ebp |
3121 mov eax, [esp + 16 + 4] // Y | 3121 mov eax, [esp + 16 + 4] // Y |
3122 mov esi, [esp + 16 + 8] // U | 3122 mov esi, [esp + 16 + 8] // U |
3123 mov edi, [esp + 16 + 12] // V | 3123 mov edi, [esp + 16 + 12] // V |
3124 mov ebp, [esp + 16 + 16] // A | 3124 mov ebp, [esp + 16 + 16] // A |
(...skipping 19 matching lines...) Expand all Loading... |
3144 } | 3144 } |
3145 | 3145 |
3146 // 8 pixels. | 3146 // 8 pixels. |
3147 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). | 3147 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). |
3148 // Similar to I420 but duplicate UV once more. | 3148 // Similar to I420 but duplicate UV once more. |
3149 __declspec(naked) | 3149 __declspec(naked) |
3150 void I411ToARGBRow_SSSE3(const uint8* y_buf, | 3150 void I411ToARGBRow_SSSE3(const uint8* y_buf, |
3151 const uint8* u_buf, | 3151 const uint8* u_buf, |
3152 const uint8* v_buf, | 3152 const uint8* v_buf, |
3153 uint8* dst_argb, | 3153 uint8* dst_argb, |
3154 struct YuvConstants* yuvconstants, | 3154 const struct YuvConstants* yuvconstants, |
3155 int width) { | 3155 int width) { |
3156 __asm { | 3156 __asm { |
3157 push esi | 3157 push esi |
3158 push edi | 3158 push edi |
3159 push ebx | 3159 push ebx |
3160 mov eax, [esp + 12 + 4] // Y | 3160 mov eax, [esp + 12 + 4] // Y |
3161 mov esi, [esp + 12 + 8] // U | 3161 mov esi, [esp + 12 + 8] // U |
3162 mov edi, [esp + 12 + 12] // V | 3162 mov edi, [esp + 12 + 12] // V |
3163 mov edx, [esp + 12 + 16] // abgr | 3163 mov edx, [esp + 12 + 16] // abgr |
3164 mov ebx, [esp + 12 + 20] // yuvconstants | 3164 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 15 matching lines...) Expand all Loading... |
3180 ret | 3180 ret |
3181 } | 3181 } |
3182 } | 3182 } |
3183 | 3183 |
3184 // 8 pixels. | 3184 // 8 pixels. |
3185 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). | 3185 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). |
3186 __declspec(naked) | 3186 __declspec(naked) |
3187 void NV12ToARGBRow_SSSE3(const uint8* y_buf, | 3187 void NV12ToARGBRow_SSSE3(const uint8* y_buf, |
3188 const uint8* uv_buf, | 3188 const uint8* uv_buf, |
3189 uint8* dst_argb, | 3189 uint8* dst_argb, |
3190 struct YuvConstants* yuvconstants, | 3190 const struct YuvConstants* yuvconstants, |
3191 int width) { | 3191 int width) { |
3192 __asm { | 3192 __asm { |
3193 push esi | 3193 push esi |
3194 push ebx | 3194 push ebx |
3195 mov eax, [esp + 8 + 4] // Y | 3195 mov eax, [esp + 8 + 4] // Y |
3196 mov esi, [esp + 8 + 8] // UV | 3196 mov esi, [esp + 8 + 8] // UV |
3197 mov edx, [esp + 8 + 12] // argb | 3197 mov edx, [esp + 8 + 12] // argb |
3198 mov ebx, [esp + 8 + 16] // yuvconstants | 3198 mov ebx, [esp + 8 + 16] // yuvconstants |
3199 mov ecx, [esp + 8 + 20] // width | 3199 mov ecx, [esp + 8 + 20] // width |
3200 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | 3200 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
(...skipping 11 matching lines...) Expand all Loading... |
3212 ret | 3212 ret |
3213 } | 3213 } |
3214 } | 3214 } |
3215 | 3215 |
3216 // 8 pixels. | 3216 // 8 pixels. |
3217 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). | 3217 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). |
3218 __declspec(naked) | 3218 __declspec(naked) |
3219 void NV21ToARGBRow_SSSE3(const uint8* y_buf, | 3219 void NV21ToARGBRow_SSSE3(const uint8* y_buf, |
3220 const uint8* vu_buf, | 3220 const uint8* vu_buf, |
3221 uint8* dst_argb, | 3221 uint8* dst_argb, |
3222 struct YuvConstants* yuvconstants, | 3222 const struct YuvConstants* yuvconstants, |
3223 int width) { | 3223 int width) { |
3224 __asm { | 3224 __asm { |
3225 push esi | 3225 push esi |
3226 push ebx | 3226 push ebx |
3227 mov eax, [esp + 8 + 4] // Y | 3227 mov eax, [esp + 8 + 4] // Y |
3228 mov esi, [esp + 8 + 8] // VU | 3228 mov esi, [esp + 8 + 8] // VU |
3229 mov edx, [esp + 8 + 12] // argb | 3229 mov edx, [esp + 8 + 12] // argb |
3230 mov ebx, [esp + 8 + 16] // yuvconstants | 3230 mov ebx, [esp + 8 + 16] // yuvconstants |
3231 mov ecx, [esp + 8 + 20] // width | 3231 mov ecx, [esp + 8 + 20] // width |
3232 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | 3232 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
(...skipping 10 matching lines...) Expand all Loading... |
3243 pop esi | 3243 pop esi |
3244 ret | 3244 ret |
3245 } | 3245 } |
3246 } | 3246 } |
3247 | 3247 |
3248 // 8 pixels. | 3248 // 8 pixels. |
3249 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). | 3249 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). |
3250 __declspec(naked) | 3250 __declspec(naked) |
3251 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, | 3251 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, |
3252 uint8* dst_argb, | 3252 uint8* dst_argb, |
3253 struct YuvConstants* yuvconstants, | 3253 const struct YuvConstants* yuvconstants, |
3254 int width) { | 3254 int width) { |
3255 __asm { | 3255 __asm { |
3256 push ebx | 3256 push ebx |
3257 mov eax, [esp + 4 + 4] // yuy2 | 3257 mov eax, [esp + 4 + 4] // yuy2 |
3258 mov edx, [esp + 4 + 8] // argb | 3258 mov edx, [esp + 4 + 8] // argb |
3259 mov ebx, [esp + 4 + 12] // yuvconstants | 3259 mov ebx, [esp + 4 + 12] // yuvconstants |
3260 mov ecx, [esp + 4 + 16] // width | 3260 mov ecx, [esp + 4 + 16] // width |
3261 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | 3261 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
3262 | 3262 |
3263 convertloop: | 3263 convertloop: |
3264 READYUY2 | 3264 READYUY2 |
3265 YUVTORGB(ebx) | 3265 YUVTORGB(ebx) |
3266 STOREARGB | 3266 STOREARGB |
3267 | 3267 |
3268 sub ecx, 8 | 3268 sub ecx, 8 |
3269 jg convertloop | 3269 jg convertloop |
3270 | 3270 |
3271 pop ebx | 3271 pop ebx |
3272 ret | 3272 ret |
3273 } | 3273 } |
3274 } | 3274 } |
3275 | 3275 |
3276 // 8 pixels. | 3276 // 8 pixels. |
3277 // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes). | 3277 // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes). |
3278 __declspec(naked) | 3278 __declspec(naked) |
3279 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, | 3279 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, |
3280 uint8* dst_argb, | 3280 uint8* dst_argb, |
3281 struct YuvConstants* yuvconstants, | 3281 const struct YuvConstants* yuvconstants, |
3282 int width) { | 3282 int width) { |
3283 __asm { | 3283 __asm { |
3284 push ebx | 3284 push ebx |
3285 mov eax, [esp + 4 + 4] // uyvy | 3285 mov eax, [esp + 4 + 4] // uyvy |
3286 mov edx, [esp + 4 + 8] // argb | 3286 mov edx, [esp + 4 + 8] // argb |
3287 mov ebx, [esp + 4 + 12] // yuvconstants | 3287 mov ebx, [esp + 4 + 12] // yuvconstants |
3288 mov ecx, [esp + 4 + 16] // width | 3288 mov ecx, [esp + 4 + 16] // width |
3289 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | 3289 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
3290 | 3290 |
3291 convertloop: | 3291 convertloop: |
3292 READUYVY | 3292 READUYVY |
3293 YUVTORGB(ebx) | 3293 YUVTORGB(ebx) |
3294 STOREARGB | 3294 STOREARGB |
3295 | 3295 |
3296 sub ecx, 8 | 3296 sub ecx, 8 |
3297 jg convertloop | 3297 jg convertloop |
3298 | 3298 |
3299 pop ebx | 3299 pop ebx |
3300 ret | 3300 ret |
3301 } | 3301 } |
3302 } | 3302 } |
3303 | 3303 |
3304 __declspec(naked) | 3304 __declspec(naked) |
3305 void I422ToBGRARow_SSSE3(const uint8* y_buf, | 3305 void I422ToBGRARow_SSSE3(const uint8* y_buf, |
3306 const uint8* u_buf, | 3306 const uint8* u_buf, |
3307 const uint8* v_buf, | 3307 const uint8* v_buf, |
3308 uint8* dst_bgra, | 3308 uint8* dst_bgra, |
3309 struct YuvConstants* yuvconstants, | 3309 const struct YuvConstants* yuvconstants, |
3310 int width) { | 3310 int width) { |
3311 __asm { | 3311 __asm { |
3312 push esi | 3312 push esi |
3313 push edi | 3313 push edi |
3314 push ebx | 3314 push ebx |
3315 mov eax, [esp + 12 + 4] // Y | 3315 mov eax, [esp + 12 + 4] // Y |
3316 mov esi, [esp + 12 + 8] // U | 3316 mov esi, [esp + 12 + 8] // U |
3317 mov edi, [esp + 12 + 12] // V | 3317 mov edi, [esp + 12 + 12] // V |
3318 mov edx, [esp + 12 + 16] // argb | 3318 mov edx, [esp + 12 + 16] // argb |
3319 mov ebx, [esp + 12 + 20] // yuvconstants | 3319 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 13 matching lines...) Expand all Loading... |
3333 pop esi | 3333 pop esi |
3334 ret | 3334 ret |
3335 } | 3335 } |
3336 } | 3336 } |
3337 | 3337 |
3338 __declspec(naked) | 3338 __declspec(naked) |
3339 void I422ToABGRRow_SSSE3(const uint8* y_buf, | 3339 void I422ToABGRRow_SSSE3(const uint8* y_buf, |
3340 const uint8* u_buf, | 3340 const uint8* u_buf, |
3341 const uint8* v_buf, | 3341 const uint8* v_buf, |
3342 uint8* dst_abgr, | 3342 uint8* dst_abgr, |
3343 struct YuvConstants* yuvconstants, | 3343 const struct YuvConstants* yuvconstants, |
3344 int width) { | 3344 int width) { |
3345 __asm { | 3345 __asm { |
3346 push esi | 3346 push esi |
3347 push edi | 3347 push edi |
3348 push ebx | 3348 push ebx |
3349 mov eax, [esp + 12 + 4] // Y | 3349 mov eax, [esp + 12 + 4] // Y |
3350 mov esi, [esp + 12 + 8] // U | 3350 mov esi, [esp + 12 + 8] // U |
3351 mov edi, [esp + 12 + 12] // V | 3351 mov edi, [esp + 12 + 12] // V |
3352 mov edx, [esp + 12 + 16] // argb | 3352 mov edx, [esp + 12 + 16] // argb |
3353 mov ebx, [esp + 12 + 20] // yuvconstants | 3353 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 14 matching lines...) Expand all Loading... |
3368 pop esi | 3368 pop esi |
3369 ret | 3369 ret |
3370 } | 3370 } |
3371 } | 3371 } |
3372 | 3372 |
3373 __declspec(naked) | 3373 __declspec(naked) |
3374 void I422ToRGBARow_SSSE3(const uint8* y_buf, | 3374 void I422ToRGBARow_SSSE3(const uint8* y_buf, |
3375 const uint8* u_buf, | 3375 const uint8* u_buf, |
3376 const uint8* v_buf, | 3376 const uint8* v_buf, |
3377 uint8* dst_rgba, | 3377 uint8* dst_rgba, |
3378 struct YuvConstants* yuvconstants, | 3378 const struct YuvConstants* yuvconstants, |
3379 int width) { | 3379 int width) { |
3380 __asm { | 3380 __asm { |
3381 push esi | 3381 push esi |
3382 push edi | 3382 push edi |
3383 push ebx | 3383 push ebx |
3384 mov eax, [esp + 12 + 4] // Y | 3384 mov eax, [esp + 12 + 4] // Y |
3385 mov esi, [esp + 12 + 8] // U | 3385 mov esi, [esp + 12 + 8] // U |
3386 mov edi, [esp + 12 + 12] // V | 3386 mov edi, [esp + 12 + 12] // V |
3387 mov edx, [esp + 12 + 16] // argb | 3387 mov edx, [esp + 12 + 16] // argb |
3388 mov ebx, [esp + 12 + 20] // yuvconstants | 3388 mov ebx, [esp + 12 + 20] // yuvconstants |
(...skipping 3247 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6636 } | 6636 } |
6637 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6637 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6638 | 6638 |
6639 #endif // defined(_M_X64) | 6639 #endif // defined(_M_X64) |
6640 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6640 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6641 | 6641 |
6642 #ifdef __cplusplus | 6642 #ifdef __cplusplus |
6643 } // extern "C" | 6643 } // extern "C" |
6644 } // namespace libyuv | 6644 } // namespace libyuv |
6645 #endif | 6645 #endif |
OLD | NEW |