OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2165 | 2165 |
2166 pop ebp | 2166 pop ebp |
2167 pop edi | 2167 pop edi |
2168 pop esi | 2168 pop esi |
2169 vzeroupper | 2169 vzeroupper |
2170 ret | 2170 ret |
2171 } | 2171 } |
2172 } | 2172 } |
2173 #endif // HAS_I422TOARGBMATRIXROW_AVX2 | 2173 #endif // HAS_I422TOARGBMATRIXROW_AVX2 |
2174 | 2174 |
2175 #ifdef HAS_I444TOARGBROW_AVX2 | 2175 #ifdef HAS_I444TOARGBMATRIXROW_AVX2 |
2176 // 16 pixels | 2176 // 16 pixels |
2177 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). | 2177 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). |
2178 __declspec(naked) | 2178 __declspec(naked) |
2179 void I444ToARGBRow_AVX2(const uint8* y_buf, | 2179 void I444ToARGBMatrixRow_AVX2(const uint8* y_buf, |
2180 const uint8* u_buf, | 2180 const uint8* u_buf, |
2181 const uint8* v_buf, | 2181 const uint8* v_buf, |
2182 uint8* dst_argb, | 2182 uint8* dst_argb, |
2183 int width) { | 2183 struct YuvConstants* YuvConstants, |
| 2184 int width) { |
2184 __asm { | 2185 __asm { |
2185 push esi | 2186 push esi |
2186 push edi | 2187 push edi |
2187 mov eax, [esp + 8 + 4] // Y | 2188 push ebp |
2188 mov esi, [esp + 8 + 8] // U | 2189 mov eax, [esp + 12 + 4] // Y |
2189 mov edi, [esp + 8 + 12] // V | 2190 mov esi, [esp + 12 + 8] // U |
2190 mov edx, [esp + 8 + 16] // argb | 2191 mov edi, [esp + 12 + 12] // V |
2191 mov ecx, [esp + 8 + 20] // width | 2192 mov edx, [esp + 12 + 16] // argb |
| 2193 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2194 mov ecx, [esp + 12 + 24] // width |
2192 sub edi, esi | 2195 sub edi, esi |
2193 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2196 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
2194 | |
2195 convertloop: | 2197 convertloop: |
2196 READYUV444_AVX2 | 2198 READYUV444_AVX2 |
2197 YUVTORGB_AVX2(kYuvConstants) | 2199 YUVTORGB_AVX2(ebp) |
2198 STOREARGB_AVX2 | 2200 STOREARGB_AVX2 |
2199 | 2201 |
2200 sub ecx, 16 | 2202 sub ecx, 16 |
2201 jg convertloop | 2203 jg convertloop |
2202 | 2204 |
| 2205 pop ebp |
2203 pop edi | 2206 pop edi |
2204 pop esi | 2207 pop esi |
2205 vzeroupper | 2208 vzeroupper |
2206 ret | 2209 ret |
2207 } | 2210 } |
2208 } | 2211 } |
2209 #endif // HAS_I444TOARGBROW_AVX2 | 2212 #endif // HAS_I444TOARGBMATRIXROW_AVX2 |
| 2213 |
| 2214 #ifdef HAS_I444TOABGRMATRIXROW_AVX2 |
| 2215 // 16 pixels |
| 2216 // 16 UV values with 16 Y producing 16 ABGR (64 bytes). |
| 2217 __declspec(naked) |
| 2218 void I444ToABGRMatrixRow_AVX2(const uint8* y_buf, |
| 2219 const uint8* u_buf, |
| 2220 const uint8* v_buf, |
| 2221 uint8* dst_abgr, |
| 2222 struct YuvConstants* YuvConstants, |
| 2223 int width) { |
| 2224 __asm { |
| 2225 push esi |
| 2226 push edi |
| 2227 push ebp |
| 2228 mov eax, [esp + 12 + 4] // Y |
| 2229 mov esi, [esp + 12 + 8] // U |
| 2230 mov edi, [esp + 12 + 12] // V |
| 2231 mov edx, [esp + 12 + 16] // abgr |
| 2232 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2233 mov ecx, [esp + 12 + 24] // width |
| 2234 sub edi, esi |
| 2235 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
| 2236 convertloop: |
| 2237 READYUV444_AVX2 |
| 2238 YUVTORGB_AVX2(ebp) |
| 2239 STOREABGR_AVX2 |
| 2240 |
| 2241 sub ecx, 16 |
| 2242 jg convertloop |
| 2243 |
| 2244 pop ebp |
| 2245 pop edi |
| 2246 pop esi |
| 2247 vzeroupper |
| 2248 ret |
| 2249 } |
| 2250 } |
| 2251 #endif // HAS_I444TOABGRMATRIXROW_AVX2 |
2210 | 2252 |
2211 #ifdef HAS_I411TOARGBROW_AVX2 | 2253 #ifdef HAS_I411TOARGBROW_AVX2 |
2212 // 16 pixels | 2254 // 16 pixels |
2213 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2255 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2214 __declspec(naked) | 2256 __declspec(naked) |
2215 void I411ToARGBRow_AVX2(const uint8* y_buf, | 2257 void I411ToARGBRow_AVX2(const uint8* y_buf, |
2216 const uint8* u_buf, | 2258 const uint8* u_buf, |
2217 const uint8* v_buf, | 2259 const uint8* v_buf, |
2218 uint8* dst_argb, | 2260 uint8* dst_argb, |
2219 int width) { | 2261 int width) { |
(...skipping 381 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2601 __asm por xmm3, xmm2 /* BG */ \ | 2643 __asm por xmm3, xmm2 /* BG */ \ |
2602 __asm por xmm1, xmm3 /* BGR */ \ | 2644 __asm por xmm1, xmm3 /* BGR */ \ |
2603 __asm packssdw xmm0, xmm1 \ | 2645 __asm packssdw xmm0, xmm1 \ |
2604 __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ | 2646 __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ |
2605 __asm lea edx, [edx + 16] \ | 2647 __asm lea edx, [edx + 16] \ |
2606 } | 2648 } |
2607 | 2649 |
2608 // 8 pixels. | 2650 // 8 pixels. |
2609 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). | 2651 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). |
2610 __declspec(naked) | 2652 __declspec(naked) |
2611 void I444ToARGBRow_SSSE3(const uint8* y_buf, | 2653 void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, |
2612 const uint8* u_buf, | 2654 const uint8* u_buf, |
2613 const uint8* v_buf, | 2655 const uint8* v_buf, |
2614 uint8* dst_argb, | 2656 uint8* dst_argb, |
2615 int width) { | 2657 struct YuvConstants* YuvConstants, |
| 2658 int width) { |
2616 __asm { | 2659 __asm { |
2617 push esi | 2660 push esi |
2618 push edi | 2661 push edi |
2619 mov eax, [esp + 8 + 4] // Y | 2662 push ebp |
2620 mov esi, [esp + 8 + 8] // U | 2663 mov eax, [esp + 12 + 4] // Y |
2621 mov edi, [esp + 8 + 12] // V | 2664 mov esi, [esp + 12 + 8] // U |
2622 mov edx, [esp + 8 + 16] // argb | 2665 mov edi, [esp + 12 + 12] // V |
2623 mov ecx, [esp + 8 + 20] // width | 2666 mov edx, [esp + 12 + 16] // argb |
| 2667 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2668 mov ecx, [esp + 12 + 24] // width |
2624 sub edi, esi | 2669 sub edi, esi |
2625 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | 2670 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
2626 | 2671 |
2627 convertloop: | 2672 convertloop: |
2628 READYUV444 | 2673 READYUV444 |
2629 YUVTORGB(kYuvConstants) | 2674 YUVTORGB(ebp) |
2630 STOREARGB | 2675 STOREARGB |
2631 | 2676 |
2632 sub ecx, 8 | 2677 sub ecx, 8 |
2633 jg convertloop | 2678 jg convertloop |
2634 | 2679 |
| 2680 pop ebp |
2635 pop edi | 2681 pop edi |
2636 pop esi | 2682 pop esi |
2637 ret | 2683 ret |
| 2684 } |
| 2685 } |
| 2686 |
| 2687 // 8 pixels. |
| 2688 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). |
| 2689 __declspec(naked) |
| 2690 void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, |
| 2691 const uint8* u_buf, |
| 2692 const uint8* v_buf, |
| 2693 uint8* dst_abgr, |
| 2694 struct YuvConstants* YuvConstants, |
| 2695 int width) { |
| 2696 __asm { |
| 2697 push esi |
| 2698 push edi |
| 2699 push ebp |
| 2700 mov eax, [esp + 12 + 4] // Y |
| 2701 mov esi, [esp + 12 + 8] // U |
| 2702 mov edi, [esp + 12 + 12] // V |
| 2703 mov edx, [esp + 12 + 16] // abgr |
| 2704 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2705 mov ecx, [esp + 12 + 24] // width |
| 2706 sub edi, esi |
| 2707 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
| 2708 |
| 2709 convertloop: |
| 2710 READYUV444 |
| 2711 YUVTORGB(ebp) |
| 2712 STOREABGR |
| 2713 |
| 2714 sub ecx, 8 |
| 2715 jg convertloop |
| 2716 |
| 2717 pop ebp |
| 2718 pop edi |
| 2719 pop esi |
| 2720 ret |
2638 } | 2721 } |
2639 } | 2722 } |
2640 | 2723 |
2641 // 8 pixels. | 2724 // 8 pixels. |
2642 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). | 2725 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). |
2643 __declspec(naked) | 2726 __declspec(naked) |
2644 void I422ToRGB24Row_SSSE3(const uint8* y_buf, | 2727 void I422ToRGB24Row_SSSE3(const uint8* y_buf, |
2645 const uint8* u_buf, | 2728 const uint8* u_buf, |
2646 const uint8* v_buf, | 2729 const uint8* v_buf, |
2647 uint8* dst_rgb24, | 2730 uint8* dst_rgb24, |
(...skipping 3716 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6364 } | 6447 } |
6365 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6448 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6366 | 6449 |
6367 #endif // defined(_M_X64) | 6450 #endif // defined(_M_X64) |
6368 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6451 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6369 | 6452 |
6370 #ifdef __cplusplus | 6453 #ifdef __cplusplus |
6371 } // extern "C" | 6454 } // extern "C" |
6372 } // namespace libyuv | 6455 } // namespace libyuv |
6373 #endif | 6456 #endif |
OLD | NEW |