| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 2154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2165 | 2165 |
| 2166 pop ebp | 2166 pop ebp |
| 2167 pop edi | 2167 pop edi |
| 2168 pop esi | 2168 pop esi |
| 2169 vzeroupper | 2169 vzeroupper |
| 2170 ret | 2170 ret |
| 2171 } | 2171 } |
| 2172 } | 2172 } |
| 2173 #endif // HAS_I422TOARGBMATRIXROW_AVX2 | 2173 #endif // HAS_I422TOARGBMATRIXROW_AVX2 |
| 2174 | 2174 |
| 2175 #ifdef HAS_I444TOARGBROW_AVX2 | 2175 #ifdef HAS_I444TOARGBMATRIXROW_AVX2 |
| 2176 // 16 pixels | 2176 // 16 pixels |
| 2177 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). | 2177 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). |
| 2178 __declspec(naked) | 2178 __declspec(naked) |
| 2179 void I444ToARGBRow_AVX2(const uint8* y_buf, | 2179 void I444ToARGBMatrixRow_AVX2(const uint8* y_buf, |
| 2180 const uint8* u_buf, | 2180 const uint8* u_buf, |
| 2181 const uint8* v_buf, | 2181 const uint8* v_buf, |
| 2182 uint8* dst_argb, | 2182 uint8* dst_argb, |
| 2183 int width) { | 2183 struct YuvConstants* YuvConstants, |
| 2184 int width) { |
| 2184 __asm { | 2185 __asm { |
| 2185 push esi | 2186 push esi |
| 2186 push edi | 2187 push edi |
| 2187 mov eax, [esp + 8 + 4] // Y | 2188 push ebp |
| 2188 mov esi, [esp + 8 + 8] // U | 2189 mov eax, [esp + 12 + 4] // Y |
| 2189 mov edi, [esp + 8 + 12] // V | 2190 mov esi, [esp + 12 + 8] // U |
| 2190 mov edx, [esp + 8 + 16] // argb | 2191 mov edi, [esp + 12 + 12] // V |
| 2191 mov ecx, [esp + 8 + 20] // width | 2192 mov edx, [esp + 12 + 16] // argb |
| 2193 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2194 mov ecx, [esp + 12 + 24] // width |
| 2192 sub edi, esi | 2195 sub edi, esi |
| 2193 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2196 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
| 2194 | |
| 2195 convertloop: | 2197 convertloop: |
| 2196 READYUV444_AVX2 | 2198 READYUV444_AVX2 |
| 2197 YUVTORGB_AVX2(kYuvConstants) | 2199 YUVTORGB_AVX2(ebp) |
| 2198 STOREARGB_AVX2 | 2200 STOREARGB_AVX2 |
| 2199 | 2201 |
| 2200 sub ecx, 16 | 2202 sub ecx, 16 |
| 2201 jg convertloop | 2203 jg convertloop |
| 2202 | 2204 |
| 2205 pop ebp |
| 2203 pop edi | 2206 pop edi |
| 2204 pop esi | 2207 pop esi |
| 2205 vzeroupper | 2208 vzeroupper |
| 2206 ret | 2209 ret |
| 2207 } | 2210 } |
| 2208 } | 2211 } |
| 2209 #endif // HAS_I444TOARGBROW_AVX2 | 2212 #endif // HAS_I444TOARGBMATRIXROW_AVX2 |
| 2213 |
| 2214 #ifdef HAS_I444TOABGRMATRIXROW_AVX2 |
| 2215 // 16 pixels |
| 2216 // 16 UV values with 16 Y producing 16 ABGR (64 bytes). |
| 2217 __declspec(naked) |
| 2218 void I444ToABGRMatrixRow_AVX2(const uint8* y_buf, |
| 2219 const uint8* u_buf, |
| 2220 const uint8* v_buf, |
| 2221 uint8* dst_abgr, |
| 2222 struct YuvConstants* YuvConstants, |
| 2223 int width) { |
| 2224 __asm { |
| 2225 push esi |
| 2226 push edi |
| 2227 push ebp |
| 2228 mov eax, [esp + 12 + 4] // Y |
| 2229 mov esi, [esp + 12 + 8] // U |
| 2230 mov edi, [esp + 12 + 12] // V |
| 2231 mov edx, [esp + 12 + 16] // abgr |
| 2232 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2233 mov ecx, [esp + 12 + 24] // width |
| 2234 sub edi, esi |
| 2235 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
| 2236 convertloop: |
| 2237 READYUV444_AVX2 |
| 2238 YUVTORGB_AVX2(ebp) |
| 2239 STOREABGR_AVX2 |
| 2240 |
| 2241 sub ecx, 16 |
| 2242 jg convertloop |
| 2243 |
| 2244 pop ebp |
| 2245 pop edi |
| 2246 pop esi |
| 2247 vzeroupper |
| 2248 ret |
| 2249 } |
| 2250 } |
| 2251 #endif // HAS_I444TOABGRMATRIXROW_AVX2 |
| 2210 | 2252 |
| 2211 #ifdef HAS_I411TOARGBROW_AVX2 | 2253 #ifdef HAS_I411TOARGBROW_AVX2 |
| 2212 // 16 pixels | 2254 // 16 pixels |
| 2213 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2255 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
| 2214 __declspec(naked) | 2256 __declspec(naked) |
| 2215 void I411ToARGBRow_AVX2(const uint8* y_buf, | 2257 void I411ToARGBRow_AVX2(const uint8* y_buf, |
| 2216 const uint8* u_buf, | 2258 const uint8* u_buf, |
| 2217 const uint8* v_buf, | 2259 const uint8* v_buf, |
| 2218 uint8* dst_argb, | 2260 uint8* dst_argb, |
| 2219 int width) { | 2261 int width) { |
| (...skipping 381 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2601 __asm por xmm3, xmm2 /* BG */ \ | 2643 __asm por xmm3, xmm2 /* BG */ \ |
| 2602 __asm por xmm1, xmm3 /* BGR */ \ | 2644 __asm por xmm1, xmm3 /* BGR */ \ |
| 2603 __asm packssdw xmm0, xmm1 \ | 2645 __asm packssdw xmm0, xmm1 \ |
| 2604 __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ | 2646 __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ |
| 2605 __asm lea edx, [edx + 16] \ | 2647 __asm lea edx, [edx + 16] \ |
| 2606 } | 2648 } |
| 2607 | 2649 |
| 2608 // 8 pixels. | 2650 // 8 pixels. |
| 2609 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). | 2651 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). |
| 2610 __declspec(naked) | 2652 __declspec(naked) |
| 2611 void I444ToARGBRow_SSSE3(const uint8* y_buf, | 2653 void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, |
| 2612 const uint8* u_buf, | 2654 const uint8* u_buf, |
| 2613 const uint8* v_buf, | 2655 const uint8* v_buf, |
| 2614 uint8* dst_argb, | 2656 uint8* dst_argb, |
| 2615 int width) { | 2657 struct YuvConstants* YuvConstants, |
| 2658 int width) { |
| 2616 __asm { | 2659 __asm { |
| 2617 push esi | 2660 push esi |
| 2618 push edi | 2661 push edi |
| 2619 mov eax, [esp + 8 + 4] // Y | 2662 push ebp |
| 2620 mov esi, [esp + 8 + 8] // U | 2663 mov eax, [esp + 12 + 4] // Y |
| 2621 mov edi, [esp + 8 + 12] // V | 2664 mov esi, [esp + 12 + 8] // U |
| 2622 mov edx, [esp + 8 + 16] // argb | 2665 mov edi, [esp + 12 + 12] // V |
| 2623 mov ecx, [esp + 8 + 20] // width | 2666 mov edx, [esp + 12 + 16] // argb |
| 2667 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2668 mov ecx, [esp + 12 + 24] // width |
| 2624 sub edi, esi | 2669 sub edi, esi |
| 2625 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | 2670 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
| 2626 | 2671 |
| 2627 convertloop: | 2672 convertloop: |
| 2628 READYUV444 | 2673 READYUV444 |
| 2629 YUVTORGB(kYuvConstants) | 2674 YUVTORGB(ebp) |
| 2630 STOREARGB | 2675 STOREARGB |
| 2631 | 2676 |
| 2632 sub ecx, 8 | 2677 sub ecx, 8 |
| 2633 jg convertloop | 2678 jg convertloop |
| 2634 | 2679 |
| 2680 pop ebp |
| 2635 pop edi | 2681 pop edi |
| 2636 pop esi | 2682 pop esi |
| 2637 ret | 2683 ret |
| 2684 } |
| 2685 } |
| 2686 |
| 2687 // 8 pixels. |
| 2688 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). |
| 2689 __declspec(naked) |
| 2690 void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, |
| 2691 const uint8* u_buf, |
| 2692 const uint8* v_buf, |
| 2693 uint8* dst_abgr, |
| 2694 struct YuvConstants* YuvConstants, |
| 2695 int width) { |
| 2696 __asm { |
| 2697 push esi |
| 2698 push edi |
| 2699 push ebp |
| 2700 mov eax, [esp + 12 + 4] // Y |
| 2701 mov esi, [esp + 12 + 8] // U |
| 2702 mov edi, [esp + 12 + 12] // V |
| 2703 mov edx, [esp + 12 + 16] // abgr |
| 2704 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2705 mov ecx, [esp + 12 + 24] // width |
| 2706 sub edi, esi |
| 2707 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
| 2708 |
| 2709 convertloop: |
| 2710 READYUV444 |
| 2711 YUVTORGB(ebp) |
| 2712 STOREABGR |
| 2713 |
| 2714 sub ecx, 8 |
| 2715 jg convertloop |
| 2716 |
| 2717 pop ebp |
| 2718 pop edi |
| 2719 pop esi |
| 2720 ret |
| 2638 } | 2721 } |
| 2639 } | 2722 } |
| 2640 | 2723 |
| 2641 // 8 pixels. | 2724 // 8 pixels. |
| 2642 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). | 2725 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). |
| 2643 __declspec(naked) | 2726 __declspec(naked) |
| 2644 void I422ToRGB24Row_SSSE3(const uint8* y_buf, | 2727 void I422ToRGB24Row_SSSE3(const uint8* y_buf, |
| 2645 const uint8* u_buf, | 2728 const uint8* u_buf, |
| 2646 const uint8* v_buf, | 2729 const uint8* v_buf, |
| 2647 uint8* dst_rgb24, | 2730 uint8* dst_rgb24, |
| (...skipping 3716 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6364 } | 6447 } |
| 6365 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6448 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 6366 | 6449 |
| 6367 #endif // defined(_M_X64) | 6450 #endif // defined(_M_X64) |
| 6368 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6451 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
| 6369 | 6452 |
| 6370 #ifdef __cplusplus | 6453 #ifdef __cplusplus |
| 6371 } // extern "C" | 6454 } // extern "C" |
| 6372 } // namespace libyuv | 6455 } // namespace libyuv |
| 6373 #endif | 6456 #endif |
| OLD | NEW |