| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 2336 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2347 | 2347 |
| 2348 sub ecx, 16 | 2348 sub ecx, 16 |
| 2349 jg convertloop | 2349 jg convertloop |
| 2350 | 2350 |
| 2351 pop ebx | 2351 pop ebx |
| 2352 vzeroupper | 2352 vzeroupper |
| 2353 ret | 2353 ret |
| 2354 } | 2354 } |
| 2355 } | 2355 } |
| 2356 | 2356 |
| 2357 | |
| 2358 #ifdef HAS_I422TOBGRAROW_AVX2 | |
| 2359 // 16 pixels | |
| 2360 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). | |
| 2361 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. | |
| 2362 __declspec(naked) | |
| 2363 void I422ToBGRARow_AVX2(const uint8* y_buf, | |
| 2364 const uint8* u_buf, | |
| 2365 const uint8* v_buf, | |
| 2366 uint8* dst_argb, | |
| 2367 const struct YuvConstants* yuvconstants, | |
| 2368 int width) { | |
| 2369 __asm { | |
| 2370 push esi | |
| 2371 push edi | |
| 2372 push ebx | |
| 2373 mov eax, [esp + 12 + 4] // Y | |
| 2374 mov esi, [esp + 12 + 8] // U | |
| 2375 mov edi, [esp + 12 + 12] // V | |
| 2376 mov edx, [esp + 12 + 16] // abgr | |
| 2377 mov ebx, [esp + 12 + 20] // yuvconstants | |
| 2378 mov ecx, [esp + 12 + 24] // width | |
| 2379 sub edi, esi | |
| 2380 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | |
| 2381 | |
| 2382 convertloop: | |
| 2383 READYUV422_AVX2 | |
| 2384 YUVTORGB_AVX2(ebx) | |
| 2385 STOREBGRA_AVX2 | |
| 2386 | |
| 2387 sub ecx, 16 | |
| 2388 jg convertloop | |
| 2389 | |
| 2390 pop ebx | |
| 2391 pop edi | |
| 2392 pop esi | |
| 2393 vzeroupper | |
| 2394 ret | |
| 2395 } | |
| 2396 } | |
| 2397 #endif // HAS_I422TOBGRAROW_AVX2 | |
| 2398 | |
| 2399 #ifdef HAS_I422TORGBAROW_AVX2 | 2357 #ifdef HAS_I422TORGBAROW_AVX2 |
| 2400 // 16 pixels | 2358 // 16 pixels |
| 2401 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). | 2359 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). |
| 2402 __declspec(naked) | 2360 __declspec(naked) |
| 2403 void I422ToRGBARow_AVX2(const uint8* y_buf, | 2361 void I422ToRGBARow_AVX2(const uint8* y_buf, |
| 2404 const uint8* u_buf, | 2362 const uint8* u_buf, |
| 2405 const uint8* v_buf, | 2363 const uint8* v_buf, |
| 2406 uint8* dst_argb, | 2364 uint8* dst_argb, |
| 2407 const struct YuvConstants* yuvconstants, | 2365 const struct YuvConstants* yuvconstants, |
| 2408 int width) { | 2366 int width) { |
| (...skipping 333 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 2742 sub ecx, 8 | 2700 sub ecx, 8 |
| 2743 jg convertloop | 2701 jg convertloop |
| 2744 | 2702 |
| 2745 pop ebx | 2703 pop ebx |
| 2746 pop edi | 2704 pop edi |
| 2747 pop esi | 2705 pop esi |
| 2748 ret | 2706 ret |
| 2749 } | 2707 } |
| 2750 } | 2708 } |
| 2751 | 2709 |
| 2752 // 8 pixels. | |
| 2753 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). | |
| 2754 __declspec(naked) | |
| 2755 void I422ToRAWRow_SSSE3(const uint8* y_buf, | |
| 2756 const uint8* u_buf, | |
| 2757 const uint8* v_buf, | |
| 2758 uint8* dst_raw, | |
| 2759 const struct YuvConstants* yuvconstants, | |
| 2760 int width) { | |
| 2761 __asm { | |
| 2762 push esi | |
| 2763 push edi | |
| 2764 push ebx | |
| 2765 mov eax, [esp + 12 + 4] // Y | |
| 2766 mov esi, [esp + 12 + 8] // U | |
| 2767 mov edi, [esp + 12 + 12] // V | |
| 2768 mov edx, [esp + 12 + 16] // argb | |
| 2769 mov ebx, [esp + 12 + 20] // yuvconstants | |
| 2770 mov ecx, [esp + 12 + 24] // width | |
| 2771 sub edi, esi | |
| 2772 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 | |
| 2773 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW | |
| 2774 | |
| 2775 convertloop: | |
| 2776 READYUV422 | |
| 2777 YUVTORGB(ebx) | |
| 2778 STORERAW | |
| 2779 | |
| 2780 sub ecx, 8 | |
| 2781 jg convertloop | |
| 2782 | |
| 2783 pop ebx | |
| 2784 pop edi | |
| 2785 pop esi | |
| 2786 ret | |
| 2787 } | |
| 2788 } | |
| 2789 | |
| 2790 // 8 pixels | 2710 // 8 pixels |
| 2791 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). | 2711 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). |
| 2792 __declspec(naked) | 2712 __declspec(naked) |
| 2793 void I422ToRGB565Row_SSSE3(const uint8* y_buf, | 2713 void I422ToRGB565Row_SSSE3(const uint8* y_buf, |
| 2794 const uint8* u_buf, | 2714 const uint8* u_buf, |
| 2795 const uint8* v_buf, | 2715 const uint8* v_buf, |
| 2796 uint8* rgb565_buf, | 2716 uint8* rgb565_buf, |
| 2797 const struct YuvConstants* yuvconstants, | 2717 const struct YuvConstants* yuvconstants, |
| 2798 int width) { | 2718 int width) { |
| 2799 __asm { | 2719 __asm { |
| (...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 3059 | 2979 |
| 3060 sub ecx, 8 | 2980 sub ecx, 8 |
| 3061 jg convertloop | 2981 jg convertloop |
| 3062 | 2982 |
| 3063 pop ebx | 2983 pop ebx |
| 3064 ret | 2984 ret |
| 3065 } | 2985 } |
| 3066 } | 2986 } |
| 3067 | 2987 |
| 3068 __declspec(naked) | 2988 __declspec(naked) |
| 3069 void I422ToBGRARow_SSSE3(const uint8* y_buf, | |
| 3070 const uint8* u_buf, | |
| 3071 const uint8* v_buf, | |
| 3072 uint8* dst_bgra, | |
| 3073 const struct YuvConstants* yuvconstants, | |
| 3074 int width) { | |
| 3075 __asm { | |
| 3076 push esi | |
| 3077 push edi | |
| 3078 push ebx | |
| 3079 mov eax, [esp + 12 + 4] // Y | |
| 3080 mov esi, [esp + 12 + 8] // U | |
| 3081 mov edi, [esp + 12 + 12] // V | |
| 3082 mov edx, [esp + 12 + 16] // argb | |
| 3083 mov ebx, [esp + 12 + 20] // yuvconstants | |
| 3084 mov ecx, [esp + 12 + 24] // width | |
| 3085 sub edi, esi | |
| 3086 | |
| 3087 convertloop: | |
| 3088 READYUV422 | |
| 3089 YUVTORGB(ebx) | |
| 3090 STOREBGRA | |
| 3091 | |
| 3092 sub ecx, 8 | |
| 3093 jg convertloop | |
| 3094 | |
| 3095 pop ebx | |
| 3096 pop edi | |
| 3097 pop esi | |
| 3098 ret | |
| 3099 } | |
| 3100 } | |
| 3101 | |
| 3102 __declspec(naked) | |
| 3103 void I422ToABGRRow_SSSE3(const uint8* y_buf, | |
| 3104 const uint8* u_buf, | |
| 3105 const uint8* v_buf, | |
| 3106 uint8* dst_abgr, | |
| 3107 const struct YuvConstants* yuvconstants, | |
| 3108 int width) { | |
| 3109 __asm { | |
| 3110 push esi | |
| 3111 push edi | |
| 3112 push ebx | |
| 3113 mov eax, [esp + 12 + 4] // Y | |
| 3114 mov esi, [esp + 12 + 8] // U | |
| 3115 mov edi, [esp + 12 + 12] // V | |
| 3116 mov edx, [esp + 12 + 16] // argb | |
| 3117 mov ebx, [esp + 12 + 20] // yuvconstants | |
| 3118 mov ecx, [esp + 12 + 24] // width | |
| 3119 sub edi, esi | |
| 3120 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | |
| 3121 | |
| 3122 convertloop: | |
| 3123 READYUV422 | |
| 3124 YUVTORGB(ebx) | |
| 3125 STOREABGR | |
| 3126 | |
| 3127 sub ecx, 8 | |
| 3128 jg convertloop | |
| 3129 | |
| 3130 pop ebx | |
| 3131 pop edi | |
| 3132 pop esi | |
| 3133 ret | |
| 3134 } | |
| 3135 } | |
| 3136 | |
| 3137 __declspec(naked) | |
| 3138 void I422ToRGBARow_SSSE3(const uint8* y_buf, | 2989 void I422ToRGBARow_SSSE3(const uint8* y_buf, |
| 3139 const uint8* u_buf, | 2990 const uint8* u_buf, |
| 3140 const uint8* v_buf, | 2991 const uint8* v_buf, |
| 3141 uint8* dst_rgba, | 2992 uint8* dst_rgba, |
| 3142 const struct YuvConstants* yuvconstants, | 2993 const struct YuvConstants* yuvconstants, |
| 3143 int width) { | 2994 int width) { |
| 3144 __asm { | 2995 __asm { |
| 3145 push esi | 2996 push esi |
| 3146 push edi | 2997 push edi |
| 3147 push ebx | 2998 push ebx |
| (...skipping 3254 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 6402 } | 6253 } |
| 6403 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6254 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
| 6404 | 6255 |
| 6405 #endif // defined(_M_X64) | 6256 #endif // defined(_M_X64) |
| 6406 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6257 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
| 6407 | 6258 |
| 6408 #ifdef __cplusplus | 6259 #ifdef __cplusplus |
| 6409 } // extern "C" | 6260 } // extern "C" |
| 6410 } // namespace libyuv | 6261 } // namespace libyuv |
| 6411 #endif | 6262 #endif |
| OLD | NEW |