OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2336 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2347 | 2347 |
2348 sub ecx, 16 | 2348 sub ecx, 16 |
2349 jg convertloop | 2349 jg convertloop |
2350 | 2350 |
2351 pop ebx | 2351 pop ebx |
2352 vzeroupper | 2352 vzeroupper |
2353 ret | 2353 ret |
2354 } | 2354 } |
2355 } | 2355 } |
2356 | 2356 |
2357 | |
2358 #ifdef HAS_I422TOBGRAROW_AVX2 | |
2359 // 16 pixels | |
2360 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). | |
2361 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. | |
2362 __declspec(naked) | |
2363 void I422ToBGRARow_AVX2(const uint8* y_buf, | |
2364 const uint8* u_buf, | |
2365 const uint8* v_buf, | |
2366 uint8* dst_argb, | |
2367 const struct YuvConstants* yuvconstants, | |
2368 int width) { | |
2369 __asm { | |
2370 push esi | |
2371 push edi | |
2372 push ebx | |
2373 mov eax, [esp + 12 + 4] // Y | |
2374 mov esi, [esp + 12 + 8] // U | |
2375 mov edi, [esp + 12 + 12] // V | |
2376 mov edx, [esp + 12 + 16] // abgr | |
2377 mov ebx, [esp + 12 + 20] // yuvconstants | |
2378 mov ecx, [esp + 12 + 24] // width | |
2379 sub edi, esi | |
2380 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | |
2381 | |
2382 convertloop: | |
2383 READYUV422_AVX2 | |
2384 YUVTORGB_AVX2(ebx) | |
2385 STOREBGRA_AVX2 | |
2386 | |
2387 sub ecx, 16 | |
2388 jg convertloop | |
2389 | |
2390 pop ebx | |
2391 pop edi | |
2392 pop esi | |
2393 vzeroupper | |
2394 ret | |
2395 } | |
2396 } | |
2397 #endif // HAS_I422TOBGRAROW_AVX2 | |
2398 | |
2399 #ifdef HAS_I422TORGBAROW_AVX2 | 2357 #ifdef HAS_I422TORGBAROW_AVX2 |
2400 // 16 pixels | 2358 // 16 pixels |
2401 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). | 2359 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). |
2402 __declspec(naked) | 2360 __declspec(naked) |
2403 void I422ToRGBARow_AVX2(const uint8* y_buf, | 2361 void I422ToRGBARow_AVX2(const uint8* y_buf, |
2404 const uint8* u_buf, | 2362 const uint8* u_buf, |
2405 const uint8* v_buf, | 2363 const uint8* v_buf, |
2406 uint8* dst_argb, | 2364 uint8* dst_argb, |
2407 const struct YuvConstants* yuvconstants, | 2365 const struct YuvConstants* yuvconstants, |
2408 int width) { | 2366 int width) { |
(...skipping 333 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2742 sub ecx, 8 | 2700 sub ecx, 8 |
2743 jg convertloop | 2701 jg convertloop |
2744 | 2702 |
2745 pop ebx | 2703 pop ebx |
2746 pop edi | 2704 pop edi |
2747 pop esi | 2705 pop esi |
2748 ret | 2706 ret |
2749 } | 2707 } |
2750 } | 2708 } |
2751 | 2709 |
2752 // 8 pixels. | |
2753 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). | |
2754 __declspec(naked) | |
2755 void I422ToRAWRow_SSSE3(const uint8* y_buf, | |
2756 const uint8* u_buf, | |
2757 const uint8* v_buf, | |
2758 uint8* dst_raw, | |
2759 const struct YuvConstants* yuvconstants, | |
2760 int width) { | |
2761 __asm { | |
2762 push esi | |
2763 push edi | |
2764 push ebx | |
2765 mov eax, [esp + 12 + 4] // Y | |
2766 mov esi, [esp + 12 + 8] // U | |
2767 mov edi, [esp + 12 + 12] // V | |
2768 mov edx, [esp + 12 + 16] // argb | |
2769 mov ebx, [esp + 12 + 20] // yuvconstants | |
2770 mov ecx, [esp + 12 + 24] // width | |
2771 sub edi, esi | |
2772 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 | |
2773 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW | |
2774 | |
2775 convertloop: | |
2776 READYUV422 | |
2777 YUVTORGB(ebx) | |
2778 STORERAW | |
2779 | |
2780 sub ecx, 8 | |
2781 jg convertloop | |
2782 | |
2783 pop ebx | |
2784 pop edi | |
2785 pop esi | |
2786 ret | |
2787 } | |
2788 } | |
2789 | |
2790 // 8 pixels | 2710 // 8 pixels |
2791 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). | 2711 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). |
2792 __declspec(naked) | 2712 __declspec(naked) |
2793 void I422ToRGB565Row_SSSE3(const uint8* y_buf, | 2713 void I422ToRGB565Row_SSSE3(const uint8* y_buf, |
2794 const uint8* u_buf, | 2714 const uint8* u_buf, |
2795 const uint8* v_buf, | 2715 const uint8* v_buf, |
2796 uint8* rgb565_buf, | 2716 uint8* rgb565_buf, |
2797 const struct YuvConstants* yuvconstants, | 2717 const struct YuvConstants* yuvconstants, |
2798 int width) { | 2718 int width) { |
2799 __asm { | 2719 __asm { |
(...skipping 259 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3059 | 2979 |
3060 sub ecx, 8 | 2980 sub ecx, 8 |
3061 jg convertloop | 2981 jg convertloop |
3062 | 2982 |
3063 pop ebx | 2983 pop ebx |
3064 ret | 2984 ret |
3065 } | 2985 } |
3066 } | 2986 } |
3067 | 2987 |
3068 __declspec(naked) | 2988 __declspec(naked) |
3069 void I422ToBGRARow_SSSE3(const uint8* y_buf, | |
3070 const uint8* u_buf, | |
3071 const uint8* v_buf, | |
3072 uint8* dst_bgra, | |
3073 const struct YuvConstants* yuvconstants, | |
3074 int width) { | |
3075 __asm { | |
3076 push esi | |
3077 push edi | |
3078 push ebx | |
3079 mov eax, [esp + 12 + 4] // Y | |
3080 mov esi, [esp + 12 + 8] // U | |
3081 mov edi, [esp + 12 + 12] // V | |
3082 mov edx, [esp + 12 + 16] // argb | |
3083 mov ebx, [esp + 12 + 20] // yuvconstants | |
3084 mov ecx, [esp + 12 + 24] // width | |
3085 sub edi, esi | |
3086 | |
3087 convertloop: | |
3088 READYUV422 | |
3089 YUVTORGB(ebx) | |
3090 STOREBGRA | |
3091 | |
3092 sub ecx, 8 | |
3093 jg convertloop | |
3094 | |
3095 pop ebx | |
3096 pop edi | |
3097 pop esi | |
3098 ret | |
3099 } | |
3100 } | |
3101 | |
3102 __declspec(naked) | |
3103 void I422ToABGRRow_SSSE3(const uint8* y_buf, | |
3104 const uint8* u_buf, | |
3105 const uint8* v_buf, | |
3106 uint8* dst_abgr, | |
3107 const struct YuvConstants* yuvconstants, | |
3108 int width) { | |
3109 __asm { | |
3110 push esi | |
3111 push edi | |
3112 push ebx | |
3113 mov eax, [esp + 12 + 4] // Y | |
3114 mov esi, [esp + 12 + 8] // U | |
3115 mov edi, [esp + 12 + 12] // V | |
3116 mov edx, [esp + 12 + 16] // argb | |
3117 mov ebx, [esp + 12 + 20] // yuvconstants | |
3118 mov ecx, [esp + 12 + 24] // width | |
3119 sub edi, esi | |
3120 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | |
3121 | |
3122 convertloop: | |
3123 READYUV422 | |
3124 YUVTORGB(ebx) | |
3125 STOREABGR | |
3126 | |
3127 sub ecx, 8 | |
3128 jg convertloop | |
3129 | |
3130 pop ebx | |
3131 pop edi | |
3132 pop esi | |
3133 ret | |
3134 } | |
3135 } | |
3136 | |
3137 __declspec(naked) | |
3138 void I422ToRGBARow_SSSE3(const uint8* y_buf, | 2989 void I422ToRGBARow_SSSE3(const uint8* y_buf, |
3139 const uint8* u_buf, | 2990 const uint8* u_buf, |
3140 const uint8* v_buf, | 2991 const uint8* v_buf, |
3141 uint8* dst_rgba, | 2992 uint8* dst_rgba, |
3142 const struct YuvConstants* yuvconstants, | 2993 const struct YuvConstants* yuvconstants, |
3143 int width) { | 2994 int width) { |
3144 __asm { | 2995 __asm { |
3145 push esi | 2996 push esi |
3146 push edi | 2997 push edi |
3147 push ebx | 2998 push ebx |
(...skipping 3254 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6402 } | 6253 } |
6403 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6254 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6404 | 6255 |
6405 #endif // defined(_M_X64) | 6256 #endif // defined(_M_X64) |
6406 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6257 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6407 | 6258 |
6408 #ifdef __cplusplus | 6259 #ifdef __cplusplus |
6409 } // extern "C" | 6260 } // extern "C" |
6410 } // namespace libyuv | 6261 } // namespace libyuv |
6411 #endif | 6262 #endif |
OLD | NEW |