OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 2398 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2409 __asm movd xmm0, [esi] /* U */ \ | 2409 __asm movd xmm0, [esi] /* U */ \ |
2410 __asm movd xmm1, [esi + edi] /* V */ \ | 2410 __asm movd xmm1, [esi + edi] /* V */ \ |
2411 __asm lea esi, [esi + 4] \ | 2411 __asm lea esi, [esi + 4] \ |
2412 __asm punpcklbw xmm0, xmm1 /* UV */ \ | 2412 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
2413 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ | 2413 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
2414 __asm movq xmm4, qword ptr [eax] \ | 2414 __asm movq xmm4, qword ptr [eax] \ |
2415 __asm punpcklbw xmm4, xmm4 \ | 2415 __asm punpcklbw xmm4, xmm4 \ |
2416 __asm lea eax, [eax + 8] \ | 2416 __asm lea eax, [eax + 8] \ |
2417 } | 2417 } |
2418 | 2418 |
| 2419 // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. |
| 2420 #define READYUVA422 __asm { \ |
| 2421 __asm movd xmm0, [esi] /* U */ \ |
| 2422 __asm movd xmm1, [esi + edi] /* V */ \ |
| 2423 __asm lea esi, [esi + 4] \ |
| 2424 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
| 2425 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
| 2426 __asm movq xmm4, qword ptr [eax] /* Y */ \ |
| 2427 __asm punpcklbw xmm4, xmm4 \ |
| 2428 __asm lea eax, [eax + 8] \ |
| 2429 __asm movq xmm5, qword ptr [ebp] /* A */ \ |
| 2430 __asm lea ebp, [ebp + 8] \ |
| 2431 } |
| 2432 |
2419 // Read 2 UV from 411, upsample to 8 UV. | 2433 // Read 2 UV from 411, upsample to 8 UV. |
2420 #define READYUV411 __asm { \ | 2434 #define READYUV411 __asm { \ |
2421 __asm pinsrw xmm0, [esi], 0 /* U */ \ | 2435 __asm pinsrw xmm0, [esi], 0 /* U */ \ |
2422 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \ | 2436 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \ |
2423 __asm lea esi, [esi + 2] \ | 2437 __asm lea esi, [esi + 2] \ |
2424 __asm punpcklbw xmm0, xmm1 /* UV */ \ | 2438 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
2425 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ | 2439 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
2426 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ | 2440 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ |
2427 __asm movq xmm4, qword ptr [eax] \ | 2441 __asm movq xmm4, qword ptr [eax] \ |
2428 __asm punpcklbw xmm4, xmm4 \ | 2442 __asm punpcklbw xmm4, xmm4 \ |
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2827 jg convertloop | 2841 jg convertloop |
2828 | 2842 |
2829 pop ebx | 2843 pop ebx |
2830 pop edi | 2844 pop edi |
2831 pop esi | 2845 pop esi |
2832 ret | 2846 ret |
2833 } | 2847 } |
2834 } | 2848 } |
2835 | 2849 |
2836 // 8 pixels. | 2850 // 8 pixels. |
| 2851 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB (32 by
tes). |
| 2852 __declspec(naked) |
| 2853 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, |
| 2854 const uint8* u_buf, |
| 2855 const uint8* v_buf, |
| 2856 const uint8* a_buf, |
| 2857 uint8* dst_argb, |
| 2858 struct YuvConstants* yuvconstants, |
| 2859 int width) { |
| 2860 __asm { |
| 2861 push esi |
| 2862 push edi |
| 2863 push ebx |
| 2864 push ebp |
| 2865 mov eax, [esp + 16 + 4] // Y |
| 2866 mov esi, [esp + 16 + 8] // U |
| 2867 mov edi, [esp + 16 + 12] // V |
| 2868 mov ebp, [esp + 16 + 16] // A |
| 2869 mov edx, [esp + 16 + 20] // argb |
| 2870 mov ebx, [esp + 16 + 24] // yuvconstants |
| 2871 mov ecx, [esp + 16 + 28] // width |
| 2872 sub edi, esi |
| 2873 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
| 2874 |
| 2875 convertloop: |
| 2876 READYUVA422 |
| 2877 YUVTORGB(ebx) |
| 2878 STOREARGB |
| 2879 |
| 2880 sub ecx, 8 |
| 2881 jg convertloop |
| 2882 |
| 2883 pop ebp |
| 2884 pop ebx |
| 2885 pop edi |
| 2886 pop esi |
| 2887 ret |
| 2888 } |
| 2889 } |
| 2890 |
| 2891 // 8 pixels. |
| 2892 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR (32 by
tes). |
| 2893 __declspec(naked) |
| 2894 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf, |
| 2895 const uint8* u_buf, |
| 2896 const uint8* v_buf, |
| 2897 const uint8* a_buf, |
| 2898 uint8* dst_abgr, |
| 2899 struct YuvConstants* yuvconstants, |
| 2900 int width) { |
| 2901 __asm { |
| 2902 push esi |
| 2903 push edi |
| 2904 push ebx |
| 2905 push ebp |
| 2906 mov eax, [esp + 16 + 4] // Y |
| 2907 mov esi, [esp + 16 + 8] // U |
| 2908 mov edi, [esp + 16 + 12] // V |
| 2909 mov ebp, [esp + 16 + 16] // A |
| 2910 mov edx, [esp + 16 + 20] // abgr |
| 2911 mov ebx, [esp + 16 + 24] // yuvconstants |
| 2912 mov ecx, [esp + 16 + 28] // width |
| 2913 sub edi, esi |
| 2914 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
| 2915 |
| 2916 convertloop: |
| 2917 READYUVA422 |
| 2918 YUVTORGB(ebx) |
| 2919 STOREABGR |
| 2920 |
| 2921 sub ecx, 8 |
| 2922 jg convertloop |
| 2923 |
| 2924 pop ebp |
| 2925 pop ebx |
| 2926 pop edi |
| 2927 pop esi |
| 2928 ret |
| 2929 } |
| 2930 } |
| 2931 |
| 2932 // 8 pixels. |
2837 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). | 2933 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). |
2838 // Similar to I420 but duplicate UV once more. | 2934 // Similar to I420 but duplicate UV once more. |
2839 __declspec(naked) | 2935 __declspec(naked) |
2840 void I411ToARGBRow_SSSE3(const uint8* y_buf, | 2936 void I411ToARGBRow_SSSE3(const uint8* y_buf, |
2841 const uint8* u_buf, | 2937 const uint8* u_buf, |
2842 const uint8* v_buf, | 2938 const uint8* v_buf, |
2843 uint8* dst_argb, | 2939 uint8* dst_argb, |
2844 struct YuvConstants* yuvconstants, | 2940 struct YuvConstants* yuvconstants, |
2845 int width) { | 2941 int width) { |
2846 __asm { | 2942 __asm { |
(...skipping 3608 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6455 } | 6551 } |
6456 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6552 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6457 | 6553 |
6458 #endif // defined(_M_X64) | 6554 #endif // defined(_M_X64) |
6459 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6555 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6460 | 6556 |
6461 #ifdef __cplusplus | 6557 #ifdef __cplusplus |
6462 } // extern "C" | 6558 } // extern "C" |
6463 } // namespace libyuv | 6559 } // namespace libyuv |
6464 #endif | 6560 #endif |
OLD | NEW |