Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: source/row_win.cc

Issue 1367093002: I420Alpha row function in 1 pass (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: bump version Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_common.cc ('k') | unit_test/convert_test.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 2398 matching lines...) Expand 10 before | Expand all | Expand 10 after
2409 __asm movd xmm0, [esi] /* U */ \ 2409 __asm movd xmm0, [esi] /* U */ \
2410 __asm movd xmm1, [esi + edi] /* V */ \ 2410 __asm movd xmm1, [esi + edi] /* V */ \
2411 __asm lea esi, [esi + 4] \ 2411 __asm lea esi, [esi + 4] \
2412 __asm punpcklbw xmm0, xmm1 /* UV */ \ 2412 __asm punpcklbw xmm0, xmm1 /* UV */ \
2413 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2413 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2414 __asm movq xmm4, qword ptr [eax] \ 2414 __asm movq xmm4, qword ptr [eax] \
2415 __asm punpcklbw xmm4, xmm4 \ 2415 __asm punpcklbw xmm4, xmm4 \
2416 __asm lea eax, [eax + 8] \ 2416 __asm lea eax, [eax + 8] \
2417 } 2417 }
2418 2418
2419 // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha.
2420 #define READYUVA422 __asm { \
2421 __asm movd xmm0, [esi] /* U */ \
2422 __asm movd xmm1, [esi + edi] /* V */ \
2423 __asm lea esi, [esi + 4] \
2424 __asm punpcklbw xmm0, xmm1 /* UV */ \
2425 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2426 __asm movq xmm4, qword ptr [eax] /* Y */ \
2427 __asm punpcklbw xmm4, xmm4 \
2428 __asm lea eax, [eax + 8] \
2429 __asm movq xmm5, qword ptr [ebp] /* A */ \
2430 __asm lea ebp, [ebp + 8] \
2431 }
2432
2419 // Read 2 UV from 411, upsample to 8 UV. 2433 // Read 2 UV from 411, upsample to 8 UV.
2420 #define READYUV411 __asm { \ 2434 #define READYUV411 __asm { \
2421 __asm pinsrw xmm0, [esi], 0 /* U */ \ 2435 __asm pinsrw xmm0, [esi], 0 /* U */ \
2422 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \ 2436 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \
2423 __asm lea esi, [esi + 2] \ 2437 __asm lea esi, [esi + 2] \
2424 __asm punpcklbw xmm0, xmm1 /* UV */ \ 2438 __asm punpcklbw xmm0, xmm1 /* UV */ \
2425 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2439 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2426 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ 2440 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \
2427 __asm movq xmm4, qword ptr [eax] \ 2441 __asm movq xmm4, qword ptr [eax] \
2428 __asm punpcklbw xmm4, xmm4 \ 2442 __asm punpcklbw xmm4, xmm4 \
(...skipping 398 matching lines...) Expand 10 before | Expand all | Expand 10 after
2827 jg convertloop 2841 jg convertloop
2828 2842
2829 pop ebx 2843 pop ebx
2830 pop edi 2844 pop edi
2831 pop esi 2845 pop esi
2832 ret 2846 ret
2833 } 2847 }
2834 } 2848 }
2835 2849
2836 // 8 pixels. 2850 // 8 pixels.
2851 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB (32 by tes).
2852 __declspec(naked)
2853 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf,
2854 const uint8* u_buf,
2855 const uint8* v_buf,
2856 const uint8* a_buf,
2857 uint8* dst_argb,
2858 struct YuvConstants* yuvconstants,
2859 int width) {
2860 __asm {
2861 push esi
2862 push edi
2863 push ebx
2864 push ebp
2865 mov eax, [esp + 16 + 4] // Y
2866 mov esi, [esp + 16 + 8] // U
2867 mov edi, [esp + 16 + 12] // V
2868 mov ebp, [esp + 16 + 16] // A
2869 mov edx, [esp + 16 + 20] // argb
2870 mov ebx, [esp + 16 + 24] // yuvconstants
2871 mov ecx, [esp + 16 + 28] // width
2872 sub edi, esi
2873 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2874
2875 convertloop:
2876 READYUVA422
2877 YUVTORGB(ebx)
2878 STOREARGB
2879
2880 sub ecx, 8
2881 jg convertloop
2882
2883 pop ebp
2884 pop ebx
2885 pop edi
2886 pop esi
2887 ret
2888 }
2889 }
2890
2891 // 8 pixels.
2892 // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ABGR (32 by tes).
2893 __declspec(naked)
2894 void I422AlphaToABGRRow_SSSE3(const uint8* y_buf,
2895 const uint8* u_buf,
2896 const uint8* v_buf,
2897 const uint8* a_buf,
2898 uint8* dst_abgr,
2899 struct YuvConstants* yuvconstants,
2900 int width) {
2901 __asm {
2902 push esi
2903 push edi
2904 push ebx
2905 push ebp
2906 mov eax, [esp + 16 + 4] // Y
2907 mov esi, [esp + 16 + 8] // U
2908 mov edi, [esp + 16 + 12] // V
2909 mov ebp, [esp + 16 + 16] // A
2910 mov edx, [esp + 16 + 20] // abgr
2911 mov ebx, [esp + 16 + 24] // yuvconstants
2912 mov ecx, [esp + 16 + 28] // width
2913 sub edi, esi
2914 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2915
2916 convertloop:
2917 READYUVA422
2918 YUVTORGB(ebx)
2919 STOREABGR
2920
2921 sub ecx, 8
2922 jg convertloop
2923
2924 pop ebp
2925 pop ebx
2926 pop edi
2927 pop esi
2928 ret
2929 }
2930 }
2931
2932 // 8 pixels.
2837 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2933 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
2838 // Similar to I420 but duplicate UV once more. 2934 // Similar to I420 but duplicate UV once more.
2839 __declspec(naked) 2935 __declspec(naked)
2840 void I411ToARGBRow_SSSE3(const uint8* y_buf, 2936 void I411ToARGBRow_SSSE3(const uint8* y_buf,
2841 const uint8* u_buf, 2937 const uint8* u_buf,
2842 const uint8* v_buf, 2938 const uint8* v_buf,
2843 uint8* dst_argb, 2939 uint8* dst_argb,
2844 struct YuvConstants* yuvconstants, 2940 struct YuvConstants* yuvconstants,
2845 int width) { 2941 int width) {
2846 __asm { 2942 __asm {
(...skipping 3608 matching lines...) Expand 10 before | Expand all | Expand 10 after
6455 } 6551 }
6456 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6552 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6457 6553
6458 #endif // defined(_M_X64) 6554 #endif // defined(_M_X64)
6459 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6555 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6460 6556
6461 #ifdef __cplusplus 6557 #ifdef __cplusplus
6462 } // extern "C" 6558 } // extern "C"
6463 } // namespace libyuv 6559 } // namespace libyuv
6464 #endif 6560 #endif
OLDNEW
« no previous file with comments | « source/row_common.cc ('k') | unit_test/convert_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698