Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1000)

Side by Side Diff: source/row_win.cc

Issue 1388273002: Reimplement NV21ToARGB to allow different color matrix. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: include scale_row.h for scaling macros Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon64.cc ('k') | source/scale_gcc.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after
312 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, 312 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15,
313 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 313 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15
314 }; 314 };
315 315
316 // UYVY shuf 8 UV to 16 UV. 316 // UYVY shuf 8 UV to 16 UV.
317 static const lvec8 kShuffleUYVYUV = { 317 static const lvec8 kShuffleUYVYUV = {
318 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, 318 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14,
319 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 319 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14
320 }; 320 };
321 321
322 // NV21 shuf 8 VU to 16 UV.
323 static const lvec8 kShuffleNV21 = {
324 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
325 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6,
326 };
327
322 // Duplicates gray value 3 times and fills in alpha opaque. 328 // Duplicates gray value 3 times and fills in alpha opaque.
323 __declspec(naked) 329 __declspec(naked)
324 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { 330 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) {
325 __asm { 331 __asm {
326 mov eax, [esp + 4] // src_y 332 mov eax, [esp + 4] // src_y
327 mov edx, [esp + 8] // dst_argb 333 mov edx, [esp + 8] // dst_argb
328 mov ecx, [esp + 12] // pix 334 mov ecx, [esp + 12] // pix
329 pcmpeqb xmm5, xmm5 // generate mask 0xff000000 335 pcmpeqb xmm5, xmm5 // generate mask 0xff000000
330 pslld xmm5, 24 336 pslld xmm5, 24
331 337
(...skipping 1653 matching lines...) Expand 10 before | Expand all | Expand 10 after
1985 __asm vmovdqu xmm0, [esi] /* UV */ \ 1991 __asm vmovdqu xmm0, [esi] /* UV */ \
1986 __asm lea esi, [esi + 16] \ 1992 __asm lea esi, [esi + 16] \
1987 __asm vpermq ymm0, ymm0, 0xd8 \ 1993 __asm vpermq ymm0, ymm0, 0xd8 \
1988 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ 1994 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1989 __asm vmovdqu xmm4, [eax] /* Y */ \ 1995 __asm vmovdqu xmm4, [eax] /* Y */ \
1990 __asm vpermq ymm4, ymm4, 0xd8 \ 1996 __asm vpermq ymm4, ymm4, 0xd8 \
1991 __asm vpunpcklbw ymm4, ymm4, ymm4 \ 1997 __asm vpunpcklbw ymm4, ymm4, ymm4 \
1992 __asm lea eax, [eax + 16] \ 1998 __asm lea eax, [eax + 16] \
1993 } 1999 }
1994 2000
2001 // Read 8 UV from NV21, upsample to 16 UV.
2002 #define READNV21_AVX2 __asm { \
2003 __asm vmovdqu xmm0, [esi] /* UV */ \
2004 __asm lea esi, [esi + 16] \
2005 __asm vpermq ymm0, ymm0, 0xd8 \
2006 __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleNV21 \
2007 __asm vmovdqu xmm4, [eax] /* Y */ \
2008 __asm vpermq ymm4, ymm4, 0xd8 \
2009 __asm vpunpcklbw ymm4, ymm4, ymm4 \
2010 __asm lea eax, [eax + 16] \
2011 }
2012
1995 // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. 2013 // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV.
1996 #define READYUY2_AVX2 __asm { \ 2014 #define READYUY2_AVX2 __asm { \
1997 __asm vmovdqu ymm4, [eax] /* YUY2 */ \ 2015 __asm vmovdqu ymm4, [eax] /* YUY2 */ \
1998 __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleYUY2Y \ 2016 __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleYUY2Y \
1999 __asm vmovdqu ymm0, [eax] /* UV */ \ 2017 __asm vmovdqu ymm0, [eax] /* UV */ \
2000 __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleYUY2UV \ 2018 __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleYUY2UV \
2001 __asm lea eax, [eax + 32] \ 2019 __asm lea eax, [eax + 32] \
2002 } 2020 }
2003 2021
2004 // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. 2022 // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV.
(...skipping 353 matching lines...) Expand 10 before | Expand all | Expand 10 after
2358 jg convertloop 2376 jg convertloop
2359 2377
2360 pop ebx 2378 pop ebx
2361 pop esi 2379 pop esi
2362 vzeroupper 2380 vzeroupper
2363 ret 2381 ret
2364 } 2382 }
2365 } 2383 }
2366 #endif // HAS_NV12TOARGBROW_AVX2 2384 #endif // HAS_NV12TOARGBROW_AVX2
2367 2385
2386 #ifdef HAS_NV21TOARGBROW_AVX2
2387 // 16 pixels.
2388 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2389 __declspec(naked)
2390 void NV21ToARGBRow_AVX2(const uint8* y_buf,
2391 const uint8* vu_buf,
2392 uint8* dst_argb,
2393 struct YuvConstants* yuvconstants,
2394 int width) {
2395 __asm {
2396 push esi
2397 push ebx
2398 mov eax, [esp + 8 + 4] // Y
2399 mov esi, [esp + 8 + 8] // VU
2400 mov edx, [esp + 8 + 12] // argb
2401 mov ebx, [esp + 8 + 16] // yuvconstants
2402 mov ecx, [esp + 8 + 20] // width
2403 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2404
2405 convertloop:
2406 READNV21_AVX2
2407 YUVTORGB_AVX2(ebx)
2408 STOREARGB_AVX2
2409
2410 sub ecx, 16
2411 jg convertloop
2412
2413 pop ebx
2414 pop esi
2415 vzeroupper
2416 ret
2417 }
2418 }
2419 #endif // HAS_NV21TOARGBROW_AVX2
2420
2368 // 16 pixels. 2421 // 16 pixels.
2369 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). 2422 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
2370 __declspec(naked) 2423 __declspec(naked)
2371 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, 2424 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
2372 uint8* dst_argb, 2425 uint8* dst_argb,
2373 struct YuvConstants* yuvconstants, 2426 struct YuvConstants* yuvconstants,
2374 int width) { 2427 int width) {
2375 __asm { 2428 __asm {
2376 push ebx 2429 push ebx
2377 mov eax, [esp + 4 + 4] // yuy2 2430 mov eax, [esp + 4 + 4] // yuy2
(...skipping 223 matching lines...) Expand 10 before | Expand all | Expand 10 after
2601 // Read 4 UV from NV12, upsample to 8 UV. 2654 // Read 4 UV from NV12, upsample to 8 UV.
2602 #define READNV12 __asm { \ 2655 #define READNV12 __asm { \
2603 __asm movq xmm0, qword ptr [esi] /* UV */ \ 2656 __asm movq xmm0, qword ptr [esi] /* UV */ \
2604 __asm lea esi, [esi + 8] \ 2657 __asm lea esi, [esi + 8] \
2605 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2658 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2606 __asm movq xmm4, qword ptr [eax] \ 2659 __asm movq xmm4, qword ptr [eax] \
2607 __asm punpcklbw xmm4, xmm4 \ 2660 __asm punpcklbw xmm4, xmm4 \
2608 __asm lea eax, [eax + 8] \ 2661 __asm lea eax, [eax + 8] \
2609 } 2662 }
2610 2663
2664 // Read 4 VU from NV21, upsample to 8 UV.
2665 #define READNV21 __asm { \
2666 __asm movq xmm0, qword ptr [esi] /* UV */ \
2667 __asm lea esi, [esi + 8] \
2668 __asm pshufb xmm0, xmmword ptr kShuffleNV21 \
2669 __asm movq xmm4, qword ptr [eax] \
2670 __asm punpcklbw xmm4, xmm4 \
2671 __asm lea eax, [eax + 8] \
2672 }
2673
2611 // Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV. 2674 // Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV.
2612 #define READYUY2 __asm { \ 2675 #define READYUY2 __asm { \
2613 __asm movdqu xmm4, [eax] /* YUY2 */ \ 2676 __asm movdqu xmm4, [eax] /* YUY2 */ \
2614 __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \ 2677 __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \
2615 __asm movdqu xmm0, [eax] /* UV */ \ 2678 __asm movdqu xmm0, [eax] /* UV */ \
2616 __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \ 2679 __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \
2617 __asm lea eax, [eax + 16] \ 2680 __asm lea eax, [eax + 16] \
2618 } 2681 }
2619 2682
2620 // Read 4 UYVY with 8 Y and upsample 4 UV to 8 UV. 2683 // Read 4 UYVY with 8 Y and upsample 4 UV to 8 UV.
(...skipping 525 matching lines...) Expand 10 before | Expand all | Expand 10 after
3146 sub ecx, 8 3209 sub ecx, 8
3147 jg convertloop 3210 jg convertloop
3148 3211
3149 pop ebx 3212 pop ebx
3150 pop esi 3213 pop esi
3151 ret 3214 ret
3152 } 3215 }
3153 } 3216 }
3154 3217
3155 // 8 pixels. 3218 // 8 pixels.
3219 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
3220 __declspec(naked)
3221 void NV21ToARGBRow_SSSE3(const uint8* y_buf,
3222 const uint8* vu_buf,
3223 uint8* dst_argb,
3224 struct YuvConstants* yuvconstants,
3225 int width) {
3226 __asm {
3227 push esi
3228 push ebx
3229 mov eax, [esp + 8 + 4] // Y
3230 mov esi, [esp + 8 + 8] // VU
3231 mov edx, [esp + 8 + 12] // argb
3232 mov ebx, [esp + 8 + 16] // yuvconstants
3233 mov ecx, [esp + 8 + 20] // width
3234 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
3235
3236 convertloop:
3237 READNV21
3238 YUVTORGB(ebx)
3239 STOREARGB
3240
3241 sub ecx, 8
3242 jg convertloop
3243
3244 pop ebx
3245 pop esi
3246 ret
3247 }
3248 }
3249
3250 // 8 pixels.
3156 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). 3251 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
3157 __declspec(naked) 3252 __declspec(naked)
3158 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, 3253 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
3159 uint8* dst_argb, 3254 uint8* dst_argb,
3160 struct YuvConstants* yuvconstants, 3255 struct YuvConstants* yuvconstants,
3161 int width) { 3256 int width) {
3162 __asm { 3257 __asm {
3163 push ebx 3258 push ebx
3164 mov eax, [esp + 4 + 4] // yuy2 3259 mov eax, [esp + 4 + 4] // yuy2
3165 mov edx, [esp + 4 + 8] // argb 3260 mov edx, [esp + 4 + 8] // argb
(...skipping 3377 matching lines...) Expand 10 before | Expand all | Expand 10 after
6543 } 6638 }
6544 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6639 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6545 6640
6546 #endif // defined(_M_X64) 6641 #endif // defined(_M_X64)
6547 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6642 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6548 6643
6549 #ifdef __cplusplus 6644 #ifdef __cplusplus
6550 } // extern "C" 6645 } // extern "C"
6551 } // namespace libyuv 6646 } // namespace libyuv
6552 #endif 6647 #endif
OLDNEW
« no previous file with comments | « source/row_neon64.cc ('k') | source/scale_gcc.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698