OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 301 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
312 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, | 312 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, |
313 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 | 313 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15 |
314 }; | 314 }; |
315 | 315 |
316 // UYVY shuf 8 UV to 16 UV. | 316 // UYVY shuf 8 UV to 16 UV. |
317 static const lvec8 kShuffleUYVYUV = { | 317 static const lvec8 kShuffleUYVYUV = { |
318 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, | 318 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, |
319 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 | 319 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14 |
320 }; | 320 }; |
321 | 321 |
| 322 // NV21 shuf 8 VU to 16 UV. |
| 323 static const lvec8 kShuffleNV21 = { |
| 324 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, |
| 325 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, |
| 326 }; |
| 327 |
322 // Duplicates gray value 3 times and fills in alpha opaque. | 328 // Duplicates gray value 3 times and fills in alpha opaque. |
323 __declspec(naked) | 329 __declspec(naked) |
324 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { | 330 void J400ToARGBRow_SSE2(const uint8* src_y, uint8* dst_argb, int pix) { |
325 __asm { | 331 __asm { |
326 mov eax, [esp + 4] // src_y | 332 mov eax, [esp + 4] // src_y |
327 mov edx, [esp + 8] // dst_argb | 333 mov edx, [esp + 8] // dst_argb |
328 mov ecx, [esp + 12] // pix | 334 mov ecx, [esp + 12] // pix |
329 pcmpeqb xmm5, xmm5 // generate mask 0xff000000 | 335 pcmpeqb xmm5, xmm5 // generate mask 0xff000000 |
330 pslld xmm5, 24 | 336 pslld xmm5, 24 |
331 | 337 |
(...skipping 1653 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1985 __asm vmovdqu xmm0, [esi] /* UV */ \ | 1991 __asm vmovdqu xmm0, [esi] /* UV */ \ |
1986 __asm lea esi, [esi + 16] \ | 1992 __asm lea esi, [esi + 16] \ |
1987 __asm vpermq ymm0, ymm0, 0xd8 \ | 1993 __asm vpermq ymm0, ymm0, 0xd8 \ |
1988 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ | 1994 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ |
1989 __asm vmovdqu xmm4, [eax] /* Y */ \ | 1995 __asm vmovdqu xmm4, [eax] /* Y */ \ |
1990 __asm vpermq ymm4, ymm4, 0xd8 \ | 1996 __asm vpermq ymm4, ymm4, 0xd8 \ |
1991 __asm vpunpcklbw ymm4, ymm4, ymm4 \ | 1997 __asm vpunpcklbw ymm4, ymm4, ymm4 \ |
1992 __asm lea eax, [eax + 16] \ | 1998 __asm lea eax, [eax + 16] \ |
1993 } | 1999 } |
1994 | 2000 |
| 2001 // Read 8 UV from NV21, upsample to 16 UV. |
| 2002 #define READNV21_AVX2 __asm { \ |
| 2003 __asm vmovdqu xmm0, [esi] /* UV */ \ |
| 2004 __asm lea esi, [esi + 16] \ |
| 2005 __asm vpermq ymm0, ymm0, 0xd8 \ |
| 2006 __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleNV21 \ |
| 2007 __asm vmovdqu xmm4, [eax] /* Y */ \ |
| 2008 __asm vpermq ymm4, ymm4, 0xd8 \ |
| 2009 __asm vpunpcklbw ymm4, ymm4, ymm4 \ |
| 2010 __asm lea eax, [eax + 16] \ |
| 2011 } |
| 2012 |
1995 // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. | 2013 // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. |
1996 #define READYUY2_AVX2 __asm { \ | 2014 #define READYUY2_AVX2 __asm { \ |
1997 __asm vmovdqu ymm4, [eax] /* YUY2 */ \ | 2015 __asm vmovdqu ymm4, [eax] /* YUY2 */ \ |
1998 __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleYUY2Y \ | 2016 __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleYUY2Y \ |
1999 __asm vmovdqu ymm0, [eax] /* UV */ \ | 2017 __asm vmovdqu ymm0, [eax] /* UV */ \ |
2000 __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleYUY2UV \ | 2018 __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleYUY2UV \ |
2001 __asm lea eax, [eax + 32] \ | 2019 __asm lea eax, [eax + 32] \ |
2002 } | 2020 } |
2003 | 2021 |
2004 // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. | 2022 // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. |
(...skipping 353 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2358 jg convertloop | 2376 jg convertloop |
2359 | 2377 |
2360 pop ebx | 2378 pop ebx |
2361 pop esi | 2379 pop esi |
2362 vzeroupper | 2380 vzeroupper |
2363 ret | 2381 ret |
2364 } | 2382 } |
2365 } | 2383 } |
2366 #endif // HAS_NV12TOARGBROW_AVX2 | 2384 #endif // HAS_NV12TOARGBROW_AVX2 |
2367 | 2385 |
| 2386 #ifdef HAS_NV21TOARGBROW_AVX2 |
| 2387 // 16 pixels. |
| 2388 // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
| 2389 __declspec(naked) |
| 2390 void NV21ToARGBRow_AVX2(const uint8* y_buf, |
| 2391 const uint8* vu_buf, |
| 2392 uint8* dst_argb, |
| 2393 struct YuvConstants* yuvconstants, |
| 2394 int width) { |
| 2395 __asm { |
| 2396 push esi |
| 2397 push ebx |
| 2398 mov eax, [esp + 8 + 4] // Y |
| 2399 mov esi, [esp + 8 + 8] // VU |
| 2400 mov edx, [esp + 8 + 12] // argb |
| 2401 mov ebx, [esp + 8 + 16] // yuvconstants |
| 2402 mov ecx, [esp + 8 + 20] // width |
| 2403 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
| 2404 |
| 2405 convertloop: |
| 2406 READNV21_AVX2 |
| 2407 YUVTORGB_AVX2(ebx) |
| 2408 STOREARGB_AVX2 |
| 2409 |
| 2410 sub ecx, 16 |
| 2411 jg convertloop |
| 2412 |
| 2413 pop ebx |
| 2414 pop esi |
| 2415 vzeroupper |
| 2416 ret |
| 2417 } |
| 2418 } |
| 2419 #endif // HAS_NV21TOARGBROW_AVX2 |
| 2420 |
2368 // 16 pixels. | 2421 // 16 pixels. |
2369 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). | 2422 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). |
2370 __declspec(naked) | 2423 __declspec(naked) |
2371 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, | 2424 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, |
2372 uint8* dst_argb, | 2425 uint8* dst_argb, |
2373 struct YuvConstants* yuvconstants, | 2426 struct YuvConstants* yuvconstants, |
2374 int width) { | 2427 int width) { |
2375 __asm { | 2428 __asm { |
2376 push ebx | 2429 push ebx |
2377 mov eax, [esp + 4 + 4] // yuy2 | 2430 mov eax, [esp + 4 + 4] // yuy2 |
(...skipping 223 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2601 // Read 4 UV from NV12, upsample to 8 UV. | 2654 // Read 4 UV from NV12, upsample to 8 UV. |
2602 #define READNV12 __asm { \ | 2655 #define READNV12 __asm { \ |
2603 __asm movq xmm0, qword ptr [esi] /* UV */ \ | 2656 __asm movq xmm0, qword ptr [esi] /* UV */ \ |
2604 __asm lea esi, [esi + 8] \ | 2657 __asm lea esi, [esi + 8] \ |
2605 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ | 2658 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ |
2606 __asm movq xmm4, qword ptr [eax] \ | 2659 __asm movq xmm4, qword ptr [eax] \ |
2607 __asm punpcklbw xmm4, xmm4 \ | 2660 __asm punpcklbw xmm4, xmm4 \ |
2608 __asm lea eax, [eax + 8] \ | 2661 __asm lea eax, [eax + 8] \ |
2609 } | 2662 } |
2610 | 2663 |
| 2664 // Read 4 VU from NV21, upsample to 8 UV. |
| 2665 #define READNV21 __asm { \ |
| 2666 __asm movq xmm0, qword ptr [esi] /* UV */ \ |
| 2667 __asm lea esi, [esi + 8] \ |
| 2668 __asm pshufb xmm0, xmmword ptr kShuffleNV21 \ |
| 2669 __asm movq xmm4, qword ptr [eax] \ |
| 2670 __asm punpcklbw xmm4, xmm4 \ |
| 2671 __asm lea eax, [eax + 8] \ |
| 2672 } |
| 2673 |
2611 // Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV. | 2674 // Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV. |
2612 #define READYUY2 __asm { \ | 2675 #define READYUY2 __asm { \ |
2613 __asm movdqu xmm4, [eax] /* YUY2 */ \ | 2676 __asm movdqu xmm4, [eax] /* YUY2 */ \ |
2614 __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \ | 2677 __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \ |
2615 __asm movdqu xmm0, [eax] /* UV */ \ | 2678 __asm movdqu xmm0, [eax] /* UV */ \ |
2616 __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \ | 2679 __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \ |
2617 __asm lea eax, [eax + 16] \ | 2680 __asm lea eax, [eax + 16] \ |
2618 } | 2681 } |
2619 | 2682 |
2620 // Read 4 UYVY with 8 Y and upsample 4 UV to 8 UV. | 2683 // Read 4 UYVY with 8 Y and upsample 4 UV to 8 UV. |
(...skipping 525 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
3146 sub ecx, 8 | 3209 sub ecx, 8 |
3147 jg convertloop | 3210 jg convertloop |
3148 | 3211 |
3149 pop ebx | 3212 pop ebx |
3150 pop esi | 3213 pop esi |
3151 ret | 3214 ret |
3152 } | 3215 } |
3153 } | 3216 } |
3154 | 3217 |
3155 // 8 pixels. | 3218 // 8 pixels. |
| 3219 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). |
| 3220 __declspec(naked) |
| 3221 void NV21ToARGBRow_SSSE3(const uint8* y_buf, |
| 3222 const uint8* vu_buf, |
| 3223 uint8* dst_argb, |
| 3224 struct YuvConstants* yuvconstants, |
| 3225 int width) { |
| 3226 __asm { |
| 3227 push esi |
| 3228 push ebx |
| 3229 mov eax, [esp + 8 + 4] // Y |
| 3230 mov esi, [esp + 8 + 8] // VU |
| 3231 mov edx, [esp + 8 + 12] // argb |
| 3232 mov ebx, [esp + 8 + 16] // yuvconstants |
| 3233 mov ecx, [esp + 8 + 20] // width |
| 3234 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
| 3235 |
| 3236 convertloop: |
| 3237 READNV21 |
| 3238 YUVTORGB(ebx) |
| 3239 STOREARGB |
| 3240 |
| 3241 sub ecx, 8 |
| 3242 jg convertloop |
| 3243 |
| 3244 pop ebx |
| 3245 pop esi |
| 3246 ret |
| 3247 } |
| 3248 } |
| 3249 |
| 3250 // 8 pixels. |
3156 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). | 3251 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). |
3157 __declspec(naked) | 3252 __declspec(naked) |
3158 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, | 3253 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, |
3159 uint8* dst_argb, | 3254 uint8* dst_argb, |
3160 struct YuvConstants* yuvconstants, | 3255 struct YuvConstants* yuvconstants, |
3161 int width) { | 3256 int width) { |
3162 __asm { | 3257 __asm { |
3163 push ebx | 3258 push ebx |
3164 mov eax, [esp + 4 + 4] // yuy2 | 3259 mov eax, [esp + 4 + 4] // yuy2 |
3165 mov edx, [esp + 4 + 8] // argb | 3260 mov edx, [esp + 4 + 8] // argb |
(...skipping 3377 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6543 } | 6638 } |
6544 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6639 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6545 | 6640 |
6546 #endif // defined(_M_X64) | 6641 #endif // defined(_M_X64) |
6547 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6642 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6548 | 6643 |
6549 #ifdef __cplusplus | 6644 #ifdef __cplusplus |
6550 } // extern "C" | 6645 } // extern "C" |
6551 } // namespace libyuv | 6646 } // namespace libyuv |
6552 #endif | 6647 #endif |
OLD | NEW |