OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
108 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 108 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
109 while (width > 0) { | 109 while (width > 0) { |
110 READYUV422 | 110 READYUV422 |
111 YUVTORGB(yuvconstants) | 111 YUVTORGB(yuvconstants) |
112 STOREARGB | 112 STOREARGB |
113 width -= 8; | 113 width -= 8; |
114 } | 114 } |
115 } | 115 } |
116 #endif | 116 #endif |
117 | 117 |
118 #if defined(HAS_I422TOABGRROW_SSSE3) | |
119 void I422ToABGRRow_SSSE3(const uint8* y_buf, | |
120 const uint8* u_buf, | |
121 const uint8* v_buf, | |
122 uint8* dst_abgr, | |
123 const struct YuvConstants* yuvconstants, | |
124 int width) { | |
125 __m128i xmm0, xmm1, xmm2, xmm4; | |
126 const __m128i xmm5 = _mm_set1_epi8(-1); | |
127 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | |
128 while (width > 0) { | |
129 READYUV422 | |
130 YUVTORGB(yuvconstants) | |
131 STOREABGR | |
132 width -= 8; | |
133 } | |
134 } | |
135 #endif | |
136 | |
137 #if defined(HAS_I422ALPHATOARGBROW_SSSE3) | 118 #if defined(HAS_I422ALPHATOARGBROW_SSSE3) |
138 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, | 119 void I422AlphaToARGBRow_SSSE3(const uint8* y_buf, |
139 const uint8* u_buf, | 120 const uint8* u_buf, |
140 const uint8* v_buf, | 121 const uint8* v_buf, |
141 const uint8* a_buf, | 122 const uint8* a_buf, |
142 uint8* dst_argb, | 123 uint8* dst_argb, |
143 const struct YuvConstants* yuvconstants, | 124 const struct YuvConstants* yuvconstants, |
144 int width) { | 125 int width) { |
145 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; | 126 __m128i xmm0, xmm1, xmm2, xmm4, xmm5; |
146 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 127 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
(...skipping 2301 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2448 | 2429 |
2449 pop ebx | 2430 pop ebx |
2450 pop edi | 2431 pop edi |
2451 pop esi | 2432 pop esi |
2452 vzeroupper | 2433 vzeroupper |
2453 ret | 2434 ret |
2454 } | 2435 } |
2455 } | 2436 } |
2456 #endif // HAS_I422TORGBAROW_AVX2 | 2437 #endif // HAS_I422TORGBAROW_AVX2 |
2457 | 2438 |
2458 #ifdef HAS_I422TOABGRROW_AVX2 | |
2459 // 16 pixels | |
2460 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | |
2461 __declspec(naked) | |
2462 void I422ToABGRRow_AVX2(const uint8* y_buf, | |
2463 const uint8* u_buf, | |
2464 const uint8* v_buf, | |
2465 uint8* dst_argb, | |
2466 const struct YuvConstants* yuvconstants, | |
2467 int width) { | |
2468 __asm { | |
2469 push esi | |
2470 push edi | |
2471 push ebx | |
2472 mov eax, [esp + 12 + 4] // Y | |
2473 mov esi, [esp + 12 + 8] // U | |
2474 mov edi, [esp + 12 + 12] // V | |
2475 mov edx, [esp + 12 + 16] // argb | |
2476 mov ebx, [esp + 12 + 20] // yuvconstants | |
2477 mov ecx, [esp + 12 + 24] // width | |
2478 sub edi, esi | |
2479 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | |
2480 | |
2481 convertloop: | |
2482 READYUV422_AVX2 | |
2483 YUVTORGB_AVX2(ebx) | |
2484 STOREABGR_AVX2 | |
2485 | |
2486 sub ecx, 16 | |
2487 jg convertloop | |
2488 | |
2489 pop ebx | |
2490 pop edi | |
2491 pop esi | |
2492 vzeroupper | |
2493 ret | |
2494 } | |
2495 } | |
2496 #endif // HAS_I422TOABGRROW_AVX2 | |
2497 | |
2498 #if defined(HAS_I422TOARGBROW_SSSE3) | 2439 #if defined(HAS_I422TOARGBROW_SSSE3) |
2499 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. | 2440 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. |
| 2441 // Allows a conversion with half size scaling. |
2500 | 2442 |
2501 // Read 8 UV from 444. | 2443 // Read 8 UV from 444. |
2502 #define READYUV444 __asm { \ | 2444 #define READYUV444 __asm { \ |
2503 __asm movq xmm0, qword ptr [esi] /* U */ \ | 2445 __asm movq xmm0, qword ptr [esi] /* U */ \ |
2504 __asm movq xmm1, qword ptr [esi + edi] /* V */ \ | 2446 __asm movq xmm1, qword ptr [esi + edi] /* V */ \ |
2505 __asm lea esi, [esi + 8] \ | 2447 __asm lea esi, [esi + 8] \ |
2506 __asm punpcklbw xmm0, xmm1 /* UV */ \ | 2448 __asm punpcklbw xmm0, xmm1 /* UV */ \ |
2507 __asm movq xmm4, qword ptr [eax] \ | 2449 __asm movq xmm4, qword ptr [eax] \ |
2508 __asm punpcklbw xmm4, xmm4 \ | 2450 __asm punpcklbw xmm4, xmm4 \ |
2509 __asm lea eax, [eax + 8] \ | 2451 __asm lea eax, [eax + 8] \ |
(...skipping 3950 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6460 } | 6402 } |
6461 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6403 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6462 | 6404 |
6463 #endif // defined(_M_X64) | 6405 #endif // defined(_M_X64) |
6464 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6406 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6465 | 6407 |
6466 #ifdef __cplusplus | 6408 #ifdef __cplusplus |
6467 } // extern "C" | 6409 } // extern "C" |
6468 } // namespace libyuv | 6410 } // namespace libyuv |
6469 #endif | 6411 #endif |
OLD | NEW |