Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(966)

Side by Side Diff: source/row_win.cc

Issue 2406123002: Remove I411 support, update doc and switch to side by side test (Closed)
Patch Set: bump version, disable a few lint warnings Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon64.cc ('k') | unit_test/convert_test.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 1951 matching lines...) Expand 10 before | Expand all | Expand 10 after
1962 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ 1962 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1963 __asm vmovdqu xmm4, [eax] /* Y */ \ 1963 __asm vmovdqu xmm4, [eax] /* Y */ \
1964 __asm vpermq ymm4, ymm4, 0xd8 \ 1964 __asm vpermq ymm4, ymm4, 0xd8 \
1965 __asm vpunpcklbw ymm4, ymm4, ymm4 \ 1965 __asm vpunpcklbw ymm4, ymm4, ymm4 \
1966 __asm lea eax, [eax + 16] \ 1966 __asm lea eax, [eax + 16] \
1967 __asm vmovdqu xmm5, [ebp] /* A */ \ 1967 __asm vmovdqu xmm5, [ebp] /* A */ \
1968 __asm vpermq ymm5, ymm5, 0xd8 \ 1968 __asm vpermq ymm5, ymm5, 0xd8 \
1969 __asm lea ebp, [ebp + 16] \ 1969 __asm lea ebp, [ebp + 16] \
1970 } 1970 }
1971 1971
1972 // Read 4 UV from 411, upsample to 16 UV.
1973 #define READYUV411_AVX2 __asm { \
1974 __asm vmovd xmm0, dword ptr [esi] /* U */ \
1975 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \
1976 __asm lea esi, [esi + 4] \
1977 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
1978 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1979 __asm vpermq ymm0, ymm0, 0xd8 \
1980 __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \
1981 __asm vmovdqu xmm4, [eax] /* Y */ \
1982 __asm vpermq ymm4, ymm4, 0xd8 \
1983 __asm vpunpcklbw ymm4, ymm4, ymm4 \
1984 __asm lea eax, [eax + 16] \
1985 }
1986
1987 // Read 8 UV from NV12, upsample to 16 UV. 1972 // Read 8 UV from NV12, upsample to 16 UV.
1988 #define READNV12_AVX2 __asm { \ 1973 #define READNV12_AVX2 __asm { \
1989 __asm vmovdqu xmm0, [esi] /* UV */ \ 1974 __asm vmovdqu xmm0, [esi] /* UV */ \
1990 __asm lea esi, [esi + 16] \ 1975 __asm lea esi, [esi + 16] \
1991 __asm vpermq ymm0, ymm0, 0xd8 \ 1976 __asm vpermq ymm0, ymm0, 0xd8 \
1992 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ 1977 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1993 __asm vmovdqu xmm4, [eax] /* Y */ \ 1978 __asm vmovdqu xmm4, [eax] /* Y */ \
1994 __asm vpermq ymm4, ymm4, 0xd8 \ 1979 __asm vpermq ymm4, ymm4, 0xd8 \
1995 __asm vpunpcklbw ymm4, ymm4, ymm4 \ 1980 __asm vpunpcklbw ymm4, ymm4, ymm4 \
1996 __asm lea eax, [eax + 16] \ 1981 __asm lea eax, [eax + 16] \
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
2191 2176
2192 pop ebx 2177 pop ebx
2193 pop edi 2178 pop edi
2194 pop esi 2179 pop esi
2195 vzeroupper 2180 vzeroupper
2196 ret 2181 ret
2197 } 2182 }
2198 } 2183 }
2199 #endif // HAS_I444TOARGBROW_AVX2 2184 #endif // HAS_I444TOARGBROW_AVX2
2200 2185
2201 #ifdef HAS_I411TOARGBROW_AVX2
2202 // 16 pixels
2203 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2204 __declspec(naked)
2205 void I411ToARGBRow_AVX2(const uint8* y_buf,
2206 const uint8* u_buf,
2207 const uint8* v_buf,
2208 uint8* dst_argb,
2209 const struct YuvConstants* yuvconstants,
2210 int width) {
2211 __asm {
2212 push esi
2213 push edi
2214 push ebx
2215 mov eax, [esp + 12 + 4] // Y
2216 mov esi, [esp + 12 + 8] // U
2217 mov edi, [esp + 12 + 12] // V
2218 mov edx, [esp + 12 + 16] // abgr
2219 mov ebx, [esp + 12 + 20] // yuvconstants
2220 mov ecx, [esp + 12 + 24] // width
2221 sub edi, esi
2222 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2223
2224 convertloop:
2225 READYUV411_AVX2
2226 YUVTORGB_AVX2(ebx)
2227 STOREARGB_AVX2
2228
2229 sub ecx, 16
2230 jg convertloop
2231
2232 pop ebx
2233 pop edi
2234 pop esi
2235 vzeroupper
2236 ret
2237 }
2238 }
2239 #endif // HAS_I411TOARGBROW_AVX2
2240
2241 #ifdef HAS_NV12TOARGBROW_AVX2 2186 #ifdef HAS_NV12TOARGBROW_AVX2
2242 // 16 pixels. 2187 // 16 pixels.
2243 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2188 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2244 __declspec(naked) 2189 __declspec(naked)
2245 void NV12ToARGBRow_AVX2(const uint8* y_buf, 2190 void NV12ToARGBRow_AVX2(const uint8* y_buf,
2246 const uint8* uv_buf, 2191 const uint8* uv_buf,
2247 uint8* dst_argb, 2192 uint8* dst_argb,
2248 const struct YuvConstants* yuvconstants, 2193 const struct YuvConstants* yuvconstants,
2249 int width) { 2194 int width) {
2250 __asm { 2195 __asm {
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
2444 __asm lea esi, [esi + 4] \ 2389 __asm lea esi, [esi + 4] \
2445 __asm punpcklbw xmm0, xmm1 /* UV */ \ 2390 __asm punpcklbw xmm0, xmm1 /* UV */ \
2446 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2391 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2447 __asm movq xmm4, qword ptr [eax] /* Y */ \ 2392 __asm movq xmm4, qword ptr [eax] /* Y */ \
2448 __asm punpcklbw xmm4, xmm4 \ 2393 __asm punpcklbw xmm4, xmm4 \
2449 __asm lea eax, [eax + 8] \ 2394 __asm lea eax, [eax + 8] \
2450 __asm movq xmm5, qword ptr [ebp] /* A */ \ 2395 __asm movq xmm5, qword ptr [ebp] /* A */ \
2451 __asm lea ebp, [ebp + 8] \ 2396 __asm lea ebp, [ebp + 8] \
2452 } 2397 }
2453 2398
2454 // Read 2 UV from 411, upsample to 8 UV.
2455 // drmemory fails with memory fault if pinsrw used. libyuv bug: 525
2456 // __asm pinsrw xmm0, [esi], 0 /* U */
2457 // __asm pinsrw xmm1, [esi + edi], 0 /* V */
2458 #define READYUV411_EBX __asm { \
2459 __asm movzx ebx, word ptr [esi] /* U */ \
2460 __asm movd xmm0, ebx \
2461 __asm movzx ebx, word ptr [esi + edi] /* V */ \
2462 __asm movd xmm1, ebx \
2463 __asm lea esi, [esi + 2] \
2464 __asm punpcklbw xmm0, xmm1 /* UV */ \
2465 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2466 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \
2467 __asm movq xmm4, qword ptr [eax] \
2468 __asm punpcklbw xmm4, xmm4 \
2469 __asm lea eax, [eax + 8] \
2470 }
2471
2472 // Read 4 UV from NV12, upsample to 8 UV. 2399 // Read 4 UV from NV12, upsample to 8 UV.
2473 #define READNV12 __asm { \ 2400 #define READNV12 __asm { \
2474 __asm movq xmm0, qword ptr [esi] /* UV */ \ 2401 __asm movq xmm0, qword ptr [esi] /* UV */ \
2475 __asm lea esi, [esi + 8] \ 2402 __asm lea esi, [esi + 8] \
2476 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2403 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2477 __asm movq xmm4, qword ptr [eax] \ 2404 __asm movq xmm4, qword ptr [eax] \
2478 __asm punpcklbw xmm4, xmm4 \ 2405 __asm punpcklbw xmm4, xmm4 \
2479 __asm lea eax, [eax + 8] \ 2406 __asm lea eax, [eax + 8] \
2480 } 2407 }
2481 2408
(...skipping 331 matching lines...) Expand 10 before | Expand all | Expand 10 after
2813 2740
2814 pop ebp 2741 pop ebp
2815 pop ebx 2742 pop ebx
2816 pop edi 2743 pop edi
2817 pop esi 2744 pop esi
2818 ret 2745 ret
2819 } 2746 }
2820 } 2747 }
2821 2748
2822 // 8 pixels. 2749 // 8 pixels.
2823 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
2824 // Similar to I420 but duplicate UV once more.
2825 __declspec(naked)
2826 void I411ToARGBRow_SSSE3(const uint8* y_buf,
2827 const uint8* u_buf,
2828 const uint8* v_buf,
2829 uint8* dst_argb,
2830 const struct YuvConstants* yuvconstants,
2831 int width) {
2832 __asm {
2833 push esi
2834 push edi
2835 push ebx
2836 push ebp
2837 mov eax, [esp + 16 + 4] // Y
2838 mov esi, [esp + 16 + 8] // U
2839 mov edi, [esp + 16 + 12] // V
2840 mov edx, [esp + 16 + 16] // abgr
2841 mov ebp, [esp + 16 + 20] // yuvconstants
2842 mov ecx, [esp + 16 + 24] // width
2843 sub edi, esi
2844 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2845
2846 convertloop:
2847 READYUV411_EBX
2848 YUVTORGB(ebp)
2849 STOREARGB
2850
2851 sub ecx, 8
2852 jg convertloop
2853
2854 pop ebp
2855 pop ebx
2856 pop edi
2857 pop esi
2858 ret
2859 }
2860 }
2861
2862 // 8 pixels.
2863 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2750 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
2864 __declspec(naked) 2751 __declspec(naked)
2865 void NV12ToARGBRow_SSSE3(const uint8* y_buf, 2752 void NV12ToARGBRow_SSSE3(const uint8* y_buf,
2866 const uint8* uv_buf, 2753 const uint8* uv_buf,
2867 uint8* dst_argb, 2754 uint8* dst_argb,
2868 const struct YuvConstants* yuvconstants, 2755 const struct YuvConstants* yuvconstants,
2869 int width) { 2756 int width) {
2870 __asm { 2757 __asm {
2871 push esi 2758 push esi
2872 push ebx 2759 push ebx
(...skipping 3454 matching lines...) Expand 10 before | Expand all | Expand 10 after
6327 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6214 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6328 6215
6329 #endif // defined(_M_X64) 6216 #endif // defined(_M_X64)
6330 6217
6331 #ifdef __cplusplus 6218 #ifdef __cplusplus
6332 } // extern "C" 6219 } // extern "C"
6333 } // namespace libyuv 6220 } // namespace libyuv
6334 #endif 6221 #endif
6335 6222
6336 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6223 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
OLDNEW
« no previous file with comments | « source/row_neon64.cc ('k') | unit_test/convert_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698