Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(304)

Side by Side Diff: source/row_win.cc

Issue 1355393002: yuy2 to rgb gcc versions (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 16 matching lines...) Expand all
27 27
28 // 64 bit 28 // 64 bit
29 #if defined(_M_X64) 29 #if defined(_M_X64)
30 30
31 // Read 4 UV from 422, upsample to 8 UV. 31 // Read 4 UV from 422, upsample to 8 UV.
32 #define READYUV422 \ 32 #define READYUV422 \
33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ 33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ 34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ 35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ 36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
37 u_buf += 4; 37 u_buf += 4; \
38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
39 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
40 y_buf += 8; \
38 41
39 // Convert 8 pixels: 8 UV and 8 Y. 42 // Convert 8 pixels: 8 UV and 8 Y.
40 #define YUVTORGB(YuvConstants) \ 43 #define YUVTORGB(YuvConstants) \
41 xmm1 = _mm_loadu_si128(&xmm0); \ 44 xmm1 = _mm_loadu_si128(&xmm0); \
42 xmm2 = _mm_loadu_si128(&xmm0); \ 45 xmm2 = _mm_loadu_si128(&xmm0); \
43 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \ 46 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \
44 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \ 47 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \
45 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \ 48 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \
46 xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \ 49 xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \
47 xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \ 50 xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \
48 xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \ 51 xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \
49 xmm3 = _mm_loadl_epi64((__m128i*)y_buf); \ 52 xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)YuvConstants->kYToRgb); \
50 y_buf += 8; \ 53 xmm0 = _mm_adds_epi16(xmm0, xmm4); \
51 xmm3 = _mm_unpacklo_epi8(xmm3, xmm3); \ 54 xmm1 = _mm_adds_epi16(xmm1, xmm4); \
52 xmm3 = _mm_mulhi_epu16(xmm3, *(__m128i*)YuvConstants->kYToRgb); \ 55 xmm2 = _mm_adds_epi16(xmm2, xmm4); \
53 xmm0 = _mm_adds_epi16(xmm0, xmm3); \
54 xmm1 = _mm_adds_epi16(xmm1, xmm3); \
55 xmm2 = _mm_adds_epi16(xmm2, xmm3); \
56 xmm0 = _mm_srai_epi16(xmm0, 6); \ 56 xmm0 = _mm_srai_epi16(xmm0, 6); \
57 xmm1 = _mm_srai_epi16(xmm1, 6); \ 57 xmm1 = _mm_srai_epi16(xmm1, 6); \
58 xmm2 = _mm_srai_epi16(xmm2, 6); \ 58 xmm2 = _mm_srai_epi16(xmm2, 6); \
59 xmm0 = _mm_packus_epi16(xmm0, xmm0); \ 59 xmm0 = _mm_packus_epi16(xmm0, xmm0); \
60 xmm1 = _mm_packus_epi16(xmm1, xmm1); \ 60 xmm1 = _mm_packus_epi16(xmm1, xmm1); \
61 xmm2 = _mm_packus_epi16(xmm2, xmm2); 61 xmm2 = _mm_packus_epi16(xmm2, xmm2);
62 62
63 // Store 8 ARGB values. 63 // Store 8 ARGB values.
64 #define STOREARGB \ 64 #define STOREARGB \
65 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ 65 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
(...skipping 17 matching lines...) Expand all
83 dst_argb += 32; 83 dst_argb += 32;
84 84
85 85
86 #if defined(HAS_I422TOARGBROW_SSSE3) 86 #if defined(HAS_I422TOARGBROW_SSSE3)
87 void I422ToARGBRow_SSSE3(const uint8* y_buf, 87 void I422ToARGBRow_SSSE3(const uint8* y_buf,
88 const uint8* u_buf, 88 const uint8* u_buf,
89 const uint8* v_buf, 89 const uint8* v_buf,
90 uint8* dst_argb, 90 uint8* dst_argb,
91 struct YuvConstants* yuvconstants, 91 struct YuvConstants* yuvconstants,
92 int width) { 92 int width) {
93 __m128i xmm0, xmm1, xmm2, xmm3; 93 __m128i xmm0, xmm1, xmm2, xmm4;
94 const __m128i xmm5 = _mm_set1_epi8(-1); 94 const __m128i xmm5 = _mm_set1_epi8(-1);
95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
96 while (width > 0) { 96 while (width > 0) {
97 READYUV422 97 READYUV422
98 YUVTORGB(YuvConstants) 98 YUVTORGB(yuvconstants)
99 STOREARGB 99 STOREARGB
100 width -= 8; 100 width -= 8;
101 } 101 }
102 } 102 }
103 #endif 103 #endif
104 104
105 #if defined(HAS_I422TOABGRROW_SSSE3) 105 #if defined(HAS_I422TOABGRROW_SSSE3)
106 void I422ToABGRRow_SSSE3(const uint8* y_buf, 106 void I422ToABGRRow_SSSE3(const uint8* y_buf,
107 const uint8* u_buf, 107 const uint8* u_buf,
108 const uint8* v_buf, 108 const uint8* v_buf,
109 uint8* dst_argb, 109 uint8* dst_argb,
110 struct YuvConstants* yuvconstants, 110 struct YuvConstants* yuvconstants,
111 int width) { 111 int width) {
112 __m128i xmm0, xmm1, xmm2, xmm3; 112 __m128i xmm0, xmm1, xmm2, xmm4;
113 const __m128i xmm5 = _mm_set1_epi8(-1); 113 const __m128i xmm5 = _mm_set1_epi8(-1);
114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; 114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf;
115 while (width > 0) { 115 while (width > 0) {
116 READYUV422 116 READYUV422
117 YUVTORGB(YuvConstants) 117 YUVTORGB(yuvconstants)
118 STOREABGR 118 STOREABGR
119 width -= 8; 119 width -= 8;
120 } 120 }
121 } 121 }
122 #endif 122 #endif
123 // 32 bit 123 // 32 bit
124 #else // defined(_M_X64) 124 #else // defined(_M_X64)
125 #ifdef HAS_ARGBTOYROW_SSSE3 125 #ifdef HAS_ARGBTOYROW_SSSE3
126 126
127 // Constants for ARGB. 127 // Constants for ARGB.
(...skipping 1717 matching lines...) Expand 10 before | Expand all | Expand 10 after
1845 #endif // HAS_ARGBTOYROW_SSSE3 1845 #endif // HAS_ARGBTOYROW_SSSE3
1846 1846
1847 // Read 16 UV from 444 1847 // Read 16 UV from 444
1848 #define READYUV444_AVX2 __asm { \ 1848 #define READYUV444_AVX2 __asm { \
1849 __asm vmovdqu xmm0, [esi] /* U */ \ 1849 __asm vmovdqu xmm0, [esi] /* U */ \
1850 __asm vmovdqu xmm1, [esi + edi] /* V */ \ 1850 __asm vmovdqu xmm1, [esi + edi] /* V */ \
1851 __asm lea esi, [esi + 16] \ 1851 __asm lea esi, [esi + 16] \
1852 __asm vpermq ymm0, ymm0, 0xd8 \ 1852 __asm vpermq ymm0, ymm0, 0xd8 \
1853 __asm vpermq ymm1, ymm1, 0xd8 \ 1853 __asm vpermq ymm1, ymm1, 0xd8 \
1854 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ 1854 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
1855 __asm vmovdqu xmm4, [eax] /* Y */ \
1856 __asm vpermq ymm4, ymm4, 0xd8 \
1857 __asm vpunpcklbw ymm4, ymm4, ymm4 \
1858 __asm lea eax, [eax + 16] \
1855 } 1859 }
1856 1860
1857 // Read 8 UV from 422, upsample to 16 UV. 1861 // Read 8 UV from 422, upsample to 16 UV.
1858 #define READYUV422_AVX2 __asm { \ 1862 #define READYUV422_AVX2 __asm { \
1859 __asm vmovq xmm0, qword ptr [esi] /* U */ \ 1863 __asm vmovq xmm0, qword ptr [esi] /* U */ \
1860 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ 1864 __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \
1861 __asm lea esi, [esi + 8] \ 1865 __asm lea esi, [esi + 8] \
1862 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ 1866 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
1863 __asm vpermq ymm0, ymm0, 0xd8 \ 1867 __asm vpermq ymm0, ymm0, 0xd8 \
1864 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ 1868 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1869 __asm vmovdqu xmm4, [eax] /* Y */ \
1870 __asm vpermq ymm4, ymm4, 0xd8 \
1871 __asm vpunpcklbw ymm4, ymm4, ymm4 \
1872 __asm lea eax, [eax + 16] \
1865 } 1873 }
1866 1874
1867 // Read 4 UV from 411, upsample to 16 UV. 1875 // Read 4 UV from 411, upsample to 16 UV.
1868 #define READYUV411_AVX2 __asm { \ 1876 #define READYUV411_AVX2 __asm { \
1869 __asm vmovd xmm0, dword ptr [esi] /* U */ \ 1877 __asm vmovd xmm0, dword ptr [esi] /* U */ \
1870 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \ 1878 __asm vmovd xmm1, dword ptr [esi + edi] /* V */ \
1871 __asm lea esi, [esi + 4] \ 1879 __asm lea esi, [esi + 4] \
1872 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ 1880 __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \
1873 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ 1881 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1874 __asm vpermq ymm0, ymm0, 0xd8 \ 1882 __asm vpermq ymm0, ymm0, 0xd8 \
1875 __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \ 1883 __asm vpunpckldq ymm0, ymm0, ymm0 /* UVUVUVUV (upsample) */ \
1884 __asm vmovdqu xmm4, [eax] /* Y */ \
1885 __asm vpermq ymm4, ymm4, 0xd8 \
1886 __asm vpunpcklbw ymm4, ymm4, ymm4 \
1887 __asm lea eax, [eax + 16] \
1876 } 1888 }
1877 1889
1878 // Read 8 UV from NV12, upsample to 16 UV. 1890 // Read 8 UV from NV12, upsample to 16 UV.
1879 #define READNV12_AVX2 __asm { \ 1891 #define READNV12_AVX2 __asm { \
1880 __asm vmovdqu xmm0, [esi] /* UV */ \ 1892 __asm vmovdqu xmm0, [esi] /* UV */ \
1881 __asm lea esi, [esi + 16] \ 1893 __asm lea esi, [esi + 16] \
1882 __asm vpermq ymm0, ymm0, 0xd8 \ 1894 __asm vpermq ymm0, ymm0, 0xd8 \
1883 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ 1895 __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \
1896 __asm vmovdqu xmm4, [eax] /* Y */ \
1897 __asm vpermq ymm4, ymm4, 0xd8 \
1898 __asm vpunpcklbw ymm4, ymm4, ymm4 \
1899 __asm lea eax, [eax + 16] \
1884 } 1900 }
1885 1901
1886 // Convert 16 pixels: 16 UV and 16 Y. 1902 // Convert 16 pixels: 16 UV and 16 Y.
1887 #define YUVTORGB_AVX2(YuvConstants) __asm { \ 1903 #define YUVTORGB_AVX2(YuvConstants) __asm { \
1888 __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\ 1904 __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\
1889 __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\ 1905 __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\
1890 __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\ 1906 __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\
1891 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \ 1907 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \
1892 __asm vpsubw ymm2, ymm3, ymm2 \ 1908 __asm vpsubw ymm2, ymm3, ymm2 \
1893 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \ 1909 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \
1894 __asm vpsubw ymm1, ymm3, ymm1 \ 1910 __asm vpsubw ymm1, ymm3, ymm1 \
1895 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \ 1911 __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \
1896 __asm vpsubw ymm0, ymm3, ymm0 \ 1912 __asm vpsubw ymm0, ymm3, ymm0 \
1897 /* Step 2: Find Y contribution to 16 R,G,B values */ \ 1913 /* Step 2: Find Y contribution to 16 R,G,B values */ \
1898 __asm vmovdqu xmm3, [eax] \ 1914 __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \
1899 __asm lea eax, [eax + 16] \ 1915 __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \
1900 __asm vpermq ymm3, ymm3, 0xd8 \ 1916 __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \
1901 __asm vpunpcklbw ymm3, ymm3, ymm3 \ 1917 __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \
1902 __asm vpmulhuw ymm3, ymm3, ymmword ptr [YuvConstants + KYTORGB] \
1903 __asm vpaddsw ymm0, ymm0, ymm3 /* B += Y */ \
1904 __asm vpaddsw ymm1, ymm1, ymm3 /* G += Y */ \
1905 __asm vpaddsw ymm2, ymm2, ymm3 /* R += Y */ \
1906 __asm vpsraw ymm0, ymm0, 6 \ 1918 __asm vpsraw ymm0, ymm0, 6 \
1907 __asm vpsraw ymm1, ymm1, 6 \ 1919 __asm vpsraw ymm1, ymm1, 6 \
1908 __asm vpsraw ymm2, ymm2, 6 \ 1920 __asm vpsraw ymm2, ymm2, 6 \
1909 __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ 1921 __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \
1910 __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ 1922 __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \
1911 __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ 1923 __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \
1912 } 1924 }
1913 1925
1914 // Store 16 ARGB values. 1926 // Store 16 ARGB values.
1915 #define STOREARGB_AVX2 __asm { \ 1927 #define STOREARGB_AVX2 __asm { \
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
1974 struct YuvConstants* yuvconstants, 1986 struct YuvConstants* yuvconstants,
1975 int width) { 1987 int width) {
1976 __asm { 1988 __asm {
1977 push esi 1989 push esi
1978 push edi 1990 push edi
1979 push ebp 1991 push ebp
1980 mov eax, [esp + 12 + 4] // Y 1992 mov eax, [esp + 12 + 4] // Y
1981 mov esi, [esp + 12 + 8] // U 1993 mov esi, [esp + 12 + 8] // U
1982 mov edi, [esp + 12 + 12] // V 1994 mov edi, [esp + 12 + 12] // V
1983 mov edx, [esp + 12 + 16] // argb 1995 mov edx, [esp + 12 + 16] // argb
1984 mov ebp, [esp + 12 + 20] // YuvConstants 1996 mov ebp, [esp + 12 + 20] // yuvconstants
1985 mov ecx, [esp + 12 + 24] // width 1997 mov ecx, [esp + 12 + 24] // width
1986 sub edi, esi 1998 sub edi, esi
1987 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 1999 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
1988 2000
1989 convertloop: 2001 convertloop:
1990 READYUV422_AVX2 2002 READYUV422_AVX2
1991 YUVTORGB_AVX2(ebp) 2003 YUVTORGB_AVX2(ebp)
1992 STOREARGB_AVX2 2004 STOREARGB_AVX2
1993 2005
1994 sub ecx, 16 2006 sub ecx, 16
(...skipping 19 matching lines...) Expand all
2014 struct YuvConstants* yuvconstants, 2026 struct YuvConstants* yuvconstants,
2015 int width) { 2027 int width) {
2016 __asm { 2028 __asm {
2017 push esi 2029 push esi
2018 push edi 2030 push edi
2019 push ebp 2031 push ebp
2020 mov eax, [esp + 12 + 4] // Y 2032 mov eax, [esp + 12 + 4] // Y
2021 mov esi, [esp + 12 + 8] // U 2033 mov esi, [esp + 12 + 8] // U
2022 mov edi, [esp + 12 + 12] // V 2034 mov edi, [esp + 12 + 12] // V
2023 mov edx, [esp + 12 + 16] // argb 2035 mov edx, [esp + 12 + 16] // argb
2024 mov ebp, [esp + 12 + 20] // YuvConstants 2036 mov ebp, [esp + 12 + 20] // yuvconstants
2025 mov ecx, [esp + 12 + 24] // width 2037 mov ecx, [esp + 12 + 24] // width
2026 sub edi, esi 2038 sub edi, esi
2027 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2039 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2028 convertloop: 2040 convertloop:
2029 READYUV444_AVX2 2041 READYUV444_AVX2
2030 YUVTORGB_AVX2(ebp) 2042 YUVTORGB_AVX2(ebp)
2031 STOREARGB_AVX2 2043 STOREARGB_AVX2
2032 2044
2033 sub ecx, 16 2045 sub ecx, 16
2034 jg convertloop 2046 jg convertloop
(...skipping 18 matching lines...) Expand all
2053 struct YuvConstants* yuvconstants, 2065 struct YuvConstants* yuvconstants,
2054 int width) { 2066 int width) {
2055 __asm { 2067 __asm {
2056 push esi 2068 push esi
2057 push edi 2069 push edi
2058 push ebp 2070 push ebp
2059 mov eax, [esp + 12 + 4] // Y 2071 mov eax, [esp + 12 + 4] // Y
2060 mov esi, [esp + 12 + 8] // U 2072 mov esi, [esp + 12 + 8] // U
2061 mov edi, [esp + 12 + 12] // V 2073 mov edi, [esp + 12 + 12] // V
2062 mov edx, [esp + 12 + 16] // abgr 2074 mov edx, [esp + 12 + 16] // abgr
2063 mov ebp, [esp + 12 + 20] // YuvConstants 2075 mov ebp, [esp + 12 + 20] // yuvconstants
2064 mov ecx, [esp + 12 + 24] // width 2076 mov ecx, [esp + 12 + 24] // width
2065 sub edi, esi 2077 sub edi, esi
2066 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2078 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2067 convertloop: 2079 convertloop:
2068 READYUV444_AVX2 2080 READYUV444_AVX2
2069 YUVTORGB_AVX2(ebp) 2081 YUVTORGB_AVX2(ebp)
2070 STOREABGR_AVX2 2082 STOREABGR_AVX2
2071 2083
2072 sub ecx, 16 2084 sub ecx, 16
2073 jg convertloop 2085 jg convertloop
(...skipping 18 matching lines...) Expand all
2092 struct YuvConstants* yuvconstants, 2104 struct YuvConstants* yuvconstants,
2093 int width) { 2105 int width) {
2094 __asm { 2106 __asm {
2095 push esi 2107 push esi
2096 push edi 2108 push edi
2097 push ebp 2109 push ebp
2098 mov eax, [esp + 12 + 4] // Y 2110 mov eax, [esp + 12 + 4] // Y
2099 mov esi, [esp + 12 + 8] // U 2111 mov esi, [esp + 12 + 8] // U
2100 mov edi, [esp + 12 + 12] // V 2112 mov edi, [esp + 12 + 12] // V
2101 mov edx, [esp + 12 + 16] // abgr 2113 mov edx, [esp + 12 + 16] // abgr
2102 mov ebp, [esp + 12 + 20] // YuvConstants 2114 mov ebp, [esp + 12 + 20] // yuvconstants
2103 mov ecx, [esp + 12 + 24] // width 2115 mov ecx, [esp + 12 + 24] // width
2104 sub edi, esi 2116 sub edi, esi
2105 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2117 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2106 2118
2107 convertloop: 2119 convertloop:
2108 READYUV411_AVX2 2120 READYUV411_AVX2
2109 YUVTORGB_AVX2(ebp) 2121 YUVTORGB_AVX2(ebp)
2110 STOREARGB_AVX2 2122 STOREARGB_AVX2
2111 2123
2112 sub ecx, 16 2124 sub ecx, 16
(...skipping 16 matching lines...) Expand all
2129 const uint8* uv_buf, 2141 const uint8* uv_buf,
2130 uint8* dst_argb, 2142 uint8* dst_argb,
2131 struct YuvConstants* yuvconstants, 2143 struct YuvConstants* yuvconstants,
2132 int width) { 2144 int width) {
2133 __asm { 2145 __asm {
2134 push esi 2146 push esi
2135 push ebp 2147 push ebp
2136 mov eax, [esp + 8 + 4] // Y 2148 mov eax, [esp + 8 + 4] // Y
2137 mov esi, [esp + 8 + 8] // UV 2149 mov esi, [esp + 8 + 8] // UV
2138 mov edx, [esp + 8 + 12] // argb 2150 mov edx, [esp + 8 + 12] // argb
2139 mov ebp, [esp + 8 + 16] // YuvConstants 2151 mov ebp, [esp + 8 + 16] // yuvconstants
2140 mov ecx, [esp + 8 + 20] // width 2152 mov ecx, [esp + 8 + 20] // width
2141 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2153 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2142 2154
2143 convertloop: 2155 convertloop:
2144 READNV12_AVX2 2156 READNV12_AVX2
2145 YUVTORGB_AVX2(ebp) 2157 YUVTORGB_AVX2(ebp)
2146 STOREARGB_AVX2 2158 STOREARGB_AVX2
2147 2159
2148 sub ecx, 16 2160 sub ecx, 16
2149 jg convertloop 2161 jg convertloop
(...skipping 18 matching lines...) Expand all
2168 struct YuvConstants* yuvconstants, 2180 struct YuvConstants* yuvconstants,
2169 int width) { 2181 int width) {
2170 __asm { 2182 __asm {
2171 push esi 2183 push esi
2172 push edi 2184 push edi
2173 push ebp 2185 push ebp
2174 mov eax, [esp + 12 + 4] // Y 2186 mov eax, [esp + 12 + 4] // Y
2175 mov esi, [esp + 12 + 8] // U 2187 mov esi, [esp + 12 + 8] // U
2176 mov edi, [esp + 12 + 12] // V 2188 mov edi, [esp + 12 + 12] // V
2177 mov edx, [esp + 12 + 16] // abgr 2189 mov edx, [esp + 12 + 16] // abgr
2178 mov ebp, [esp + 12 + 20] // YuvConstants 2190 mov ebp, [esp + 12 + 20] // yuvconstants
2179 mov ecx, [esp + 12 + 24] // width 2191 mov ecx, [esp + 12 + 24] // width
2180 sub edi, esi 2192 sub edi, esi
2181 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2193 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2182 2194
2183 convertloop: 2195 convertloop:
2184 READYUV422_AVX2 2196 READYUV422_AVX2
2185 YUVTORGB_AVX2(ebp) 2197 YUVTORGB_AVX2(ebp)
2186 STOREBGRA_AVX2 2198 STOREBGRA_AVX2
2187 2199
2188 sub ecx, 16 2200 sub ecx, 16
(...skipping 19 matching lines...) Expand all
2208 struct YuvConstants* yuvconstants, 2220 struct YuvConstants* yuvconstants,
2209 int width) { 2221 int width) {
2210 __asm { 2222 __asm {
2211 push esi 2223 push esi
2212 push edi 2224 push edi
2213 push ebp 2225 push ebp
2214 mov eax, [esp + 12 + 4] // Y 2226 mov eax, [esp + 12 + 4] // Y
2215 mov esi, [esp + 12 + 8] // U 2227 mov esi, [esp + 12 + 8] // U
2216 mov edi, [esp + 12 + 12] // V 2228 mov edi, [esp + 12 + 12] // V
2217 mov edx, [esp + 12 + 16] // abgr 2229 mov edx, [esp + 12 + 16] // abgr
2218 mov ebp, [esp + 12 + 20] // YuvConstants 2230 mov ebp, [esp + 12 + 20] // yuvconstants
2219 mov ecx, [esp + 12 + 24] // width 2231 mov ecx, [esp + 12 + 24] // width
2220 sub edi, esi 2232 sub edi, esi
2221 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2233 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2222 2234
2223 convertloop: 2235 convertloop:
2224 READYUV422_AVX2 2236 READYUV422_AVX2
2225 YUVTORGB_AVX2(ebp) 2237 YUVTORGB_AVX2(ebp)
2226 STORERGBA_AVX2 2238 STORERGBA_AVX2
2227 2239
2228 sub ecx, 16 2240 sub ecx, 16
(...skipping 19 matching lines...) Expand all
2248 struct YuvConstants* yuvconstants, 2260 struct YuvConstants* yuvconstants,
2249 int width) { 2261 int width) {
2250 __asm { 2262 __asm {
2251 push esi 2263 push esi
2252 push edi 2264 push edi
2253 push ebp 2265 push ebp
2254 mov eax, [esp + 12 + 4] // Y 2266 mov eax, [esp + 12 + 4] // Y
2255 mov esi, [esp + 12 + 8] // U 2267 mov esi, [esp + 12 + 8] // U
2256 mov edi, [esp + 12 + 12] // V 2268 mov edi, [esp + 12 + 12] // V
2257 mov edx, [esp + 12 + 16] // argb 2269 mov edx, [esp + 12 + 16] // argb
2258 mov ebp, [esp + 12 + 20] // YuvConstants 2270 mov ebp, [esp + 12 + 20] // yuvconstants
2259 mov ecx, [esp + 12 + 24] // width 2271 mov ecx, [esp + 12 + 24] // width
2260 sub edi, esi 2272 sub edi, esi
2261 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2273 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2262 2274
2263 convertloop: 2275 convertloop:
2264 READYUV422_AVX2 2276 READYUV422_AVX2
2265 YUVTORGB_AVX2(ebp) 2277 YUVTORGB_AVX2(ebp)
2266 STOREABGR_AVX2 2278 STOREABGR_AVX2
2267 2279
2268 sub ecx, 16 2280 sub ecx, 16
(...skipping 10 matching lines...) Expand all
2279 2291
2280 #if defined(HAS_I422TOARGBROW_SSSE3) 2292 #if defined(HAS_I422TOARGBROW_SSSE3)
2281 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. 2293 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
2282 2294
2283 // Read 8 UV from 444. 2295 // Read 8 UV from 444.
2284 #define READYUV444 __asm { \ 2296 #define READYUV444 __asm { \
2285 __asm movq xmm0, qword ptr [esi] /* U */ \ 2297 __asm movq xmm0, qword ptr [esi] /* U */ \
2286 __asm movq xmm1, qword ptr [esi + edi] /* V */ \ 2298 __asm movq xmm1, qword ptr [esi + edi] /* V */ \
2287 __asm lea esi, [esi + 8] \ 2299 __asm lea esi, [esi + 8] \
2288 __asm punpcklbw xmm0, xmm1 /* UV */ \ 2300 __asm punpcklbw xmm0, xmm1 /* UV */ \
2301 __asm movq xmm4, qword ptr [eax] \
2302 __asm punpcklbw xmm4, xmm4 \
2303 __asm lea eax, [eax + 8] \
2289 } 2304 }
2290 2305
2291 // Read 4 UV from 422, upsample to 8 UV. 2306 // Read 4 UV from 422, upsample to 8 UV.
2292 #define READYUV422 __asm { \ 2307 #define READYUV422 __asm { \
2293 __asm movd xmm0, [esi] /* U */ \ 2308 __asm movd xmm0, [esi] /* U */ \
2294 __asm movd xmm1, [esi + edi] /* V */ \ 2309 __asm movd xmm1, [esi + edi] /* V */ \
2295 __asm lea esi, [esi + 4] \ 2310 __asm lea esi, [esi + 4] \
2296 __asm punpcklbw xmm0, xmm1 /* UV */ \ 2311 __asm punpcklbw xmm0, xmm1 /* UV */ \
2297 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2312 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2313 __asm movq xmm4, qword ptr [eax] \
2314 __asm punpcklbw xmm4, xmm4 \
2315 __asm lea eax, [eax + 8] \
2298 } 2316 }
2299 2317
2300 // Read 2 UV from 411, upsample to 8 UV. 2318 // Read 2 UV from 411, upsample to 8 UV.
2301 #define READYUV411 __asm { \ 2319 #define READYUV411 __asm { \
2302 __asm pinsrw xmm0, [esi], 0 /* U */ \ 2320 __asm pinsrw xmm0, [esi], 0 /* U */ \
2303 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \ 2321 __asm pinsrw xmm1, [esi + edi], 0 /* V */ \
2304 __asm lea esi, [esi + 2] \ 2322 __asm lea esi, [esi + 2] \
2305 __asm punpcklbw xmm0, xmm1 /* UV */ \ 2323 __asm punpcklbw xmm0, xmm1 /* UV */ \
2306 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2324 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2307 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \ 2325 __asm punpckldq xmm0, xmm0 /* UVUVUVUV (upsample) */ \
2326 __asm movq xmm4, qword ptr [eax] \
2327 __asm punpcklbw xmm4, xmm4 \
2328 __asm lea eax, [eax + 8] \
2308 } 2329 }
2309 2330
2310 // Read 4 UV from NV12, upsample to 8 UV. 2331 // Read 4 UV from NV12, upsample to 8 UV.
2311 #define READNV12 __asm { \ 2332 #define READNV12 __asm { \
2312 __asm movq xmm0, qword ptr [esi] /* UV */ \ 2333 __asm movq xmm0, qword ptr [esi] /* UV */ \
2313 __asm lea esi, [esi + 8] \ 2334 __asm lea esi, [esi + 8] \
2314 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ 2335 __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \
2336 __asm movq xmm4, qword ptr [eax] \
2337 __asm punpcklbw xmm4, xmm4 \
2338 __asm lea eax, [eax + 8] \
2339 }
2340
2341 // YUY2 shuf 8 Y to 16 Y.
2342 static const vec8 kShuffleYUY2Y = {
2343 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14
2344 };
2345
2346 // YUY2 shuf 4 UV to 8 UV.
2347 static const vec8 kShuffleYUY2UV = {
2348 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15
2349 };
2350
2351 // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV.
2352 #define READYUY2 __asm { \
2353 __asm movdqu xmm4, [eax] /* YUY2 */ \
2354 __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \
2355 __asm movdqu xmm0, [eax] /* UV */ \
2356 __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \
2357 __asm lea eax, [eax + 16] \
2358 }
2359
2360 // UYVY shuf 8 Y to 16 Y.
2361 static const vec8 kShuffleUYVYY = {
2362 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15
2363 };
2364
2365 // UYVY shuf 4 UV to 8 UV.
2366 static const vec8 kShuffleUYVYUV = {
2367 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14
2368 };
2369
2370 // Read 4 UYVY with 8 Y and update 4 UV to 8 UV.
2371 #define READUYVY __asm { \
2372 __asm movdqu xmm4, [eax] /* UYVY */ \
2373 __asm pshufb xmm4, xmmword ptr kShuffleUYVYY \
2374 __asm movdqu xmm0, [eax] /* UV */ \
2375 __asm pshufb xmm0, xmmword ptr kShuffleUYVYUV \
2376 __asm lea eax, [eax + 16] \
2315 } 2377 }
2316 2378
2317 // Convert 8 pixels: 8 UV and 8 Y. 2379 // Convert 8 pixels: 8 UV and 8 Y.
2318 #define YUVTORGB(YuvConstants) __asm { \ 2380 #define YUVTORGB(YuvConstants) __asm { \
2319 __asm movdqa xmm1, xmm0 \ 2381 __asm movdqa xmm1, xmm0 \
2320 __asm movdqa xmm2, xmm0 \ 2382 __asm movdqa xmm2, xmm0 \
2321 __asm movdqa xmm3, xmm0 \ 2383 __asm movdqa xmm3, xmm0 \
2322 __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \ 2384 __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \
2323 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \ 2385 __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \
2324 __asm psubw xmm0, xmm1 \ 2386 __asm psubw xmm0, xmm1 \
2325 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \ 2387 __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \
2326 __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \ 2388 __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \
2327 __asm psubw xmm1, xmm2 \ 2389 __asm psubw xmm1, xmm2 \
2328 __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \ 2390 __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \
2329 __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \ 2391 __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \
2330 __asm psubw xmm2, xmm3 \ 2392 __asm psubw xmm2, xmm3 \
2331 __asm movq xmm3, qword ptr [eax] \ 2393 __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \
2332 __asm lea eax, [eax + 8] \ 2394 __asm paddsw xmm0, xmm4 /* B += Y */ \
2333 __asm punpcklbw xmm3, xmm3 \ 2395 __asm paddsw xmm1, xmm4 /* G += Y */ \
2334 __asm pmulhuw xmm3, xmmword ptr [YuvConstants + KYTORGB] \ 2396 __asm paddsw xmm2, xmm4 /* R += Y */ \
2335 __asm paddsw xmm0, xmm3 /* B += Y */ \
2336 __asm paddsw xmm1, xmm3 /* G += Y */ \
2337 __asm paddsw xmm2, xmm3 /* R += Y */ \
2338 __asm psraw xmm0, 6 \ 2397 __asm psraw xmm0, 6 \
2339 __asm psraw xmm1, 6 \ 2398 __asm psraw xmm1, 6 \
2340 __asm psraw xmm2, 6 \ 2399 __asm psraw xmm2, 6 \
2341 __asm packuswb xmm0, xmm0 /* B */ \ 2400 __asm packuswb xmm0, xmm0 /* B */ \
2342 __asm packuswb xmm1, xmm1 /* G */ \ 2401 __asm packuswb xmm1, xmm1 /* G */ \
2343 __asm packuswb xmm2, xmm2 /* R */ \ 2402 __asm packuswb xmm2, xmm2 /* R */ \
2344 } 2403 }
2345 2404
2346 // Store 8 ARGB values. 2405 // Store 8 ARGB values.
2347 #define STOREARGB __asm { \ 2406 #define STOREARGB __asm { \
(...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after
2473 struct YuvConstants* yuvconstants, 2532 struct YuvConstants* yuvconstants,
2474 int width) { 2533 int width) {
2475 __asm { 2534 __asm {
2476 push esi 2535 push esi
2477 push edi 2536 push edi
2478 push ebp 2537 push ebp
2479 mov eax, [esp + 12 + 4] // Y 2538 mov eax, [esp + 12 + 4] // Y
2480 mov esi, [esp + 12 + 8] // U 2539 mov esi, [esp + 12 + 8] // U
2481 mov edi, [esp + 12 + 12] // V 2540 mov edi, [esp + 12 + 12] // V
2482 mov edx, [esp + 12 + 16] // argb 2541 mov edx, [esp + 12 + 16] // argb
2483 mov ebp, [esp + 12 + 20] // YuvConstants 2542 mov ebp, [esp + 12 + 20] // yuvconstants
2484 mov ecx, [esp + 12 + 24] // width 2543 mov ecx, [esp + 12 + 24] // width
2485 sub edi, esi 2544 sub edi, esi
2486 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2545 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2487 2546
2488 convertloop: 2547 convertloop:
2489 READYUV444 2548 READYUV444
2490 YUVTORGB(ebp) 2549 YUVTORGB(ebp)
2491 STOREARGB 2550 STOREARGB
2492 2551
2493 sub ecx, 8 2552 sub ecx, 8
(...skipping 16 matching lines...) Expand all
2510 struct YuvConstants* yuvconstants, 2569 struct YuvConstants* yuvconstants,
2511 int width) { 2570 int width) {
2512 __asm { 2571 __asm {
2513 push esi 2572 push esi
2514 push edi 2573 push edi
2515 push ebp 2574 push ebp
2516 mov eax, [esp + 12 + 4] // Y 2575 mov eax, [esp + 12 + 4] // Y
2517 mov esi, [esp + 12 + 8] // U 2576 mov esi, [esp + 12 + 8] // U
2518 mov edi, [esp + 12 + 12] // V 2577 mov edi, [esp + 12 + 12] // V
2519 mov edx, [esp + 12 + 16] // abgr 2578 mov edx, [esp + 12 + 16] // abgr
2520 mov ebp, [esp + 12 + 20] // YuvConstants 2579 mov ebp, [esp + 12 + 20] // yuvconstants
2521 mov ecx, [esp + 12 + 24] // width 2580 mov ecx, [esp + 12 + 24] // width
2522 sub edi, esi 2581 sub edi, esi
2523 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2582 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2524 2583
2525 convertloop: 2584 convertloop:
2526 READYUV444 2585 READYUV444
2527 YUVTORGB(ebp) 2586 YUVTORGB(ebp)
2528 STOREABGR 2587 STOREABGR
2529 2588
2530 sub ecx, 8 2589 sub ecx, 8
(...skipping 16 matching lines...) Expand all
2547 struct YuvConstants* yuvconstants, 2606 struct YuvConstants* yuvconstants,
2548 int width) { 2607 int width) {
2549 __asm { 2608 __asm {
2550 push esi 2609 push esi
2551 push edi 2610 push edi
2552 push ebp 2611 push ebp
2553 mov eax, [esp + 12 + 4] // Y 2612 mov eax, [esp + 12 + 4] // Y
2554 mov esi, [esp + 12 + 8] // U 2613 mov esi, [esp + 12 + 8] // U
2555 mov edi, [esp + 12 + 12] // V 2614 mov edi, [esp + 12 + 12] // V
2556 mov edx, [esp + 12 + 16] // argb 2615 mov edx, [esp + 12 + 16] // argb
2557 mov ebp, [esp + 12 + 20] // YuvConstants 2616 mov ebp, [esp + 12 + 20] // yuvconstants
2558 mov ecx, [esp + 12 + 24] // width 2617 mov ecx, [esp + 12 + 24] // width
2559 sub edi, esi 2618 sub edi, esi
2560 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 2619 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
2561 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 2620 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
2562 2621
2563 convertloop: 2622 convertloop:
2564 READYUV422 2623 READYUV422
2565 YUVTORGB(ebp) 2624 YUVTORGB(ebp)
2566 STORERGB24 2625 STORERGB24
2567 2626
(...skipping 17 matching lines...) Expand all
2585 struct YuvConstants* yuvconstants, 2644 struct YuvConstants* yuvconstants,
2586 int width) { 2645 int width) {
2587 __asm { 2646 __asm {
2588 push esi 2647 push esi
2589 push edi 2648 push edi
2590 push ebp 2649 push ebp
2591 mov eax, [esp + 12 + 4] // Y 2650 mov eax, [esp + 12 + 4] // Y
2592 mov esi, [esp + 12 + 8] // U 2651 mov esi, [esp + 12 + 8] // U
2593 mov edi, [esp + 12 + 12] // V 2652 mov edi, [esp + 12 + 12] // V
2594 mov edx, [esp + 12 + 16] // argb 2653 mov edx, [esp + 12 + 16] // argb
2595 mov ebp, [esp + 12 + 20] // YuvConstants 2654 mov ebp, [esp + 12 + 20] // yuvconstants
2596 mov ecx, [esp + 12 + 24] // width 2655 mov ecx, [esp + 12 + 24] // width
2597 sub edi, esi 2656 sub edi, esi
2598 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 2657 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
2599 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW 2658 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
2600 2659
2601 convertloop: 2660 convertloop:
2602 READYUV422 2661 READYUV422
2603 YUVTORGB(ebp) 2662 YUVTORGB(ebp)
2604 STORERAW 2663 STORERAW
2605 2664
(...skipping 17 matching lines...) Expand all
2623 struct YuvConstants* yuvconstants, 2682 struct YuvConstants* yuvconstants,
2624 int width) { 2683 int width) {
2625 __asm { 2684 __asm {
2626 push esi 2685 push esi
2627 push edi 2686 push edi
2628 push ebp 2687 push ebp
2629 mov eax, [esp + 12 + 4] // Y 2688 mov eax, [esp + 12 + 4] // Y
2630 mov esi, [esp + 12 + 8] // U 2689 mov esi, [esp + 12 + 8] // U
2631 mov edi, [esp + 12 + 12] // V 2690 mov edi, [esp + 12 + 12] // V
2632 mov edx, [esp + 12 + 16] // argb 2691 mov edx, [esp + 12 + 16] // argb
2633 mov ebp, [esp + 12 + 20] // YuvConstants 2692 mov ebp, [esp + 12 + 20] // yuvconstants
2634 mov ecx, [esp + 12 + 24] // width 2693 mov ecx, [esp + 12 + 24] // width
2635 sub edi, esi 2694 sub edi, esi
2636 pcmpeqb xmm5, xmm5 // generate mask 0x0000001f 2695 pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
2637 psrld xmm5, 27 2696 psrld xmm5, 27
2638 pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 2697 pcmpeqb xmm6, xmm6 // generate mask 0x000007e0
2639 psrld xmm6, 26 2698 psrld xmm6, 26
2640 pslld xmm6, 5 2699 pslld xmm6, 5
2641 pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 2700 pcmpeqb xmm7, xmm7 // generate mask 0xfffff800
2642 pslld xmm7, 11 2701 pslld xmm7, 11
2643 2702
(...skipping 22 matching lines...) Expand all
2666 struct YuvConstants* yuvconstants, 2725 struct YuvConstants* yuvconstants,
2667 int width) { 2726 int width) {
2668 __asm { 2727 __asm {
2669 push esi 2728 push esi
2670 push edi 2729 push edi
2671 push ebp 2730 push ebp
2672 mov eax, [esp + 12 + 4] // Y 2731 mov eax, [esp + 12 + 4] // Y
2673 mov esi, [esp + 12 + 8] // U 2732 mov esi, [esp + 12 + 8] // U
2674 mov edi, [esp + 12 + 12] // V 2733 mov edi, [esp + 12 + 12] // V
2675 mov edx, [esp + 12 + 16] // argb 2734 mov edx, [esp + 12 + 16] // argb
2676 mov ebp, [esp + 12 + 20] // YuvConstants 2735 mov ebp, [esp + 12 + 20] // yuvconstants
2677 mov ecx, [esp + 12 + 24] // width 2736 mov ecx, [esp + 12 + 24] // width
2678 sub edi, esi 2737 sub edi, esi
2679 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2738 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2680 2739
2681 convertloop: 2740 convertloop:
2682 READYUV422 2741 READYUV422
2683 YUVTORGB(ebp) 2742 YUVTORGB(ebp)
2684 STOREARGB 2743 STOREARGB
2685 2744
2686 sub ecx, 8 2745 sub ecx, 8
(...skipping 17 matching lines...) Expand all
2704 struct YuvConstants* yuvconstants, 2763 struct YuvConstants* yuvconstants,
2705 int width) { 2764 int width) {
2706 __asm { 2765 __asm {
2707 push esi 2766 push esi
2708 push edi 2767 push edi
2709 push ebp 2768 push ebp
2710 mov eax, [esp + 12 + 4] // Y 2769 mov eax, [esp + 12 + 4] // Y
2711 mov esi, [esp + 12 + 8] // U 2770 mov esi, [esp + 12 + 8] // U
2712 mov edi, [esp + 12 + 12] // V 2771 mov edi, [esp + 12 + 12] // V
2713 mov edx, [esp + 12 + 16] // abgr 2772 mov edx, [esp + 12 + 16] // abgr
2714 mov ebp, [esp + 12 + 20] // YuvConstants 2773 mov ebp, [esp + 12 + 20] // yuvconstants
2715 mov ecx, [esp + 12 + 24] // width 2774 mov ecx, [esp + 12 + 24] // width
2716 sub edi, esi 2775 sub edi, esi
2717 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2776 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2718 2777
2719 convertloop: 2778 convertloop:
2720 READYUV411 2779 READYUV411
2721 YUVTORGB(ebp) 2780 YUVTORGB(ebp)
2722 STOREARGB 2781 STOREARGB
2723 2782
2724 sub ecx, 8 2783 sub ecx, 8
(...skipping 13 matching lines...) Expand all
2738 const uint8* uv_buf, 2797 const uint8* uv_buf,
2739 uint8* dst_argb, 2798 uint8* dst_argb,
2740 struct YuvConstants* yuvconstants, 2799 struct YuvConstants* yuvconstants,
2741 int width) { 2800 int width) {
2742 __asm { 2801 __asm {
2743 push esi 2802 push esi
2744 push ebp 2803 push ebp
2745 mov eax, [esp + 8 + 4] // Y 2804 mov eax, [esp + 8 + 4] // Y
2746 mov esi, [esp + 8 + 8] // UV 2805 mov esi, [esp + 8 + 8] // UV
2747 mov edx, [esp + 8 + 12] // argb 2806 mov edx, [esp + 8 + 12] // argb
2748 mov ebp, [esp + 8 + 16] // YuvConstants 2807 mov ebp, [esp + 8 + 16] // yuvconstants
2749 mov ecx, [esp + 8 + 20] // width 2808 mov ecx, [esp + 8 + 20] // width
2750 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2809 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2751 2810
2752 convertloop: 2811 convertloop:
2753 READNV12 2812 READNV12
2754 YUVTORGB(ebp) 2813 YUVTORGB(ebp)
2755 STOREARGB 2814 STOREARGB
2756 2815
2757 sub ecx, 8 2816 sub ecx, 8
2758 jg convertloop 2817 jg convertloop
2759 2818
2760 pop ebp 2819 pop ebp
2761 pop esi 2820 pop esi
2762 ret 2821 ret
2763 } 2822 }
2764 } 2823 }
2765 2824
2825 // 8 pixels.
2826 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
2827 __declspec(naked)
2828 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
2829 uint8* dst_argb,
2830 struct YuvConstants* yuvconstants,
2831 int width) {
2832 __asm {
2833 push ebp
2834 mov eax, [esp + 4 + 4] // yuy2
2835 mov edx, [esp + 4 + 8] // argb
2836 mov ebp, [esp + 4 + 12] // yuvconstants
2837 mov ecx, [esp + 4 + 16] // width
2838 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2839
2840 convertloop:
2841 READYUY2
2842 YUVTORGB(ebp)
2843 STOREARGB
2844
2845 sub ecx, 8
2846 jg convertloop
2847
2848 pop ebp
2849 ret
2850 }
2851 }
2852
2853 // 8 pixels.
2854 // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
2855 __declspec(naked)
2856 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
2857 uint8* dst_argb,
2858 struct YuvConstants* yuvconstants,
2859 int width) {
2860 __asm {
2861 push ebp
2862 mov eax, [esp + 4 + 4] // uyvy
2863 mov edx, [esp + 4 + 8] // argb
2864 mov ebp, [esp + 4 + 12] // yuvconstants
2865 mov ecx, [esp + 4 + 16] // width
2866 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2867
2868 convertloop:
2869 READUYVY
2870 YUVTORGB(ebp)
2871 STOREARGB
2872
2873 sub ecx, 8
2874 jg convertloop
2875
2876 pop ebp
2877 ret
2878 }
2879 }
2880
2766 __declspec(naked) 2881 __declspec(naked)
2767 void I422ToBGRARow_SSSE3(const uint8* y_buf, 2882 void I422ToBGRARow_SSSE3(const uint8* y_buf,
2768 const uint8* u_buf, 2883 const uint8* u_buf,
2769 const uint8* v_buf, 2884 const uint8* v_buf,
2770 uint8* dst_bgra, 2885 uint8* dst_bgra,
2771 struct YuvConstants* yuvconstants, 2886 struct YuvConstants* yuvconstants,
2772 int width) { 2887 int width) {
2773 __asm { 2888 __asm {
2774 push esi 2889 push esi
2775 push edi 2890 push edi
2776 push ebp 2891 push ebp
2777 mov eax, [esp + 12 + 4] // Y 2892 mov eax, [esp + 12 + 4] // Y
2778 mov esi, [esp + 12 + 8] // U 2893 mov esi, [esp + 12 + 8] // U
2779 mov edi, [esp + 12 + 12] // V 2894 mov edi, [esp + 12 + 12] // V
2780 mov edx, [esp + 12 + 16] // argb 2895 mov edx, [esp + 12 + 16] // argb
2781 mov ebp, [esp + 12 + 20] // YuvConstants 2896 mov ebp, [esp + 12 + 20] // yuvconstants
2782 mov ecx, [esp + 12 + 24] // width 2897 mov ecx, [esp + 12 + 24] // width
2783 sub edi, esi 2898 sub edi, esi
2784 2899
2785 convertloop: 2900 convertloop:
2786 READYUV422 2901 READYUV422
2787 YUVTORGB(ebp) 2902 YUVTORGB(ebp)
2788 STOREBGRA 2903 STOREBGRA
2789 2904
2790 sub ecx, 8 2905 sub ecx, 8
2791 jg convertloop 2906 jg convertloop
(...skipping 13 matching lines...) Expand all
2805 struct YuvConstants* yuvconstants, 2920 struct YuvConstants* yuvconstants,
2806 int width) { 2921 int width) {
2807 __asm { 2922 __asm {
2808 push esi 2923 push esi
2809 push edi 2924 push edi
2810 push ebp 2925 push ebp
2811 mov eax, [esp + 12 + 4] // Y 2926 mov eax, [esp + 12 + 4] // Y
2812 mov esi, [esp + 12 + 8] // U 2927 mov esi, [esp + 12 + 8] // U
2813 mov edi, [esp + 12 + 12] // V 2928 mov edi, [esp + 12 + 12] // V
2814 mov edx, [esp + 12 + 16] // argb 2929 mov edx, [esp + 12 + 16] // argb
2815 mov ebp, [esp + 12 + 20] // YuvConstants 2930 mov ebp, [esp + 12 + 20] // yuvconstants
2816 mov ecx, [esp + 12 + 24] // width 2931 mov ecx, [esp + 12 + 24] // width
2817 sub edi, esi 2932 sub edi, esi
2818 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2933 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2819 2934
2820 convertloop: 2935 convertloop:
2821 READYUV422 2936 READYUV422
2822 YUVTORGB(ebp) 2937 YUVTORGB(ebp)
2823 STOREABGR 2938 STOREABGR
2824 2939
2825 sub ecx, 8 2940 sub ecx, 8
(...skipping 14 matching lines...) Expand all
2840 struct YuvConstants* yuvconstants, 2955 struct YuvConstants* yuvconstants,
2841 int width) { 2956 int width) {
2842 __asm { 2957 __asm {
2843 push esi 2958 push esi
2844 push edi 2959 push edi
2845 push ebp 2960 push ebp
2846 mov eax, [esp + 12 + 4] // Y 2961 mov eax, [esp + 12 + 4] // Y
2847 mov esi, [esp + 12 + 8] // U 2962 mov esi, [esp + 12 + 8] // U
2848 mov edi, [esp + 12 + 12] // V 2963 mov edi, [esp + 12 + 12] // V
2849 mov edx, [esp + 12 + 16] // argb 2964 mov edx, [esp + 12 + 16] // argb
2850 mov ebp, [esp + 12 + 20] // YuvConstants 2965 mov ebp, [esp + 12 + 20] // yuvconstants
2851 mov ecx, [esp + 12 + 24] // width 2966 mov ecx, [esp + 12 + 24] // width
2852 sub edi, esi 2967 sub edi, esi
2853 2968
2854 convertloop: 2969 convertloop:
2855 READYUV422 2970 READYUV422
2856 YUVTORGB(ebp) 2971 YUVTORGB(ebp)
2857 STORERGBA 2972 STORERGBA
2858 2973
2859 sub ecx, 8 2974 sub ecx, 8
2860 jg convertloop 2975 jg convertloop
(...skipping 644 matching lines...) Expand 10 before | Expand all | Expand 10 after
3505 mov ecx, [esp + 12] // count 3620 mov ecx, [esp + 12] // count
3506 rep stosd 3621 rep stosd
3507 mov edi, edx 3622 mov edi, edx
3508 ret 3623 ret
3509 } 3624 }
3510 } 3625 }
3511 #endif // HAS_SETROW_X86 3626 #endif // HAS_SETROW_X86
3512 3627
3513 #ifdef HAS_YUY2TOYROW_AVX2 3628 #ifdef HAS_YUY2TOYROW_AVX2
3514 __declspec(naked) 3629 __declspec(naked)
3515 void YUY2ToYRow_AVX2(const uint8* src_yuy2, 3630 void YUY2ToYRow_AVX2(const uint8* src_yuy2, uint8* dst_y, int pix) {
3516 uint8* dst_y, int pix) {
3517 __asm { 3631 __asm {
3518 mov eax, [esp + 4] // src_yuy2 3632 mov eax, [esp + 4] // src_yuy2
3519 mov edx, [esp + 8] // dst_y 3633 mov edx, [esp + 8] // dst_y
3520 mov ecx, [esp + 12] // pix 3634 mov ecx, [esp + 12] // pix
3521 vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff 3635 vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff
3522 vpsrlw ymm5, ymm5, 8 3636 vpsrlw ymm5, ymm5, 8
3523 3637
3524 convertloop: 3638 convertloop:
3525 vmovdqu ymm0, [eax] 3639 vmovdqu ymm0, [eax]
3526 vmovdqu ymm1, [eax + 32] 3640 vmovdqu ymm1, [eax + 32]
(...skipping 2733 matching lines...) Expand 10 before | Expand all | Expand 10 after
6260 } 6374 }
6261 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6375 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6262 6376
6263 #endif // defined(_M_X64) 6377 #endif // defined(_M_X64)
6264 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6378 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6265 6379
6266 #ifdef __cplusplus 6380 #ifdef __cplusplus
6267 } // extern "C" 6381 } // extern "C"
6268 } // namespace libyuv 6382 } // namespace libyuv
6269 #endif 6383 #endif
OLDNEW
« no previous file with comments | « source/row_gcc.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698