OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
76 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ | 76 xmm2 = _mm_unpacklo_epi8(xmm2, xmm1); \ |
77 xmm0 = _mm_unpacklo_epi8(xmm0, xmm5); \ | 77 xmm0 = _mm_unpacklo_epi8(xmm0, xmm5); \ |
78 xmm1 = _mm_loadu_si128(&xmm2); \ | 78 xmm1 = _mm_loadu_si128(&xmm2); \ |
79 xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \ | 79 xmm2 = _mm_unpacklo_epi16(xmm2, xmm0); \ |
80 xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \ | 80 xmm1 = _mm_unpackhi_epi16(xmm1, xmm0); \ |
81 _mm_storeu_si128((__m128i *)dst_argb, xmm2); \ | 81 _mm_storeu_si128((__m128i *)dst_argb, xmm2); \ |
82 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ | 82 _mm_storeu_si128((__m128i *)(dst_argb + 16), xmm1); \ |
83 dst_argb += 32; | 83 dst_argb += 32; |
84 | 84 |
85 | 85 |
86 #if defined(HAS_I422TOARGBMATRIXROW_SSSE3) | 86 #if defined(HAS_I422TOARGBROW_SSSE3) |
87 void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, | 87 void I422ToARGBRow_SSSE3(const uint8* y_buf, |
88 const uint8* u_buf, | 88 const uint8* u_buf, |
89 const uint8* v_buf, | 89 const uint8* v_buf, |
90 uint8* dst_argb, | 90 uint8* dst_argb, |
91 struct YuvConstants* YuvConstants, | 91 struct YuvConstants* yuvconstants, |
92 int width) { | 92 int width) { |
93 __m128i xmm0, xmm1, xmm2, xmm3; | 93 __m128i xmm0, xmm1, xmm2, xmm3; |
94 const __m128i xmm5 = _mm_set1_epi8(-1); | 94 const __m128i xmm5 = _mm_set1_epi8(-1); |
95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 95 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
96 while (width > 0) { | 96 while (width > 0) { |
97 READYUV422 | 97 READYUV422 |
98 YUVTORGB(YuvConstants) | 98 YUVTORGB(YuvConstants) |
99 STOREARGB | 99 STOREARGB |
100 width -= 8; | 100 width -= 8; |
101 } | 101 } |
102 } | 102 } |
103 #endif | 103 #endif |
104 | 104 |
105 #if defined(HAS_I422TOABGRMATRIXROW_SSSE3) | 105 #if defined(HAS_I422TOABGRROW_SSSE3) |
106 void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf, | 106 void I422ToABGRRow_SSSE3(const uint8* y_buf, |
107 const uint8* u_buf, | 107 const uint8* u_buf, |
108 const uint8* v_buf, | 108 const uint8* v_buf, |
109 uint8* dst_argb, | 109 uint8* dst_argb, |
110 struct YuvConstants* YuvConstants, | 110 struct YuvConstants* yuvconstants, |
111 int width) { | 111 int width) { |
112 __m128i xmm0, xmm1, xmm2, xmm3; | 112 __m128i xmm0, xmm1, xmm2, xmm3; |
113 const __m128i xmm5 = _mm_set1_epi8(-1); | 113 const __m128i xmm5 = _mm_set1_epi8(-1); |
114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; | 114 const ptrdiff_t offset = (uint8*)v_buf - (uint8*)u_buf; |
115 while (width > 0) { | 115 while (width > 0) { |
116 READYUV422 | 116 READYUV422 |
117 YUVTORGB(YuvConstants) | 117 YUVTORGB(YuvConstants) |
118 STOREABGR | 118 STOREABGR |
119 width -= 8; | 119 width -= 8; |
120 } | 120 } |
121 } | 121 } |
(...skipping 1834 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1956 __asm vpermq ymm1, ymm1, 0xd8 \ | 1956 __asm vpermq ymm1, ymm1, 0xd8 \ |
1957 __asm vpunpcklbw ymm2, ymm0, ymm5 /* BA */ \ | 1957 __asm vpunpcklbw ymm2, ymm0, ymm5 /* BA */ \ |
1958 __asm vpermq ymm2, ymm2, 0xd8 \ | 1958 __asm vpermq ymm2, ymm2, 0xd8 \ |
1959 __asm vpunpcklwd ymm0, ymm1, ymm2 /* RGBA first 8 pixels */ \ | 1959 __asm vpunpcklwd ymm0, ymm1, ymm2 /* RGBA first 8 pixels */ \ |
1960 __asm vpunpckhwd ymm1, ymm1, ymm2 /* RGBA next 8 pixels */ \ | 1960 __asm vpunpckhwd ymm1, ymm1, ymm2 /* RGBA next 8 pixels */ \ |
1961 __asm vmovdqu [edx], ymm0 \ | 1961 __asm vmovdqu [edx], ymm0 \ |
1962 __asm vmovdqu [edx + 32], ymm1 \ | 1962 __asm vmovdqu [edx + 32], ymm1 \ |
1963 __asm lea edx, [edx + 64] \ | 1963 __asm lea edx, [edx + 64] \ |
1964 } | 1964 } |
1965 | 1965 |
1966 #ifdef HAS_I422TOARGBMATRIXROW_AVX2 | 1966 #ifdef HAS_I422TOARGBROW_AVX2 |
1967 // 16 pixels | 1967 // 16 pixels |
1968 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 1968 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
1969 __declspec(naked) | 1969 __declspec(naked) |
1970 void I422ToARGBMatrixRow_AVX2(const uint8* y_buf, | 1970 void I422ToARGBRow_AVX2(const uint8* y_buf, |
1971 const uint8* u_buf, | 1971 const uint8* u_buf, |
1972 const uint8* v_buf, | 1972 const uint8* v_buf, |
1973 uint8* dst_argb, | 1973 uint8* dst_argb, |
1974 struct YuvConstants* YuvConstants, | 1974 struct YuvConstants* yuvconstants, |
1975 int width) { | 1975 int width) { |
1976 __asm { | 1976 __asm { |
1977 push esi | 1977 push esi |
1978 push edi | 1978 push edi |
1979 push ebp | 1979 push ebp |
1980 mov eax, [esp + 12 + 4] // Y | 1980 mov eax, [esp + 12 + 4] // Y |
1981 mov esi, [esp + 12 + 8] // U | 1981 mov esi, [esp + 12 + 8] // U |
1982 mov edi, [esp + 12 + 12] // V | 1982 mov edi, [esp + 12 + 12] // V |
1983 mov edx, [esp + 12 + 16] // argb | 1983 mov edx, [esp + 12 + 16] // argb |
1984 mov ebp, [esp + 12 + 20] // YuvConstants | 1984 mov ebp, [esp + 12 + 20] // YuvConstants |
1985 mov ecx, [esp + 12 + 24] // width | 1985 mov ecx, [esp + 12 + 24] // width |
1986 sub edi, esi | 1986 sub edi, esi |
1987 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 1987 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
1988 | 1988 |
1989 convertloop: | 1989 convertloop: |
1990 READYUV422_AVX2 | 1990 READYUV422_AVX2 |
1991 YUVTORGB_AVX2(ebp) | 1991 YUVTORGB_AVX2(ebp) |
1992 STOREARGB_AVX2 | 1992 STOREARGB_AVX2 |
1993 | 1993 |
1994 sub ecx, 16 | 1994 sub ecx, 16 |
1995 jg convertloop | 1995 jg convertloop |
1996 | 1996 |
1997 pop ebp | 1997 pop ebp |
1998 pop edi | 1998 pop edi |
1999 pop esi | 1999 pop esi |
2000 vzeroupper | 2000 vzeroupper |
2001 ret | 2001 ret |
2002 } | 2002 } |
2003 } | 2003 } |
2004 #endif // HAS_I422TOARGBMATRIXROW_AVX2 | 2004 #endif // HAS_I422TOARGBROW_AVX2 |
2005 | 2005 |
2006 #ifdef HAS_I444TOARGBMATRIXROW_AVX2 | 2006 #ifdef HAS_I444TOARGBROW_AVX2 |
2007 // 16 pixels | 2007 // 16 pixels |
2008 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). | 2008 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). |
2009 __declspec(naked) | 2009 __declspec(naked) |
2010 void I444ToARGBMatrixRow_AVX2(const uint8* y_buf, | 2010 void I444ToARGBRow_AVX2(const uint8* y_buf, |
2011 const uint8* u_buf, | 2011 const uint8* u_buf, |
2012 const uint8* v_buf, | 2012 const uint8* v_buf, |
2013 uint8* dst_argb, | 2013 uint8* dst_argb, |
2014 struct YuvConstants* YuvConstants, | 2014 struct YuvConstants* yuvconstants, |
2015 int width) { | 2015 int width) { |
2016 __asm { | 2016 __asm { |
2017 push esi | 2017 push esi |
2018 push edi | 2018 push edi |
2019 push ebp | 2019 push ebp |
2020 mov eax, [esp + 12 + 4] // Y | 2020 mov eax, [esp + 12 + 4] // Y |
2021 mov esi, [esp + 12 + 8] // U | 2021 mov esi, [esp + 12 + 8] // U |
2022 mov edi, [esp + 12 + 12] // V | 2022 mov edi, [esp + 12 + 12] // V |
2023 mov edx, [esp + 12 + 16] // argb | 2023 mov edx, [esp + 12 + 16] // argb |
2024 mov ebp, [esp + 12 + 20] // YuvConstants | 2024 mov ebp, [esp + 12 + 20] // YuvConstants |
2025 mov ecx, [esp + 12 + 24] // width | 2025 mov ecx, [esp + 12 + 24] // width |
2026 sub edi, esi | 2026 sub edi, esi |
2027 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2027 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
2028 convertloop: | 2028 convertloop: |
2029 READYUV444_AVX2 | 2029 READYUV444_AVX2 |
2030 YUVTORGB_AVX2(ebp) | 2030 YUVTORGB_AVX2(ebp) |
2031 STOREARGB_AVX2 | 2031 STOREARGB_AVX2 |
2032 | 2032 |
2033 sub ecx, 16 | 2033 sub ecx, 16 |
2034 jg convertloop | 2034 jg convertloop |
2035 | 2035 |
2036 pop ebp | 2036 pop ebp |
2037 pop edi | 2037 pop edi |
2038 pop esi | 2038 pop esi |
2039 vzeroupper | 2039 vzeroupper |
2040 ret | 2040 ret |
2041 } | 2041 } |
2042 } | 2042 } |
2043 #endif // HAS_I444TOARGBMATRIXROW_AVX2 | 2043 #endif // HAS_I444TOARGBROW_AVX2 |
2044 | 2044 |
2045 #ifdef HAS_I444TOABGRMATRIXROW_AVX2 | 2045 #ifdef HAS_I444TOABGRROW_AVX2 |
2046 // 16 pixels | 2046 // 16 pixels |
2047 // 16 UV values with 16 Y producing 16 ABGR (64 bytes). | 2047 // 16 UV values with 16 Y producing 16 ABGR (64 bytes). |
2048 __declspec(naked) | 2048 __declspec(naked) |
2049 void I444ToABGRMatrixRow_AVX2(const uint8* y_buf, | 2049 void I444ToABGRRow_AVX2(const uint8* y_buf, |
2050 const uint8* u_buf, | 2050 const uint8* u_buf, |
2051 const uint8* v_buf, | 2051 const uint8* v_buf, |
2052 uint8* dst_abgr, | 2052 uint8* dst_abgr, |
2053 struct YuvConstants* YuvConstants, | 2053 struct YuvConstants* yuvconstants, |
2054 int width) { | 2054 int width) { |
2055 __asm { | 2055 __asm { |
2056 push esi | 2056 push esi |
2057 push edi | 2057 push edi |
2058 push ebp | 2058 push ebp |
2059 mov eax, [esp + 12 + 4] // Y | 2059 mov eax, [esp + 12 + 4] // Y |
2060 mov esi, [esp + 12 + 8] // U | 2060 mov esi, [esp + 12 + 8] // U |
2061 mov edi, [esp + 12 + 12] // V | 2061 mov edi, [esp + 12 + 12] // V |
2062 mov edx, [esp + 12 + 16] // abgr | 2062 mov edx, [esp + 12 + 16] // abgr |
2063 mov ebp, [esp + 12 + 20] // YuvConstants | 2063 mov ebp, [esp + 12 + 20] // YuvConstants |
2064 mov ecx, [esp + 12 + 24] // width | 2064 mov ecx, [esp + 12 + 24] // width |
2065 sub edi, esi | 2065 sub edi, esi |
2066 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2066 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
2067 convertloop: | 2067 convertloop: |
2068 READYUV444_AVX2 | 2068 READYUV444_AVX2 |
2069 YUVTORGB_AVX2(ebp) | 2069 YUVTORGB_AVX2(ebp) |
2070 STOREABGR_AVX2 | 2070 STOREABGR_AVX2 |
2071 | 2071 |
2072 sub ecx, 16 | 2072 sub ecx, 16 |
2073 jg convertloop | 2073 jg convertloop |
2074 | 2074 |
2075 pop ebp | 2075 pop ebp |
2076 pop edi | 2076 pop edi |
2077 pop esi | 2077 pop esi |
2078 vzeroupper | 2078 vzeroupper |
2079 ret | 2079 ret |
2080 } | 2080 } |
2081 } | 2081 } |
2082 #endif // HAS_I444TOABGRMATRIXROW_AVX2 | 2082 #endif // HAS_I444TOABGRROW_AVX2 |
2083 | 2083 |
2084 #ifdef HAS_I411TOARGBROW_AVX2 | 2084 #ifdef HAS_I411TOARGBROW_AVX2 |
2085 // 16 pixels | 2085 // 16 pixels |
2086 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2086 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2087 __declspec(naked) | 2087 __declspec(naked) |
2088 void I411ToARGBRow_AVX2(const uint8* y_buf, | 2088 void I411ToARGBRow_AVX2(const uint8* y_buf, |
2089 const uint8* u_buf, | 2089 const uint8* u_buf, |
2090 const uint8* v_buf, | 2090 const uint8* v_buf, |
2091 uint8* dst_argb, | 2091 uint8* dst_argb, |
| 2092 struct YuvConstants* yuvconstants, |
2092 int width) { | 2093 int width) { |
2093 __asm { | 2094 __asm { |
2094 push esi | 2095 push esi |
2095 push edi | 2096 push edi |
2096 mov eax, [esp + 8 + 4] // Y | 2097 push ebp |
2097 mov esi, [esp + 8 + 8] // U | 2098 mov eax, [esp + 12 + 4] // Y |
2098 mov edi, [esp + 8 + 12] // V | 2099 mov esi, [esp + 12 + 8] // U |
2099 mov edx, [esp + 8 + 16] // argb | 2100 mov edi, [esp + 12 + 12] // V |
2100 mov ecx, [esp + 8 + 20] // width | 2101 mov edx, [esp + 12 + 16] // abgr |
| 2102 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2103 mov ecx, [esp + 12 + 24] // width |
2101 sub edi, esi | 2104 sub edi, esi |
2102 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2105 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
2103 | 2106 |
2104 convertloop: | 2107 convertloop: |
2105 READYUV411_AVX2 | 2108 READYUV411_AVX2 |
2106 YUVTORGB_AVX2(kYuvConstants) | 2109 YUVTORGB_AVX2(ebp) |
2107 STOREARGB_AVX2 | 2110 STOREARGB_AVX2 |
2108 | 2111 |
2109 sub ecx, 16 | 2112 sub ecx, 16 |
2110 jg convertloop | 2113 jg convertloop |
2111 | 2114 |
| 2115 pop ebp |
2112 pop edi | 2116 pop edi |
2113 pop esi | 2117 pop esi |
2114 vzeroupper | 2118 vzeroupper |
2115 ret | 2119 ret |
2116 } | 2120 } |
2117 } | 2121 } |
2118 #endif // HAS_I411TOARGBROW_AVX2 | 2122 #endif // HAS_I411TOARGBROW_AVX2 |
2119 | 2123 |
2120 #ifdef HAS_NV12TOARGBROW_AVX2 | 2124 #ifdef HAS_NV12TOARGBROW_AVX2 |
2121 // 16 pixels. | 2125 // 16 pixels. |
2122 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). | 2126 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). |
2123 __declspec(naked) | 2127 __declspec(naked) |
2124 void NV12ToARGBRow_AVX2(const uint8* y_buf, | 2128 void NV12ToARGBRow_AVX2(const uint8* y_buf, |
2125 const uint8* uv_buf, | 2129 const uint8* uv_buf, |
2126 uint8* dst_argb, | 2130 uint8* dst_argb, |
| 2131 struct YuvConstants* yuvconstants, |
2127 int width) { | 2132 int width) { |
2128 __asm { | 2133 __asm { |
2129 push esi | 2134 push esi |
2130 mov eax, [esp + 4 + 4] // Y | 2135 push ebp |
2131 mov esi, [esp + 4 + 8] // UV | 2136 mov eax, [esp + 8 + 4] // Y |
2132 mov edx, [esp + 4 + 12] // argb | 2137 mov esi, [esp + 8 + 8] // UV |
2133 mov ecx, [esp + 4 + 16] // width | 2138 mov edx, [esp + 8 + 12] // argb |
| 2139 mov ebp, [esp + 8 + 16] // YuvConstants |
| 2140 mov ecx, [esp + 8 + 20] // width |
2134 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2141 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
2135 | 2142 |
2136 convertloop: | 2143 convertloop: |
2137 READNV12_AVX2 | 2144 READNV12_AVX2 |
2138 YUVTORGB_AVX2(kYuvConstants) | 2145 YUVTORGB_AVX2(ebp) |
2139 STOREARGB_AVX2 | 2146 STOREARGB_AVX2 |
2140 | 2147 |
2141 sub ecx, 16 | 2148 sub ecx, 16 |
2142 jg convertloop | 2149 jg convertloop |
2143 | 2150 |
| 2151 pop ebp |
2144 pop esi | 2152 pop esi |
2145 vzeroupper | 2153 vzeroupper |
2146 ret | 2154 ret |
2147 } | 2155 } |
2148 } | 2156 } |
2149 #endif // HAS_NV12TOARGBROW_AVX2 | 2157 #endif // HAS_NV12TOARGBROW_AVX2 |
2150 | 2158 |
2151 #ifdef HAS_NV21TOARGBROW_AVX2 | |
2152 // 16 pixels. | |
2153 // 8 VU values upsampled to 16 VU, mixed with 16 Y producing 16 ARGB (64 bytes). | |
2154 __declspec(naked) | |
2155 void NV21ToARGBRow_AVX2(const uint8* y_buf, | |
2156 const uint8* uv_buf, | |
2157 uint8* dst_argb, | |
2158 int width) { | |
2159 __asm { | |
2160 push esi | |
2161 mov eax, [esp + 4 + 4] // Y | |
2162 mov esi, [esp + 4 + 8] // UV | |
2163 mov edx, [esp + 4 + 12] // argb | |
2164 mov ecx, [esp + 4 + 16] // width | |
2165 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | |
2166 | |
2167 convertloop: | |
2168 READNV12_AVX2 | |
2169 YUVTORGB_AVX2(kYvuConstants) | |
2170 STOREARGB_AVX2 | |
2171 | |
2172 sub ecx, 16 | |
2173 jg convertloop | |
2174 | |
2175 pop esi | |
2176 vzeroupper | |
2177 ret | |
2178 } | |
2179 } | |
2180 #endif // HAS_NV21TOARGBROW_AVX2 | |
2181 | |
2182 #ifdef HAS_I422TOBGRAROW_AVX2 | 2159 #ifdef HAS_I422TOBGRAROW_AVX2 |
2183 // 16 pixels | 2160 // 16 pixels |
2184 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). | 2161 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). |
2185 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. | 2162 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. |
2186 __declspec(naked) | 2163 __declspec(naked) |
2187 void I422ToBGRARow_AVX2(const uint8* y_buf, | 2164 void I422ToBGRARow_AVX2(const uint8* y_buf, |
2188 const uint8* u_buf, | 2165 const uint8* u_buf, |
2189 const uint8* v_buf, | 2166 const uint8* v_buf, |
2190 uint8* dst_argb, | 2167 uint8* dst_argb, |
| 2168 struct YuvConstants* yuvconstants, |
2191 int width) { | 2169 int width) { |
2192 __asm { | 2170 __asm { |
2193 push esi | 2171 push esi |
2194 push edi | 2172 push edi |
2195 mov eax, [esp + 8 + 4] // Y | 2173 push ebp |
2196 mov esi, [esp + 8 + 8] // U | 2174 mov eax, [esp + 12 + 4] // Y |
2197 mov edi, [esp + 8 + 12] // V | 2175 mov esi, [esp + 12 + 8] // U |
2198 mov edx, [esp + 8 + 16] // argb | 2176 mov edi, [esp + 12 + 12] // V |
2199 mov ecx, [esp + 8 + 20] // width | 2177 mov edx, [esp + 12 + 16] // abgr |
| 2178 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2179 mov ecx, [esp + 12 + 24] // width |
2200 sub edi, esi | 2180 sub edi, esi |
2201 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2181 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
2202 | 2182 |
2203 convertloop: | 2183 convertloop: |
2204 READYUV422_AVX2 | 2184 READYUV422_AVX2 |
2205 YUVTORGB_AVX2(kYuvConstants) | 2185 YUVTORGB_AVX2(ebp) |
2206 STOREBGRA_AVX2 | 2186 STOREBGRA_AVX2 |
2207 | 2187 |
2208 sub ecx, 16 | 2188 sub ecx, 16 |
2209 jg convertloop | 2189 jg convertloop |
2210 | 2190 |
| 2191 pop ebp |
2211 pop edi | 2192 pop edi |
2212 pop esi | 2193 pop esi |
2213 vzeroupper | 2194 vzeroupper |
2214 ret | 2195 ret |
2215 } | 2196 } |
2216 } | 2197 } |
2217 #endif // HAS_I422TOBGRAROW_AVX2 | 2198 #endif // HAS_I422TOBGRAROW_AVX2 |
2218 | 2199 |
2219 #ifdef HAS_I422TORGBAROW_AVX2 | 2200 #ifdef HAS_I422TORGBAROW_AVX2 |
2220 // 16 pixels | 2201 // 16 pixels |
2221 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). | 2202 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). |
2222 __declspec(naked) | 2203 __declspec(naked) |
2223 void I422ToRGBARow_AVX2(const uint8* y_buf, | 2204 void I422ToRGBARow_AVX2(const uint8* y_buf, |
2224 const uint8* u_buf, | 2205 const uint8* u_buf, |
2225 const uint8* v_buf, | 2206 const uint8* v_buf, |
2226 uint8* dst_argb, | 2207 uint8* dst_argb, |
| 2208 struct YuvConstants* yuvconstants, |
2227 int width) { | 2209 int width) { |
2228 __asm { | 2210 __asm { |
2229 push esi | 2211 push esi |
2230 push edi | 2212 push edi |
2231 mov eax, [esp + 8 + 4] // Y | 2213 push ebp |
2232 mov esi, [esp + 8 + 8] // U | 2214 mov eax, [esp + 12 + 4] // Y |
2233 mov edi, [esp + 8 + 12] // V | 2215 mov esi, [esp + 12 + 8] // U |
2234 mov edx, [esp + 8 + 16] // argb | 2216 mov edi, [esp + 12 + 12] // V |
2235 mov ecx, [esp + 8 + 20] // width | 2217 mov edx, [esp + 12 + 16] // abgr |
| 2218 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2219 mov ecx, [esp + 12 + 24] // width |
2236 sub edi, esi | 2220 sub edi, esi |
2237 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha | 2221 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha |
2238 | 2222 |
2239 convertloop: | 2223 convertloop: |
2240 READYUV422_AVX2 | 2224 READYUV422_AVX2 |
2241 YUVTORGB_AVX2(kYuvConstants) | 2225 YUVTORGB_AVX2(ebp) |
2242 STORERGBA_AVX2 | 2226 STORERGBA_AVX2 |
2243 | 2227 |
2244 sub ecx, 16 | 2228 sub ecx, 16 |
2245 jg convertloop | 2229 jg convertloop |
2246 | 2230 |
| 2231 pop ebp |
2247 pop edi | 2232 pop edi |
2248 pop esi | 2233 pop esi |
2249 vzeroupper | 2234 vzeroupper |
2250 ret | 2235 ret |
2251 } | 2236 } |
2252 } | 2237 } |
2253 #endif // HAS_I422TORGBAROW_AVX2 | 2238 #endif // HAS_I422TORGBAROW_AVX2 |
2254 | 2239 |
2255 #ifdef HAS_I422TOABGRROW_AVX2 | 2240 #ifdef HAS_I422TOABGRROW_AVX2 |
2256 // 16 pixels | 2241 // 16 pixels |
2257 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). | 2242 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). |
2258 __declspec(naked) | 2243 __declspec(naked) |
2259 void I422ToABGRMatrixRow_AVX2(const uint8* y_buf, | 2244 void I422ToABGRRow_AVX2(const uint8* y_buf, |
2260 const uint8* u_buf, | 2245 const uint8* u_buf, |
2261 const uint8* v_buf, | 2246 const uint8* v_buf, |
2262 uint8* dst_argb, | 2247 uint8* dst_argb, |
2263 struct YuvConstants* YuvConstants, | 2248 struct YuvConstants* yuvconstants, |
2264 int width) { | 2249 int width) { |
2265 __asm { | 2250 __asm { |
2266 push esi | 2251 push esi |
2267 push edi | 2252 push edi |
2268 push ebp | 2253 push ebp |
2269 mov eax, [esp + 12 + 4] // Y | 2254 mov eax, [esp + 12 + 4] // Y |
2270 mov esi, [esp + 12 + 8] // U | 2255 mov esi, [esp + 12 + 8] // U |
2271 mov edi, [esp + 12 + 12] // V | 2256 mov edi, [esp + 12 + 12] // V |
2272 mov edx, [esp + 12 + 16] // argb | 2257 mov edx, [esp + 12 + 16] // argb |
2273 mov ebp, [esp + 12 + 20] // YuvConstants | 2258 mov ebp, [esp + 12 + 20] // YuvConstants |
2274 mov ecx, [esp + 12 + 24] // width | 2259 mov ecx, [esp + 12 + 24] // width |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
2474 __asm por xmm3, xmm2 /* BG */ \ | 2459 __asm por xmm3, xmm2 /* BG */ \ |
2475 __asm por xmm1, xmm3 /* BGR */ \ | 2460 __asm por xmm1, xmm3 /* BGR */ \ |
2476 __asm packssdw xmm0, xmm1 \ | 2461 __asm packssdw xmm0, xmm1 \ |
2477 __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ | 2462 __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ |
2478 __asm lea edx, [edx + 16] \ | 2463 __asm lea edx, [edx + 16] \ |
2479 } | 2464 } |
2480 | 2465 |
2481 // 8 pixels. | 2466 // 8 pixels. |
2482 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). | 2467 // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). |
2483 __declspec(naked) | 2468 __declspec(naked) |
2484 void I444ToARGBMatrixRow_SSSE3(const uint8* y_buf, | 2469 void I444ToARGBRow_SSSE3(const uint8* y_buf, |
2485 const uint8* u_buf, | 2470 const uint8* u_buf, |
2486 const uint8* v_buf, | 2471 const uint8* v_buf, |
2487 uint8* dst_argb, | 2472 uint8* dst_argb, |
2488 struct YuvConstants* YuvConstants, | 2473 struct YuvConstants* yuvconstants, |
2489 int width) { | 2474 int width) { |
2490 __asm { | 2475 __asm { |
2491 push esi | 2476 push esi |
2492 push edi | 2477 push edi |
2493 push ebp | 2478 push ebp |
2494 mov eax, [esp + 12 + 4] // Y | 2479 mov eax, [esp + 12 + 4] // Y |
2495 mov esi, [esp + 12 + 8] // U | 2480 mov esi, [esp + 12 + 8] // U |
2496 mov edi, [esp + 12 + 12] // V | 2481 mov edi, [esp + 12 + 12] // V |
2497 mov edx, [esp + 12 + 16] // argb | 2482 mov edx, [esp + 12 + 16] // argb |
2498 mov ebp, [esp + 12 + 20] // YuvConstants | 2483 mov ebp, [esp + 12 + 20] // YuvConstants |
2499 mov ecx, [esp + 12 + 24] // width | 2484 mov ecx, [esp + 12 + 24] // width |
(...skipping 11 matching lines...) Expand all Loading... |
2511 pop ebp | 2496 pop ebp |
2512 pop edi | 2497 pop edi |
2513 pop esi | 2498 pop esi |
2514 ret | 2499 ret |
2515 } | 2500 } |
2516 } | 2501 } |
2517 | 2502 |
2518 // 8 pixels. | 2503 // 8 pixels. |
2519 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). | 2504 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). |
2520 __declspec(naked) | 2505 __declspec(naked) |
2521 void I444ToABGRMatrixRow_SSSE3(const uint8* y_buf, | 2506 void I444ToABGRRow_SSSE3(const uint8* y_buf, |
2522 const uint8* u_buf, | 2507 const uint8* u_buf, |
2523 const uint8* v_buf, | 2508 const uint8* v_buf, |
2524 uint8* dst_abgr, | 2509 uint8* dst_abgr, |
2525 struct YuvConstants* YuvConstants, | 2510 struct YuvConstants* yuvconstants, |
2526 int width) { | 2511 int width) { |
2527 __asm { | 2512 __asm { |
2528 push esi | 2513 push esi |
2529 push edi | 2514 push edi |
2530 push ebp | 2515 push ebp |
2531 mov eax, [esp + 12 + 4] // Y | 2516 mov eax, [esp + 12 + 4] // Y |
2532 mov esi, [esp + 12 + 8] // U | 2517 mov esi, [esp + 12 + 8] // U |
2533 mov edi, [esp + 12 + 12] // V | 2518 mov edi, [esp + 12 + 12] // V |
2534 mov edx, [esp + 12 + 16] // abgr | 2519 mov edx, [esp + 12 + 16] // abgr |
2535 mov ebp, [esp + 12 + 20] // YuvConstants | 2520 mov ebp, [esp + 12 + 20] // YuvConstants |
2536 mov ecx, [esp + 12 + 24] // width | 2521 mov ecx, [esp + 12 + 24] // width |
(...skipping 15 matching lines...) Expand all Loading... |
2552 } | 2537 } |
2553 } | 2538 } |
2554 | 2539 |
2555 // 8 pixels. | 2540 // 8 pixels. |
2556 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). | 2541 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). |
2557 __declspec(naked) | 2542 __declspec(naked) |
2558 void I422ToRGB24Row_SSSE3(const uint8* y_buf, | 2543 void I422ToRGB24Row_SSSE3(const uint8* y_buf, |
2559 const uint8* u_buf, | 2544 const uint8* u_buf, |
2560 const uint8* v_buf, | 2545 const uint8* v_buf, |
2561 uint8* dst_rgb24, | 2546 uint8* dst_rgb24, |
| 2547 struct YuvConstants* yuvconstants, |
2562 int width) { | 2548 int width) { |
2563 __asm { | 2549 __asm { |
2564 push esi | 2550 push esi |
2565 push edi | 2551 push edi |
2566 mov eax, [esp + 8 + 4] // Y | 2552 push ebp |
2567 mov esi, [esp + 8 + 8] // U | 2553 mov eax, [esp + 12 + 4] // Y |
2568 mov edi, [esp + 8 + 12] // V | 2554 mov esi, [esp + 12 + 8] // U |
2569 mov edx, [esp + 8 + 16] // rgb24 | 2555 mov edi, [esp + 12 + 12] // V |
2570 mov ecx, [esp + 8 + 20] // width | 2556 mov edx, [esp + 12 + 16] // argb |
| 2557 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2558 mov ecx, [esp + 12 + 24] // width |
2571 sub edi, esi | 2559 sub edi, esi |
2572 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 | 2560 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 |
2573 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 | 2561 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 |
2574 | 2562 |
2575 convertloop: | 2563 convertloop: |
2576 READYUV422 | 2564 READYUV422 |
2577 YUVTORGB(kYuvConstants) | 2565 YUVTORGB(ebp) |
2578 STORERGB24 | 2566 STORERGB24 |
2579 | 2567 |
2580 sub ecx, 8 | 2568 sub ecx, 8 |
2581 jg convertloop | 2569 jg convertloop |
2582 | 2570 |
| 2571 pop ebp |
2583 pop edi | 2572 pop edi |
2584 pop esi | 2573 pop esi |
2585 ret | 2574 ret |
2586 } | 2575 } |
2587 } | 2576 } |
2588 | 2577 |
2589 // 8 pixels. | 2578 // 8 pixels. |
2590 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). | 2579 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). |
2591 __declspec(naked) | 2580 __declspec(naked) |
2592 void I422ToRAWRow_SSSE3(const uint8* y_buf, | 2581 void I422ToRAWRow_SSSE3(const uint8* y_buf, |
2593 const uint8* u_buf, | 2582 const uint8* u_buf, |
2594 const uint8* v_buf, | 2583 const uint8* v_buf, |
2595 uint8* dst_raw, | 2584 uint8* dst_raw, |
| 2585 struct YuvConstants* yuvconstants, |
2596 int width) { | 2586 int width) { |
2597 __asm { | 2587 __asm { |
2598 push esi | 2588 push esi |
2599 push edi | 2589 push edi |
2600 mov eax, [esp + 8 + 4] // Y | 2590 push ebp |
2601 mov esi, [esp + 8 + 8] // U | 2591 mov eax, [esp + 12 + 4] // Y |
2602 mov edi, [esp + 8 + 12] // V | 2592 mov esi, [esp + 12 + 8] // U |
2603 mov edx, [esp + 8 + 16] // raw | 2593 mov edi, [esp + 12 + 12] // V |
2604 mov ecx, [esp + 8 + 20] // width | 2594 mov edx, [esp + 12 + 16] // argb |
| 2595 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2596 mov ecx, [esp + 12 + 24] // width |
2605 sub edi, esi | 2597 sub edi, esi |
2606 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 | 2598 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 |
2607 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW | 2599 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW |
2608 | 2600 |
2609 convertloop: | 2601 convertloop: |
2610 READYUV422 | 2602 READYUV422 |
2611 YUVTORGB(kYuvConstants) | 2603 YUVTORGB(ebp) |
2612 STORERAW | 2604 STORERAW |
2613 | 2605 |
2614 sub ecx, 8 | 2606 sub ecx, 8 |
2615 jg convertloop | 2607 jg convertloop |
2616 | 2608 |
| 2609 pop ebp |
2617 pop edi | 2610 pop edi |
2618 pop esi | 2611 pop esi |
2619 ret | 2612 ret |
2620 } | 2613 } |
2621 } | 2614 } |
2622 | 2615 |
2623 // 8 pixels | 2616 // 8 pixels |
2624 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). | 2617 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). |
2625 __declspec(naked) | 2618 __declspec(naked) |
2626 void I422ToRGB565Row_SSSE3(const uint8* y_buf, | 2619 void I422ToRGB565Row_SSSE3(const uint8* y_buf, |
2627 const uint8* u_buf, | 2620 const uint8* u_buf, |
2628 const uint8* v_buf, | 2621 const uint8* v_buf, |
2629 uint8* rgb565_buf, | 2622 uint8* rgb565_buf, |
| 2623 struct YuvConstants* yuvconstants, |
2630 int width) { | 2624 int width) { |
2631 __asm { | 2625 __asm { |
2632 push esi | 2626 push esi |
2633 push edi | 2627 push edi |
2634 mov eax, [esp + 8 + 4] // Y | 2628 push ebp |
2635 mov esi, [esp + 8 + 8] // U | 2629 mov eax, [esp + 12 + 4] // Y |
2636 mov edi, [esp + 8 + 12] // V | 2630 mov esi, [esp + 12 + 8] // U |
2637 mov edx, [esp + 8 + 16] // rgb565 | 2631 mov edi, [esp + 12 + 12] // V |
2638 mov ecx, [esp + 8 + 20] // width | 2632 mov edx, [esp + 12 + 16] // argb |
| 2633 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2634 mov ecx, [esp + 12 + 24] // width |
2639 sub edi, esi | 2635 sub edi, esi |
2640 pcmpeqb xmm5, xmm5 // generate mask 0x0000001f | 2636 pcmpeqb xmm5, xmm5 // generate mask 0x0000001f |
2641 psrld xmm5, 27 | 2637 psrld xmm5, 27 |
2642 pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 | 2638 pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 |
2643 psrld xmm6, 26 | 2639 psrld xmm6, 26 |
2644 pslld xmm6, 5 | 2640 pslld xmm6, 5 |
2645 pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 | 2641 pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 |
2646 pslld xmm7, 11 | 2642 pslld xmm7, 11 |
2647 | 2643 |
2648 convertloop: | 2644 convertloop: |
2649 READYUV422 | 2645 READYUV422 |
2650 YUVTORGB(kYuvConstants) | 2646 YUVTORGB(ebp) |
2651 STORERGB565 | 2647 STORERGB565 |
2652 | 2648 |
2653 sub ecx, 8 | 2649 sub ecx, 8 |
2654 jg convertloop | 2650 jg convertloop |
2655 | 2651 |
| 2652 pop ebp |
2656 pop edi | 2653 pop edi |
2657 pop esi | 2654 pop esi |
2658 ret | 2655 ret |
2659 } | 2656 } |
2660 } | 2657 } |
2661 | 2658 |
2662 // 8 pixels. | 2659 // 8 pixels. |
2663 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). | 2660 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). |
2664 __declspec(naked) | 2661 __declspec(naked) |
2665 void I422ToARGBMatrixRow_SSSE3(const uint8* y_buf, | 2662 void I422ToARGBRow_SSSE3(const uint8* y_buf, |
2666 const uint8* u_buf, | 2663 const uint8* u_buf, |
2667 const uint8* v_buf, | 2664 const uint8* v_buf, |
2668 uint8* dst_argb, | 2665 uint8* dst_argb, |
2669 struct YuvConstants* YuvConstants, | 2666 struct YuvConstants* yuvconstants, |
2670 int width) { | 2667 int width) { |
2671 __asm { | 2668 __asm { |
2672 push esi | 2669 push esi |
2673 push edi | 2670 push edi |
2674 push ebp | 2671 push ebp |
2675 mov eax, [esp + 12 + 4] // Y | 2672 mov eax, [esp + 12 + 4] // Y |
2676 mov esi, [esp + 12 + 8] // U | 2673 mov esi, [esp + 12 + 8] // U |
2677 mov edi, [esp + 12 + 12] // V | 2674 mov edi, [esp + 12 + 12] // V |
2678 mov edx, [esp + 12 + 16] // argb | 2675 mov edx, [esp + 12 + 16] // argb |
2679 mov ebp, [esp + 12 + 20] // YuvConstants | 2676 mov ebp, [esp + 12 + 20] // YuvConstants |
2680 mov ecx, [esp + 12 + 24] // width | 2677 mov ecx, [esp + 12 + 24] // width |
(...skipping 16 matching lines...) Expand all Loading... |
2697 } | 2694 } |
2698 | 2695 |
2699 // 8 pixels. | 2696 // 8 pixels. |
2700 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). | 2697 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). |
2701 // Similar to I420 but duplicate UV once more. | 2698 // Similar to I420 but duplicate UV once more. |
2702 __declspec(naked) | 2699 __declspec(naked) |
2703 void I411ToARGBRow_SSSE3(const uint8* y_buf, | 2700 void I411ToARGBRow_SSSE3(const uint8* y_buf, |
2704 const uint8* u_buf, | 2701 const uint8* u_buf, |
2705 const uint8* v_buf, | 2702 const uint8* v_buf, |
2706 uint8* dst_argb, | 2703 uint8* dst_argb, |
| 2704 struct YuvConstants* yuvconstants, |
2707 int width) { | 2705 int width) { |
2708 __asm { | 2706 __asm { |
2709 push ebx | |
2710 push esi | 2707 push esi |
2711 push edi | 2708 push edi |
| 2709 push ebp |
2712 mov eax, [esp + 12 + 4] // Y | 2710 mov eax, [esp + 12 + 4] // Y |
2713 mov esi, [esp + 12 + 8] // U | 2711 mov esi, [esp + 12 + 8] // U |
2714 mov edi, [esp + 12 + 12] // V | 2712 mov edi, [esp + 12 + 12] // V |
2715 mov edx, [esp + 12 + 16] // argb | 2713 mov edx, [esp + 12 + 16] // abgr |
2716 mov ecx, [esp + 12 + 20] // width | 2714 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2715 mov ecx, [esp + 12 + 24] // width |
2717 sub edi, esi | 2716 sub edi, esi |
2718 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | 2717 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
2719 | 2718 |
2720 convertloop: | 2719 convertloop: |
2721 READYUV411 // modifies EBX | 2720 READYUV411 |
2722 YUVTORGB(kYuvConstants) | 2721 YUVTORGB(ebp) |
2723 STOREARGB | 2722 STOREARGB |
2724 | 2723 |
2725 sub ecx, 8 | 2724 sub ecx, 8 |
2726 jg convertloop | 2725 jg convertloop |
2727 | 2726 |
| 2727 pop ebp |
2728 pop edi | 2728 pop edi |
2729 pop esi | 2729 pop esi |
2730 pop ebx | |
2731 ret | 2730 ret |
2732 } | 2731 } |
2733 } | 2732 } |
2734 | 2733 |
2735 // 8 pixels. | 2734 // 8 pixels. |
2736 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). | 2735 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). |
2737 __declspec(naked) | 2736 __declspec(naked) |
2738 void NV12ToARGBRow_SSSE3(const uint8* y_buf, | 2737 void NV12ToARGBRow_SSSE3(const uint8* y_buf, |
2739 const uint8* uv_buf, | 2738 const uint8* uv_buf, |
2740 uint8* dst_argb, | 2739 uint8* dst_argb, |
| 2740 struct YuvConstants* yuvconstants, |
2741 int width) { | 2741 int width) { |
2742 __asm { | 2742 __asm { |
2743 push esi | 2743 push esi |
2744 mov eax, [esp + 4 + 4] // Y | 2744 push ebp |
2745 mov esi, [esp + 4 + 8] // UV | 2745 mov eax, [esp + 8 + 4] // Y |
2746 mov edx, [esp + 4 + 12] // argb | 2746 mov esi, [esp + 8 + 8] // UV |
2747 mov ecx, [esp + 4 + 16] // width | 2747 mov edx, [esp + 8 + 12] // argb |
| 2748 mov ebp, [esp + 8 + 16] // YuvConstants |
| 2749 mov ecx, [esp + 8 + 20] // width |
2748 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | 2750 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha |
2749 | 2751 |
2750 convertloop: | 2752 convertloop: |
2751 READNV12 | 2753 READNV12 |
2752 YUVTORGB(kYuvConstants) | 2754 YUVTORGB(ebp) |
2753 STOREARGB | 2755 STOREARGB |
2754 | 2756 |
2755 sub ecx, 8 | 2757 sub ecx, 8 |
2756 jg convertloop | 2758 jg convertloop |
2757 | 2759 |
| 2760 pop ebp |
2758 pop esi | 2761 pop esi |
2759 ret | 2762 ret |
2760 } | 2763 } |
2761 } | |
2762 | |
2763 // 8 pixels. | |
2764 // 4 VU values upsampled to 8 VU, mixed with 8 Y producing 8 ARGB (32 bytes). | |
2765 __declspec(naked) | |
2766 void NV21ToARGBRow_SSSE3(const uint8* y_buf, | |
2767 const uint8* uv_buf, | |
2768 uint8* dst_argb, | |
2769 int width) { | |
2770 __asm { | |
2771 push esi | |
2772 mov eax, [esp + 4 + 4] // Y | |
2773 mov esi, [esp + 4 + 8] // UV | |
2774 mov edx, [esp + 4 + 12] // argb | |
2775 mov ecx, [esp + 4 + 16] // width | |
2776 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha | |
2777 | |
2778 convertloop: | |
2779 READNV12 | |
2780 YUVTORGB(kYvuConstants) | |
2781 STOREARGB | |
2782 | |
2783 sub ecx, 8 | |
2784 jg convertloop | |
2785 | |
2786 pop esi | |
2787 ret | |
2788 } | |
2789 } | 2764 } |
2790 | 2765 |
2791 __declspec(naked) | 2766 __declspec(naked) |
2792 void I422ToBGRARow_SSSE3(const uint8* y_buf, | 2767 void I422ToBGRARow_SSSE3(const uint8* y_buf, |
2793 const uint8* u_buf, | 2768 const uint8* u_buf, |
2794 const uint8* v_buf, | 2769 const uint8* v_buf, |
2795 uint8* dst_bgra, | 2770 uint8* dst_bgra, |
| 2771 struct YuvConstants* yuvconstants, |
2796 int width) { | 2772 int width) { |
2797 __asm { | 2773 __asm { |
2798 push esi | 2774 push esi |
2799 push edi | 2775 push edi |
2800 mov eax, [esp + 8 + 4] // Y | 2776 push ebp |
2801 mov esi, [esp + 8 + 8] // U | 2777 mov eax, [esp + 12 + 4] // Y |
2802 mov edi, [esp + 8 + 12] // V | 2778 mov esi, [esp + 12 + 8] // U |
2803 mov edx, [esp + 8 + 16] // bgra | 2779 mov edi, [esp + 12 + 12] // V |
2804 mov ecx, [esp + 8 + 20] // width | 2780 mov edx, [esp + 12 + 16] // argb |
| 2781 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2782 mov ecx, [esp + 12 + 24] // width |
2805 sub edi, esi | 2783 sub edi, esi |
2806 | 2784 |
2807 convertloop: | 2785 convertloop: |
2808 READYUV422 | 2786 READYUV422 |
2809 YUVTORGB(kYuvConstants) | 2787 YUVTORGB(ebp) |
2810 STOREBGRA | 2788 STOREBGRA |
2811 | 2789 |
2812 sub ecx, 8 | 2790 sub ecx, 8 |
2813 jg convertloop | 2791 jg convertloop |
2814 | 2792 |
| 2793 pop ebp |
2815 pop edi | 2794 pop edi |
2816 pop esi | 2795 pop esi |
2817 ret | 2796 ret |
2818 } | 2797 } |
2819 } | 2798 } |
2820 | 2799 |
2821 __declspec(naked) | 2800 __declspec(naked) |
2822 void I422ToABGRMatrixRow_SSSE3(const uint8* y_buf, | 2801 void I422ToABGRRow_SSSE3(const uint8* y_buf, |
2823 const uint8* u_buf, | 2802 const uint8* u_buf, |
2824 const uint8* v_buf, | 2803 const uint8* v_buf, |
2825 uint8* dst_abgr, | 2804 uint8* dst_abgr, |
2826 struct YuvConstants* YuvConstants, | 2805 struct YuvConstants* yuvconstants, |
2827 int width) { | 2806 int width) { |
2828 __asm { | 2807 __asm { |
2829 push esi | 2808 push esi |
2830 push edi | 2809 push edi |
2831 push ebp | 2810 push ebp |
2832 mov eax, [esp + 12 + 4] // Y | 2811 mov eax, [esp + 12 + 4] // Y |
2833 mov esi, [esp + 12 + 8] // U | 2812 mov esi, [esp + 12 + 8] // U |
2834 mov edi, [esp + 12 + 12] // V | 2813 mov edi, [esp + 12 + 12] // V |
2835 mov edx, [esp + 12 + 16] // argb | 2814 mov edx, [esp + 12 + 16] // argb |
2836 mov ebp, [esp + 12 + 20] // YuvConstants | 2815 mov ebp, [esp + 12 + 20] // YuvConstants |
2837 mov ecx, [esp + 12 + 24] // width | 2816 mov ecx, [esp + 12 + 24] // width |
(...skipping 13 matching lines...) Expand all Loading... |
2851 pop esi | 2830 pop esi |
2852 ret | 2831 ret |
2853 } | 2832 } |
2854 } | 2833 } |
2855 | 2834 |
2856 __declspec(naked) | 2835 __declspec(naked) |
2857 void I422ToRGBARow_SSSE3(const uint8* y_buf, | 2836 void I422ToRGBARow_SSSE3(const uint8* y_buf, |
2858 const uint8* u_buf, | 2837 const uint8* u_buf, |
2859 const uint8* v_buf, | 2838 const uint8* v_buf, |
2860 uint8* dst_rgba, | 2839 uint8* dst_rgba, |
| 2840 struct YuvConstants* yuvconstants, |
2861 int width) { | 2841 int width) { |
2862 __asm { | 2842 __asm { |
2863 push esi | 2843 push esi |
2864 push edi | 2844 push edi |
2865 mov eax, [esp + 8 + 4] // Y | 2845 push ebp |
2866 mov esi, [esp + 8 + 8] // U | 2846 mov eax, [esp + 12 + 4] // Y |
2867 mov edi, [esp + 8 + 12] // V | 2847 mov esi, [esp + 12 + 8] // U |
2868 mov edx, [esp + 8 + 16] // rgba | 2848 mov edi, [esp + 12 + 12] // V |
2869 mov ecx, [esp + 8 + 20] // width | 2849 mov edx, [esp + 12 + 16] // argb |
| 2850 mov ebp, [esp + 12 + 20] // YuvConstants |
| 2851 mov ecx, [esp + 12 + 24] // width |
2870 sub edi, esi | 2852 sub edi, esi |
2871 | 2853 |
2872 convertloop: | 2854 convertloop: |
2873 READYUV422 | 2855 READYUV422 |
2874 YUVTORGB(kYuvConstants) | 2856 YUVTORGB(ebp) |
2875 STORERGBA | 2857 STORERGBA |
2876 | 2858 |
2877 sub ecx, 8 | 2859 sub ecx, 8 |
2878 jg convertloop | 2860 jg convertloop |
2879 | 2861 |
| 2862 pop ebp |
2880 pop edi | 2863 pop edi |
2881 pop esi | 2864 pop esi |
2882 ret | 2865 ret |
2883 } | 2866 } |
2884 } | 2867 } |
2885 | |
2886 #endif // HAS_I422TOARGBROW_SSSE3 | 2868 #endif // HAS_I422TOARGBROW_SSSE3 |
2887 | 2869 |
2888 #ifdef HAS_I400TOARGBROW_SSE2 | 2870 #ifdef HAS_I400TOARGBROW_SSE2 |
2889 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes). | 2871 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes). |
2890 __declspec(naked) | 2872 __declspec(naked) |
2891 void I400ToARGBRow_SSE2(const uint8* y_buf, | 2873 void I400ToARGBRow_SSE2(const uint8* y_buf, |
2892 uint8* rgb_buf, | 2874 uint8* rgb_buf, |
2893 int width) { | 2875 int width) { |
2894 __asm { | 2876 __asm { |
2895 mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) | 2877 mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) |
(...skipping 3382 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
6278 } | 6260 } |
6279 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 | 6261 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 |
6280 | 6262 |
6281 #endif // defined(_M_X64) | 6263 #endif // defined(_M_X64) |
6282 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) | 6264 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) |
6283 | 6265 |
6284 #ifdef __cplusplus | 6266 #ifdef __cplusplus |
6285 } // extern "C" | 6267 } // extern "C" |
6286 } // namespace libyuv | 6268 } // namespace libyuv |
6287 #endif | 6269 #endif |
OLD | NEW |