Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(9)

Side by Side Diff: source/row_win.cc

Issue 1365003003: switch from ebp to ebx. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: add version Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 22 matching lines...) Expand all
33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \ 33 xmm0 = _mm_cvtsi32_si128(*(uint32*)u_buf); \
34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \ 34 xmm1 = _mm_cvtsi32_si128(*(uint32*)(u_buf + offset)); \
35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ 35 xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \
36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ 36 xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \
37 u_buf += 4; \ 37 u_buf += 4; \
38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ 38 xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \
39 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ 39 xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \
40 y_buf += 8; \ 40 y_buf += 8; \
41 41
42 // Convert 8 pixels: 8 UV and 8 Y. 42 // Convert 8 pixels: 8 UV and 8 Y.
43 #define YUVTORGB(YuvConstants) \ 43 #define YUVTORGB(yuvconstants) \
44 xmm1 = _mm_loadu_si128(&xmm0); \ 44 xmm1 = _mm_loadu_si128(&xmm0); \
45 xmm2 = _mm_loadu_si128(&xmm0); \ 45 xmm2 = _mm_loadu_si128(&xmm0); \
46 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)YuvConstants->kUVToB); \ 46 xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \
47 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)YuvConstants->kUVToG); \ 47 xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \
48 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)YuvConstants->kUVToR); \ 48 xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \
49 xmm0 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasB, xmm0); \ 49 xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \
50 xmm1 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasG, xmm1); \ 50 xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \
51 xmm2 = _mm_sub_epi16(*(__m128i*)YuvConstants->kUVBiasR, xmm2); \ 51 xmm2 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasR, xmm2); \
52 xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)YuvConstants->kYToRgb); \ 52 xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)yuvconstants->kYToRgb); \
53 xmm0 = _mm_adds_epi16(xmm0, xmm4); \ 53 xmm0 = _mm_adds_epi16(xmm0, xmm4); \
54 xmm1 = _mm_adds_epi16(xmm1, xmm4); \ 54 xmm1 = _mm_adds_epi16(xmm1, xmm4); \
55 xmm2 = _mm_adds_epi16(xmm2, xmm4); \ 55 xmm2 = _mm_adds_epi16(xmm2, xmm4); \
56 xmm0 = _mm_srai_epi16(xmm0, 6); \ 56 xmm0 = _mm_srai_epi16(xmm0, 6); \
57 xmm1 = _mm_srai_epi16(xmm1, 6); \ 57 xmm1 = _mm_srai_epi16(xmm1, 6); \
58 xmm2 = _mm_srai_epi16(xmm2, 6); \ 58 xmm2 = _mm_srai_epi16(xmm2, 6); \
59 xmm0 = _mm_packus_epi16(xmm0, xmm0); \ 59 xmm0 = _mm_packus_epi16(xmm0, xmm0); \
60 xmm1 = _mm_packus_epi16(xmm1, xmm1); \ 60 xmm1 = _mm_packus_epi16(xmm1, xmm1); \
61 xmm2 = _mm_packus_epi16(xmm2, xmm2); 61 xmm2 = _mm_packus_epi16(xmm2, xmm2);
62 62
(...skipping 1960 matching lines...) Expand 10 before | Expand all | Expand 10 after
2023 __declspec(naked) 2023 __declspec(naked)
2024 void I422ToARGBRow_AVX2(const uint8* y_buf, 2024 void I422ToARGBRow_AVX2(const uint8* y_buf,
2025 const uint8* u_buf, 2025 const uint8* u_buf,
2026 const uint8* v_buf, 2026 const uint8* v_buf,
2027 uint8* dst_argb, 2027 uint8* dst_argb,
2028 struct YuvConstants* yuvconstants, 2028 struct YuvConstants* yuvconstants,
2029 int width) { 2029 int width) {
2030 __asm { 2030 __asm {
2031 push esi 2031 push esi
2032 push edi 2032 push edi
2033 push ebp 2033 push ebx
2034 mov eax, [esp + 12 + 4] // Y 2034 mov eax, [esp + 12 + 4] // Y
2035 mov esi, [esp + 12 + 8] // U 2035 mov esi, [esp + 12 + 8] // U
2036 mov edi, [esp + 12 + 12] // V 2036 mov edi, [esp + 12 + 12] // V
2037 mov edx, [esp + 12 + 16] // argb 2037 mov edx, [esp + 12 + 16] // argb
2038 mov ebp, [esp + 12 + 20] // yuvconstants 2038 mov ebx, [esp + 12 + 20] // yuvconstants
2039 mov ecx, [esp + 12 + 24] // width 2039 mov ecx, [esp + 12 + 24] // width
2040 sub edi, esi 2040 sub edi, esi
2041 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2041 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2042 2042
2043 convertloop: 2043 convertloop:
2044 READYUV422_AVX2 2044 READYUV422_AVX2
2045 YUVTORGB_AVX2(ebp) 2045 YUVTORGB_AVX2(ebx)
2046 STOREARGB_AVX2 2046 STOREARGB_AVX2
2047 2047
2048 sub ecx, 16 2048 sub ecx, 16
2049 jg convertloop 2049 jg convertloop
2050 2050
2051 pop ebp 2051 pop ebx
2052 pop edi 2052 pop edi
2053 pop esi 2053 pop esi
2054 vzeroupper 2054 vzeroupper
2055 ret 2055 ret
2056 } 2056 }
2057 } 2057 }
2058 #endif // HAS_I422TOARGBROW_AVX2 2058 #endif // HAS_I422TOARGBROW_AVX2
2059 2059
2060 #ifdef HAS_I444TOARGBROW_AVX2 2060 #ifdef HAS_I444TOARGBROW_AVX2
2061 // 16 pixels 2061 // 16 pixels
2062 // 16 UV values with 16 Y producing 16 ARGB (64 bytes). 2062 // 16 UV values with 16 Y producing 16 ARGB (64 bytes).
2063 __declspec(naked) 2063 __declspec(naked)
2064 void I444ToARGBRow_AVX2(const uint8* y_buf, 2064 void I444ToARGBRow_AVX2(const uint8* y_buf,
2065 const uint8* u_buf, 2065 const uint8* u_buf,
2066 const uint8* v_buf, 2066 const uint8* v_buf,
2067 uint8* dst_argb, 2067 uint8* dst_argb,
2068 struct YuvConstants* yuvconstants, 2068 struct YuvConstants* yuvconstants,
2069 int width) { 2069 int width) {
2070 __asm { 2070 __asm {
2071 push esi 2071 push esi
2072 push edi 2072 push edi
2073 push ebp 2073 push ebx
2074 mov eax, [esp + 12 + 4] // Y 2074 mov eax, [esp + 12 + 4] // Y
2075 mov esi, [esp + 12 + 8] // U 2075 mov esi, [esp + 12 + 8] // U
2076 mov edi, [esp + 12 + 12] // V 2076 mov edi, [esp + 12 + 12] // V
2077 mov edx, [esp + 12 + 16] // argb 2077 mov edx, [esp + 12 + 16] // argb
2078 mov ebp, [esp + 12 + 20] // yuvconstants 2078 mov ebx, [esp + 12 + 20] // yuvconstants
2079 mov ecx, [esp + 12 + 24] // width 2079 mov ecx, [esp + 12 + 24] // width
2080 sub edi, esi 2080 sub edi, esi
2081 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2081 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2082 convertloop: 2082 convertloop:
2083 READYUV444_AVX2 2083 READYUV444_AVX2
2084 YUVTORGB_AVX2(ebp) 2084 YUVTORGB_AVX2(ebx)
2085 STOREARGB_AVX2 2085 STOREARGB_AVX2
2086 2086
2087 sub ecx, 16 2087 sub ecx, 16
2088 jg convertloop 2088 jg convertloop
2089 2089
2090 pop ebp 2090 pop ebx
2091 pop edi 2091 pop edi
2092 pop esi 2092 pop esi
2093 vzeroupper 2093 vzeroupper
2094 ret 2094 ret
2095 } 2095 }
2096 } 2096 }
2097 #endif // HAS_I444TOARGBROW_AVX2 2097 #endif // HAS_I444TOARGBROW_AVX2
2098 2098
2099 #ifdef HAS_I444TOABGRROW_AVX2 2099 #ifdef HAS_I444TOABGRROW_AVX2
2100 // 16 pixels 2100 // 16 pixels
2101 // 16 UV values with 16 Y producing 16 ABGR (64 bytes). 2101 // 16 UV values with 16 Y producing 16 ABGR (64 bytes).
2102 __declspec(naked) 2102 __declspec(naked)
2103 void I444ToABGRRow_AVX2(const uint8* y_buf, 2103 void I444ToABGRRow_AVX2(const uint8* y_buf,
2104 const uint8* u_buf, 2104 const uint8* u_buf,
2105 const uint8* v_buf, 2105 const uint8* v_buf,
2106 uint8* dst_abgr, 2106 uint8* dst_abgr,
2107 struct YuvConstants* yuvconstants, 2107 struct YuvConstants* yuvconstants,
2108 int width) { 2108 int width) {
2109 __asm { 2109 __asm {
2110 push esi 2110 push esi
2111 push edi 2111 push edi
2112 push ebp 2112 push ebx
2113 mov eax, [esp + 12 + 4] // Y 2113 mov eax, [esp + 12 + 4] // Y
2114 mov esi, [esp + 12 + 8] // U 2114 mov esi, [esp + 12 + 8] // U
2115 mov edi, [esp + 12 + 12] // V 2115 mov edi, [esp + 12 + 12] // V
2116 mov edx, [esp + 12 + 16] // abgr 2116 mov edx, [esp + 12 + 16] // abgr
2117 mov ebp, [esp + 12 + 20] // yuvconstants 2117 mov ebx, [esp + 12 + 20] // yuvconstants
2118 mov ecx, [esp + 12 + 24] // width 2118 mov ecx, [esp + 12 + 24] // width
2119 sub edi, esi 2119 sub edi, esi
2120 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2120 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2121 convertloop: 2121 convertloop:
2122 READYUV444_AVX2 2122 READYUV444_AVX2
2123 YUVTORGB_AVX2(ebp) 2123 YUVTORGB_AVX2(ebx)
2124 STOREABGR_AVX2 2124 STOREABGR_AVX2
2125 2125
2126 sub ecx, 16 2126 sub ecx, 16
2127 jg convertloop 2127 jg convertloop
2128 2128
2129 pop ebp 2129 pop ebx
2130 pop edi 2130 pop edi
2131 pop esi 2131 pop esi
2132 vzeroupper 2132 vzeroupper
2133 ret 2133 ret
2134 } 2134 }
2135 } 2135 }
2136 #endif // HAS_I444TOABGRROW_AVX2 2136 #endif // HAS_I444TOABGRROW_AVX2
2137 2137
2138 #ifdef HAS_I411TOARGBROW_AVX2 2138 #ifdef HAS_I411TOARGBROW_AVX2
2139 // 16 pixels 2139 // 16 pixels
2140 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2140 // 4 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2141 __declspec(naked) 2141 __declspec(naked)
2142 void I411ToARGBRow_AVX2(const uint8* y_buf, 2142 void I411ToARGBRow_AVX2(const uint8* y_buf,
2143 const uint8* u_buf, 2143 const uint8* u_buf,
2144 const uint8* v_buf, 2144 const uint8* v_buf,
2145 uint8* dst_argb, 2145 uint8* dst_argb,
2146 struct YuvConstants* yuvconstants, 2146 struct YuvConstants* yuvconstants,
2147 int width) { 2147 int width) {
2148 __asm { 2148 __asm {
2149 push esi 2149 push esi
2150 push edi 2150 push edi
2151 push ebp 2151 push ebx
2152 mov eax, [esp + 12 + 4] // Y 2152 mov eax, [esp + 12 + 4] // Y
2153 mov esi, [esp + 12 + 8] // U 2153 mov esi, [esp + 12 + 8] // U
2154 mov edi, [esp + 12 + 12] // V 2154 mov edi, [esp + 12 + 12] // V
2155 mov edx, [esp + 12 + 16] // abgr 2155 mov edx, [esp + 12 + 16] // abgr
2156 mov ebp, [esp + 12 + 20] // yuvconstants 2156 mov ebx, [esp + 12 + 20] // yuvconstants
2157 mov ecx, [esp + 12 + 24] // width 2157 mov ecx, [esp + 12 + 24] // width
2158 sub edi, esi 2158 sub edi, esi
2159 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2159 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2160 2160
2161 convertloop: 2161 convertloop:
2162 READYUV411_AVX2 2162 READYUV411_AVX2
2163 YUVTORGB_AVX2(ebp) 2163 YUVTORGB_AVX2(ebx)
2164 STOREARGB_AVX2 2164 STOREARGB_AVX2
2165 2165
2166 sub ecx, 16 2166 sub ecx, 16
2167 jg convertloop 2167 jg convertloop
2168 2168
2169 pop ebp 2169 pop ebx
2170 pop edi 2170 pop edi
2171 pop esi 2171 pop esi
2172 vzeroupper 2172 vzeroupper
2173 ret 2173 ret
2174 } 2174 }
2175 } 2175 }
2176 #endif // HAS_I411TOARGBROW_AVX2 2176 #endif // HAS_I411TOARGBROW_AVX2
2177 2177
2178 #ifdef HAS_NV12TOARGBROW_AVX2 2178 #ifdef HAS_NV12TOARGBROW_AVX2
2179 // 16 pixels. 2179 // 16 pixels.
2180 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). 2180 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes).
2181 __declspec(naked) 2181 __declspec(naked)
2182 void NV12ToARGBRow_AVX2(const uint8* y_buf, 2182 void NV12ToARGBRow_AVX2(const uint8* y_buf,
2183 const uint8* uv_buf, 2183 const uint8* uv_buf,
2184 uint8* dst_argb, 2184 uint8* dst_argb,
2185 struct YuvConstants* yuvconstants, 2185 struct YuvConstants* yuvconstants,
2186 int width) { 2186 int width) {
2187 __asm { 2187 __asm {
2188 push esi 2188 push esi
2189 push ebp 2189 push ebx
2190 mov eax, [esp + 8 + 4] // Y 2190 mov eax, [esp + 8 + 4] // Y
2191 mov esi, [esp + 8 + 8] // UV 2191 mov esi, [esp + 8 + 8] // UV
2192 mov edx, [esp + 8 + 12] // argb 2192 mov edx, [esp + 8 + 12] // argb
2193 mov ebp, [esp + 8 + 16] // yuvconstants 2193 mov ebx, [esp + 8 + 16] // yuvconstants
2194 mov ecx, [esp + 8 + 20] // width 2194 mov ecx, [esp + 8 + 20] // width
2195 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2195 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2196 2196
2197 convertloop: 2197 convertloop:
2198 READNV12_AVX2 2198 READNV12_AVX2
2199 YUVTORGB_AVX2(ebp) 2199 YUVTORGB_AVX2(ebx)
2200 STOREARGB_AVX2 2200 STOREARGB_AVX2
2201 2201
2202 sub ecx, 16 2202 sub ecx, 16
2203 jg convertloop 2203 jg convertloop
2204 2204
2205 pop ebp 2205 pop ebx
2206 pop esi 2206 pop esi
2207 vzeroupper 2207 vzeroupper
2208 ret 2208 ret
2209 } 2209 }
2210 } 2210 }
2211 #endif // HAS_NV12TOARGBROW_AVX2 2211 #endif // HAS_NV12TOARGBROW_AVX2
2212 2212
2213 // 16 pixels. 2213 // 16 pixels.
2214 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). 2214 // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
2215 __declspec(naked) 2215 __declspec(naked)
2216 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2, 2216 void YUY2ToARGBRow_AVX2(const uint8* src_yuy2,
2217 uint8* dst_argb, 2217 uint8* dst_argb,
2218 struct YuvConstants* yuvconstants, 2218 struct YuvConstants* yuvconstants,
2219 int width) { 2219 int width) {
2220 __asm { 2220 __asm {
2221 push ebp 2221 push ebx
2222 mov eax, [esp + 4 + 4] // yuy2 2222 mov eax, [esp + 4 + 4] // yuy2
2223 mov edx, [esp + 4 + 8] // argb 2223 mov edx, [esp + 4 + 8] // argb
2224 mov ebp, [esp + 4 + 12] // yuvconstants 2224 mov ebx, [esp + 4 + 12] // yuvconstants
2225 mov ecx, [esp + 4 + 16] // width 2225 mov ecx, [esp + 4 + 16] // width
2226 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2226 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2227 2227
2228 convertloop: 2228 convertloop:
2229 READYUY2_AVX2 2229 READYUY2_AVX2
2230 YUVTORGB_AVX2(ebp) 2230 YUVTORGB_AVX2(ebx)
2231 STOREARGB_AVX2 2231 STOREARGB_AVX2
2232 2232
2233 sub ecx, 16 2233 sub ecx, 16
2234 jg convertloop 2234 jg convertloop
2235 2235
2236 pop ebp 2236 pop ebx
2237 vzeroupper 2237 vzeroupper
2238 ret 2238 ret
2239 } 2239 }
2240 } 2240 }
2241 2241
2242 // 16 pixels. 2242 // 16 pixels.
2243 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). 2243 // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes).
2244 __declspec(naked) 2244 __declspec(naked)
2245 void UYVYToARGBRow_AVX2(const uint8* src_uyvy, 2245 void UYVYToARGBRow_AVX2(const uint8* src_uyvy,
2246 uint8* dst_argb, 2246 uint8* dst_argb,
2247 struct YuvConstants* yuvconstants, 2247 struct YuvConstants* yuvconstants,
2248 int width) { 2248 int width) {
2249 __asm { 2249 __asm {
2250 push ebp 2250 push ebx
2251 mov eax, [esp + 4 + 4] // uyvy 2251 mov eax, [esp + 4 + 4] // uyvy
2252 mov edx, [esp + 4 + 8] // argb 2252 mov edx, [esp + 4 + 8] // argb
2253 mov ebp, [esp + 4 + 12] // yuvconstants 2253 mov ebx, [esp + 4 + 12] // yuvconstants
2254 mov ecx, [esp + 4 + 16] // width 2254 mov ecx, [esp + 4 + 16] // width
2255 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2255 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2256 2256
2257 convertloop: 2257 convertloop:
2258 READUYVY_AVX2 2258 READUYVY_AVX2
2259 YUVTORGB_AVX2(ebp) 2259 YUVTORGB_AVX2(ebx)
2260 STOREARGB_AVX2 2260 STOREARGB_AVX2
2261 2261
2262 sub ecx, 16 2262 sub ecx, 16
2263 jg convertloop 2263 jg convertloop
2264 2264
2265 pop ebp 2265 pop ebx
2266 vzeroupper 2266 vzeroupper
2267 ret 2267 ret
2268 } 2268 }
2269 } 2269 }
2270 2270
2271 2271
2272 #ifdef HAS_I422TOBGRAROW_AVX2 2272 #ifdef HAS_I422TOBGRAROW_AVX2
2273 // 16 pixels 2273 // 16 pixels
2274 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes). 2274 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 BGRA (64 bytes).
2275 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3. 2275 // TODO(fbarchard): Use macros to reduce duplicate code. See SSSE3.
2276 __declspec(naked) 2276 __declspec(naked)
2277 void I422ToBGRARow_AVX2(const uint8* y_buf, 2277 void I422ToBGRARow_AVX2(const uint8* y_buf,
2278 const uint8* u_buf, 2278 const uint8* u_buf,
2279 const uint8* v_buf, 2279 const uint8* v_buf,
2280 uint8* dst_argb, 2280 uint8* dst_argb,
2281 struct YuvConstants* yuvconstants, 2281 struct YuvConstants* yuvconstants,
2282 int width) { 2282 int width) {
2283 __asm { 2283 __asm {
2284 push esi 2284 push esi
2285 push edi 2285 push edi
2286 push ebp 2286 push ebx
2287 mov eax, [esp + 12 + 4] // Y 2287 mov eax, [esp + 12 + 4] // Y
2288 mov esi, [esp + 12 + 8] // U 2288 mov esi, [esp + 12 + 8] // U
2289 mov edi, [esp + 12 + 12] // V 2289 mov edi, [esp + 12 + 12] // V
2290 mov edx, [esp + 12 + 16] // abgr 2290 mov edx, [esp + 12 + 16] // abgr
2291 mov ebp, [esp + 12 + 20] // yuvconstants 2291 mov ebx, [esp + 12 + 20] // yuvconstants
2292 mov ecx, [esp + 12 + 24] // width 2292 mov ecx, [esp + 12 + 24] // width
2293 sub edi, esi 2293 sub edi, esi
2294 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2294 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2295 2295
2296 convertloop: 2296 convertloop:
2297 READYUV422_AVX2 2297 READYUV422_AVX2
2298 YUVTORGB_AVX2(ebp) 2298 YUVTORGB_AVX2(ebx)
2299 STOREBGRA_AVX2 2299 STOREBGRA_AVX2
2300 2300
2301 sub ecx, 16 2301 sub ecx, 16
2302 jg convertloop 2302 jg convertloop
2303 2303
2304 pop ebp 2304 pop ebx
2305 pop edi 2305 pop edi
2306 pop esi 2306 pop esi
2307 vzeroupper 2307 vzeroupper
2308 ret 2308 ret
2309 } 2309 }
2310 } 2310 }
2311 #endif // HAS_I422TOBGRAROW_AVX2 2311 #endif // HAS_I422TOBGRAROW_AVX2
2312 2312
2313 #ifdef HAS_I422TORGBAROW_AVX2 2313 #ifdef HAS_I422TORGBAROW_AVX2
2314 // 16 pixels 2314 // 16 pixels
2315 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). 2315 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes).
2316 __declspec(naked) 2316 __declspec(naked)
2317 void I422ToRGBARow_AVX2(const uint8* y_buf, 2317 void I422ToRGBARow_AVX2(const uint8* y_buf,
2318 const uint8* u_buf, 2318 const uint8* u_buf,
2319 const uint8* v_buf, 2319 const uint8* v_buf,
2320 uint8* dst_argb, 2320 uint8* dst_argb,
2321 struct YuvConstants* yuvconstants, 2321 struct YuvConstants* yuvconstants,
2322 int width) { 2322 int width) {
2323 __asm { 2323 __asm {
2324 push esi 2324 push esi
2325 push edi 2325 push edi
2326 push ebp 2326 push ebx
2327 mov eax, [esp + 12 + 4] // Y 2327 mov eax, [esp + 12 + 4] // Y
2328 mov esi, [esp + 12 + 8] // U 2328 mov esi, [esp + 12 + 8] // U
2329 mov edi, [esp + 12 + 12] // V 2329 mov edi, [esp + 12 + 12] // V
2330 mov edx, [esp + 12 + 16] // abgr 2330 mov edx, [esp + 12 + 16] // abgr
2331 mov ebp, [esp + 12 + 20] // yuvconstants 2331 mov ebx, [esp + 12 + 20] // yuvconstants
2332 mov ecx, [esp + 12 + 24] // width 2332 mov ecx, [esp + 12 + 24] // width
2333 sub edi, esi 2333 sub edi, esi
2334 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2334 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2335 2335
2336 convertloop: 2336 convertloop:
2337 READYUV422_AVX2 2337 READYUV422_AVX2
2338 YUVTORGB_AVX2(ebp) 2338 YUVTORGB_AVX2(ebx)
2339 STORERGBA_AVX2 2339 STORERGBA_AVX2
2340 2340
2341 sub ecx, 16 2341 sub ecx, 16
2342 jg convertloop 2342 jg convertloop
2343 2343
2344 pop ebp 2344 pop ebx
2345 pop edi 2345 pop edi
2346 pop esi 2346 pop esi
2347 vzeroupper 2347 vzeroupper
2348 ret 2348 ret
2349 } 2349 }
2350 } 2350 }
2351 #endif // HAS_I422TORGBAROW_AVX2 2351 #endif // HAS_I422TORGBAROW_AVX2
2352 2352
2353 #ifdef HAS_I422TOABGRROW_AVX2 2353 #ifdef HAS_I422TOABGRROW_AVX2
2354 // 16 pixels 2354 // 16 pixels
2355 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes). 2355 // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ABGR (64 bytes).
2356 __declspec(naked) 2356 __declspec(naked)
2357 void I422ToABGRRow_AVX2(const uint8* y_buf, 2357 void I422ToABGRRow_AVX2(const uint8* y_buf,
2358 const uint8* u_buf, 2358 const uint8* u_buf,
2359 const uint8* v_buf, 2359 const uint8* v_buf,
2360 uint8* dst_argb, 2360 uint8* dst_argb,
2361 struct YuvConstants* yuvconstants, 2361 struct YuvConstants* yuvconstants,
2362 int width) { 2362 int width) {
2363 __asm { 2363 __asm {
2364 push esi 2364 push esi
2365 push edi 2365 push edi
2366 push ebp 2366 push ebx
2367 mov eax, [esp + 12 + 4] // Y 2367 mov eax, [esp + 12 + 4] // Y
2368 mov esi, [esp + 12 + 8] // U 2368 mov esi, [esp + 12 + 8] // U
2369 mov edi, [esp + 12 + 12] // V 2369 mov edi, [esp + 12 + 12] // V
2370 mov edx, [esp + 12 + 16] // argb 2370 mov edx, [esp + 12 + 16] // argb
2371 mov ebp, [esp + 12 + 20] // yuvconstants 2371 mov ebx, [esp + 12 + 20] // yuvconstants
2372 mov ecx, [esp + 12 + 24] // width 2372 mov ecx, [esp + 12 + 24] // width
2373 sub edi, esi 2373 sub edi, esi
2374 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha 2374 vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha
2375 2375
2376 convertloop: 2376 convertloop:
2377 READYUV422_AVX2 2377 READYUV422_AVX2
2378 YUVTORGB_AVX2(ebp) 2378 YUVTORGB_AVX2(ebx)
2379 STOREABGR_AVX2 2379 STOREABGR_AVX2
2380 2380
2381 sub ecx, 16 2381 sub ecx, 16
2382 jg convertloop 2382 jg convertloop
2383 2383
2384 pop ebp 2384 pop ebx
2385 pop edi 2385 pop edi
2386 pop esi 2386 pop esi
2387 vzeroupper 2387 vzeroupper
2388 ret 2388 ret
2389 } 2389 }
2390 } 2390 }
2391 #endif // HAS_I422TOABGRROW_AVX2 2391 #endif // HAS_I422TOABGRROW_AVX2
2392 2392
2393 #if defined(HAS_I422TOARGBROW_SSSE3) 2393 #if defined(HAS_I422TOARGBROW_SSSE3)
2394 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. 2394 // TODO(fbarchard): Read that does half size on Y and treats 420 as 444.
(...skipping 213 matching lines...) Expand 10 before | Expand all | Expand 10 after
2608 __declspec(naked) 2608 __declspec(naked)
2609 void I444ToARGBRow_SSSE3(const uint8* y_buf, 2609 void I444ToARGBRow_SSSE3(const uint8* y_buf,
2610 const uint8* u_buf, 2610 const uint8* u_buf,
2611 const uint8* v_buf, 2611 const uint8* v_buf,
2612 uint8* dst_argb, 2612 uint8* dst_argb,
2613 struct YuvConstants* yuvconstants, 2613 struct YuvConstants* yuvconstants,
2614 int width) { 2614 int width) {
2615 __asm { 2615 __asm {
2616 push esi 2616 push esi
2617 push edi 2617 push edi
2618 push ebp 2618 push ebx
2619 mov eax, [esp + 12 + 4] // Y 2619 mov eax, [esp + 12 + 4] // Y
2620 mov esi, [esp + 12 + 8] // U 2620 mov esi, [esp + 12 + 8] // U
2621 mov edi, [esp + 12 + 12] // V 2621 mov edi, [esp + 12 + 12] // V
2622 mov edx, [esp + 12 + 16] // argb 2622 mov edx, [esp + 12 + 16] // argb
2623 mov ebp, [esp + 12 + 20] // yuvconstants 2623 mov ebx, [esp + 12 + 20] // yuvconstants
2624 mov ecx, [esp + 12 + 24] // width 2624 mov ecx, [esp + 12 + 24] // width
2625 sub edi, esi 2625 sub edi, esi
2626 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2626 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2627 2627
2628 convertloop: 2628 convertloop:
2629 READYUV444 2629 READYUV444
2630 YUVTORGB(ebp) 2630 YUVTORGB(ebx)
2631 STOREARGB 2631 STOREARGB
2632 2632
2633 sub ecx, 8 2633 sub ecx, 8
2634 jg convertloop 2634 jg convertloop
2635 2635
2636 pop ebp 2636 pop ebx
2637 pop edi 2637 pop edi
2638 pop esi 2638 pop esi
2639 ret 2639 ret
2640 } 2640 }
2641 } 2641 }
2642 2642
2643 // 8 pixels. 2643 // 8 pixels.
2644 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes). 2644 // 8 UV values, mixed with 8 Y producing 8 ABGR (32 bytes).
2645 __declspec(naked) 2645 __declspec(naked)
2646 void I444ToABGRRow_SSSE3(const uint8* y_buf, 2646 void I444ToABGRRow_SSSE3(const uint8* y_buf,
2647 const uint8* u_buf, 2647 const uint8* u_buf,
2648 const uint8* v_buf, 2648 const uint8* v_buf,
2649 uint8* dst_abgr, 2649 uint8* dst_abgr,
2650 struct YuvConstants* yuvconstants, 2650 struct YuvConstants* yuvconstants,
2651 int width) { 2651 int width) {
2652 __asm { 2652 __asm {
2653 push esi 2653 push esi
2654 push edi 2654 push edi
2655 push ebp 2655 push ebx
2656 mov eax, [esp + 12 + 4] // Y 2656 mov eax, [esp + 12 + 4] // Y
2657 mov esi, [esp + 12 + 8] // U 2657 mov esi, [esp + 12 + 8] // U
2658 mov edi, [esp + 12 + 12] // V 2658 mov edi, [esp + 12 + 12] // V
2659 mov edx, [esp + 12 + 16] // abgr 2659 mov edx, [esp + 12 + 16] // abgr
2660 mov ebp, [esp + 12 + 20] // yuvconstants 2660 mov ebx, [esp + 12 + 20] // yuvconstants
2661 mov ecx, [esp + 12 + 24] // width 2661 mov ecx, [esp + 12 + 24] // width
2662 sub edi, esi 2662 sub edi, esi
2663 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2663 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2664 2664
2665 convertloop: 2665 convertloop:
2666 READYUV444 2666 READYUV444
2667 YUVTORGB(ebp) 2667 YUVTORGB(ebx)
2668 STOREABGR 2668 STOREABGR
2669 2669
2670 sub ecx, 8 2670 sub ecx, 8
2671 jg convertloop 2671 jg convertloop
2672 2672
2673 pop ebp 2673 pop ebx
2674 pop edi 2674 pop edi
2675 pop esi 2675 pop esi
2676 ret 2676 ret
2677 } 2677 }
2678 } 2678 }
2679 2679
2680 // 8 pixels. 2680 // 8 pixels.
2681 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). 2681 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes).
2682 __declspec(naked) 2682 __declspec(naked)
2683 void I422ToRGB24Row_SSSE3(const uint8* y_buf, 2683 void I422ToRGB24Row_SSSE3(const uint8* y_buf,
2684 const uint8* u_buf, 2684 const uint8* u_buf,
2685 const uint8* v_buf, 2685 const uint8* v_buf,
2686 uint8* dst_rgb24, 2686 uint8* dst_rgb24,
2687 struct YuvConstants* yuvconstants, 2687 struct YuvConstants* yuvconstants,
2688 int width) { 2688 int width) {
2689 __asm { 2689 __asm {
2690 push esi 2690 push esi
2691 push edi 2691 push edi
2692 push ebp 2692 push ebx
2693 mov eax, [esp + 12 + 4] // Y 2693 mov eax, [esp + 12 + 4] // Y
2694 mov esi, [esp + 12 + 8] // U 2694 mov esi, [esp + 12 + 8] // U
2695 mov edi, [esp + 12 + 12] // V 2695 mov edi, [esp + 12 + 12] // V
2696 mov edx, [esp + 12 + 16] // argb 2696 mov edx, [esp + 12 + 16] // argb
2697 mov ebp, [esp + 12 + 20] // yuvconstants 2697 mov ebx, [esp + 12 + 20] // yuvconstants
2698 mov ecx, [esp + 12 + 24] // width 2698 mov ecx, [esp + 12 + 24] // width
2699 sub edi, esi 2699 sub edi, esi
2700 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 2700 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0
2701 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 2701 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24
2702 2702
2703 convertloop: 2703 convertloop:
2704 READYUV422 2704 READYUV422
2705 YUVTORGB(ebp) 2705 YUVTORGB(ebx)
2706 STORERGB24 2706 STORERGB24
2707 2707
2708 sub ecx, 8 2708 sub ecx, 8
2709 jg convertloop 2709 jg convertloop
2710 2710
2711 pop ebp 2711 pop ebx
2712 pop edi 2712 pop edi
2713 pop esi 2713 pop esi
2714 ret 2714 ret
2715 } 2715 }
2716 } 2716 }
2717 2717
2718 // 8 pixels. 2718 // 8 pixels.
2719 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes). 2719 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RAW (24 bytes).
2720 __declspec(naked) 2720 __declspec(naked)
2721 void I422ToRAWRow_SSSE3(const uint8* y_buf, 2721 void I422ToRAWRow_SSSE3(const uint8* y_buf,
2722 const uint8* u_buf, 2722 const uint8* u_buf,
2723 const uint8* v_buf, 2723 const uint8* v_buf,
2724 uint8* dst_raw, 2724 uint8* dst_raw,
2725 struct YuvConstants* yuvconstants, 2725 struct YuvConstants* yuvconstants,
2726 int width) { 2726 int width) {
2727 __asm { 2727 __asm {
2728 push esi 2728 push esi
2729 push edi 2729 push edi
2730 push ebp 2730 push ebx
2731 mov eax, [esp + 12 + 4] // Y 2731 mov eax, [esp + 12 + 4] // Y
2732 mov esi, [esp + 12 + 8] // U 2732 mov esi, [esp + 12 + 8] // U
2733 mov edi, [esp + 12 + 12] // V 2733 mov edi, [esp + 12 + 12] // V
2734 mov edx, [esp + 12 + 16] // argb 2734 mov edx, [esp + 12 + 16] // argb
2735 mov ebp, [esp + 12 + 20] // yuvconstants 2735 mov ebx, [esp + 12 + 20] // yuvconstants
2736 mov ecx, [esp + 12 + 24] // width 2736 mov ecx, [esp + 12 + 24] // width
2737 sub edi, esi 2737 sub edi, esi
2738 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0 2738 movdqa xmm5, xmmword ptr kShuffleMaskARGBToRAW_0
2739 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW 2739 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW
2740 2740
2741 convertloop: 2741 convertloop:
2742 READYUV422 2742 READYUV422
2743 YUVTORGB(ebp) 2743 YUVTORGB(ebx)
2744 STORERAW 2744 STORERAW
2745 2745
2746 sub ecx, 8 2746 sub ecx, 8
2747 jg convertloop 2747 jg convertloop
2748 2748
2749 pop ebp 2749 pop ebx
2750 pop edi 2750 pop edi
2751 pop esi 2751 pop esi
2752 ret 2752 ret
2753 } 2753 }
2754 } 2754 }
2755 2755
2756 // 8 pixels 2756 // 8 pixels
2757 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). 2757 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes).
2758 __declspec(naked) 2758 __declspec(naked)
2759 void I422ToRGB565Row_SSSE3(const uint8* y_buf, 2759 void I422ToRGB565Row_SSSE3(const uint8* y_buf,
2760 const uint8* u_buf, 2760 const uint8* u_buf,
2761 const uint8* v_buf, 2761 const uint8* v_buf,
2762 uint8* rgb565_buf, 2762 uint8* rgb565_buf,
2763 struct YuvConstants* yuvconstants, 2763 struct YuvConstants* yuvconstants,
2764 int width) { 2764 int width) {
2765 __asm { 2765 __asm {
2766 push esi 2766 push esi
2767 push edi 2767 push edi
2768 push ebp 2768 push ebx
2769 mov eax, [esp + 12 + 4] // Y 2769 mov eax, [esp + 12 + 4] // Y
2770 mov esi, [esp + 12 + 8] // U 2770 mov esi, [esp + 12 + 8] // U
2771 mov edi, [esp + 12 + 12] // V 2771 mov edi, [esp + 12 + 12] // V
2772 mov edx, [esp + 12 + 16] // argb 2772 mov edx, [esp + 12 + 16] // argb
2773 mov ebp, [esp + 12 + 20] // yuvconstants 2773 mov ebx, [esp + 12 + 20] // yuvconstants
2774 mov ecx, [esp + 12 + 24] // width 2774 mov ecx, [esp + 12 + 24] // width
2775 sub edi, esi 2775 sub edi, esi
2776 pcmpeqb xmm5, xmm5 // generate mask 0x0000001f 2776 pcmpeqb xmm5, xmm5 // generate mask 0x0000001f
2777 psrld xmm5, 27 2777 psrld xmm5, 27
2778 pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 2778 pcmpeqb xmm6, xmm6 // generate mask 0x000007e0
2779 psrld xmm6, 26 2779 psrld xmm6, 26
2780 pslld xmm6, 5 2780 pslld xmm6, 5
2781 pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 2781 pcmpeqb xmm7, xmm7 // generate mask 0xfffff800
2782 pslld xmm7, 11 2782 pslld xmm7, 11
2783 2783
2784 convertloop: 2784 convertloop:
2785 READYUV422 2785 READYUV422
2786 YUVTORGB(ebp) 2786 YUVTORGB(ebx)
2787 STORERGB565 2787 STORERGB565
2788 2788
2789 sub ecx, 8 2789 sub ecx, 8
2790 jg convertloop 2790 jg convertloop
2791 2791
2792 pop ebp 2792 pop ebx
2793 pop edi 2793 pop edi
2794 pop esi 2794 pop esi
2795 ret 2795 ret
2796 } 2796 }
2797 } 2797 }
2798 2798
2799 // 8 pixels. 2799 // 8 pixels.
2800 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2800 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
2801 __declspec(naked) 2801 __declspec(naked)
2802 void I422ToARGBRow_SSSE3(const uint8* y_buf, 2802 void I422ToARGBRow_SSSE3(const uint8* y_buf,
2803 const uint8* u_buf, 2803 const uint8* u_buf,
2804 const uint8* v_buf, 2804 const uint8* v_buf,
2805 uint8* dst_argb, 2805 uint8* dst_argb,
2806 struct YuvConstants* yuvconstants, 2806 struct YuvConstants* yuvconstants,
2807 int width) { 2807 int width) {
2808 __asm { 2808 __asm {
2809 push esi 2809 push esi
2810 push edi 2810 push edi
2811 push ebp 2811 push ebx
2812 mov eax, [esp + 12 + 4] // Y 2812 mov eax, [esp + 12 + 4] // Y
2813 mov esi, [esp + 12 + 8] // U 2813 mov esi, [esp + 12 + 8] // U
2814 mov edi, [esp + 12 + 12] // V 2814 mov edi, [esp + 12 + 12] // V
2815 mov edx, [esp + 12 + 16] // argb 2815 mov edx, [esp + 12 + 16] // argb
2816 mov ebp, [esp + 12 + 20] // yuvconstants 2816 mov ebx, [esp + 12 + 20] // yuvconstants
2817 mov ecx, [esp + 12 + 24] // width 2817 mov ecx, [esp + 12 + 24] // width
2818 sub edi, esi 2818 sub edi, esi
2819 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2819 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2820 2820
2821 convertloop: 2821 convertloop:
2822 READYUV422 2822 READYUV422
2823 YUVTORGB(ebp) 2823 YUVTORGB(ebx)
2824 STOREARGB 2824 STOREARGB
2825 2825
2826 sub ecx, 8 2826 sub ecx, 8
2827 jg convertloop 2827 jg convertloop
2828 2828
2829 pop ebp 2829 pop ebx
2830 pop edi 2830 pop edi
2831 pop esi 2831 pop esi
2832 ret 2832 ret
2833 } 2833 }
2834 } 2834 }
2835 2835
2836 // 8 pixels. 2836 // 8 pixels.
2837 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2837 // 2 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
2838 // Similar to I420 but duplicate UV once more. 2838 // Similar to I420 but duplicate UV once more.
2839 __declspec(naked) 2839 __declspec(naked)
2840 void I411ToARGBRow_SSSE3(const uint8* y_buf, 2840 void I411ToARGBRow_SSSE3(const uint8* y_buf,
2841 const uint8* u_buf, 2841 const uint8* u_buf,
2842 const uint8* v_buf, 2842 const uint8* v_buf,
2843 uint8* dst_argb, 2843 uint8* dst_argb,
2844 struct YuvConstants* yuvconstants, 2844 struct YuvConstants* yuvconstants,
2845 int width) { 2845 int width) {
2846 __asm { 2846 __asm {
2847 push esi 2847 push esi
2848 push edi 2848 push edi
2849 push ebp 2849 push ebx
2850 mov eax, [esp + 12 + 4] // Y 2850 mov eax, [esp + 12 + 4] // Y
2851 mov esi, [esp + 12 + 8] // U 2851 mov esi, [esp + 12 + 8] // U
2852 mov edi, [esp + 12 + 12] // V 2852 mov edi, [esp + 12 + 12] // V
2853 mov edx, [esp + 12 + 16] // abgr 2853 mov edx, [esp + 12 + 16] // abgr
2854 mov ebp, [esp + 12 + 20] // yuvconstants 2854 mov ebx, [esp + 12 + 20] // yuvconstants
2855 mov ecx, [esp + 12 + 24] // width 2855 mov ecx, [esp + 12 + 24] // width
2856 sub edi, esi 2856 sub edi, esi
2857 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2857 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2858 2858
2859 convertloop: 2859 convertloop:
2860 READYUV411 2860 READYUV411
2861 YUVTORGB(ebp) 2861 YUVTORGB(ebx)
2862 STOREARGB 2862 STOREARGB
2863 2863
2864 sub ecx, 8 2864 sub ecx, 8
2865 jg convertloop 2865 jg convertloop
2866 2866
2867 pop ebp 2867 pop ebx
2868 pop edi 2868 pop edi
2869 pop esi 2869 pop esi
2870 ret 2870 ret
2871 } 2871 }
2872 } 2872 }
2873 2873
2874 // 8 pixels. 2874 // 8 pixels.
2875 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). 2875 // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes).
2876 __declspec(naked) 2876 __declspec(naked)
2877 void NV12ToARGBRow_SSSE3(const uint8* y_buf, 2877 void NV12ToARGBRow_SSSE3(const uint8* y_buf,
2878 const uint8* uv_buf, 2878 const uint8* uv_buf,
2879 uint8* dst_argb, 2879 uint8* dst_argb,
2880 struct YuvConstants* yuvconstants, 2880 struct YuvConstants* yuvconstants,
2881 int width) { 2881 int width) {
2882 __asm { 2882 __asm {
2883 push esi 2883 push esi
2884 push ebp 2884 push ebx
2885 mov eax, [esp + 8 + 4] // Y 2885 mov eax, [esp + 8 + 4] // Y
2886 mov esi, [esp + 8 + 8] // UV 2886 mov esi, [esp + 8 + 8] // UV
2887 mov edx, [esp + 8 + 12] // argb 2887 mov edx, [esp + 8 + 12] // argb
2888 mov ebp, [esp + 8 + 16] // yuvconstants 2888 mov ebx, [esp + 8 + 16] // yuvconstants
2889 mov ecx, [esp + 8 + 20] // width 2889 mov ecx, [esp + 8 + 20] // width
2890 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2890 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2891 2891
2892 convertloop: 2892 convertloop:
2893 READNV12 2893 READNV12
2894 YUVTORGB(ebp) 2894 YUVTORGB(ebx)
2895 STOREARGB 2895 STOREARGB
2896 2896
2897 sub ecx, 8 2897 sub ecx, 8
2898 jg convertloop 2898 jg convertloop
2899 2899
2900 pop ebp 2900 pop ebx
2901 pop esi 2901 pop esi
2902 ret 2902 ret
2903 } 2903 }
2904 } 2904 }
2905 2905
2906 // 8 pixels. 2906 // 8 pixels.
2907 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). 2907 // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
2908 __declspec(naked) 2908 __declspec(naked)
2909 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2, 2909 void YUY2ToARGBRow_SSSE3(const uint8* src_yuy2,
2910 uint8* dst_argb, 2910 uint8* dst_argb,
2911 struct YuvConstants* yuvconstants, 2911 struct YuvConstants* yuvconstants,
2912 int width) { 2912 int width) {
2913 __asm { 2913 __asm {
2914 push ebp 2914 push ebx
2915 mov eax, [esp + 4 + 4] // yuy2 2915 mov eax, [esp + 4 + 4] // yuy2
2916 mov edx, [esp + 4 + 8] // argb 2916 mov edx, [esp + 4 + 8] // argb
2917 mov ebp, [esp + 4 + 12] // yuvconstants 2917 mov ebx, [esp + 4 + 12] // yuvconstants
2918 mov ecx, [esp + 4 + 16] // width 2918 mov ecx, [esp + 4 + 16] // width
2919 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2919 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2920 2920
2921 convertloop: 2921 convertloop:
2922 READYUY2 2922 READYUY2
2923 YUVTORGB(ebp) 2923 YUVTORGB(ebx)
2924 STOREARGB 2924 STOREARGB
2925 2925
2926 sub ecx, 8 2926 sub ecx, 8
2927 jg convertloop 2927 jg convertloop
2928 2928
2929 pop ebp 2929 pop ebx
2930 ret 2930 ret
2931 } 2931 }
2932 } 2932 }
2933 2933
2934 // 8 pixels. 2934 // 8 pixels.
2935 // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes). 2935 // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes).
2936 __declspec(naked) 2936 __declspec(naked)
2937 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy, 2937 void UYVYToARGBRow_SSSE3(const uint8* src_uyvy,
2938 uint8* dst_argb, 2938 uint8* dst_argb,
2939 struct YuvConstants* yuvconstants, 2939 struct YuvConstants* yuvconstants,
2940 int width) { 2940 int width) {
2941 __asm { 2941 __asm {
2942 push ebp 2942 push ebx
2943 mov eax, [esp + 4 + 4] // uyvy 2943 mov eax, [esp + 4 + 4] // uyvy
2944 mov edx, [esp + 4 + 8] // argb 2944 mov edx, [esp + 4 + 8] // argb
2945 mov ebp, [esp + 4 + 12] // yuvconstants 2945 mov ebx, [esp + 4 + 12] // yuvconstants
2946 mov ecx, [esp + 4 + 16] // width 2946 mov ecx, [esp + 4 + 16] // width
2947 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 2947 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
2948 2948
2949 convertloop: 2949 convertloop:
2950 READUYVY 2950 READUYVY
2951 YUVTORGB(ebp) 2951 YUVTORGB(ebx)
2952 STOREARGB 2952 STOREARGB
2953 2953
2954 sub ecx, 8 2954 sub ecx, 8
2955 jg convertloop 2955 jg convertloop
2956 2956
2957 pop ebp 2957 pop ebx
2958 ret 2958 ret
2959 } 2959 }
2960 } 2960 }
2961 2961
2962 __declspec(naked) 2962 __declspec(naked)
2963 void I422ToBGRARow_SSSE3(const uint8* y_buf, 2963 void I422ToBGRARow_SSSE3(const uint8* y_buf,
2964 const uint8* u_buf, 2964 const uint8* u_buf,
2965 const uint8* v_buf, 2965 const uint8* v_buf,
2966 uint8* dst_bgra, 2966 uint8* dst_bgra,
2967 struct YuvConstants* yuvconstants, 2967 struct YuvConstants* yuvconstants,
2968 int width) { 2968 int width) {
2969 __asm { 2969 __asm {
2970 push esi 2970 push esi
2971 push edi 2971 push edi
2972 push ebp 2972 push ebx
2973 mov eax, [esp + 12 + 4] // Y 2973 mov eax, [esp + 12 + 4] // Y
2974 mov esi, [esp + 12 + 8] // U 2974 mov esi, [esp + 12 + 8] // U
2975 mov edi, [esp + 12 + 12] // V 2975 mov edi, [esp + 12 + 12] // V
2976 mov edx, [esp + 12 + 16] // argb 2976 mov edx, [esp + 12 + 16] // argb
2977 mov ebp, [esp + 12 + 20] // yuvconstants 2977 mov ebx, [esp + 12 + 20] // yuvconstants
2978 mov ecx, [esp + 12 + 24] // width 2978 mov ecx, [esp + 12 + 24] // width
2979 sub edi, esi 2979 sub edi, esi
2980 2980
2981 convertloop: 2981 convertloop:
2982 READYUV422 2982 READYUV422
2983 YUVTORGB(ebp) 2983 YUVTORGB(ebx)
2984 STOREBGRA 2984 STOREBGRA
2985 2985
2986 sub ecx, 8 2986 sub ecx, 8
2987 jg convertloop 2987 jg convertloop
2988 2988
2989 pop ebp 2989 pop ebx
2990 pop edi 2990 pop edi
2991 pop esi 2991 pop esi
2992 ret 2992 ret
2993 } 2993 }
2994 } 2994 }
2995 2995
2996 __declspec(naked) 2996 __declspec(naked)
2997 void I422ToABGRRow_SSSE3(const uint8* y_buf, 2997 void I422ToABGRRow_SSSE3(const uint8* y_buf,
2998 const uint8* u_buf, 2998 const uint8* u_buf,
2999 const uint8* v_buf, 2999 const uint8* v_buf,
3000 uint8* dst_abgr, 3000 uint8* dst_abgr,
3001 struct YuvConstants* yuvconstants, 3001 struct YuvConstants* yuvconstants,
3002 int width) { 3002 int width) {
3003 __asm { 3003 __asm {
3004 push esi 3004 push esi
3005 push edi 3005 push edi
3006 push ebp 3006 push ebx
3007 mov eax, [esp + 12 + 4] // Y 3007 mov eax, [esp + 12 + 4] // Y
3008 mov esi, [esp + 12 + 8] // U 3008 mov esi, [esp + 12 + 8] // U
3009 mov edi, [esp + 12 + 12] // V 3009 mov edi, [esp + 12 + 12] // V
3010 mov edx, [esp + 12 + 16] // argb 3010 mov edx, [esp + 12 + 16] // argb
3011 mov ebp, [esp + 12 + 20] // yuvconstants 3011 mov ebx, [esp + 12 + 20] // yuvconstants
3012 mov ecx, [esp + 12 + 24] // width 3012 mov ecx, [esp + 12 + 24] // width
3013 sub edi, esi 3013 sub edi, esi
3014 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha 3014 pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha
3015 3015
3016 convertloop: 3016 convertloop:
3017 READYUV422 3017 READYUV422
3018 YUVTORGB(ebp) 3018 YUVTORGB(ebx)
3019 STOREABGR 3019 STOREABGR
3020 3020
3021 sub ecx, 8 3021 sub ecx, 8
3022 jg convertloop 3022 jg convertloop
3023 3023
3024 pop ebp 3024 pop ebx
3025 pop edi 3025 pop edi
3026 pop esi 3026 pop esi
3027 ret 3027 ret
3028 } 3028 }
3029 } 3029 }
3030 3030
3031 __declspec(naked) 3031 __declspec(naked)
3032 void I422ToRGBARow_SSSE3(const uint8* y_buf, 3032 void I422ToRGBARow_SSSE3(const uint8* y_buf,
3033 const uint8* u_buf, 3033 const uint8* u_buf,
3034 const uint8* v_buf, 3034 const uint8* v_buf,
3035 uint8* dst_rgba, 3035 uint8* dst_rgba,
3036 struct YuvConstants* yuvconstants, 3036 struct YuvConstants* yuvconstants,
3037 int width) { 3037 int width) {
3038 __asm { 3038 __asm {
3039 push esi 3039 push esi
3040 push edi 3040 push edi
3041 push ebp 3041 push ebx
3042 mov eax, [esp + 12 + 4] // Y 3042 mov eax, [esp + 12 + 4] // Y
3043 mov esi, [esp + 12 + 8] // U 3043 mov esi, [esp + 12 + 8] // U
3044 mov edi, [esp + 12 + 12] // V 3044 mov edi, [esp + 12 + 12] // V
3045 mov edx, [esp + 12 + 16] // argb 3045 mov edx, [esp + 12 + 16] // argb
3046 mov ebp, [esp + 12 + 20] // yuvconstants 3046 mov ebx, [esp + 12 + 20] // yuvconstants
3047 mov ecx, [esp + 12 + 24] // width 3047 mov ecx, [esp + 12 + 24] // width
3048 sub edi, esi 3048 sub edi, esi
3049 3049
3050 convertloop: 3050 convertloop:
3051 READYUV422 3051 READYUV422
3052 YUVTORGB(ebp) 3052 YUVTORGB(ebx)
3053 STORERGBA 3053 STORERGBA
3054 3054
3055 sub ecx, 8 3055 sub ecx, 8
3056 jg convertloop 3056 jg convertloop
3057 3057
3058 pop ebp 3058 pop ebx
3059 pop edi 3059 pop edi
3060 pop esi 3060 pop esi
3061 ret 3061 ret
3062 } 3062 }
3063 } 3063 }
3064 #endif // HAS_I422TOARGBROW_SSSE3 3064 #endif // HAS_I422TOARGBROW_SSSE3
3065 3065
3066 #ifdef HAS_I400TOARGBROW_SSE2 3066 #ifdef HAS_I400TOARGBROW_SSE2
3067 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes). 3067 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes).
3068 __declspec(naked) 3068 __declspec(naked)
(...skipping 3386 matching lines...) Expand 10 before | Expand all | Expand 10 after
6455 } 6455 }
6456 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 6456 #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3
6457 6457
6458 #endif // defined(_M_X64) 6458 #endif // defined(_M_X64)
6459 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) 6459 #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64))
6460 6460
6461 #ifdef __cplusplus 6461 #ifdef __cplusplus
6462 } // extern "C" 6462 } // extern "C"
6463 } // namespace libyuv 6463 } // namespace libyuv
6464 #endif 6464 #endif
OLDNEW
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698