Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(218)

Side by Side Diff: source/row_neon64.cc

Issue 1363503002: yuvconstants for all YUV to RGB conversion functions. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: mips dspr2 add constants parameter Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon.cc ('k') | source/row_win.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
84 84
85 // Read 8 UYVY 85 // Read 8 UYVY
86 #define READUYVY \ 86 #define READUYVY \
87 MEMACCESS(0) \ 87 MEMACCESS(0) \
88 "ld2 {v2.8b, v3.8b}, [%0], #16 \n" \ 88 "ld2 {v2.8b, v3.8b}, [%0], #16 \n" \
89 "orr v0.8b, v3.8b, v3.8b \n" \ 89 "orr v0.8b, v3.8b, v3.8b \n" \
90 "uzp1 v1.8b, v2.8b, v2.8b \n" \ 90 "uzp1 v1.8b, v2.8b, v2.8b \n" \
91 "uzp2 v3.8b, v2.8b, v2.8b \n" \ 91 "uzp2 v3.8b, v2.8b, v2.8b \n" \
92 "ins v1.s[1], v3.s[0] \n" 92 "ins v1.s[1], v3.s[0] \n"
93 93
94 #define YUV422TORGB_SETUP_REG \ 94 // TODO(fbarchard): replace movi with constants from struct.
95 #define YUVTORGB_SETUP \
95 "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \ 96 "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \
96 "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \ 97 "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \
97 "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \ 98 "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \
98 "ld1r {v31.4s}, [%[kYToRgb]] \n" \ 99 "ld1r {v31.4s}, [%[kYToRgb]] \n" \
99 "movi v27.8h, #128 \n" \ 100 "movi v27.8h, #128 \n" \
100 "movi v28.8h, #102 \n" \ 101 "movi v28.8h, #102 \n" \
101 "movi v29.8h, #25 \n" \ 102 "movi v29.8h, #25 \n" \
102 "movi v30.8h, #52 \n" 103 "movi v30.8h, #52 \n"
103 104
104 #define YUV422TORGB(vR, vG, vB) \ 105 #define YUVTORGB(vR, vG, vB) \
105 "uxtl v0.8h, v0.8b \n" /* Extract Y */ \ 106 "uxtl v0.8h, v0.8b \n" /* Extract Y */ \
106 "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \ 107 "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \
107 "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \ 108 "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \
108 "ushll v0.4s, v0.4h, #0 \n" \ 109 "ushll v0.4s, v0.4h, #0 \n" \
109 "mul v3.4s, v3.4s, v31.4s \n" \ 110 "mul v3.4s, v3.4s, v31.4s \n" \
110 "mul v0.4s, v0.4s, v31.4s \n" \ 111 "mul v0.4s, v0.4s, v31.4s \n" \
111 "sqshrun v0.4h, v0.4s, #16 \n" \ 112 "sqshrun v0.4h, v0.4s, #16 \n" \
112 "sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \ 113 "sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \
113 "uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \ 114 "uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \
114 "mov v2.d[0], v1.d[1] \n" /* Extract V */ \ 115 "mov v2.d[0], v1.d[1] \n" /* Extract V */ \
(...skipping 21 matching lines...) Expand all
136 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ 137 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
137 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ 138 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
138 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ 139 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
139 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ 140 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
140 141
141 #ifdef HAS_I444TOARGBROW_NEON 142 #ifdef HAS_I444TOARGBROW_NEON
142 void I444ToARGBRow_NEON(const uint8* src_y, 143 void I444ToARGBRow_NEON(const uint8* src_y,
143 const uint8* src_u, 144 const uint8* src_u,
144 const uint8* src_v, 145 const uint8* src_v,
145 uint8* dst_argb, 146 uint8* dst_argb,
147 struct YuvConstants* yuvconstants,
146 int width) { 148 int width) {
147 asm volatile ( 149 asm volatile (
148 YUV422TORGB_SETUP_REG 150 YUVTORGB_SETUP
149 "1: \n" 151 "1: \n"
150 READYUV444 152 READYUV444
151 YUV422TORGB(v22, v21, v20) 153 YUVTORGB(v22, v21, v20)
152 "subs %w4, %w4, #8 \n" 154 "subs %w4, %w4, #8 \n"
153 "movi v23.8b, #255 \n" /* A */ 155 "movi v23.8b, #255 \n" /* A */
154 MEMACCESS(3) 156 MEMACCESS(3)
155 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 157 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
156 "b.gt 1b \n" 158 "b.gt 1b \n"
157 : "+r"(src_y), // %0 159 : "+r"(src_y), // %0
158 "+r"(src_u), // %1 160 "+r"(src_u), // %1
159 "+r"(src_v), // %2 161 "+r"(src_v), // %2
160 "+r"(dst_argb), // %3 162 "+r"(dst_argb), // %3
161 "+r"(width) // %4 163 "+r"(width) // %4
162 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 164 : [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
163 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 165 [kYToRgb]"r"(&yuvconstants->kYToRgb)
164 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 166 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
165 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 167 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
166 ); 168 );
167 } 169 }
168 #endif // HAS_I444TOARGBROW_NEON 170 #endif // HAS_I444TOARGBROW_NEON
169 171
170 // TODO(fbarchard): Switch to Matrix version of this function. 172 // TODO(fbarchard): Switch to Matrix version of this function.
171 #ifdef HAS_I422TOARGBROW_NEON 173 #ifdef HAS_I422TOARGBROW_NEON
172 void I422ToARGBRow_NEON(const uint8* src_y, 174 void I422ToARGBRow_NEON(const uint8* src_y,
173 const uint8* src_u, 175 const uint8* src_u,
174 const uint8* src_v, 176 const uint8* src_v,
175 uint8* dst_argb, 177 uint8* dst_argb,
178 struct YuvConstants* yuvconstants,
176 int width) { 179 int width) {
177 asm volatile ( 180 asm volatile (
178 YUV422TORGB_SETUP_REG 181 YUVTORGB_SETUP
179 "1: \n" 182 "1: \n"
180 READYUV422 183 READYUV422
181 YUV422TORGB(v22, v21, v20) 184 YUVTORGB(v22, v21, v20)
182 "subs %w4, %w4, #8 \n" 185 "subs %w4, %w4, #8 \n"
183 "movi v23.8b, #255 \n" /* A */ 186 "movi v23.8b, #255 \n" /* A */
184 MEMACCESS(3) 187 MEMACCESS(3)
185 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 188 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
186 "b.gt 1b \n" 189 "b.gt 1b \n"
187 : "+r"(src_y), // %0 190 : "+r"(src_y), // %0
188 "+r"(src_u), // %1 191 "+r"(src_u), // %1
189 "+r"(src_v), // %2 192 "+r"(src_v), // %2
190 "+r"(dst_argb), // %3 193 "+r"(dst_argb), // %3
191 "+r"(width) // %4 194 "+r"(width) // %4
192 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 195 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
193 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 196 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
194 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 197 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
195 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 198 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
196 ); 199 );
197 } 200 }
198 #endif // HAS_I422TOARGBROW_NEON 201 #endif // HAS_I422TOARGBROW_NEON
199 202
200 #ifdef HAS_I411TOARGBROW_NEON 203 #ifdef HAS_I411TOARGBROW_NEON
201 void I411ToARGBRow_NEON(const uint8* src_y, 204 void I411ToARGBRow_NEON(const uint8* src_y,
202 const uint8* src_u, 205 const uint8* src_u,
203 const uint8* src_v, 206 const uint8* src_v,
204 uint8* dst_argb, 207 uint8* dst_argb,
208 struct YuvConstants* yuvconstants,
205 int width) { 209 int width) {
206 asm volatile ( 210 asm volatile (
207 YUV422TORGB_SETUP_REG 211 YUVTORGB_SETUP
208 "1: \n" 212 "1: \n"
209 READYUV411 213 READYUV411
210 YUV422TORGB(v22, v21, v20) 214 YUVTORGB(v22, v21, v20)
211 "subs %w4, %w4, #8 \n" 215 "subs %w4, %w4, #8 \n"
212 "movi v23.8b, #255 \n" /* A */ 216 "movi v23.8b, #255 \n" /* A */
213 MEMACCESS(3) 217 MEMACCESS(3)
214 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 218 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
215 "b.gt 1b \n" 219 "b.gt 1b \n"
216 : "+r"(src_y), // %0 220 : "+r"(src_y), // %0
217 "+r"(src_u), // %1 221 "+r"(src_u), // %1
218 "+r"(src_v), // %2 222 "+r"(src_v), // %2
219 "+r"(dst_argb), // %3 223 "+r"(dst_argb), // %3
220 "+r"(width) // %4 224 "+r"(width) // %4
221 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 225 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
222 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 226 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
223 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 227 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
224 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 228 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
225 ); 229 );
226 } 230 }
227 #endif // HAS_I411TOARGBROW_NEON 231 #endif // HAS_I411TOARGBROW_NEON
228 232
229 #ifdef HAS_I422TOBGRAROW_NEON 233 #ifdef HAS_I422TOBGRAROW_NEON
230 void I422ToBGRARow_NEON(const uint8* src_y, 234 void I422ToBGRARow_NEON(const uint8* src_y,
231 const uint8* src_u, 235 const uint8* src_u,
232 const uint8* src_v, 236 const uint8* src_v,
233 uint8* dst_bgra, 237 uint8* dst_bgra,
238 struct YuvConstants* yuvconstants,
234 int width) { 239 int width) {
235 asm volatile ( 240 asm volatile (
236 YUV422TORGB_SETUP_REG 241 YUVTORGB_SETUP
237 "1: \n" 242 "1: \n"
238 READYUV422 243 READYUV422
239 YUV422TORGB(v21, v22, v23) 244 YUVTORGB(v21, v22, v23)
240 "subs %w4, %w4, #8 \n" 245 "subs %w4, %w4, #8 \n"
241 "movi v20.8b, #255 \n" /* A */ 246 "movi v20.8b, #255 \n" /* A */
242 MEMACCESS(3) 247 MEMACCESS(3)
243 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 248 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
244 "b.gt 1b \n" 249 "b.gt 1b \n"
245 : "+r"(src_y), // %0 250 : "+r"(src_y), // %0
246 "+r"(src_u), // %1 251 "+r"(src_u), // %1
247 "+r"(src_v), // %2 252 "+r"(src_v), // %2
248 "+r"(dst_bgra), // %3 253 "+r"(dst_bgra), // %3
249 "+r"(width) // %4 254 "+r"(width) // %4
250 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 255 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
251 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 256 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
252 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 257 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
253 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 258 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
254 ); 259 );
255 } 260 }
256 #endif // HAS_I422TOBGRAROW_NEON 261 #endif // HAS_I422TOBGRAROW_NEON
257 262
258 // TODO(fbarchard): Switch to Matrix version of this function. 263 // TODO(fbarchard): Switch to Matrix version of this function.
259 #ifdef HAS_I422TOABGRROW_NEON 264 #ifdef HAS_I422TOABGRROW_NEON
260 void I422ToABGRRow_NEON(const uint8* src_y, 265 void I422ToABGRRow_NEON(const uint8* src_y,
261 const uint8* src_u, 266 const uint8* src_u,
262 const uint8* src_v, 267 const uint8* src_v,
263 uint8* dst_abgr, 268 uint8* dst_abgr,
269 struct YuvConstants* yuvconstants,
264 int width) { 270 int width) {
265 asm volatile ( 271 asm volatile (
266 YUV422TORGB_SETUP_REG 272 YUVTORGB_SETUP
267 "1: \n" 273 "1: \n"
268 READYUV422 274 READYUV422
269 YUV422TORGB(v20, v21, v22) 275 YUVTORGB(v20, v21, v22)
270 "subs %w4, %w4, #8 \n" 276 "subs %w4, %w4, #8 \n"
271 "movi v23.8b, #255 \n" /* A */ 277 "movi v23.8b, #255 \n" /* A */
272 MEMACCESS(3) 278 MEMACCESS(3)
273 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 279 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
274 "b.gt 1b \n" 280 "b.gt 1b \n"
275 : "+r"(src_y), // %0 281 : "+r"(src_y), // %0
276 "+r"(src_u), // %1 282 "+r"(src_u), // %1
277 "+r"(src_v), // %2 283 "+r"(src_v), // %2
278 "+r"(dst_abgr), // %3 284 "+r"(dst_abgr), // %3
279 "+r"(width) // %4 285 "+r"(width) // %4
280 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 286 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
281 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 287 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
282 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 288 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
283 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 289 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
284 ); 290 );
285 } 291 }
286 #endif // HAS_I422TOABGRROW_NEON 292 #endif // HAS_I422TOABGRROW_NEON
287 293
288 #ifdef HAS_I422TORGBAROW_NEON 294 #ifdef HAS_I422TORGBAROW_NEON
289 void I422ToRGBARow_NEON(const uint8* src_y, 295 void I422ToRGBARow_NEON(const uint8* src_y,
290 const uint8* src_u, 296 const uint8* src_u,
291 const uint8* src_v, 297 const uint8* src_v,
292 uint8* dst_rgba, 298 uint8* dst_rgba,
299 struct YuvConstants* yuvconstants,
293 int width) { 300 int width) {
294 asm volatile ( 301 asm volatile (
295 YUV422TORGB_SETUP_REG 302 YUVTORGB_SETUP
296 "1: \n" 303 "1: \n"
297 READYUV422 304 READYUV422
298 YUV422TORGB(v23, v22, v21) 305 YUVTORGB(v23, v22, v21)
299 "subs %w4, %w4, #8 \n" 306 "subs %w4, %w4, #8 \n"
300 "movi v20.8b, #255 \n" /* A */ 307 "movi v20.8b, #255 \n" /* A */
301 MEMACCESS(3) 308 MEMACCESS(3)
302 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 309 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
303 "b.gt 1b \n" 310 "b.gt 1b \n"
304 : "+r"(src_y), // %0 311 : "+r"(src_y), // %0
305 "+r"(src_u), // %1 312 "+r"(src_u), // %1
306 "+r"(src_v), // %2 313 "+r"(src_v), // %2
307 "+r"(dst_rgba), // %3 314 "+r"(dst_rgba), // %3
308 "+r"(width) // %4 315 "+r"(width) // %4
309 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 316 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
310 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 317 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
311 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 318 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
312 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 319 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
313 ); 320 );
314 } 321 }
315 #endif // HAS_I422TORGBAROW_NEON 322 #endif // HAS_I422TORGBAROW_NEON
316 323
317 #ifdef HAS_I422TORGB24ROW_NEON 324 #ifdef HAS_I422TORGB24ROW_NEON
318 void I422ToRGB24Row_NEON(const uint8* src_y, 325 void I422ToRGB24Row_NEON(const uint8* src_y,
319 const uint8* src_u, 326 const uint8* src_u,
320 const uint8* src_v, 327 const uint8* src_v,
321 uint8* dst_rgb24, 328 uint8* dst_rgb24,
329 struct YuvConstants* yuvconstants,
322 int width) { 330 int width) {
323 asm volatile ( 331 asm volatile (
324 YUV422TORGB_SETUP_REG 332 YUVTORGB_SETUP
325 "1: \n" 333 "1: \n"
326 READYUV422 334 READYUV422
327 YUV422TORGB(v22, v21, v20) 335 YUVTORGB(v22, v21, v20)
328 "subs %w4, %w4, #8 \n" 336 "subs %w4, %w4, #8 \n"
329 MEMACCESS(3) 337 MEMACCESS(3)
330 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" 338 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
331 "b.gt 1b \n" 339 "b.gt 1b \n"
332 : "+r"(src_y), // %0 340 : "+r"(src_y), // %0
333 "+r"(src_u), // %1 341 "+r"(src_u), // %1
334 "+r"(src_v), // %2 342 "+r"(src_v), // %2
335 "+r"(dst_rgb24), // %3 343 "+r"(dst_rgb24), // %3
336 "+r"(width) // %4 344 "+r"(width) // %4
337 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 345 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
338 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 346 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
339 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 347 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
340 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 348 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
341 ); 349 );
342 } 350 }
343 #endif // HAS_I422TORGB24ROW_NEON 351 #endif // HAS_I422TORGB24ROW_NEON
344 352
345 #ifdef HAS_I422TORAWROW_NEON 353 #ifdef HAS_I422TORAWROW_NEON
346 void I422ToRAWRow_NEON(const uint8* src_y, 354 void I422ToRAWRow_NEON(const uint8* src_y,
347 const uint8* src_u, 355 const uint8* src_u,
348 const uint8* src_v, 356 const uint8* src_v,
349 uint8* dst_raw, 357 uint8* dst_raw,
358 struct YuvConstants* yuvconstants,
350 int width) { 359 int width) {
351 asm volatile ( 360 asm volatile (
352 YUV422TORGB_SETUP_REG 361 YUVTORGB_SETUP
353 "1: \n" 362 "1: \n"
354 READYUV422 363 READYUV422
355 YUV422TORGB(v20, v21, v22) 364 YUVTORGB(v20, v21, v22)
356 "subs %w4, %w4, #8 \n" 365 "subs %w4, %w4, #8 \n"
357 MEMACCESS(3) 366 MEMACCESS(3)
358 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" 367 "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n"
359 "b.gt 1b \n" 368 "b.gt 1b \n"
360 : "+r"(src_y), // %0 369 : "+r"(src_y), // %0
361 "+r"(src_u), // %1 370 "+r"(src_u), // %1
362 "+r"(src_v), // %2 371 "+r"(src_v), // %2
363 "+r"(dst_raw), // %3 372 "+r"(dst_raw), // %3
364 "+r"(width) // %4 373 "+r"(width) // %4
365 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 374 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
366 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 375 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
367 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 376 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
368 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 377 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
369 ); 378 );
370 } 379 }
371 #endif // HAS_I422TORAWROW_NEON 380 #endif // HAS_I422TORAWROW_NEON
372 381
373 #define ARGBTORGB565 \ 382 #define ARGBTORGB565 \
374 "shll v0.8h, v22.8b, #8 \n" /* R */ \ 383 "shll v0.8h, v22.8b, #8 \n" /* R */ \
375 "shll v20.8h, v20.8b, #8 \n" /* B */ \ 384 "shll v20.8h, v20.8b, #8 \n" /* B */ \
376 "shll v21.8h, v21.8b, #8 \n" /* G */ \ 385 "shll v21.8h, v21.8b, #8 \n" /* G */ \
377 "sri v0.8h, v21.8h, #5 \n" /* RG */ \ 386 "sri v0.8h, v21.8h, #5 \n" /* RG */ \
378 "sri v0.8h, v20.8h, #11 \n" /* RGB */ 387 "sri v0.8h, v20.8h, #11 \n" /* RGB */
379 388
380 #ifdef HAS_I422TORGB565ROW_NEON 389 #ifdef HAS_I422TORGB565ROW_NEON
381 void I422ToRGB565Row_NEON(const uint8* src_y, 390 void I422ToRGB565Row_NEON(const uint8* src_y,
382 const uint8* src_u, 391 const uint8* src_u,
383 const uint8* src_v, 392 const uint8* src_v,
384 uint8* dst_rgb565, 393 uint8* dst_rgb565,
394 struct YuvConstants* yuvconstants,
385 int width) { 395 int width) {
386 asm volatile ( 396 asm volatile (
387 YUV422TORGB_SETUP_REG 397 YUVTORGB_SETUP
388 "1: \n" 398 "1: \n"
389 READYUV422 399 READYUV422
390 YUV422TORGB(v22, v21, v20) 400 YUVTORGB(v22, v21, v20)
391 "subs %w4, %w4, #8 \n" 401 "subs %w4, %w4, #8 \n"
392 ARGBTORGB565 402 ARGBTORGB565
393 MEMACCESS(3) 403 MEMACCESS(3)
394 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. 404 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
395 "b.gt 1b \n" 405 "b.gt 1b \n"
396 : "+r"(src_y), // %0 406 : "+r"(src_y), // %0
397 "+r"(src_u), // %1 407 "+r"(src_u), // %1
398 "+r"(src_v), // %2 408 "+r"(src_v), // %2
399 "+r"(dst_rgb565), // %3 409 "+r"(dst_rgb565), // %3
400 "+r"(width) // %4 410 "+r"(width) // %4
401 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 411 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
402 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 412 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
403 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 413 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
404 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 414 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
405 ); 415 );
406 } 416 }
407 #endif // HAS_I422TORGB565ROW_NEON 417 #endif // HAS_I422TORGB565ROW_NEON
408 418
409 #define ARGBTOARGB1555 \ 419 #define ARGBTOARGB1555 \
410 "shll v0.8h, v23.8b, #8 \n" /* A */ \ 420 "shll v0.8h, v23.8b, #8 \n" /* A */ \
411 "shll v22.8h, v22.8b, #8 \n" /* R */ \ 421 "shll v22.8h, v22.8b, #8 \n" /* R */ \
412 "shll v20.8h, v20.8b, #8 \n" /* B */ \ 422 "shll v20.8h, v20.8b, #8 \n" /* B */ \
413 "shll v21.8h, v21.8b, #8 \n" /* G */ \ 423 "shll v21.8h, v21.8b, #8 \n" /* G */ \
414 "sri v0.8h, v22.8h, #1 \n" /* AR */ \ 424 "sri v0.8h, v22.8h, #1 \n" /* AR */ \
415 "sri v0.8h, v21.8h, #6 \n" /* ARG */ \ 425 "sri v0.8h, v21.8h, #6 \n" /* ARG */ \
416 "sri v0.8h, v20.8h, #11 \n" /* ARGB */ 426 "sri v0.8h, v20.8h, #11 \n" /* ARGB */
417 427
418 #ifdef HAS_I422TOARGB1555ROW_NEON 428 #ifdef HAS_I422TOARGB1555ROW_NEON
419 void I422ToARGB1555Row_NEON(const uint8* src_y, 429 void I422ToARGB1555Row_NEON(const uint8* src_y,
420 const uint8* src_u, 430 const uint8* src_u,
421 const uint8* src_v, 431 const uint8* src_v,
422 uint8* dst_argb1555, 432 uint8* dst_argb1555,
433 struct YuvConstants* yuvconstants,
423 int width) { 434 int width) {
424 asm volatile ( 435 asm volatile (
425 YUV422TORGB_SETUP_REG 436 YUVTORGB_SETUP
426 "1: \n" 437 "1: \n"
427 READYUV422 438 READYUV422
428 YUV422TORGB(v22, v21, v20) 439 YUVTORGB(v22, v21, v20)
429 "subs %w4, %w4, #8 \n" 440 "subs %w4, %w4, #8 \n"
430 "movi v23.8b, #255 \n" 441 "movi v23.8b, #255 \n"
431 ARGBTOARGB1555 442 ARGBTOARGB1555
432 MEMACCESS(3) 443 MEMACCESS(3)
433 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. 444 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
434 "b.gt 1b \n" 445 "b.gt 1b \n"
435 : "+r"(src_y), // %0 446 : "+r"(src_y), // %0
436 "+r"(src_u), // %1 447 "+r"(src_u), // %1
437 "+r"(src_v), // %2 448 "+r"(src_v), // %2
438 "+r"(dst_argb1555), // %3 449 "+r"(dst_argb1555), // %3
439 "+r"(width) // %4 450 "+r"(width) // %4
440 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 451 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
441 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 452 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
442 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 453 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
443 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 454 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
444 ); 455 );
445 } 456 }
446 #endif // HAS_I422TOARGB1555ROW_NEON 457 #endif // HAS_I422TOARGB1555ROW_NEON
447 458
448 #define ARGBTOARGB4444 \ 459 #define ARGBTOARGB4444 \
449 /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ 460 /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \
450 "ushr v20.8b, v20.8b, #4 \n" /* B */ \ 461 "ushr v20.8b, v20.8b, #4 \n" /* B */ \
451 "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ 462 "bic v21.8b, v21.8b, v4.8b \n" /* G */ \
452 "ushr v22.8b, v22.8b, #4 \n" /* R */ \ 463 "ushr v22.8b, v22.8b, #4 \n" /* R */ \
453 "bic v23.8b, v23.8b, v4.8b \n" /* A */ \ 464 "bic v23.8b, v23.8b, v4.8b \n" /* A */ \
454 "orr v0.8b, v20.8b, v21.8b \n" /* BG */ \ 465 "orr v0.8b, v20.8b, v21.8b \n" /* BG */ \
455 "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \ 466 "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \
456 "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */ 467 "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */
457 468
458 #ifdef HAS_I422TOARGB4444ROW_NEON 469 #ifdef HAS_I422TOARGB4444ROW_NEON
459 void I422ToARGB4444Row_NEON(const uint8* src_y, 470 void I422ToARGB4444Row_NEON(const uint8* src_y,
460 const uint8* src_u, 471 const uint8* src_u,
461 const uint8* src_v, 472 const uint8* src_v,
462 uint8* dst_argb4444, 473 uint8* dst_argb4444,
474 struct YuvConstants* yuvconstants,
463 int width) { 475 int width) {
464 asm volatile ( 476 asm volatile (
465 YUV422TORGB_SETUP_REG 477 YUVTORGB_SETUP
466 "movi v4.16b, #0x0f \n" // bits to clear with vbic. 478 "movi v4.16b, #0x0f \n" // bits to clear with vbic.
467 "1: \n" 479 "1: \n"
468 READYUV422 480 READYUV422
469 YUV422TORGB(v22, v21, v20) 481 YUVTORGB(v22, v21, v20)
470 "subs %w4, %w4, #8 \n" 482 "subs %w4, %w4, #8 \n"
471 "movi v23.8b, #255 \n" 483 "movi v23.8b, #255 \n"
472 ARGBTOARGB4444 484 ARGBTOARGB4444
473 MEMACCESS(3) 485 MEMACCESS(3)
474 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444. 486 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444.
475 "b.gt 1b \n" 487 "b.gt 1b \n"
476 : "+r"(src_y), // %0 488 : "+r"(src_y), // %0
477 "+r"(src_u), // %1 489 "+r"(src_u), // %1
478 "+r"(src_v), // %2 490 "+r"(src_v), // %2
479 "+r"(dst_argb4444), // %3 491 "+r"(dst_argb4444), // %3
480 "+r"(width) // %4 492 "+r"(width) // %4
481 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 493 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
482 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 494 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
483 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 495 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
484 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 496 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
485 ); 497 );
486 } 498 }
487 #endif // HAS_I422TOARGB4444ROW_NEON 499 #endif // HAS_I422TOARGB4444ROW_NEON
488 500
489 #ifdef HAS_I400TOARGBROW_NEON 501 #ifdef HAS_I400TOARGBROW_NEON
490 void I400ToARGBRow_NEON(const uint8* src_y, 502 void I400ToARGBRow_NEON(const uint8* src_y,
491 uint8* dst_argb, 503 uint8* dst_argb,
492 int width) { 504 int width) {
493 int64 width64 = (int64)(width); 505 int64 width64 = (int64)(width);
494 asm volatile ( 506 asm volatile (
495 YUV422TORGB_SETUP_REG 507 YUVTORGB_SETUP
496 "1: \n" 508 "1: \n"
497 READYUV400 509 READYUV400
498 YUV422TORGB(v22, v21, v20) 510 YUVTORGB(v22, v21, v20)
499 "subs %w2, %w2, #8 \n" 511 "subs %w2, %w2, #8 \n"
500 "movi v23.8b, #255 \n" 512 "movi v23.8b, #255 \n"
501 MEMACCESS(1) 513 MEMACCESS(1)
502 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 514 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
503 "b.gt 1b \n" 515 "b.gt 1b \n"
504 : "+r"(src_y), // %0 516 : "+r"(src_y), // %0
505 "+r"(dst_argb), // %1 517 "+r"(dst_argb), // %1
506 "+r"(width64) // %2 518 "+r"(width64) // %2
507 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 519 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
508 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 520 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
509 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 521 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
510 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 522 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
511 ); 523 );
512 } 524 }
513 #endif // HAS_I400TOARGBROW_NEON 525 #endif // HAS_I400TOARGBROW_NEON
514 526
515 #ifdef HAS_J400TOARGBROW_NEON 527 #ifdef HAS_J400TOARGBROW_NEON
516 void J400ToARGBRow_NEON(const uint8* src_y, 528 void J400ToARGBRow_NEON(const uint8* src_y,
517 uint8* dst_argb, 529 uint8* dst_argb,
518 int width) { 530 int width) {
(...skipping 14 matching lines...) Expand all
533 : 545 :
534 : "cc", "memory", "v20", "v21", "v22", "v23" 546 : "cc", "memory", "v20", "v21", "v22", "v23"
535 ); 547 );
536 } 548 }
537 #endif // HAS_J400TOARGBROW_NEON 549 #endif // HAS_J400TOARGBROW_NEON
538 550
539 #ifdef HAS_NV12TOARGBROW_NEON 551 #ifdef HAS_NV12TOARGBROW_NEON
540 void NV12ToARGBRow_NEON(const uint8* src_y, 552 void NV12ToARGBRow_NEON(const uint8* src_y,
541 const uint8* src_uv, 553 const uint8* src_uv,
542 uint8* dst_argb, 554 uint8* dst_argb,
555 struct YuvConstants* yuvconstants,
543 int width) { 556 int width) {
544 asm volatile ( 557 asm volatile (
545 YUV422TORGB_SETUP_REG 558 YUVTORGB_SETUP
546 "1: \n" 559 "1: \n"
547 READNV12 560 READNV12
548 YUV422TORGB(v22, v21, v20) 561 YUVTORGB(v22, v21, v20)
549 "subs %w3, %w3, #8 \n" 562 "subs %w3, %w3, #8 \n"
550 "movi v23.8b, #255 \n" 563 "movi v23.8b, #255 \n"
551 MEMACCESS(2) 564 MEMACCESS(2)
552 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" 565 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
553 "b.gt 1b \n" 566 "b.gt 1b \n"
554 : "+r"(src_y), // %0 567 : "+r"(src_y), // %0
555 "+r"(src_uv), // %1 568 "+r"(src_uv), // %1
556 "+r"(dst_argb), // %2 569 "+r"(dst_argb), // %2
557 "+r"(width) // %3 570 "+r"(width) // %3
558 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 571 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
559 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 572 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
560 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 573 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
561 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 574 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
562 ); 575 );
563 } 576 }
564 #endif // HAS_NV12TOARGBROW_NEON 577 #endif // HAS_NV12TOARGBROW_NEON
565 578
566 #ifdef HAS_NV21TOARGBROW_NEON
567 void NV21ToARGBRow_NEON(const uint8* src_y,
568 const uint8* src_uv,
569 uint8* dst_argb,
570 int width) {
571 asm volatile (
572 YUV422TORGB_SETUP_REG
573 "1: \n"
574 READNV21
575 YUV422TORGB(v22, v21, v20)
576 "subs %w3, %w3, #8 \n"
577 "movi v23.8b, #255 \n"
578 MEMACCESS(2)
579 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
580 "b.gt 1b \n"
581 : "+r"(src_y), // %0
582 "+r"(src_uv), // %1
583 "+r"(dst_argb), // %2
584 "+r"(width) // %3
585 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
586 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
587 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
588 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
589 );
590 }
591 #endif // HAS_NV21TOARGBROW_NEON
592
593 #ifdef HAS_NV12TORGB565ROW_NEON 579 #ifdef HAS_NV12TORGB565ROW_NEON
594 void NV12ToRGB565Row_NEON(const uint8* src_y, 580 void NV12ToRGB565Row_NEON(const uint8* src_y,
595 const uint8* src_uv, 581 const uint8* src_uv,
596 uint8* dst_rgb565, 582 uint8* dst_rgb565,
583 struct YuvConstants* yuvconstants,
597 int width) { 584 int width) {
598 asm volatile ( 585 asm volatile (
599 YUV422TORGB_SETUP_REG 586 YUVTORGB_SETUP
600 "1: \n" 587 "1: \n"
601 READNV12 588 READNV12
602 YUV422TORGB(v22, v21, v20) 589 YUVTORGB(v22, v21, v20)
603 "subs %w3, %w3, #8 \n" 590 "subs %w3, %w3, #8 \n"
604 ARGBTORGB565 591 ARGBTORGB565
605 MEMACCESS(2) 592 MEMACCESS(2)
606 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565. 593 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
607 "b.gt 1b \n" 594 "b.gt 1b \n"
608 : "+r"(src_y), // %0 595 : "+r"(src_y), // %0
609 "+r"(src_uv), // %1 596 "+r"(src_uv), // %1
610 "+r"(dst_rgb565), // %2 597 "+r"(dst_rgb565), // %2
611 "+r"(width) // %3 598 "+r"(width) // %3
612 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 599 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
613 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 600 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
614 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 601 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
615 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 602 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
616 ); 603 );
617 } 604 }
618 #endif // HAS_NV12TORGB565ROW_NEON 605 #endif // HAS_NV12TORGB565ROW_NEON
619 606
620 #ifdef HAS_NV21TORGB565ROW_NEON
621 void NV21ToRGB565Row_NEON(const uint8* src_y,
622 const uint8* src_uv,
623 uint8* dst_rgb565,
624 int width) {
625 asm volatile (
626 YUV422TORGB_SETUP_REG
627 "1: \n"
628 READNV21
629 YUV422TORGB(v22, v21, v20)
630 "subs %w3, %w3, #8 \n"
631 ARGBTORGB565
632 MEMACCESS(2)
633 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels RGB565.
634 "b.gt 1b \n"
635 : "+r"(src_y), // %0
636 "+r"(src_uv), // %1
637 "+r"(dst_rgb565), // %2
638 "+r"(width) // %3
639 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR),
640 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb)
641 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
642 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
643 );
644 }
645 #endif // HAS_NV21TORGB565ROW_NEON
646
647 #ifdef HAS_YUY2TOARGBROW_NEON 607 #ifdef HAS_YUY2TOARGBROW_NEON
648 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, 608 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
649 uint8* dst_argb, 609 uint8* dst_argb,
610 struct YuvConstants* yuvconstants,
650 int width) { 611 int width) {
651 int64 width64 = (int64)(width); 612 int64 width64 = (int64)(width);
652 asm volatile ( 613 asm volatile (
653 YUV422TORGB_SETUP_REG 614 YUVTORGB_SETUP
654 "1: \n" 615 "1: \n"
655 READYUY2 616 READYUY2
656 YUV422TORGB(v22, v21, v20) 617 YUVTORGB(v22, v21, v20)
657 "subs %w2, %w2, #8 \n" 618 "subs %w2, %w2, #8 \n"
658 "movi v23.8b, #255 \n" 619 "movi v23.8b, #255 \n"
659 MEMACCESS(1) 620 MEMACCESS(1)
660 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 621 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
661 "b.gt 1b \n" 622 "b.gt 1b \n"
662 : "+r"(src_yuy2), // %0 623 : "+r"(src_yuy2), // %0
663 "+r"(dst_argb), // %1 624 "+r"(dst_argb), // %1
664 "+r"(width64) // %2 625 "+r"(width64) // %2
665 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 626 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
666 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 627 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
667 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 628 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
668 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 629 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
669 ); 630 );
670 } 631 }
671 #endif // HAS_YUY2TOARGBROW_NEON 632 #endif // HAS_YUY2TOARGBROW_NEON
672 633
673 #ifdef HAS_UYVYTOARGBROW_NEON 634 #ifdef HAS_UYVYTOARGBROW_NEON
674 void UYVYToARGBRow_NEON(const uint8* src_uyvy, 635 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
675 uint8* dst_argb, 636 uint8* dst_argb,
637 struct YuvConstants* yuvconstants,
676 int width) { 638 int width) {
677 int64 width64 = (int64)(width); 639 int64 width64 = (int64)(width);
678 asm volatile ( 640 asm volatile (
679 YUV422TORGB_SETUP_REG 641 YUVTORGB_SETUP
680 "1: \n" 642 "1: \n"
681 READUYVY 643 READUYVY
682 YUV422TORGB(v22, v21, v20) 644 YUVTORGB(v22, v21, v20)
683 "subs %w2, %w2, #8 \n" 645 "subs %w2, %w2, #8 \n"
684 "movi v23.8b, #255 \n" 646 "movi v23.8b, #255 \n"
685 MEMACCESS(1) 647 MEMACCESS(1)
686 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" 648 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n"
687 "b.gt 1b \n" 649 "b.gt 1b \n"
688 : "+r"(src_uyvy), // %0 650 : "+r"(src_uyvy), // %0
689 "+r"(dst_argb), // %1 651 "+r"(dst_argb), // %1
690 "+r"(width64) // %2 652 "+r"(width64) // %2
691 : [kUVBiasBGR]"r"(&kYuvConstantsNEON.kUVBiasBGR), 653 : [kUVBiasBGR]"r"(&kYuvConstants.kUVBiasBGR),
692 [kYToRgb]"r"(&kYuvConstantsNEON.kYToRgb) 654 [kYToRgb]"r"(&kYuvConstants.kYToRgb)
693 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 655 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
694 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 656 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
695 ); 657 );
696 } 658 }
697 #endif // HAS_UYVYTOARGBROW_NEON 659 #endif // HAS_UYVYTOARGBROW_NEON
698 660
699 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. 661 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
700 #ifdef HAS_SPLITUVROW_NEON 662 #ifdef HAS_SPLITUVROW_NEON
701 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 663 void SplitUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
702 int width) { 664 int width) {
(...skipping 2348 matching lines...) Expand 10 before | Expand all | Expand 10 after
3051 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 3013 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
3052 ); 3014 );
3053 } 3015 }
3054 #endif // HAS_SOBELYROW_NEON 3016 #endif // HAS_SOBELYROW_NEON
3055 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 3017 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
3056 3018
3057 #ifdef __cplusplus 3019 #ifdef __cplusplus
3058 } // extern "C" 3020 } // extern "C"
3059 } // namespace libyuv 3021 } // namespace libyuv
3060 #endif 3022 #endif
OLDNEW
« no previous file with comments | « source/row_neon.cc ('k') | source/row_win.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698