Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1213)

Side by Side Diff: source/row_neon64.cc

Issue 1413763017: ARMv7 Neon version of I420AlphaToARGB (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: port i422alphatoargb to neon64 Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_neon.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after
120 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \ 120 "sqadd " #vB ".8h, v24.8h, v0.8h \n" /* B */ \
121 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \ 121 "sqadd " #vG ".8h, v25.8h, v0.8h \n" /* G */ \
122 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \ 122 "sqadd " #vR ".8h, v26.8h, v0.8h \n" /* R */ \
123 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \ 123 "sqadd " #vB ".8h, " #vB ".8h, v3.8h \n" /* B */ \
124 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \ 124 "sqsub " #vG ".8h, " #vG ".8h, v6.8h \n" /* G */ \
125 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \ 125 "sqadd " #vR ".8h, " #vR ".8h, v7.8h \n" /* R */ \
126 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \ 126 "sqshrun " #vB ".8b, " #vB ".8h, #6 \n" /* B */ \
127 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \ 127 "sqshrun " #vG ".8b, " #vG ".8h, #6 \n" /* G */ \
128 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \ 128 "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ \
129 129
130 // TODO(fbarchard): Use structure for constants like 32 bit code.
131 #define RGBTOUV_SETUP_REG \
132 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
133 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
134 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
135 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
136 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
137 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
138
139 #ifdef HAS_I444TOARGBROW_NEON 130 #ifdef HAS_I444TOARGBROW_NEON
140 void I444ToARGBRow_NEON(const uint8* src_y, 131 void I444ToARGBRow_NEON(const uint8* src_y,
141 const uint8* src_u, 132 const uint8* src_u,
142 const uint8* src_v, 133 const uint8* src_v,
143 uint8* dst_argb, 134 uint8* dst_argb,
144 const struct YuvConstants* yuvconstants, 135 const struct YuvConstants* yuvconstants,
145 int width) { 136 int width) {
146 asm volatile ( 137 asm volatile (
147 YUVTORGB_SETUP 138 YUVTORGB_SETUP
139 "movi v23.8b, #255 \n" /* A */
148 "1: \n" 140 "1: \n"
149 READYUV444 141 READYUV444
150 YUVTORGB(v22, v21, v20) 142 YUVTORGB(v22, v21, v20)
151 "subs %w4, %w4, #8 \n" 143 "subs %w4, %w4, #8 \n"
152 "movi v23.8b, #255 \n" /* A */
153 MEMACCESS(3) 144 MEMACCESS(3)
154 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 145 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
155 "b.gt 1b \n" 146 "b.gt 1b \n"
156 : "+r"(src_y), // %0 147 : "+r"(src_y), // %0
157 "+r"(src_u), // %1 148 "+r"(src_u), // %1
158 "+r"(src_v), // %2 149 "+r"(src_v), // %2
159 "+r"(dst_argb), // %3 150 "+r"(dst_argb), // %3
160 "+r"(width) // %4 151 "+r"(width) // %4
161 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 152 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
162 [kUVToG]"r"(&yuvconstants->kUVToG), 153 [kUVToG]"r"(&yuvconstants->kUVToG),
163 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 154 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
164 [kYToRgb]"r"(&yuvconstants->kYToRgb) 155 [kYToRgb]"r"(&yuvconstants->kYToRgb)
165 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 156 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
166 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 157 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
167 ); 158 );
168 } 159 }
169 #endif // HAS_I444TOARGBROW_NEON 160 #endif // HAS_I444TOARGBROW_NEON
170 161
171 // TODO(fbarchard): Switch to Matrix version of this function.
172 #ifdef HAS_I422TOARGBROW_NEON 162 #ifdef HAS_I422TOARGBROW_NEON
173 void I422ToARGBRow_NEON(const uint8* src_y, 163 void I422ToARGBRow_NEON(const uint8* src_y,
174 const uint8* src_u, 164 const uint8* src_u,
175 const uint8* src_v, 165 const uint8* src_v,
176 uint8* dst_argb, 166 uint8* dst_argb,
177 const struct YuvConstants* yuvconstants, 167 const struct YuvConstants* yuvconstants,
178 int width) { 168 int width) {
179 asm volatile ( 169 asm volatile (
180 YUVTORGB_SETUP 170 YUVTORGB_SETUP
171 "movi v23.8b, #255 \n" /* A */
181 "1: \n" 172 "1: \n"
182 READYUV422 173 READYUV422
183 YUVTORGB(v22, v21, v20) 174 YUVTORGB(v22, v21, v20)
184 "subs %w4, %w4, #8 \n" 175 "subs %w4, %w4, #8 \n"
185 "movi v23.8b, #255 \n" /* A */
186 MEMACCESS(3) 176 MEMACCESS(3)
187 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 177 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
188 "b.gt 1b \n" 178 "b.gt 1b \n"
189 : "+r"(src_y), // %0 179 : "+r"(src_y), // %0
190 "+r"(src_u), // %1 180 "+r"(src_u), // %1
191 "+r"(src_v), // %2 181 "+r"(src_v), // %2
192 "+r"(dst_argb), // %3 182 "+r"(dst_argb), // %3
193 "+r"(width) // %4 183 "+r"(width) // %4
194 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 184 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
195 [kUVToG]"r"(&yuvconstants->kUVToG), 185 [kUVToG]"r"(&yuvconstants->kUVToG),
196 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 186 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
197 [kYToRgb]"r"(&yuvconstants->kYToRgb) 187 [kYToRgb]"r"(&yuvconstants->kYToRgb)
198 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 188 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
199 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 189 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
200 ); 190 );
201 } 191 }
202 #endif // HAS_I422TOARGBROW_NEON 192 #endif // HAS_I422TOARGBROW_NEON
203 193
194 #ifdef HAS_I422ALPHATOARGBROW_NEON
195 void I422AlphaToARGBRow_NEON(const uint8* src_y,
196 const uint8* src_u,
197 const uint8* src_v,
198 const uint8* src_a,
199 uint8* dst_argb,
200 const struct YuvConstants* yuvconstants,
201 int width) {
202 asm volatile (
203 YUVTORGB_SETUP
204 "1: \n"
205 READYUV422
206 YUVTORGB(v22, v21, v20)
207 MEMACCESS(3)
208 "ld1 {v23.8b}, [%3], #8 \n"
209 "subs %w5, %w5, #8 \n"
210 MEMACCESS(4)
211 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n"
212 "b.gt 1b \n"
213 : "+r"(src_y), // %0
214 "+r"(src_u), // %1
215 "+r"(src_v), // %2
216 "+r"(src_a), // %3
217 "+r"(dst_argb), // %4
218 "+r"(width) // %5
219 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
220 [kUVToG]"r"(&yuvconstants->kUVToG),
221 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
222 [kYToRgb]"r"(&yuvconstants->kYToRgb)
223 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
224 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
225 );
226 }
227 #endif // HAS_I422ALPHATOARGBROW_NEON
228
204 #ifdef HAS_I411TOARGBROW_NEON 229 #ifdef HAS_I411TOARGBROW_NEON
205 void I411ToARGBRow_NEON(const uint8* src_y, 230 void I411ToARGBRow_NEON(const uint8* src_y,
206 const uint8* src_u, 231 const uint8* src_u,
207 const uint8* src_v, 232 const uint8* src_v,
208 uint8* dst_argb, 233 uint8* dst_argb,
209 const struct YuvConstants* yuvconstants, 234 const struct YuvConstants* yuvconstants,
210 int width) { 235 int width) {
211 asm volatile ( 236 asm volatile (
212 YUVTORGB_SETUP 237 YUVTORGB_SETUP
238 "movi v23.8b, #255 \n" /* A */
213 "1: \n" 239 "1: \n"
214 READYUV411 240 READYUV411
215 YUVTORGB(v22, v21, v20) 241 YUVTORGB(v22, v21, v20)
216 "subs %w4, %w4, #8 \n" 242 "subs %w4, %w4, #8 \n"
217 "movi v23.8b, #255 \n" /* A */
218 MEMACCESS(3) 243 MEMACCESS(3)
219 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 244 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
220 "b.gt 1b \n" 245 "b.gt 1b \n"
221 : "+r"(src_y), // %0 246 : "+r"(src_y), // %0
222 "+r"(src_u), // %1 247 "+r"(src_u), // %1
223 "+r"(src_v), // %2 248 "+r"(src_v), // %2
224 "+r"(dst_argb), // %3 249 "+r"(dst_argb), // %3
225 "+r"(width) // %4 250 "+r"(width) // %4
226 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 251 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
227 [kUVToG]"r"(&yuvconstants->kUVToG), 252 [kUVToG]"r"(&yuvconstants->kUVToG),
228 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 253 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
229 [kYToRgb]"r"(&yuvconstants->kYToRgb) 254 [kYToRgb]"r"(&yuvconstants->kYToRgb)
230 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 255 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
231 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 256 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
232 ); 257 );
233 } 258 }
234 #endif // HAS_I411TOARGBROW_NEON 259 #endif // HAS_I411TOARGBROW_NEON
235 260
236 #ifdef HAS_I422TORGBAROW_NEON 261 #ifdef HAS_I422TORGBAROW_NEON
237 void I422ToRGBARow_NEON(const uint8* src_y, 262 void I422ToRGBARow_NEON(const uint8* src_y,
238 const uint8* src_u, 263 const uint8* src_u,
239 const uint8* src_v, 264 const uint8* src_v,
240 uint8* dst_rgba, 265 uint8* dst_rgba,
241 const struct YuvConstants* yuvconstants, 266 const struct YuvConstants* yuvconstants,
242 int width) { 267 int width) {
243 asm volatile ( 268 asm volatile (
244 YUVTORGB_SETUP 269 YUVTORGB_SETUP
270 "movi v20.8b, #255 \n" /* A */
245 "1: \n" 271 "1: \n"
246 READYUV422 272 READYUV422
247 YUVTORGB(v23, v22, v21) 273 YUVTORGB(v23, v22, v21)
248 "subs %w4, %w4, #8 \n" 274 "subs %w4, %w4, #8 \n"
249 "movi v20.8b, #255 \n" /* A */
250 MEMACCESS(3) 275 MEMACCESS(3)
251 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" 276 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n"
252 "b.gt 1b \n" 277 "b.gt 1b \n"
253 : "+r"(src_y), // %0 278 : "+r"(src_y), // %0
254 "+r"(src_u), // %1 279 "+r"(src_u), // %1
255 "+r"(src_v), // %2 280 "+r"(src_v), // %2
256 "+r"(dst_rgba), // %3 281 "+r"(dst_rgba), // %3
257 "+r"(width) // %4 282 "+r"(width) // %4
258 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 283 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
259 [kUVToG]"r"(&yuvconstants->kUVToG), 284 [kUVToG]"r"(&yuvconstants->kUVToG),
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
346 371
347 #ifdef HAS_I422TOARGB1555ROW_NEON 372 #ifdef HAS_I422TOARGB1555ROW_NEON
348 void I422ToARGB1555Row_NEON(const uint8* src_y, 373 void I422ToARGB1555Row_NEON(const uint8* src_y,
349 const uint8* src_u, 374 const uint8* src_u,
350 const uint8* src_v, 375 const uint8* src_v,
351 uint8* dst_argb1555, 376 uint8* dst_argb1555,
352 const struct YuvConstants* yuvconstants, 377 const struct YuvConstants* yuvconstants,
353 int width) { 378 int width) {
354 asm volatile ( 379 asm volatile (
355 YUVTORGB_SETUP 380 YUVTORGB_SETUP
381 "movi v23.8b, #255 \n"
356 "1: \n" 382 "1: \n"
357 READYUV422 383 READYUV422
358 YUVTORGB(v22, v21, v20) 384 YUVTORGB(v22, v21, v20)
359 "subs %w4, %w4, #8 \n" 385 "subs %w4, %w4, #8 \n"
360 "movi v23.8b, #255 \n"
361 ARGBTOARGB1555 386 ARGBTOARGB1555
362 MEMACCESS(3) 387 MEMACCESS(3)
363 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565. 388 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels RGB565.
364 "b.gt 1b \n" 389 "b.gt 1b \n"
365 : "+r"(src_y), // %0 390 : "+r"(src_y), // %0
366 "+r"(src_u), // %1 391 "+r"(src_u), // %1
367 "+r"(src_v), // %2 392 "+r"(src_v), // %2
368 "+r"(dst_argb1555), // %3 393 "+r"(dst_argb1555), // %3
369 "+r"(width) // %4 394 "+r"(width) // %4
370 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 395 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after
421 } 446 }
422 #endif // HAS_I422TOARGB4444ROW_NEON 447 #endif // HAS_I422TOARGB4444ROW_NEON
423 448
424 #ifdef HAS_I400TOARGBROW_NEON 449 #ifdef HAS_I400TOARGBROW_NEON
425 void I400ToARGBRow_NEON(const uint8* src_y, 450 void I400ToARGBRow_NEON(const uint8* src_y,
426 uint8* dst_argb, 451 uint8* dst_argb,
427 int width) { 452 int width) {
428 int64 width64 = (int64)(width); 453 int64 width64 = (int64)(width);
429 asm volatile ( 454 asm volatile (
430 YUVTORGB_SETUP 455 YUVTORGB_SETUP
456 "movi v23.8b, #255 \n"
431 "1: \n" 457 "1: \n"
432 READYUV400 458 READYUV400
433 YUVTORGB(v22, v21, v20) 459 YUVTORGB(v22, v21, v20)
434 "subs %w2, %w2, #8 \n" 460 "subs %w2, %w2, #8 \n"
435 "movi v23.8b, #255 \n"
436 MEMACCESS(1) 461 MEMACCESS(1)
437 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 462 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
438 "b.gt 1b \n" 463 "b.gt 1b \n"
439 : "+r"(src_y), // %0 464 : "+r"(src_y), // %0
440 "+r"(dst_argb), // %1 465 "+r"(dst_argb), // %1
441 "+r"(width64) // %2 466 "+r"(width64) // %2
442 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), 467 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
443 [kUVToG]"r"(&kYuvI601Constants.kUVToG), 468 [kUVToG]"r"(&kYuvI601Constants.kUVToG),
444 [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), 469 [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
445 [kYToRgb]"r"(&kYuvI601Constants.kYToRgb) 470 [kYToRgb]"r"(&kYuvI601Constants.kYToRgb)
(...skipping 28 matching lines...) Expand all
474 #endif // HAS_J400TOARGBROW_NEON 499 #endif // HAS_J400TOARGBROW_NEON
475 500
476 #ifdef HAS_NV12TOARGBROW_NEON 501 #ifdef HAS_NV12TOARGBROW_NEON
477 void NV12ToARGBRow_NEON(const uint8* src_y, 502 void NV12ToARGBRow_NEON(const uint8* src_y,
478 const uint8* src_uv, 503 const uint8* src_uv,
479 uint8* dst_argb, 504 uint8* dst_argb,
480 const struct YuvConstants* yuvconstants, 505 const struct YuvConstants* yuvconstants,
481 int width) { 506 int width) {
482 asm volatile ( 507 asm volatile (
483 YUVTORGB_SETUP 508 YUVTORGB_SETUP
509 "movi v23.8b, #255 \n"
484 "1: \n" 510 "1: \n"
485 READNV12 511 READNV12
486 YUVTORGB(v22, v21, v20) 512 YUVTORGB(v22, v21, v20)
487 "subs %w3, %w3, #8 \n" 513 "subs %w3, %w3, #8 \n"
488 "movi v23.8b, #255 \n"
489 MEMACCESS(2) 514 MEMACCESS(2)
490 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" 515 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
491 "b.gt 1b \n" 516 "b.gt 1b \n"
492 : "+r"(src_y), // %0 517 : "+r"(src_y), // %0
493 "+r"(src_uv), // %1 518 "+r"(src_uv), // %1
494 "+r"(dst_argb), // %2 519 "+r"(dst_argb), // %2
495 "+r"(width) // %3 520 "+r"(width) // %3
496 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 521 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
497 [kUVToG]"r"(&yuvconstants->kUVToG), 522 [kUVToG]"r"(&yuvconstants->kUVToG),
498 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 523 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
499 [kYToRgb]"r"(&yuvconstants->kYToRgb) 524 [kYToRgb]"r"(&yuvconstants->kYToRgb)
500 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 525 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
501 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 526 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
502 ); 527 );
503 } 528 }
504 #endif // HAS_NV12TOARGBROW_NEON 529 #endif // HAS_NV12TOARGBROW_NEON
505 530
506 #ifdef HAS_NV12TOARGBROW_NEON 531 #ifdef HAS_NV12TOARGBROW_NEON
507 void NV21ToARGBRow_NEON(const uint8* src_y, 532 void NV21ToARGBRow_NEON(const uint8* src_y,
508 const uint8* src_vu, 533 const uint8* src_vu,
509 uint8* dst_argb, 534 uint8* dst_argb,
510 const struct YuvConstants* yuvconstants, 535 const struct YuvConstants* yuvconstants,
511 int width) { 536 int width) {
512 asm volatile ( 537 asm volatile (
513 YUVTORGB_SETUP 538 YUVTORGB_SETUP
539 "movi v23.8b, #255 \n"
514 "1: \n" 540 "1: \n"
515 READNV21 541 READNV21
516 YUVTORGB(v22, v21, v20) 542 YUVTORGB(v22, v21, v20)
517 "subs %w3, %w3, #8 \n" 543 "subs %w3, %w3, #8 \n"
518 "movi v23.8b, #255 \n"
519 MEMACCESS(2) 544 MEMACCESS(2)
520 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" 545 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n"
521 "b.gt 1b \n" 546 "b.gt 1b \n"
522 : "+r"(src_y), // %0 547 : "+r"(src_y), // %0
523 "+r"(src_vu), // %1 548 "+r"(src_vu), // %1
524 "+r"(dst_argb), // %2 549 "+r"(dst_argb), // %2
525 "+r"(width) // %3 550 "+r"(width) // %3
526 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 551 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
527 [kUVToG]"r"(&yuvconstants->kUVToG), 552 [kUVToG]"r"(&yuvconstants->kUVToG),
528 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 553 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
564 #endif // HAS_NV12TORGB565ROW_NEON 589 #endif // HAS_NV12TORGB565ROW_NEON
565 590
566 #ifdef HAS_YUY2TOARGBROW_NEON 591 #ifdef HAS_YUY2TOARGBROW_NEON
567 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, 592 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
568 uint8* dst_argb, 593 uint8* dst_argb,
569 const struct YuvConstants* yuvconstants, 594 const struct YuvConstants* yuvconstants,
570 int width) { 595 int width) {
571 int64 width64 = (int64)(width); 596 int64 width64 = (int64)(width);
572 asm volatile ( 597 asm volatile (
573 YUVTORGB_SETUP 598 YUVTORGB_SETUP
599 "movi v23.8b, #255 \n"
574 "1: \n" 600 "1: \n"
575 READYUY2 601 READYUY2
576 YUVTORGB(v22, v21, v20) 602 YUVTORGB(v22, v21, v20)
577 "subs %w2, %w2, #8 \n" 603 "subs %w2, %w2, #8 \n"
578 "movi v23.8b, #255 \n"
579 MEMACCESS(1) 604 MEMACCESS(1)
580 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 605 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
581 "b.gt 1b \n" 606 "b.gt 1b \n"
582 : "+r"(src_yuy2), // %0 607 : "+r"(src_yuy2), // %0
583 "+r"(dst_argb), // %1 608 "+r"(dst_argb), // %1
584 "+r"(width64) // %2 609 "+r"(width64) // %2
585 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 610 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
586 [kUVToG]"r"(&yuvconstants->kUVToG), 611 [kUVToG]"r"(&yuvconstants->kUVToG),
587 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 612 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
588 [kYToRgb]"r"(&yuvconstants->kYToRgb) 613 [kYToRgb]"r"(&yuvconstants->kYToRgb)
589 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 614 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
590 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 615 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
591 ); 616 );
592 } 617 }
593 #endif // HAS_YUY2TOARGBROW_NEON 618 #endif // HAS_YUY2TOARGBROW_NEON
594 619
595 #ifdef HAS_UYVYTOARGBROW_NEON 620 #ifdef HAS_UYVYTOARGBROW_NEON
596 void UYVYToARGBRow_NEON(const uint8* src_uyvy, 621 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
597 uint8* dst_argb, 622 uint8* dst_argb,
598 const struct YuvConstants* yuvconstants, 623 const struct YuvConstants* yuvconstants,
599 int width) { 624 int width) {
600 int64 width64 = (int64)(width); 625 int64 width64 = (int64)(width);
601 asm volatile ( 626 asm volatile (
602 YUVTORGB_SETUP 627 YUVTORGB_SETUP
628 "movi v23.8b, #255 \n"
603 "1: \n" 629 "1: \n"
604 READUYVY 630 READUYVY
605 YUVTORGB(v22, v21, v20) 631 YUVTORGB(v22, v21, v20)
606 "subs %w2, %w2, #8 \n" 632 "subs %w2, %w2, #8 \n"
607 "movi v23.8b, #255 \n"
608 MEMACCESS(1) 633 MEMACCESS(1)
609 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" 634 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n"
610 "b.gt 1b \n" 635 "b.gt 1b \n"
611 : "+r"(src_uyvy), // %0 636 : "+r"(src_uyvy), // %0
612 "+r"(dst_argb), // %1 637 "+r"(dst_argb), // %1
613 "+r"(width64) // %2 638 "+r"(width64) // %2
614 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 639 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
615 [kUVToG]"r"(&yuvconstants->kUVToG), 640 [kUVToG]"r"(&yuvconstants->kUVToG),
616 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 641 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
617 [kYToRgb]"r"(&yuvconstants->kYToRgb) 642 [kYToRgb]"r"(&yuvconstants->kYToRgb)
(...skipping 819 matching lines...) Expand 10 before | Expand all | Expand 10 after
1437 "+r"(dst_u), // %1 1462 "+r"(dst_u), // %1
1438 "+r"(dst_v), // %2 1463 "+r"(dst_v), // %2
1439 "+r"(width) // %3 1464 "+r"(width) // %3
1440 : 1465 :
1441 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", 1466 : "cc", "memory", "v0", "v1", "v2", "v3", "v4",
1442 "v24", "v25", "v26", "v27", "v28", "v29" 1467 "v24", "v25", "v26", "v27", "v28", "v29"
1443 ); 1468 );
1444 } 1469 }
1445 #endif // HAS_ARGBTOUV444ROW_NEON 1470 #endif // HAS_ARGBTOUV444ROW_NEON
1446 1471
1472 #define RGBTOUV_SETUP_REG \
1473 "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \
1474 "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \
1475 "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \
1476 "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \
1477 "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \
1478 "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */
1479
1447 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16. 1480 // 16x1 pixels -> 8x1. width is number of argb pixels. e.g. 16.
1448 #ifdef HAS_ARGBTOUV422ROW_NEON 1481 #ifdef HAS_ARGBTOUV422ROW_NEON
1449 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v, 1482 void ARGBToUV422Row_NEON(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
1450 int width) { 1483 int width) {
1451 asm volatile ( 1484 asm volatile (
1452 RGBTOUV_SETUP_REG 1485 RGBTOUV_SETUP_REG
1453 "1: \n" 1486 "1: \n"
1454 MEMACCESS(0) 1487 MEMACCESS(0)
1455 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. 1488 "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels.
1456 1489
(...skipping 1538 matching lines...) Expand 10 before | Expand all | Expand 10 after
2995 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 3028 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
2996 ); 3029 );
2997 } 3030 }
2998 #endif // HAS_SOBELYROW_NEON 3031 #endif // HAS_SOBELYROW_NEON
2999 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 3032 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
3000 3033
3001 #ifdef __cplusplus 3034 #ifdef __cplusplus
3002 } // extern "C" 3035 } // extern "C"
3003 } // namespace libyuv 3036 } // namespace libyuv
3004 #endif 3037 #endif
OLDNEW
« no previous file with comments | « source/row_neon.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698