Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(414)

Side by Side Diff: source/row_neon.cc

Issue 1413763017: ARMv7 Neon version of I420AlphaToARGB (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: port i422alphatoargb to neon64 Created 5 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_any.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2011 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2011 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
135 "vqshrun.s16 d21, q0, #6 \n" /* G */ 135 "vqshrun.s16 d21, q0, #6 \n" /* G */
136 136
137 void I444ToARGBRow_NEON(const uint8* src_y, 137 void I444ToARGBRow_NEON(const uint8* src_y,
138 const uint8* src_u, 138 const uint8* src_u,
139 const uint8* src_v, 139 const uint8* src_v,
140 uint8* dst_argb, 140 uint8* dst_argb,
141 const struct YuvConstants* yuvconstants, 141 const struct YuvConstants* yuvconstants,
142 int width) { 142 int width) {
143 asm volatile ( 143 asm volatile (
144 YUVTORGB_SETUP 144 YUVTORGB_SETUP
145 "vmov.u8 d23, #255 \n"
145 "1: \n" 146 "1: \n"
146 READYUV444 147 READYUV444
147 YUVTORGB 148 YUVTORGB
148 "subs %4, %4, #8 \n" 149 "subs %4, %4, #8 \n"
149 "vmov.u8 d23, #255 \n"
150 MEMACCESS(3) 150 MEMACCESS(3)
151 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 151 "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
152 "bgt 1b \n" 152 "bgt 1b \n"
153 : "+r"(src_y), // %0 153 : "+r"(src_y), // %0
154 "+r"(src_u), // %1 154 "+r"(src_u), // %1
155 "+r"(src_v), // %2 155 "+r"(src_v), // %2
156 "+r"(dst_argb), // %3 156 "+r"(dst_argb), // %3
157 "+r"(width) // %4 157 "+r"(width) // %4
158 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 158 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
159 [kUVToG]"r"(&yuvconstants->kUVToG), 159 [kUVToG]"r"(&yuvconstants->kUVToG),
160 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 160 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
161 [kYToRgb]"r"(&yuvconstants->kYToRgb) 161 [kYToRgb]"r"(&yuvconstants->kYToRgb)
162 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", 162 : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
163 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 163 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
164 ); 164 );
165 } 165 }
166 166
167 void I422ToARGBRow_NEON(const uint8* src_y, 167 void I422ToARGBRow_NEON(const uint8* src_y,
168 const uint8* src_u, 168 const uint8* src_u,
169 const uint8* src_v, 169 const uint8* src_v,
170 uint8* dst_argb, 170 uint8* dst_argb,
171 const struct YuvConstants* yuvconstants, 171 const struct YuvConstants* yuvconstants,
172 int width) { 172 int width) {
173 asm volatile ( 173 asm volatile (
174 YUVTORGB_SETUP 174 YUVTORGB_SETUP
175 "vmov.u8 d23, #255 \n"
175 "1: \n" 176 "1: \n"
176 READYUV422 177 READYUV422
177 YUVTORGB 178 YUVTORGB
178 "subs %4, %4, #8 \n" 179 "subs %4, %4, #8 \n"
179 "vmov.u8 d23, #255 \n"
180 MEMACCESS(3) 180 MEMACCESS(3)
181 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 181 "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
182 "bgt 1b \n" 182 "bgt 1b \n"
183 : "+r"(src_y), // %0 183 : "+r"(src_y), // %0
184 "+r"(src_u), // %1 184 "+r"(src_u), // %1
185 "+r"(src_v), // %2 185 "+r"(src_v), // %2
186 "+r"(dst_argb), // %3 186 "+r"(dst_argb), // %3
187 "+r"(width) // %4 187 "+r"(width) // %4
188 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 188 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
189 [kUVToG]"r"(&yuvconstants->kUVToG), 189 [kUVToG]"r"(&yuvconstants->kUVToG),
190 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 190 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
191 [kYToRgb]"r"(&yuvconstants->kYToRgb) 191 [kYToRgb]"r"(&yuvconstants->kYToRgb)
192 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", 192 : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
193 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 193 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
194 ); 194 );
195 } 195 }
196 196
197 void I422AlphaToARGBRow_NEON(const uint8* src_y,
198 const uint8* src_u,
199 const uint8* src_v,
200 const uint8* src_a,
201 uint8* dst_argb,
202 const struct YuvConstants* yuvconstants,
203 int width) {
204 asm volatile (
205 YUVTORGB_SETUP
206 "1: \n"
207 READYUV422
208 YUVTORGB
209 "subs %5, %5, #8 \n"
210 MEMACCESS(3)
211 "vld1.8 {d23}, [%3]! \n"
212 MEMACCESS(4)
213 "vst4.8 {d20, d21, d22, d23}, [%4]! \n"
214 "bgt 1b \n"
215 : "+r"(src_y), // %0
216 "+r"(src_u), // %1
217 "+r"(src_v), // %2
218 "+r"(src_a), // %3
219 "+r"(dst_argb), // %4
220 "+r"(width) // %5
221 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
222 [kUVToG]"r"(&yuvconstants->kUVToG),
223 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
224 [kYToRgb]"r"(&yuvconstants->kYToRgb)
225 : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
226 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
227 );
228 }
229
197 void I411ToARGBRow_NEON(const uint8* src_y, 230 void I411ToARGBRow_NEON(const uint8* src_y,
198 const uint8* src_u, 231 const uint8* src_u,
199 const uint8* src_v, 232 const uint8* src_v,
200 uint8* dst_argb, 233 uint8* dst_argb,
201 const struct YuvConstants* yuvconstants, 234 const struct YuvConstants* yuvconstants,
202 int width) { 235 int width) {
203 asm volatile ( 236 asm volatile (
204 YUVTORGB_SETUP 237 YUVTORGB_SETUP
238 "vmov.u8 d23, #255 \n"
205 "1: \n" 239 "1: \n"
206 READYUV411 240 READYUV411
207 YUVTORGB 241 YUVTORGB
208 "subs %4, %4, #8 \n" 242 "subs %4, %4, #8 \n"
209 "vmov.u8 d23, #255 \n"
210 MEMACCESS(3) 243 MEMACCESS(3)
211 "vst4.8 {d20, d21, d22, d23}, [%3]! \n" 244 "vst4.8 {d20, d21, d22, d23}, [%3]! \n"
212 "bgt 1b \n" 245 "bgt 1b \n"
213 : "+r"(src_y), // %0 246 : "+r"(src_y), // %0
214 "+r"(src_u), // %1 247 "+r"(src_u), // %1
215 "+r"(src_v), // %2 248 "+r"(src_v), // %2
216 "+r"(dst_argb), // %3 249 "+r"(dst_argb), // %3
217 "+r"(width) // %4 250 "+r"(width) // %4
218 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 251 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
219 [kUVToG]"r"(&yuvconstants->kUVToG), 252 [kUVToG]"r"(&yuvconstants->kUVToG),
220 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 253 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
221 [kYToRgb]"r"(&yuvconstants->kYToRgb) 254 [kYToRgb]"r"(&yuvconstants->kYToRgb)
222 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", 255 : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
223 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 256 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
224 ); 257 );
225 } 258 }
226 259
227 void I422ToRGBARow_NEON(const uint8* src_y, 260 void I422ToRGBARow_NEON(const uint8* src_y,
228 const uint8* src_u, 261 const uint8* src_u,
229 const uint8* src_v, 262 const uint8* src_v,
230 uint8* dst_rgba, 263 uint8* dst_rgba,
231 const struct YuvConstants* yuvconstants, 264 const struct YuvConstants* yuvconstants,
232 int width) { 265 int width) {
233 asm volatile ( 266 asm volatile (
234 YUVTORGB_SETUP 267 YUVTORGB_SETUP
268 "vmov.u8 d19, #255 \n"
235 "1: \n" 269 "1: \n"
236 READYUV422 270 READYUV422
237 YUVTORGB 271 YUVTORGB
238 "subs %4, %4, #8 \n" 272 "subs %4, %4, #8 \n"
239 "vmov.u8 d19, #255 \n"
240 MEMACCESS(3) 273 MEMACCESS(3)
241 "vst4.8 {d19, d20, d21, d22}, [%3]! \n" 274 "vst4.8 {d19, d20, d21, d22}, [%3]! \n"
242 "bgt 1b \n" 275 "bgt 1b \n"
243 : "+r"(src_y), // %0 276 : "+r"(src_y), // %0
244 "+r"(src_u), // %1 277 "+r"(src_u), // %1
245 "+r"(src_v), // %2 278 "+r"(src_v), // %2
246 "+r"(dst_rgba), // %3 279 "+r"(dst_rgba), // %3
247 "+r"(width) // %4 280 "+r"(width) // %4
248 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 281 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
249 [kUVToG]"r"(&yuvconstants->kUVToG), 282 [kUVToG]"r"(&yuvconstants->kUVToG),
(...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after
410 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", 443 : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
411 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 444 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
412 ); 445 );
413 } 446 }
414 447
415 void I400ToARGBRow_NEON(const uint8* src_y, 448 void I400ToARGBRow_NEON(const uint8* src_y,
416 uint8* dst_argb, 449 uint8* dst_argb,
417 int width) { 450 int width) {
418 asm volatile ( 451 asm volatile (
419 YUVTORGB_SETUP 452 YUVTORGB_SETUP
453 "vmov.u8 d23, #255 \n"
420 "1: \n" 454 "1: \n"
421 READYUV400 455 READYUV400
422 YUVTORGB 456 YUVTORGB
423 "subs %2, %2, #8 \n" 457 "subs %2, %2, #8 \n"
424 "vmov.u8 d23, #255 \n"
425 MEMACCESS(1) 458 MEMACCESS(1)
426 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 459 "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
427 "bgt 1b \n" 460 "bgt 1b \n"
428 : "+r"(src_y), // %0 461 : "+r"(src_y), // %0
429 "+r"(dst_argb), // %1 462 "+r"(dst_argb), // %1
430 "+r"(width) // %2 463 "+r"(width) // %2
431 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), 464 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB),
432 [kUVToG]"r"(&kYuvI601Constants.kUVToG), 465 [kUVToG]"r"(&kYuvI601Constants.kUVToG),
433 [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), 466 [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR),
434 [kYToRgb]"r"(&kYuvI601Constants.kYToRgb) 467 [kYToRgb]"r"(&kYuvI601Constants.kYToRgb)
(...skipping 24 matching lines...) Expand all
459 ); 492 );
460 } 493 }
461 494
462 void NV12ToARGBRow_NEON(const uint8* src_y, 495 void NV12ToARGBRow_NEON(const uint8* src_y,
463 const uint8* src_uv, 496 const uint8* src_uv,
464 uint8* dst_argb, 497 uint8* dst_argb,
465 const struct YuvConstants* yuvconstants, 498 const struct YuvConstants* yuvconstants,
466 int width) { 499 int width) {
467 asm volatile ( 500 asm volatile (
468 YUVTORGB_SETUP 501 YUVTORGB_SETUP
502 "vmov.u8 d23, #255 \n"
469 "1: \n" 503 "1: \n"
470 READNV12 504 READNV12
471 YUVTORGB 505 YUVTORGB
472 "subs %3, %3, #8 \n" 506 "subs %3, %3, #8 \n"
473 "vmov.u8 d23, #255 \n"
474 MEMACCESS(2) 507 MEMACCESS(2)
475 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 508 "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
476 "bgt 1b \n" 509 "bgt 1b \n"
477 : "+r"(src_y), // %0 510 : "+r"(src_y), // %0
478 "+r"(src_uv), // %1 511 "+r"(src_uv), // %1
479 "+r"(dst_argb), // %2 512 "+r"(dst_argb), // %2
480 "+r"(width) // %3 513 "+r"(width) // %3
481 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 514 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
482 [kUVToG]"r"(&yuvconstants->kUVToG), 515 [kUVToG]"r"(&yuvconstants->kUVToG),
483 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 516 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
484 [kYToRgb]"r"(&yuvconstants->kYToRgb) 517 [kYToRgb]"r"(&yuvconstants->kYToRgb)
485 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", 518 : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
486 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 519 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
487 ); 520 );
488 } 521 }
489 522
490 void NV21ToARGBRow_NEON(const uint8* src_y, 523 void NV21ToARGBRow_NEON(const uint8* src_y,
491 const uint8* src_vu, 524 const uint8* src_vu,
492 uint8* dst_argb, 525 uint8* dst_argb,
493 const struct YuvConstants* yuvconstants, 526 const struct YuvConstants* yuvconstants,
494 int width) { 527 int width) {
495 asm volatile ( 528 asm volatile (
496 YUVTORGB_SETUP 529 YUVTORGB_SETUP
530 "vmov.u8 d23, #255 \n"
497 "1: \n" 531 "1: \n"
498 READNV21 532 READNV21
499 YUVTORGB 533 YUVTORGB
500 "subs %3, %3, #8 \n" 534 "subs %3, %3, #8 \n"
501 "vmov.u8 d23, #255 \n"
502 MEMACCESS(2) 535 MEMACCESS(2)
503 "vst4.8 {d20, d21, d22, d23}, [%2]! \n" 536 "vst4.8 {d20, d21, d22, d23}, [%2]! \n"
504 "bgt 1b \n" 537 "bgt 1b \n"
505 : "+r"(src_y), // %0 538 : "+r"(src_y), // %0
506 "+r"(src_vu), // %1 539 "+r"(src_vu), // %1
507 "+r"(dst_argb), // %2 540 "+r"(dst_argb), // %2
508 "+r"(width) // %3 541 "+r"(width) // %3
509 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 542 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
510 [kUVToG]"r"(&yuvconstants->kUVToG), 543 [kUVToG]"r"(&yuvconstants->kUVToG),
511 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 544 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
(...skipping 30 matching lines...) Expand all
542 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 575 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
543 ); 576 );
544 } 577 }
545 578
546 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, 579 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
547 uint8* dst_argb, 580 uint8* dst_argb,
548 const struct YuvConstants* yuvconstants, 581 const struct YuvConstants* yuvconstants,
549 int width) { 582 int width) {
550 asm volatile ( 583 asm volatile (
551 YUVTORGB_SETUP 584 YUVTORGB_SETUP
585 "vmov.u8 d23, #255 \n"
552 "1: \n" 586 "1: \n"
553 READYUY2 587 READYUY2
554 YUVTORGB 588 YUVTORGB
555 "subs %2, %2, #8 \n" 589 "subs %2, %2, #8 \n"
556 "vmov.u8 d23, #255 \n"
557 MEMACCESS(1) 590 MEMACCESS(1)
558 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 591 "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
559 "bgt 1b \n" 592 "bgt 1b \n"
560 : "+r"(src_yuy2), // %0 593 : "+r"(src_yuy2), // %0
561 "+r"(dst_argb), // %1 594 "+r"(dst_argb), // %1
562 "+r"(width) // %2 595 "+r"(width) // %2
563 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 596 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
564 [kUVToG]"r"(&yuvconstants->kUVToG), 597 [kUVToG]"r"(&yuvconstants->kUVToG),
565 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 598 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
566 [kYToRgb]"r"(&yuvconstants->kYToRgb) 599 [kYToRgb]"r"(&yuvconstants->kYToRgb)
567 : "cc", "memory", "q0", "q1", "q2", "q3", "q4", 600 : "cc", "memory", "q0", "q1", "q2", "q3", "q4",
568 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" 601 "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"
569 ); 602 );
570 } 603 }
571 604
572 void UYVYToARGBRow_NEON(const uint8* src_uyvy, 605 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
573 uint8* dst_argb, 606 uint8* dst_argb,
574 const struct YuvConstants* yuvconstants, 607 const struct YuvConstants* yuvconstants,
575 int width) { 608 int width) {
576 asm volatile ( 609 asm volatile (
577 YUVTORGB_SETUP 610 YUVTORGB_SETUP
611 "vmov.u8 d23, #255 \n"
578 "1: \n" 612 "1: \n"
579 READUYVY 613 READUYVY
580 YUVTORGB 614 YUVTORGB
581 "subs %2, %2, #8 \n" 615 "subs %2, %2, #8 \n"
582 "vmov.u8 d23, #255 \n"
583 MEMACCESS(1) 616 MEMACCESS(1)
584 "vst4.8 {d20, d21, d22, d23}, [%1]! \n" 617 "vst4.8 {d20, d21, d22, d23}, [%1]! \n"
585 "bgt 1b \n" 618 "bgt 1b \n"
586 : "+r"(src_uyvy), // %0 619 : "+r"(src_uyvy), // %0
587 "+r"(dst_argb), // %1 620 "+r"(dst_argb), // %1
588 "+r"(width) // %2 621 "+r"(width) // %2
589 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 622 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
590 [kUVToG]"r"(&yuvconstants->kUVToG), 623 [kUVToG]"r"(&yuvconstants->kUVToG),
591 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 624 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
592 [kYToRgb]"r"(&yuvconstants->kYToRgb) 625 [kYToRgb]"r"(&yuvconstants->kYToRgb)
(...skipping 2284 matching lines...) Expand 10 before | Expand all | Expand 10 after
2877 "r"(6) // %5 2910 "r"(6) // %5
2878 : "cc", "memory", "q0", "q1" // Clobber List 2911 : "cc", "memory", "q0", "q1" // Clobber List
2879 ); 2912 );
2880 } 2913 }
2881 #endif // defined(__ARM_NEON__) && !defined(__aarch64__) 2914 #endif // defined(__ARM_NEON__) && !defined(__aarch64__)
2882 2915
2883 #ifdef __cplusplus 2916 #ifdef __cplusplus
2884 } // extern "C" 2917 } // extern "C"
2885 } // namespace libyuv 2918 } // namespace libyuv
2886 #endif 2919 #endif
OLDNEW
« no previous file with comments | « source/row_any.cc ('k') | source/row_neon64.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698