Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(10)

Side by Side Diff: source/libvpx/vp9/common/arm/neon/vp9_reconintra_neon.asm

Issue 168343002: libvpx: Pull from upstream (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/libvpx/
Patch Set: libvpx: Pull from upstream Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 ; 1 ;
2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. 2 ; Copyright (c) 2014 The WebM project authors. All Rights Reserved.
3 ; 3 ;
4 ; Use of this source code is governed by a BSD-style license 4 ; Use of this source code is governed by a BSD-style license
5 ; that can be found in the LICENSE file in the root of the source 5 ; that can be found in the LICENSE file in the root of the source
6 ; tree. An additional intellectual property rights grant can be found 6 ; tree. An additional intellectual property rights grant can be found
7 ; in the file PATENTS. All contributing project authors may 7 ; in the file PATENTS. All contributing project authors may
8 ; be found in the AUTHORS file in the root of the source tree. 8 ; be found in the AUTHORS file in the root of the source tree.
9 ; 9 ;
10 10
(...skipping 331 matching lines...) Expand 10 before | Expand all | Expand 10 after
342 ; r2 const uint8_t *above 342 ; r2 const uint8_t *above
343 ; r3 const uint8_t *left 343 ; r3 const uint8_t *left
344 344
345 |vp9_tm_predictor_8x8_neon| PROC 345 |vp9_tm_predictor_8x8_neon| PROC
346 ; Load ytop_left = above[-1]; 346 ; Load ytop_left = above[-1];
347 sub r12, r2, #1 347 sub r12, r2, #1
348 ldrb r12, [r12] 348 ldrb r12, [r12]
349 vdup.u8 d0, r12 349 vdup.u8 d0, r12
350 350
351 ; preload 8 left 351 ; preload 8 left
352 vld1.8 d30, [r3] 352 vld1.8 {d30}, [r3]
353 353
354 ; Load above 8 pixels 354 ; Load above 8 pixels
355 vld1.64 {d2}, [r2] 355 vld1.64 {d2}, [r2]
356 356
357 vmovl.u8 q10, d30 357 vmovl.u8 q10, d30
358 358
359 ; Compute above - ytop_left 359 ; Compute above - ytop_left
360 vsubl.u8 q3, d2, d0 360 vsubl.u8 q3, d2, d0
361 361
362 ; Load left row by row and compute left + (above - ytop_left) 362 ; Load left row by row and compute left + (above - ytop_left)
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
415 ; r2 const uint8_t *above 415 ; r2 const uint8_t *above
416 ; r3 const uint8_t *left 416 ; r3 const uint8_t *left
417 417
418 |vp9_tm_predictor_16x16_neon| PROC 418 |vp9_tm_predictor_16x16_neon| PROC
419 ; Load ytop_left = above[-1]; 419 ; Load ytop_left = above[-1];
420 sub r12, r2, #1 420 sub r12, r2, #1
421 ldrb r12, [r12] 421 ldrb r12, [r12]
422 vdup.u8 q0, r12 422 vdup.u8 q0, r12
423 423
424 ; Load above 8 pixels 424 ; Load above 8 pixels
425 vld1.8 q1, [r2] 425 vld1.8 {q1}, [r2]
426 426
427 ; preload 8 left into r12 427 ; preload 8 left into r12
428 vld1.8 d18, [r3]! 428 vld1.8 {d18}, [r3]!
429 429
430 ; Compute above - ytop_left 430 ; Compute above - ytop_left
431 vsubl.u8 q2, d2, d0 431 vsubl.u8 q2, d2, d0
432 vsubl.u8 q3, d3, d1 432 vsubl.u8 q3, d3, d1
433 433
434 vmovl.u8 q10, d18 434 vmovl.u8 q10, d18
435 435
436 ; Load left row by row and compute left + (above - ytop_left) 436 ; Load left row by row and compute left + (above - ytop_left)
437 ; Process 8 rows in each single loop and loop 2 times to process 16 rows. 437 ; Process 8 rows in each single loop and loop 2 times to process 16 rows.
438 mov r2, #2 438 mov r2, #2
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
485 vadd.s16 q1, q0, q2 485 vadd.s16 q1, q0, q2
486 vadd.s16 q0, q0, q3 486 vadd.s16 q0, q0, q3
487 vadd.s16 q11, q8, q2 487 vadd.s16 q11, q8, q2
488 vadd.s16 q8, q8, q3 488 vadd.s16 q8, q8, q3
489 vqshrun.s16 d2, q1, #0 489 vqshrun.s16 d2, q1, #0
490 vqshrun.s16 d3, q0, #0 490 vqshrun.s16 d3, q0, #0
491 vqshrun.s16 d22, q11, #0 491 vqshrun.s16 d22, q11, #0
492 vqshrun.s16 d23, q8, #0 492 vqshrun.s16 d23, q8, #0
493 vdup.16 q0, d20[2] 493 vdup.16 q0, d20[2]
494 vdup.16 q8, d20[3] 494 vdup.16 q8, d20[3]
495 vld1.8 d18, [r3]! ; preload 8 left into r12 495 vld1.8 {d18}, [r3]! ; preload 8 left into r12
496 vmovl.u8 q10, d18 496 vmovl.u8 q10, d18
497 vst1.64 {d2,d3}, [r0], r1 497 vst1.64 {d2,d3}, [r0], r1
498 vst1.64 {d22,d23}, [r0], r1 498 vst1.64 {d22,d23}, [r0], r1
499 499
500 subs r2, r2, #1 500 subs r2, r2, #1
501 bgt loop_16x16_neon 501 bgt loop_16x16_neon
502 502
503 bx lr 503 bx lr
504 ENDP ; |vp9_tm_predictor_16x16_neon| 504 ENDP ; |vp9_tm_predictor_16x16_neon|
505 505
506 ;void vp9_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, 506 ;void vp9_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride,
507 ; const uint8_t *above, 507 ; const uint8_t *above,
508 ; const uint8_t *left) 508 ; const uint8_t *left)
509 ; r0 uint8_t *dst 509 ; r0 uint8_t *dst
510 ; r1 ptrdiff_t y_stride 510 ; r1 ptrdiff_t y_stride
511 ; r2 const uint8_t *above 511 ; r2 const uint8_t *above
512 ; r3 const uint8_t *left 512 ; r3 const uint8_t *left
513 513
514 |vp9_tm_predictor_32x32_neon| PROC 514 |vp9_tm_predictor_32x32_neon| PROC
515 ; Load ytop_left = above[-1]; 515 ; Load ytop_left = above[-1];
516 sub r12, r2, #1 516 sub r12, r2, #1
517 ldrb r12, [r12] 517 ldrb r12, [r12]
518 vdup.u8 q0, r12 518 vdup.u8 q0, r12
519 519
520 ; Load above 32 pixels 520 ; Load above 32 pixels
521 vld1.8 q1, [r2]! 521 vld1.8 {q1}, [r2]!
522 vld1.8 q2, [r2] 522 vld1.8 {q2}, [r2]
523 523
524 ; preload 8 left pixels 524 ; preload 8 left pixels
525 vld1.8 d26, [r3]! 525 vld1.8 {d26}, [r3]!
526 526
527 ; Compute above - ytop_left 527 ; Compute above - ytop_left
528 vsubl.u8 q8, d2, d0 528 vsubl.u8 q8, d2, d0
529 vsubl.u8 q9, d3, d1 529 vsubl.u8 q9, d3, d1
530 vsubl.u8 q10, d4, d0 530 vsubl.u8 q10, d4, d0
531 vsubl.u8 q11, d5, d1 531 vsubl.u8 q11, d5, d1
532 532
533 vmovl.u8 q3, d26 533 vmovl.u8 q3, d26
534 534
535 ; Load left row by row and compute left + (above - ytop_left) 535 ; Load left row by row and compute left + (above - ytop_left)
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
614 vqshrun.s16 d1, q13, #0 614 vqshrun.s16 d1, q13, #0
615 vadd.s16 q12, q2, q8 615 vadd.s16 q12, q2, q8
616 vadd.s16 q13, q2, q9 616 vadd.s16 q13, q2, q9
617 vqshrun.s16 d2, q14, #0 617 vqshrun.s16 d2, q14, #0
618 vqshrun.s16 d3, q15, #0 618 vqshrun.s16 d3, q15, #0
619 vadd.s16 q14, q2, q10 619 vadd.s16 q14, q2, q10
620 vadd.s16 q15, q2, q11 620 vadd.s16 q15, q2, q11
621 vst1.64 {d0-d3}, [r0], r1 621 vst1.64 {d0-d3}, [r0], r1
622 vqshrun.s16 d24, q12, #0 622 vqshrun.s16 d24, q12, #0
623 vqshrun.s16 d25, q13, #0 623 vqshrun.s16 d25, q13, #0
624 vld1.8 d0, [r3]! ; preload 8 left pixels 624 vld1.8 {d0}, [r3]! ; preload 8 left pixels
625 vqshrun.s16 d26, q14, #0 625 vqshrun.s16 d26, q14, #0
626 vqshrun.s16 d27, q15, #0 626 vqshrun.s16 d27, q15, #0
627 vmovl.u8 q3, d0 627 vmovl.u8 q3, d0
628 vst1.64 {d24-d27}, [r0], r1 628 vst1.64 {d24-d27}, [r0], r1
629 629
630 subs r2, r2, #1 630 subs r2, r2, #1
631 bgt loop_32x32_neon 631 bgt loop_32x32_neon
632 632
633 bx lr 633 bx lr
634 ENDP ; |vp9_tm_predictor_32x32_neon| 634 ENDP ; |vp9_tm_predictor_32x32_neon|
635 635
636 END 636 END
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698