Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(196)

Side by Side Diff: source/row_neon64.cc

Issue 2043073003: neon64 use width int directly. (Closed) Base URL: https://chromium.googlesource.com/libyuv/libyuv@master
Patch Set: remove trailing tab Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2014 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2014 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 545 matching lines...) Expand 10 before | Expand all | Expand 10 after
556 [kYToRgb]"r"(&yuvconstants->kYToRgb) 556 [kYToRgb]"r"(&yuvconstants->kYToRgb)
557 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 557 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
558 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 558 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
559 ); 559 );
560 } 560 }
561 561
562 void YUY2ToARGBRow_NEON(const uint8* src_yuy2, 562 void YUY2ToARGBRow_NEON(const uint8* src_yuy2,
563 uint8* dst_argb, 563 uint8* dst_argb,
564 const struct YuvConstants* yuvconstants, 564 const struct YuvConstants* yuvconstants,
565 int width) { 565 int width) {
566 int64 width64 = (int64)(width);
567 asm volatile ( 566 asm volatile (
568 YUVTORGB_SETUP 567 YUVTORGB_SETUP
569 "movi v23.8b, #255 \n" 568 "movi v23.8b, #255 \n"
570 "1: \n" 569 "1: \n"
571 READYUY2 570 READYUY2
572 YUVTORGB(v22, v21, v20) 571 YUVTORGB(v22, v21, v20)
573 "subs %w2, %w2, #8 \n" 572 "subs %w2, %w2, #8 \n"
574 MEMACCESS(1) 573 MEMACCESS(1)
575 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" 574 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n"
576 "b.gt 1b \n" 575 "b.gt 1b \n"
577 : "+r"(src_yuy2), // %0 576 : "+r"(src_yuy2), // %0
578 "+r"(dst_argb), // %1 577 "+r"(dst_argb), // %1
579 "+r"(width64) // %2 578 "+r"(width) // %2
580 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 579 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
581 [kUVToG]"r"(&yuvconstants->kUVToG), 580 [kUVToG]"r"(&yuvconstants->kUVToG),
582 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 581 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
583 [kYToRgb]"r"(&yuvconstants->kYToRgb) 582 [kYToRgb]"r"(&yuvconstants->kYToRgb)
584 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 583 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
585 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 584 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
586 ); 585 );
587 } 586 }
588 587
589 void UYVYToARGBRow_NEON(const uint8* src_uyvy, 588 void UYVYToARGBRow_NEON(const uint8* src_uyvy,
590 uint8* dst_argb, 589 uint8* dst_argb,
591 const struct YuvConstants* yuvconstants, 590 const struct YuvConstants* yuvconstants,
592 int width) { 591 int width) {
593 int64 width64 = (int64)(width);
594 asm volatile ( 592 asm volatile (
595 YUVTORGB_SETUP 593 YUVTORGB_SETUP
596 "movi v23.8b, #255 \n" 594 "movi v23.8b, #255 \n"
597 "1: \n" 595 "1: \n"
598 READUYVY 596 READUYVY
599 YUVTORGB(v22, v21, v20) 597 YUVTORGB(v22, v21, v20)
600 "subs %w2, %w2, #8 \n" 598 "subs %w2, %w2, #8 \n"
601 MEMACCESS(1) 599 MEMACCESS(1)
602 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" 600 "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n"
603 "b.gt 1b \n" 601 "b.gt 1b \n"
604 : "+r"(src_uyvy), // %0 602 : "+r"(src_uyvy), // %0
605 "+r"(dst_argb), // %1 603 "+r"(dst_argb), // %1
606 "+r"(width64) // %2 604 "+r"(width) // %2
607 : [kUVToRB]"r"(&yuvconstants->kUVToRB), 605 : [kUVToRB]"r"(&yuvconstants->kUVToRB),
608 [kUVToG]"r"(&yuvconstants->kUVToG), 606 [kUVToG]"r"(&yuvconstants->kUVToG),
609 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), 607 [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR),
610 [kYToRgb]"r"(&yuvconstants->kYToRgb) 608 [kYToRgb]"r"(&yuvconstants->kYToRgb)
611 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", 609 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20",
612 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" 610 "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"
613 ); 611 );
614 } 612 }
615 613
616 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. 614 // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v.
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
674 : // Input registers 672 : // Input registers
675 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 673 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
676 ); 674 );
677 } 675 }
678 676
679 // SetRow writes 'count' bytes using an 8 bit value repeated. 677 // SetRow writes 'count' bytes using an 8 bit value repeated.
680 void SetRow_NEON(uint8* dst, uint8 v8, int count) { 678 void SetRow_NEON(uint8* dst, uint8 v8, int count) {
681 asm volatile ( 679 asm volatile (
682 "dup v0.16b, %w2 \n" // duplicate 16 bytes 680 "dup v0.16b, %w2 \n" // duplicate 16 bytes
683 "1: \n" 681 "1: \n"
684 "subs %w1, %w1, #16 \n" // 16 bytes per loop 682 "subs %w1, %w1, #16 \n" // 16 bytes per loop
685 MEMACCESS(0) 683 MEMACCESS(0)
686 "st1 {v0.16b}, [%0], #16 \n" // store 684 "st1 {v0.16b}, [%0], #16 \n" // store
687 "b.gt 1b \n" 685 "b.gt 1b \n"
688 : "+r"(dst), // %0 686 : "+r"(dst), // %0
689 "+r"(count) // %1 687 "+r"(count) // %1
690 : "r"(v8) // %2 688 : "r"(v8) // %2
691 : "cc", "memory", "v0" 689 : "cc", "memory", "v0"
692 ); 690 );
693 } 691 }
694 692
695 void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) { 693 void ARGBSetRow_NEON(uint8* dst, uint32 v32, int count) {
696 asm volatile ( 694 asm volatile (
697 "dup v0.4s, %w2 \n" // duplicate 4 ints 695 "dup v0.4s, %w2 \n" // duplicate 4 ints
698 "1: \n" 696 "1: \n"
699 "subs %w1, %w1, #4 \n" // 4 ints per loop 697 "subs %w1, %w1, #4 \n" // 4 ints per loop
700 MEMACCESS(0) 698 MEMACCESS(0)
701 "st1 {v0.16b}, [%0], #16 \n" // store 699 "st1 {v0.16b}, [%0], #16 \n" // store
702 "b.gt 1b \n" 700 "b.gt 1b \n"
703 : "+r"(dst), // %0 701 : "+r"(dst), // %0
704 "+r"(count) // %1 702 "+r"(count) // %1
705 : "r"(v32) // %2 703 : "r"(v32) // %2
706 : "cc", "memory", "v0" 704 : "cc", "memory", "v0"
707 ); 705 );
708 } 706 }
709 707
710 void MirrorRow_NEON(const uint8* src, uint8* dst, int width) { 708 void MirrorRow_NEON(const uint8* src, uint8* dst, int width) {
711 int64 width64 = (int64) width;
712 asm volatile ( 709 asm volatile (
713 // Start at end of source row. 710 // Start at end of source row.
714 "add %0, %0, %2 \n" 711 "add %0, %0, %w2, sxtw \n"
715 "sub %0, %0, #16 \n" 712 "sub %0, %0, #16 \n"
716
717 "1: \n" 713 "1: \n"
718 MEMACCESS(0) 714 MEMACCESS(0)
719 "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 715 "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
720 "subs %2, %2, #16 \n" // 16 pixels per loop. 716 "subs %w2, %w2, #16 \n" // 16 pixels per loop.
721 "rev64 v0.16b, v0.16b \n" 717 "rev64 v0.16b, v0.16b \n"
722 MEMACCESS(1) 718 MEMACCESS(1)
723 "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 719 "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
724 MEMACCESS(1) 720 MEMACCESS(1)
725 "st1 {v0.D}[0], [%1], #8 \n" 721 "st1 {v0.D}[0], [%1], #8 \n"
726 "b.gt 1b \n" 722 "b.gt 1b \n"
727 : "+r"(src), // %0 723 : "+r"(src), // %0
728 "+r"(dst), // %1 724 "+r"(dst), // %1
729 "+r"(width64) // %2 725 "+r"(width) // %2
730 : "r"((ptrdiff_t)-16) // %3 726 : "r"((ptrdiff_t)-16) // %3
731 : "cc", "memory", "v0" 727 : "cc", "memory", "v0"
732 ); 728 );
733 } 729 }
734 730
735 void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v, 731 void MirrorUVRow_NEON(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
736 int width) { 732 int width) {
737 int64 width64 = (int64) width;
738 asm volatile ( 733 asm volatile (
739 // Start at end of source row. 734 // Start at end of source row.
740 "add %0, %0, %3, lsl #1 \n" 735 "add %0, %0, %w3, sxtw #1 \n"
741 "sub %0, %0, #16 \n" 736 "sub %0, %0, #16 \n"
742
743 "1: \n" 737 "1: \n"
744 MEMACCESS(0) 738 MEMACCESS(0)
745 "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16 739 "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16
746 "subs %3, %3, #8 \n" // 8 pixels per loop. 740 "subs %w3, %w3, #8 \n" // 8 pixels per loop.
747 "rev64 v0.8b, v0.8b \n" 741 "rev64 v0.8b, v0.8b \n"
748 "rev64 v1.8b, v1.8b \n" 742 "rev64 v1.8b, v1.8b \n"
749 MEMACCESS(1) 743 MEMACCESS(1)
750 "st1 {v0.8b}, [%1], #8 \n" // dst += 8 744 "st1 {v0.8b}, [%1], #8 \n" // dst += 8
751 MEMACCESS(2) 745 MEMACCESS(2)
752 "st1 {v1.8b}, [%2], #8 \n" 746 "st1 {v1.8b}, [%2], #8 \n"
753 "b.gt 1b \n" 747 "b.gt 1b \n"
754 : "+r"(src_uv), // %0 748 : "+r"(src_uv), // %0
755 "+r"(dst_u), // %1 749 "+r"(dst_u), // %1
756 "+r"(dst_v), // %2 750 "+r"(dst_v), // %2
757 "+r"(width64) // %3 751 "+r"(width) // %3
758 : "r"((ptrdiff_t)-16) // %4 752 : "r"((ptrdiff_t)-16) // %4
759 : "cc", "memory", "v0", "v1" 753 : "cc", "memory", "v0", "v1"
760 ); 754 );
761 } 755 }
762 756
763 void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) { 757 void ARGBMirrorRow_NEON(const uint8* src, uint8* dst, int width) {
764 int64 width64 = (int64) width;
765 asm volatile ( 758 asm volatile (
766 // Start at end of source row. 759 // Start at end of source row.
767 "add %0, %0, %2, lsl #2 \n" 760 "add %0, %0, %w2, sxtw #2 \n"
768 "sub %0, %0, #16 \n" 761 "sub %0, %0, #16 \n"
769
770 "1: \n" 762 "1: \n"
771 MEMACCESS(0) 763 MEMACCESS(0)
772 "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 764 "ld1 {v0.16b}, [%0], %3 \n" // src -= 16
773 "subs %2, %2, #4 \n" // 4 pixels per loop. 765 "subs %w2, %w2, #4 \n" // 4 pixels per loop.
774 "rev64 v0.4s, v0.4s \n" 766 "rev64 v0.4s, v0.4s \n"
775 MEMACCESS(1) 767 MEMACCESS(1)
776 "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 768 "st1 {v0.D}[1], [%1], #8 \n" // dst += 16
777 MEMACCESS(1) 769 MEMACCESS(1)
778 "st1 {v0.D}[0], [%1], #8 \n" 770 "st1 {v0.D}[0], [%1], #8 \n"
779 "b.gt 1b \n" 771 "b.gt 1b \n"
780 : "+r"(src), // %0 772 : "+r"(src), // %0
781 "+r"(dst), // %1 773 "+r"(dst), // %1
782 "+r"(width64) // %2 774 "+r"(width) // %2
783 : "r"((ptrdiff_t)-16) // %3 775 : "r"((ptrdiff_t)-16) // %3
784 : "cc", "memory", "v0" 776 : "cc", "memory", "v0"
785 ); 777 );
786 } 778 }
787 779
788 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) { 780 void RGB24ToARGBRow_NEON(const uint8* src_rgb24, uint8* dst_argb, int width) {
789 asm volatile ( 781 asm volatile (
790 "movi v4.8b, #255 \n" // Alpha 782 "movi v4.8b, #255 \n" // Alpha
791 "1: \n" 783 "1: \n"
792 MEMACCESS(0) 784 MEMACCESS(0)
793 "ld3 {v1.8b,v2.8b,v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24. 785 "ld3 {v1.8b,v2.8b,v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24.
794 "subs %w2, %w2, #8 \n" // 8 processed per loop. 786 "subs %w2, %w2, #8 \n" // 8 processed per loop.
795 MEMACCESS(1) 787 MEMACCESS(1)
796 "st4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%1], #32 \n" // store 8 ARGB pixels 788 "st4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%1], #32 \n" // store 8 ARGB pixels
797 "b.gt 1b \n" 789 "b.gt 1b \n"
798 : "+r"(src_rgb24), // %0 790 : "+r"(src_rgb24), // %0
799 "+r"(dst_argb), // %1 791 "+r"(dst_argb), // %1
800 "+r"(width) // %2 792 "+r"(width) // %2
801 : 793 :
802 : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List 794 : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List
803 ); 795 );
804 } 796 }
805 797
806 void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) { 798 void RAWToARGBRow_NEON(const uint8* src_raw, uint8* dst_argb, int width) {
807 asm volatile ( 799 asm volatile (
808 "movi v5.8b, #255 \n" // Alpha 800 "movi v5.8b, #255 \n" // Alpha
809 "1: \n" 801 "1: \n"
810 MEMACCESS(0) 802 MEMACCESS(0)
811 "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b 803 "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
812 "subs %w2, %w2, #8 \n" // 8 processed per loop. 804 "subs %w2, %w2, #8 \n" // 8 processed per loop.
813 "orr v3.8b, v1.8b, v1.8b \n" // move g 805 "orr v3.8b, v1.8b, v1.8b \n" // move g
814 "orr v4.8b, v0.8b, v0.8b \n" // move r 806 "orr v4.8b, v0.8b, v0.8b \n" // move r
815 MEMACCESS(1) 807 MEMACCESS(1)
816 "st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a 808 "st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a
817 "b.gt 1b \n" 809 "b.gt 1b \n"
818 : "+r"(src_raw), // %0 810 : "+r"(src_raw), // %0
819 "+r"(dst_argb), // %1 811 "+r"(dst_argb), // %1
820 "+r"(width) // %2 812 "+r"(width) // %2
821 : 813 :
822 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List 814 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List
823 ); 815 );
824 } 816 }
825 817
826 void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) { 818 void RAWToRGB24Row_NEON(const uint8* src_raw, uint8* dst_rgb24, int width) {
827 asm volatile ( 819 asm volatile (
828 "1: \n" 820 "1: \n"
829 MEMACCESS(0) 821 MEMACCESS(0)
830 "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b 822 "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b
(...skipping 1977 matching lines...) Expand 10 before | Expand all | Expand 10 after
2808 "r"(6LL) // %5 2800 "r"(6LL) // %5
2809 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List 2801 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List
2810 ); 2802 );
2811 } 2803 }
2812 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) 2804 #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
2813 2805
2814 #ifdef __cplusplus 2806 #ifdef __cplusplus
2815 } // extern "C" 2807 } // extern "C"
2816 } // namespace libyuv 2808 } // namespace libyuv
2817 #endif 2809 #endif
OLDNEW
« no previous file with comments | « include/libyuv/version.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698