Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1011)

Side by Side Diff: third_party/WebKit/Source/platform/graphics/cpu/mips/WebGLImageConversionMSA.h

Issue 2392443003: Add MSA (MIPS SIMD Arch) optimized color conversion functions (Closed)
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef WebGLImageConversionMSA_h 5 #ifndef WebGLImageConversionMSA_h
6 #define WebGLImageConversionMSA_h 6 #define WebGLImageConversionMSA_h
7 7
8 #if HAVE(MIPS_MSA_INTRINSICS) 8 #if HAVE(MIPS_MSA_INTRINSICS)
9 9
10 #include "platform/cpu/mips/CommonMacrosMSA.h" 10 #include "platform/cpu/mips/CommonMacrosMSA.h"
(...skipping 697 matching lines...) Expand 10 before | Expand all | Expand 10 after
708 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2); 708 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2);
709 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4); 709 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4);
710 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b); 710 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b);
711 ST_UH(dst0, destination); 711 ST_UH(dst0, destination);
712 destination += 8; 712 destination += 8;
713 } 713 }
714 } 714 }
715 715
716 pixelsPerRow &= 7; 716 pixelsPerRow &= 7;
717 } 717 }
718
719 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444MSA(
720 const uint8_t*& source,
721 uint16_t*& destination,
722 unsigned& pixelsPerRow) {
723 unsigned i;
724 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
725 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
726 v8u16 dst0, dst1, dst2, dst3;
727
728 for (i = (pixelsPerRow >> 5); i--;) {
729 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
730 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12);
731 SRLI_H4_UB(src4, src5, src6, src7, vec4, vec5, vec6, vec7, 12);
732 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3);
733 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3);
734 BINSLI_B2_UB(vec4, src4, vec5, src5, vec4, vec5, 3);
735 BINSLI_B2_UB(vec6, src6, vec7, src7, vec6, vec7, 3);
736 PCKEV_B4_UH(vec1, vec0, vec3, vec2, vec5, vec4, vec7, vec6, dst0, dst1,
737 dst2, dst3);
738 SHF_B4_UH(dst0, dst1, dst2, dst3, 177);
739 ST_UH4(dst0, dst1, dst2, dst3, destination, 8);
740 }
741
742 if (pixelsPerRow & 31) {
743 if (pixelsPerRow & 16) {
744 if ((pixelsPerRow & 8) && (pixelsPerRow & 4)) {
745 LD_UB7(source, 16, src0, src1, src2, src3, src4, src5, src6);
746 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12);
747 SRLI_H2_UB(src4, src5, vec4, vec5, 12);
748 vec6 = (v16u8)SRLI_H(src6, 12);
749 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3);
750 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3);
751 BINSLI_B2_UB(vec4, src4, vec5, src5, vec4, vec5, 3);
752 vec6 = (v16u8)__msa_binsli_b((v16u8)vec6, (v16u8)src6, 3);
753 PCKEV_B2_UH(vec1, vec0, vec3, vec2, dst0, dst1);
754 PCKEV_B2_UH(vec5, vec4, vec6, vec6, dst2, dst3);
755 SHF_B4_UH(dst0, dst1, dst2, dst3, 177);
756 ST_UH3(dst0, dst1, dst2, destination, 8);
757 ST8x1_UB(dst3, destination);
758 destination += 4;
759 } else if (pixelsPerRow & 8) {
760 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5);
761 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12);
762 SRLI_H2_UB(src4, src5, vec4, vec5, 12);
763 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3);
764 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3);
765 BINSLI_B2_UB(vec4, src4, vec5, src5, vec4, vec5, 3);
766 PCKEV_B3_UH(vec1, vec0, vec3, vec2, vec5, vec4, dst0, dst1, dst2);
767 SHF_B3_UH(dst0, dst1, dst2, 177);
768 ST_UH3(dst0, dst1, dst2, destination, 8);
769 } else if (pixelsPerRow & 4) {
770 LD_UB5(source, 16, src0, src1, src2, src3, src4);
771 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12);
772 vec4 = (v16u8)SRLI_H(src4, 12);
773 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3);
774 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3);
775 vec4 = (v16u8)__msa_binsli_b((v16u8)vec4, (v16u8)src4, 3);
776 PCKEV_B3_UH(vec1, vec0, vec3, vec2, vec4, vec4, dst0, dst1, dst2);
777 SHF_B3_UH(dst0, dst1, dst2, 177);
778 ST_UH2(dst0, dst1, destination, 8);
779 ST8x1_UB(dst2, destination);
780 destination += 4;
781 } else {
782 LD_UB4(source, 16, src0, src1, src2, src3);
783 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12);
784 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3);
785 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3);
786 PCKEV_B2_UH(vec1, vec0, vec3, vec2, dst0, dst1);
787 SHF_B2_UH(dst0, dst1, 177);
788 ST_UH2(dst0, dst1, destination, 8);
789 }
790 } else if ((pixelsPerRow & 8) && (pixelsPerRow & 4)) {
791 LD_UB3(source, 16, src0, src1, src2);
792 SRLI_H2_UB(src0, src1, vec0, vec1, 12);
793 vec2 = (v16u8)SRLI_H(src2, 12);
794 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3);
795 vec2 = (v16u8)__msa_binsli_b((v16u8)vec2, (v16u8)src2, 3);
796 PCKEV_B2_UH(vec1, vec0, vec2, vec2, dst0, dst1);
797 SHF_B2_UH(dst0, dst1, 177);
798 ST_UH(dst0, destination);
799 destination += 8;
800 ST8x1_UB(dst1, destination);
801 destination += 4;
802 } else if (pixelsPerRow & 16) {
803 LD_UB4(source, 16, src0, src1, src2, src3);
804 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12);
805 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3);
806 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3);
807 PCKEV_B2_UH(vec1, vec0, vec3, vec2, dst0, dst1);
808 SHF_B2_UH(dst0, dst1, 177);
809 ST_UH2(dst0, dst1, destination, 8);
810 } else if (pixelsPerRow & 8) {
811 LD_UB2(source, 16, src0, src1);
812 SRLI_H2_UB(src0, src1, vec0, vec1, 12);
813 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3);
814 dst0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
815 dst0 = (v8u16)__msa_shf_b((v16i8)dst0, 177);
816 ST_UH(dst0, destination);
817 destination += 8;
818 } else if (pixelsPerRow & 4) {
819 src0 = LD_UB(source);
820 source += 16;
821 vec0 = (v16u8)SRLI_H(src0, 12);
822 vec0 = (v16u8)__msa_binsli_b((v16u8)vec0, (v16u8)src0, 3);
823 dst0 = (v8u16)__msa_pckev_b((v16i8)vec0, (v16i8)vec0);
824 dst0 = (v8u16)__msa_shf_b((v16i8)dst0, 177);
825 ST8x1_UB(dst0, destination);
826 destination += 4;
827 }
828 }
829
830 pixelsPerRow &= 3;
831 }
832
833 ALWAYS_INLINE void packOneRowOfRGBA8LittleToR8MSA(const uint8_t*& source,
834 uint8_t*& destination,
835 unsigned& pixelsPerRow) {
836 unsigned i;
837 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
838 v16u8 src0A, src1A, src2A, src3A, src4A, src5A, src6A, src7A;
839 v16u8 src0R, src1R, src2R, src3R, src4R, src5R, src6R, src7R;
840 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
841 v4f32 fsrc0A, fsrc1A, fsrc2A, fsrc3A, fsrc4A, fsrc5A, fsrc6A, fsrc7A;
842 v4f32 fsrc0R, fsrc1R, fsrc2R, fsrc3R, fsrc4R, fsrc5R, fsrc6R, fsrc7R;
843 v4f32 fdst0R, fdst1R, fdst2R, fdst3R, fdst4R, fdst5R, fdst6R, fdst7R;
844 const v16u8 alphaMask = {0, 0, 0, 255, 0, 0, 0, 255,
845 0, 0, 0, 255, 0, 0, 0, 255};
846 const v4u32 vCnst255 = (v4u32)__msa_ldi_w(255);
847 const v4f32 vfCnst255 = __msa_ffint_u_w(vCnst255);
848
849 for (i = (pixelsPerRow >> 5); i--;) {
850 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
851 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A);
852 CEQI_B4_UB(src4, src5, src6, src7, 0, src4A, src5A, src6A, src7A);
853 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
854 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
855 src2A = __msa_bmnz_v(src2, alphaMask, src2A);
856 src3A = __msa_bmnz_v(src3, alphaMask, src3A);
857 src4A = __msa_bmnz_v(src4, alphaMask, src4A);
858 src5A = __msa_bmnz_v(src5, alphaMask, src5A);
859 src6A = __msa_bmnz_v(src6, alphaMask, src6A);
860 src7A = __msa_bmnz_v(src7, alphaMask, src7A);
861 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A,
862 src3A);
863 AND_V4_UB(src4A, src5A, src6A, src7A, alphaMask, src4A, src5A, src6A,
864 src7A);
865 src0A = SLDI_UB(src0A, src0A, 3);
866 src1A = SLDI_UB(src1A, src1A, 3);
867 src2A = SLDI_UB(src2A, src2A, 3);
868 src3A = SLDI_UB(src3A, src3A, 3);
869 src4A = SLDI_UB(src4A, src4A, 3);
870 src5A = SLDI_UB(src5A, src5A, 3);
871 src6A = SLDI_UB(src6A, src6A, 3);
872 src7A = SLDI_UB(src7A, src7A, 3);
873 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R);
874 AND_V4_UB(src4, src5, src6, src7, vCnst255, src4R, src5R, src6R, src7R);
875 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
876 FFINTU_W4_SP(src4A, src5A, src6A, src7A, fsrc4A, fsrc5A, fsrc6A, fsrc7A);
877 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R);
878 FFINTU_W4_SP(src4R, src5R, src6R, src7R, fsrc4R, fsrc5R, fsrc6R, fsrc7R);
879 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255,
880 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
881 DIV4(vfCnst255, fsrc4A, vfCnst255, fsrc5A, vfCnst255, fsrc6A, vfCnst255,
882 fsrc7A, fsrc4A, fsrc5A, fsrc6A, fsrc7A);
883 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, fdst0R,
884 fdst1R, fdst2R, fdst3R);
885 MUL4(fsrc4R, fsrc4A, fsrc5R, fsrc5A, fsrc6R, fsrc6A, fsrc7R, fsrc7A, fdst4R,
886 fdst5R, fdst6R, fdst7R);
887 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3);
888 FTRUNCU_W4_UB(fdst4R, fdst5R, fdst6R, fdst7R, dst4, dst5, dst6, dst7);
889 PCKEV_H4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2,
890 dst4, dst6);
891 PCKEV_B2_UB(dst2, dst0, dst6, dst4, dst0, dst1);
892 ST_UB2(dst0, dst1, destination, 16);
893 }
894
895 if (pixelsPerRow & 31) {
896 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
897 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5);
898 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A);
899 CEQI_B2_UB(src4, src5, 0, src4A, src5A);
900 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
901 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
902 src2A = __msa_bmnz_v(src2, alphaMask, src2A);
903 src3A = __msa_bmnz_v(src3, alphaMask, src3A);
904 src4A = __msa_bmnz_v(src4, alphaMask, src4A);
905 src5A = __msa_bmnz_v(src5, alphaMask, src5A);
906 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A,
907 src3A);
908 AND_V2_UB(src4A, src5A, alphaMask, src4A, src5A);
909 src0A = SLDI_UB(src0A, src0A, 3);
910 src1A = SLDI_UB(src1A, src1A, 3);
911 src2A = SLDI_UB(src2A, src2A, 3);
912 src3A = SLDI_UB(src3A, src3A, 3);
913 src4A = SLDI_UB(src4A, src4A, 3);
914 src5A = SLDI_UB(src5A, src5A, 3);
915 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R);
916 AND_V2_UB(src4, src5, vCnst255, src4R, src5R);
917 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
918 FFINTU_W2_SP(src4A, src5A, fsrc4A, fsrc5A);
919 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R);
920 FFINTU_W2_SP(src4R, src5R, fsrc4R, fsrc5R);
921 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255,
922 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
923 DIV2(vfCnst255, fsrc4A, vfCnst255, fsrc5A, fsrc4A, fsrc5A);
924 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A,
925 fdst0R, fdst1R, fdst2R, fdst3R);
926 MUL2(fsrc4R, fsrc4A, fsrc5R, fsrc5A, fdst4R, fdst5R);
927 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3);
928 FTRUNCU_W2_UB(fdst4R, fdst5R, dst4, dst5);
929 PCKEV_H3_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst0, dst2, dst4);
930 PCKEV_B2_UB(dst2, dst0, dst4, dst4, dst0, dst1);
931 ST_UB(dst0, destination);
932 destination += 16;
933 ST8x1_UB(dst1, destination);
934 destination += 8;
935 } else if (pixelsPerRow & 16) {
936 LD_UB4(source, 16, src0, src1, src2, src3);
937 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A);
938 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
939 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
940 src2A = __msa_bmnz_v(src2, alphaMask, src2A);
941 src3A = __msa_bmnz_v(src3, alphaMask, src3A);
942 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A,
943 src3A);
944 src0A = SLDI_UB(src0A, src0A, 3);
945 src1A = SLDI_UB(src1A, src1A, 3);
946 src2A = SLDI_UB(src2A, src2A, 3);
947 src3A = SLDI_UB(src3A, src3A, 3);
948 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R);
949 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
950 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R);
951 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255,
952 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
953 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A,
954 fdst0R, fdst1R, fdst2R, fdst3R);
955 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3);
956 PCKEV_H2_UB(dst1, dst0, dst3, dst2, dst0, dst2);
957 dst0 = (v16u8)__msa_pckev_b((v16i8)dst2, (v16i8)dst0);
958 ST_UB(dst0, destination);
959 destination += 16;
960 } else if (pixelsPerRow & 8) {
961 LD_UB2(source, 16, src0, src1);
962 CEQI_B2_UB(src0, src1, 0, src0A, src1A);
963 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
964 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
965 AND_V2_UB(src0A, src1A, alphaMask, src0A, src1A);
966 src0A = SLDI_UB(src0A, src0A, 3);
967 src1A = SLDI_UB(src1A, src1A, 3);
968 AND_V2_UB(src0, src1, vCnst255, src0R, src1R);
969 FFINTU_W2_SP(src0A, src1A, fsrc0A, fsrc1A);
970 FFINTU_W2_SP(src0R, src1R, fsrc0R, fsrc1R);
971 DIV2(vfCnst255, fsrc0A, vfCnst255, fsrc1A, fsrc0A, fsrc1A);
972 MUL2(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fdst0R, fdst1R);
973 FTRUNCU_W2_UB(fdst0R, fdst1R, dst0, dst1);
974 dst0 = (v16u8)__msa_pckev_h((v8i16)dst1, (v8i16)dst0);
975 dst0 = (v16u8)__msa_pckev_b((v16i8)dst0, (v16i8)dst0);
976 ST8x1_UB(dst0, destination);
977 destination += 8;
978 }
979 }
980
981 pixelsPerRow &= 7;
982 }
983
984 ALWAYS_INLINE void packOneRowOfRGBA8LittleToRA8MSA(const uint8_t*& source,
985 uint8_t*& destination,
986 unsigned& pixelsPerRow) {
987 unsigned i;
988 v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
989 v16u8 src0A, src1A, src2A, src3A, src4A, src5A, src6A, src7A;
990 v16u8 src0R, src1R, src2R, src3R, src4R, src5R, src6R, src7R;
991 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
992 v4f32 fsrc0A, fsrc1A, fsrc2A, fsrc3A, fsrc4A, fsrc5A, fsrc6A, fsrc7A;
993 v4f32 fsrc0R, fsrc1R, fsrc2R, fsrc3R, fsrc4R, fsrc5R, fsrc6R, fsrc7R;
994 v4f32 fdst0R, fdst1R, fdst2R, fdst3R, fdst4R, fdst5R, fdst6R, fdst7R;
995 const v16u8 alphaMask = {0, 0, 0, 255, 0, 0, 0, 255,
996 0, 0, 0, 255, 0, 0, 0, 255};
997 const v16i8 vshfm = {0, 19, 4, 23, 8, 27, 12, 31, 0, 0, 0, 0, 0, 0, 0, 0};
998 const v4u32 vCnst255 = (v4u32)__msa_ldi_w(255);
999 const v4f32 vfCnst255 = __msa_ffint_u_w(vCnst255);
1000
1001 for (i = (pixelsPerRow >> 5); i--;) {
1002 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7);
1003 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A);
1004 CEQI_B4_UB(src4, src5, src6, src7, 0, src4A, src5A, src6A, src7A);
1005 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
1006 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
1007 src2A = __msa_bmnz_v(src2, alphaMask, src2A);
1008 src3A = __msa_bmnz_v(src3, alphaMask, src3A);
1009 src4A = __msa_bmnz_v(src4, alphaMask, src4A);
1010 src5A = __msa_bmnz_v(src5, alphaMask, src5A);
1011 src6A = __msa_bmnz_v(src6, alphaMask, src6A);
1012 src7A = __msa_bmnz_v(src7, alphaMask, src7A);
1013 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A,
1014 src3A);
1015 AND_V4_UB(src4A, src5A, src6A, src7A, alphaMask, src4A, src5A, src6A,
1016 src7A);
1017 src0A = SLDI_UB(src0A, src0A, 3);
1018 src1A = SLDI_UB(src1A, src1A, 3);
1019 src2A = SLDI_UB(src2A, src2A, 3);
1020 src3A = SLDI_UB(src3A, src3A, 3);
1021 src4A = SLDI_UB(src4A, src4A, 3);
1022 src5A = SLDI_UB(src5A, src5A, 3);
1023 src6A = SLDI_UB(src6A, src6A, 3);
1024 src7A = SLDI_UB(src7A, src7A, 3);
1025 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R);
1026 AND_V4_UB(src4, src5, src6, src7, vCnst255, src4R, src5R, src6R, src7R);
1027 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
1028 FFINTU_W4_SP(src4A, src5A, src6A, src7A, fsrc4A, fsrc5A, fsrc6A, fsrc7A);
1029 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R);
1030 FFINTU_W4_SP(src4R, src5R, src6R, src7R, fsrc4R, fsrc5R, fsrc6R, fsrc7R);
1031 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255,
1032 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
1033 DIV4(vfCnst255, fsrc4A, vfCnst255, fsrc5A, vfCnst255, fsrc6A, vfCnst255,
1034 fsrc7A, fsrc4A, fsrc5A, fsrc6A, fsrc7A);
1035 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, fdst0R,
1036 fdst1R, fdst2R, fdst3R);
1037 MUL4(fsrc4R, fsrc4A, fsrc5R, fsrc5A, fsrc6R, fsrc6A, fsrc7R, fsrc7A, fdst4R,
1038 fdst5R, fdst6R, fdst7R);
1039 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3);
1040 FTRUNCU_W4_UB(fdst4R, fdst5R, fdst6R, fdst7R, dst4, dst5, dst6, dst7);
1041 dst0 = VSHF_UB(dst0, src0, vshfm);
1042 dst1 = VSHF_UB(dst1, src1, vshfm);
1043 dst2 = VSHF_UB(dst2, src2, vshfm);
1044 dst3 = VSHF_UB(dst3, src3, vshfm);
1045 dst4 = VSHF_UB(dst4, src4, vshfm);
1046 dst5 = VSHF_UB(dst5, src5, vshfm);
1047 dst6 = VSHF_UB(dst6, src6, vshfm);
1048 dst7 = VSHF_UB(dst7, src7, vshfm);
1049 ILVR_D4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst1, dst2,
1050 dst3);
1051 ST_UB4(dst0, dst1, dst2, dst3, destination, 16);
1052 }
1053
1054 if (pixelsPerRow & 31) {
1055 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) {
1056 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5);
1057 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A);
1058 CEQI_B2_UB(src4, src5, 0, src4A, src5A);
1059 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
1060 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
1061 src2A = __msa_bmnz_v(src2, alphaMask, src2A);
1062 src3A = __msa_bmnz_v(src3, alphaMask, src3A);
1063 src4A = __msa_bmnz_v(src4, alphaMask, src4A);
1064 src5A = __msa_bmnz_v(src5, alphaMask, src5A);
1065 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A,
1066 src3A);
1067 AND_V2_UB(src4A, src5A, alphaMask, src4A, src5A);
1068 src0A = SLDI_UB(src0A, src0A, 3);
1069 src1A = SLDI_UB(src1A, src1A, 3);
1070 src2A = SLDI_UB(src2A, src2A, 3);
1071 src3A = SLDI_UB(src3A, src3A, 3);
1072 src4A = SLDI_UB(src4A, src4A, 3);
1073 src5A = SLDI_UB(src5A, src5A, 3);
1074 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R);
1075 AND_V2_UB(src4, src5, vCnst255, src4R, src5R);
1076 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
1077 FFINTU_W2_SP(src4A, src5A, fsrc4A, fsrc5A);
1078 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R);
1079 FFINTU_W2_SP(src4R, src5R, fsrc4R, fsrc5R);
1080 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255,
1081 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
1082 DIV2(vfCnst255, fsrc4A, vfCnst255, fsrc5A, fsrc4A, fsrc5A);
1083 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A,
1084 fdst0R, fdst1R, fdst2R, fdst3R);
1085 MUL2(fsrc4R, fsrc4A, fsrc5R, fsrc5A, fdst4R, fdst5R);
1086 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3);
1087 FTRUNCU_W2_UB(fdst4R, fdst5R, dst4, dst5);
1088 dst0 = VSHF_UB(dst0, src0, vshfm);
1089 dst1 = VSHF_UB(dst1, src1, vshfm);
1090 dst2 = VSHF_UB(dst2, src2, vshfm);
1091 dst3 = VSHF_UB(dst3, src3, vshfm);
1092 dst4 = VSHF_UB(dst4, src4, vshfm);
1093 dst5 = VSHF_UB(dst5, src5, vshfm);
1094 ILVR_D3_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst0, dst1, dst2);
1095 ST_UB3(dst0, dst1, dst2, destination, 16);
1096 } else if (pixelsPerRow & 16) {
1097 LD_UB4(source, 16, src0, src1, src2, src3);
1098 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A);
1099 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
1100 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
1101 src2A = __msa_bmnz_v(src2, alphaMask, src2A);
1102 src3A = __msa_bmnz_v(src3, alphaMask, src3A);
1103 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A,
1104 src3A);
1105 src0A = SLDI_UB(src0A, src0A, 3);
1106 src1A = SLDI_UB(src1A, src1A, 3);
1107 src2A = SLDI_UB(src2A, src2A, 3);
1108 src3A = SLDI_UB(src3A, src3A, 3);
1109 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R);
1110 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
1111 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R);
1112 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255,
1113 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A);
1114 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A,
1115 fdst0R, fdst1R, fdst2R, fdst3R);
1116 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3);
1117 dst0 = VSHF_UB(dst0, src0, vshfm);
1118 dst1 = VSHF_UB(dst1, src1, vshfm);
1119 dst2 = VSHF_UB(dst2, src2, vshfm);
1120 dst3 = VSHF_UB(dst3, src3, vshfm);
1121 ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
1122 ST_UB2(dst0, dst1, destination, 16);
1123 } else if (pixelsPerRow & 8) {
1124 LD_UB2(source, 16, src0, src1);
1125 CEQI_B2_UB(src0, src1, 0, src0A, src1A);
1126 src0A = __msa_bmnz_v(src0, alphaMask, src0A);
1127 src1A = __msa_bmnz_v(src1, alphaMask, src1A);
1128 AND_V2_UB(src0A, src1A, alphaMask, src0A, src1A);
1129 src0A = SLDI_UB(src0A, src0A, 3);
1130 src1A = SLDI_UB(src1A, src1A, 3);
1131 AND_V2_UB(src0, src1, vCnst255, src0R, src1R);
1132 FFINTU_W2_SP(src0A, src1A, fsrc0A, fsrc1A);
1133 FFINTU_W2_SP(src0R, src1R, fsrc0R, fsrc1R);
1134 DIV2(vfCnst255, fsrc0A, vfCnst255, fsrc1A, fsrc0A, fsrc1A);
1135 MUL2(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fdst0R, fdst1R);
1136 FTRUNCU_W2_UB(fdst0R, fdst1R, dst0, dst1);
1137 dst0 = VSHF_UB(dst0, src0, vshfm);
1138 dst1 = VSHF_UB(dst1, src1, vshfm);
1139 dst0 = (v16u8)__msa_ilvr_d((v2i64)dst1, (v2i64)dst0);
1140 ST_UB(dst0, destination);
1141 destination += 16;
1142 }
1143 }
1144
1145 pixelsPerRow &= 7;
1146 }
1147
718 } // namespace SIMD 1148 } // namespace SIMD
719 1149
720 } // namespace blink 1150 } // namespace blink
721 1151
722 #endif // HAVE(MIPS_MSA_INTRINSICS) 1152 #endif // HAVE(MIPS_MSA_INTRINSICS)
723 1153
724 #endif // WebGLImageConversionMSA_h 1154 #endif // WebGLImageConversionMSA_h
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698