| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef WebGLImageConversionMSA_h | 5 #ifndef WebGLImageConversionMSA_h |
| 6 #define WebGLImageConversionMSA_h | 6 #define WebGLImageConversionMSA_h |
| 7 | 7 |
| 8 #if HAVE(MIPS_MSA_INTRINSICS) | 8 #if HAVE(MIPS_MSA_INTRINSICS) |
| 9 | 9 |
| 10 #include "platform/cpu/mips/CommonMacrosMSA.h" | 10 #include "platform/cpu/mips/CommonMacrosMSA.h" |
| (...skipping 697 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 708 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2); | 708 src0r = (v16u8)__msa_binsri_b((v16u8)src0r, (v16u8)src0g, 2); |
| 709 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4); | 709 src0b = (v16u8)__msa_binsri_b((v16u8)src0gt, (v16u8)src0b, 4); |
| 710 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b); | 710 dst0 = (v8u16)__msa_ilvev_b((v16i8)src0r, (v16i8)src0b); |
| 711 ST_UH(dst0, destination); | 711 ST_UH(dst0, destination); |
| 712 destination += 8; | 712 destination += 8; |
| 713 } | 713 } |
| 714 } | 714 } |
| 715 | 715 |
| 716 pixelsPerRow &= 7; | 716 pixelsPerRow &= 7; |
| 717 } | 717 } |
| 718 |
| 719 ALWAYS_INLINE void packOneRowOfRGBA8ToUnsignedShort4444MSA( |
| 720 const uint8_t*& source, |
| 721 uint16_t*& destination, |
| 722 unsigned& pixelsPerRow) { |
| 723 unsigned i; |
| 724 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; |
| 725 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; |
| 726 v8u16 dst0, dst1, dst2, dst3; |
| 727 |
| 728 for (i = (pixelsPerRow >> 5); i--;) { |
| 729 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7); |
| 730 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12); |
| 731 SRLI_H4_UB(src4, src5, src6, src7, vec4, vec5, vec6, vec7, 12); |
| 732 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3); |
| 733 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3); |
| 734 BINSLI_B2_UB(vec4, src4, vec5, src5, vec4, vec5, 3); |
| 735 BINSLI_B2_UB(vec6, src6, vec7, src7, vec6, vec7, 3); |
| 736 PCKEV_B4_UH(vec1, vec0, vec3, vec2, vec5, vec4, vec7, vec6, dst0, dst1, |
| 737 dst2, dst3); |
| 738 SHF_B4_UH(dst0, dst1, dst2, dst3, 177); |
| 739 ST_UH4(dst0, dst1, dst2, dst3, destination, 8); |
| 740 } |
| 741 |
| 742 if (pixelsPerRow & 31) { |
| 743 if (pixelsPerRow & 16) { |
| 744 if ((pixelsPerRow & 8) && (pixelsPerRow & 4)) { |
| 745 LD_UB7(source, 16, src0, src1, src2, src3, src4, src5, src6); |
| 746 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12); |
| 747 SRLI_H2_UB(src4, src5, vec4, vec5, 12); |
| 748 vec6 = (v16u8)SRLI_H(src6, 12); |
| 749 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3); |
| 750 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3); |
| 751 BINSLI_B2_UB(vec4, src4, vec5, src5, vec4, vec5, 3); |
| 752 vec6 = (v16u8)__msa_binsli_b((v16u8)vec6, (v16u8)src6, 3); |
| 753 PCKEV_B2_UH(vec1, vec0, vec3, vec2, dst0, dst1); |
| 754 PCKEV_B2_UH(vec5, vec4, vec6, vec6, dst2, dst3); |
| 755 SHF_B4_UH(dst0, dst1, dst2, dst3, 177); |
| 756 ST_UH3(dst0, dst1, dst2, destination, 8); |
| 757 ST8x1_UB(dst3, destination); |
| 758 destination += 4; |
| 759 } else if (pixelsPerRow & 8) { |
| 760 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5); |
| 761 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12); |
| 762 SRLI_H2_UB(src4, src5, vec4, vec5, 12); |
| 763 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3); |
| 764 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3); |
| 765 BINSLI_B2_UB(vec4, src4, vec5, src5, vec4, vec5, 3); |
| 766 PCKEV_B3_UH(vec1, vec0, vec3, vec2, vec5, vec4, dst0, dst1, dst2); |
| 767 SHF_B3_UH(dst0, dst1, dst2, 177); |
| 768 ST_UH3(dst0, dst1, dst2, destination, 8); |
| 769 } else if (pixelsPerRow & 4) { |
| 770 LD_UB5(source, 16, src0, src1, src2, src3, src4); |
| 771 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12); |
| 772 vec4 = (v16u8)SRLI_H(src4, 12); |
| 773 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3); |
| 774 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3); |
| 775 vec4 = (v16u8)__msa_binsli_b((v16u8)vec4, (v16u8)src4, 3); |
| 776 PCKEV_B3_UH(vec1, vec0, vec3, vec2, vec4, vec4, dst0, dst1, dst2); |
| 777 SHF_B3_UH(dst0, dst1, dst2, 177); |
| 778 ST_UH2(dst0, dst1, destination, 8); |
| 779 ST8x1_UB(dst2, destination); |
| 780 destination += 4; |
| 781 } else { |
| 782 LD_UB4(source, 16, src0, src1, src2, src3); |
| 783 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12); |
| 784 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3); |
| 785 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3); |
| 786 PCKEV_B2_UH(vec1, vec0, vec3, vec2, dst0, dst1); |
| 787 SHF_B2_UH(dst0, dst1, 177); |
| 788 ST_UH2(dst0, dst1, destination, 8); |
| 789 } |
| 790 } else if ((pixelsPerRow & 8) && (pixelsPerRow & 4)) { |
| 791 LD_UB3(source, 16, src0, src1, src2); |
| 792 SRLI_H2_UB(src0, src1, vec0, vec1, 12); |
| 793 vec2 = (v16u8)SRLI_H(src2, 12); |
| 794 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3); |
| 795 vec2 = (v16u8)__msa_binsli_b((v16u8)vec2, (v16u8)src2, 3); |
| 796 PCKEV_B2_UH(vec1, vec0, vec2, vec2, dst0, dst1); |
| 797 SHF_B2_UH(dst0, dst1, 177); |
| 798 ST_UH(dst0, destination); |
| 799 destination += 8; |
| 800 ST8x1_UB(dst1, destination); |
| 801 destination += 4; |
| 802 } else if (pixelsPerRow & 16) { |
| 803 LD_UB4(source, 16, src0, src1, src2, src3); |
| 804 SRLI_H4_UB(src0, src1, src2, src3, vec0, vec1, vec2, vec3, 12); |
| 805 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3); |
| 806 BINSLI_B2_UB(vec2, src2, vec3, src3, vec2, vec3, 3); |
| 807 PCKEV_B2_UH(vec1, vec0, vec3, vec2, dst0, dst1); |
| 808 SHF_B2_UH(dst0, dst1, 177); |
| 809 ST_UH2(dst0, dst1, destination, 8); |
| 810 } else if (pixelsPerRow & 8) { |
| 811 LD_UB2(source, 16, src0, src1); |
| 812 SRLI_H2_UB(src0, src1, vec0, vec1, 12); |
| 813 BINSLI_B2_UB(vec0, src0, vec1, src1, vec0, vec1, 3); |
| 814 dst0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); |
| 815 dst0 = (v8u16)__msa_shf_b((v16i8)dst0, 177); |
| 816 ST_UH(dst0, destination); |
| 817 destination += 8; |
| 818 } else if (pixelsPerRow & 4) { |
| 819 src0 = LD_UB(source); |
| 820 source += 16; |
| 821 vec0 = (v16u8)SRLI_H(src0, 12); |
| 822 vec0 = (v16u8)__msa_binsli_b((v16u8)vec0, (v16u8)src0, 3); |
| 823 dst0 = (v8u16)__msa_pckev_b((v16i8)vec0, (v16i8)vec0); |
| 824 dst0 = (v8u16)__msa_shf_b((v16i8)dst0, 177); |
| 825 ST8x1_UB(dst0, destination); |
| 826 destination += 4; |
| 827 } |
| 828 } |
| 829 |
| 830 pixelsPerRow &= 3; |
| 831 } |
| 832 |
| 833 ALWAYS_INLINE void packOneRowOfRGBA8LittleToR8MSA(const uint8_t*& source, |
| 834 uint8_t*& destination, |
| 835 unsigned& pixelsPerRow) { |
| 836 unsigned i; |
| 837 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; |
| 838 v16u8 src0A, src1A, src2A, src3A, src4A, src5A, src6A, src7A; |
| 839 v16u8 src0R, src1R, src2R, src3R, src4R, src5R, src6R, src7R; |
| 840 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; |
| 841 v4f32 fsrc0A, fsrc1A, fsrc2A, fsrc3A, fsrc4A, fsrc5A, fsrc6A, fsrc7A; |
| 842 v4f32 fsrc0R, fsrc1R, fsrc2R, fsrc3R, fsrc4R, fsrc5R, fsrc6R, fsrc7R; |
| 843 v4f32 fdst0R, fdst1R, fdst2R, fdst3R, fdst4R, fdst5R, fdst6R, fdst7R; |
| 844 const v16u8 alphaMask = {0, 0, 0, 255, 0, 0, 0, 255, |
| 845 0, 0, 0, 255, 0, 0, 0, 255}; |
| 846 const v4u32 vCnst255 = (v4u32)__msa_ldi_w(255); |
| 847 const v4f32 vfCnst255 = __msa_ffint_u_w(vCnst255); |
| 848 |
| 849 for (i = (pixelsPerRow >> 5); i--;) { |
| 850 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7); |
| 851 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A); |
| 852 CEQI_B4_UB(src4, src5, src6, src7, 0, src4A, src5A, src6A, src7A); |
| 853 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 854 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 855 src2A = __msa_bmnz_v(src2, alphaMask, src2A); |
| 856 src3A = __msa_bmnz_v(src3, alphaMask, src3A); |
| 857 src4A = __msa_bmnz_v(src4, alphaMask, src4A); |
| 858 src5A = __msa_bmnz_v(src5, alphaMask, src5A); |
| 859 src6A = __msa_bmnz_v(src6, alphaMask, src6A); |
| 860 src7A = __msa_bmnz_v(src7, alphaMask, src7A); |
| 861 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A, |
| 862 src3A); |
| 863 AND_V4_UB(src4A, src5A, src6A, src7A, alphaMask, src4A, src5A, src6A, |
| 864 src7A); |
| 865 src0A = SLDI_UB(src0A, src0A, 3); |
| 866 src1A = SLDI_UB(src1A, src1A, 3); |
| 867 src2A = SLDI_UB(src2A, src2A, 3); |
| 868 src3A = SLDI_UB(src3A, src3A, 3); |
| 869 src4A = SLDI_UB(src4A, src4A, 3); |
| 870 src5A = SLDI_UB(src5A, src5A, 3); |
| 871 src6A = SLDI_UB(src6A, src6A, 3); |
| 872 src7A = SLDI_UB(src7A, src7A, 3); |
| 873 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R); |
| 874 AND_V4_UB(src4, src5, src6, src7, vCnst255, src4R, src5R, src6R, src7R); |
| 875 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 876 FFINTU_W4_SP(src4A, src5A, src6A, src7A, fsrc4A, fsrc5A, fsrc6A, fsrc7A); |
| 877 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R); |
| 878 FFINTU_W4_SP(src4R, src5R, src6R, src7R, fsrc4R, fsrc5R, fsrc6R, fsrc7R); |
| 879 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255, |
| 880 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 881 DIV4(vfCnst255, fsrc4A, vfCnst255, fsrc5A, vfCnst255, fsrc6A, vfCnst255, |
| 882 fsrc7A, fsrc4A, fsrc5A, fsrc6A, fsrc7A); |
| 883 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, fdst0R, |
| 884 fdst1R, fdst2R, fdst3R); |
| 885 MUL4(fsrc4R, fsrc4A, fsrc5R, fsrc5A, fsrc6R, fsrc6A, fsrc7R, fsrc7A, fdst4R, |
| 886 fdst5R, fdst6R, fdst7R); |
| 887 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3); |
| 888 FTRUNCU_W4_UB(fdst4R, fdst5R, fdst6R, fdst7R, dst4, dst5, dst6, dst7); |
| 889 PCKEV_H4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst2, |
| 890 dst4, dst6); |
| 891 PCKEV_B2_UB(dst2, dst0, dst6, dst4, dst0, dst1); |
| 892 ST_UB2(dst0, dst1, destination, 16); |
| 893 } |
| 894 |
| 895 if (pixelsPerRow & 31) { |
| 896 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) { |
| 897 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5); |
| 898 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A); |
| 899 CEQI_B2_UB(src4, src5, 0, src4A, src5A); |
| 900 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 901 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 902 src2A = __msa_bmnz_v(src2, alphaMask, src2A); |
| 903 src3A = __msa_bmnz_v(src3, alphaMask, src3A); |
| 904 src4A = __msa_bmnz_v(src4, alphaMask, src4A); |
| 905 src5A = __msa_bmnz_v(src5, alphaMask, src5A); |
| 906 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A, |
| 907 src3A); |
| 908 AND_V2_UB(src4A, src5A, alphaMask, src4A, src5A); |
| 909 src0A = SLDI_UB(src0A, src0A, 3); |
| 910 src1A = SLDI_UB(src1A, src1A, 3); |
| 911 src2A = SLDI_UB(src2A, src2A, 3); |
| 912 src3A = SLDI_UB(src3A, src3A, 3); |
| 913 src4A = SLDI_UB(src4A, src4A, 3); |
| 914 src5A = SLDI_UB(src5A, src5A, 3); |
| 915 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R); |
| 916 AND_V2_UB(src4, src5, vCnst255, src4R, src5R); |
| 917 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 918 FFINTU_W2_SP(src4A, src5A, fsrc4A, fsrc5A); |
| 919 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R); |
| 920 FFINTU_W2_SP(src4R, src5R, fsrc4R, fsrc5R); |
| 921 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255, |
| 922 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 923 DIV2(vfCnst255, fsrc4A, vfCnst255, fsrc5A, fsrc4A, fsrc5A); |
| 924 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, |
| 925 fdst0R, fdst1R, fdst2R, fdst3R); |
| 926 MUL2(fsrc4R, fsrc4A, fsrc5R, fsrc5A, fdst4R, fdst5R); |
| 927 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3); |
| 928 FTRUNCU_W2_UB(fdst4R, fdst5R, dst4, dst5); |
| 929 PCKEV_H3_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst0, dst2, dst4); |
| 930 PCKEV_B2_UB(dst2, dst0, dst4, dst4, dst0, dst1); |
| 931 ST_UB(dst0, destination); |
| 932 destination += 16; |
| 933 ST8x1_UB(dst1, destination); |
| 934 destination += 8; |
| 935 } else if (pixelsPerRow & 16) { |
| 936 LD_UB4(source, 16, src0, src1, src2, src3); |
| 937 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A); |
| 938 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 939 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 940 src2A = __msa_bmnz_v(src2, alphaMask, src2A); |
| 941 src3A = __msa_bmnz_v(src3, alphaMask, src3A); |
| 942 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A, |
| 943 src3A); |
| 944 src0A = SLDI_UB(src0A, src0A, 3); |
| 945 src1A = SLDI_UB(src1A, src1A, 3); |
| 946 src2A = SLDI_UB(src2A, src2A, 3); |
| 947 src3A = SLDI_UB(src3A, src3A, 3); |
| 948 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R); |
| 949 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 950 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R); |
| 951 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255, |
| 952 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 953 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, |
| 954 fdst0R, fdst1R, fdst2R, fdst3R); |
| 955 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3); |
| 956 PCKEV_H2_UB(dst1, dst0, dst3, dst2, dst0, dst2); |
| 957 dst0 = (v16u8)__msa_pckev_b((v16i8)dst2, (v16i8)dst0); |
| 958 ST_UB(dst0, destination); |
| 959 destination += 16; |
| 960 } else if (pixelsPerRow & 8) { |
| 961 LD_UB2(source, 16, src0, src1); |
| 962 CEQI_B2_UB(src0, src1, 0, src0A, src1A); |
| 963 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 964 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 965 AND_V2_UB(src0A, src1A, alphaMask, src0A, src1A); |
| 966 src0A = SLDI_UB(src0A, src0A, 3); |
| 967 src1A = SLDI_UB(src1A, src1A, 3); |
| 968 AND_V2_UB(src0, src1, vCnst255, src0R, src1R); |
| 969 FFINTU_W2_SP(src0A, src1A, fsrc0A, fsrc1A); |
| 970 FFINTU_W2_SP(src0R, src1R, fsrc0R, fsrc1R); |
| 971 DIV2(vfCnst255, fsrc0A, vfCnst255, fsrc1A, fsrc0A, fsrc1A); |
| 972 MUL2(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fdst0R, fdst1R); |
| 973 FTRUNCU_W2_UB(fdst0R, fdst1R, dst0, dst1); |
| 974 dst0 = (v16u8)__msa_pckev_h((v8i16)dst1, (v8i16)dst0); |
| 975 dst0 = (v16u8)__msa_pckev_b((v16i8)dst0, (v16i8)dst0); |
| 976 ST8x1_UB(dst0, destination); |
| 977 destination += 8; |
| 978 } |
| 979 } |
| 980 |
| 981 pixelsPerRow &= 7; |
| 982 } |
| 983 |
| 984 ALWAYS_INLINE void packOneRowOfRGBA8LittleToRA8MSA(const uint8_t*& source, |
| 985 uint8_t*& destination, |
| 986 unsigned& pixelsPerRow) { |
| 987 unsigned i; |
| 988 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; |
| 989 v16u8 src0A, src1A, src2A, src3A, src4A, src5A, src6A, src7A; |
| 990 v16u8 src0R, src1R, src2R, src3R, src4R, src5R, src6R, src7R; |
| 991 v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; |
| 992 v4f32 fsrc0A, fsrc1A, fsrc2A, fsrc3A, fsrc4A, fsrc5A, fsrc6A, fsrc7A; |
| 993 v4f32 fsrc0R, fsrc1R, fsrc2R, fsrc3R, fsrc4R, fsrc5R, fsrc6R, fsrc7R; |
| 994 v4f32 fdst0R, fdst1R, fdst2R, fdst3R, fdst4R, fdst5R, fdst6R, fdst7R; |
| 995 const v16u8 alphaMask = {0, 0, 0, 255, 0, 0, 0, 255, |
| 996 0, 0, 0, 255, 0, 0, 0, 255}; |
| 997 const v16i8 vshfm = {0, 19, 4, 23, 8, 27, 12, 31, 0, 0, 0, 0, 0, 0, 0, 0}; |
| 998 const v4u32 vCnst255 = (v4u32)__msa_ldi_w(255); |
| 999 const v4f32 vfCnst255 = __msa_ffint_u_w(vCnst255); |
| 1000 |
| 1001 for (i = (pixelsPerRow >> 5); i--;) { |
| 1002 LD_UB8(source, 16, src0, src1, src2, src3, src4, src5, src6, src7); |
| 1003 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A); |
| 1004 CEQI_B4_UB(src4, src5, src6, src7, 0, src4A, src5A, src6A, src7A); |
| 1005 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 1006 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 1007 src2A = __msa_bmnz_v(src2, alphaMask, src2A); |
| 1008 src3A = __msa_bmnz_v(src3, alphaMask, src3A); |
| 1009 src4A = __msa_bmnz_v(src4, alphaMask, src4A); |
| 1010 src5A = __msa_bmnz_v(src5, alphaMask, src5A); |
| 1011 src6A = __msa_bmnz_v(src6, alphaMask, src6A); |
| 1012 src7A = __msa_bmnz_v(src7, alphaMask, src7A); |
| 1013 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A, |
| 1014 src3A); |
| 1015 AND_V4_UB(src4A, src5A, src6A, src7A, alphaMask, src4A, src5A, src6A, |
| 1016 src7A); |
| 1017 src0A = SLDI_UB(src0A, src0A, 3); |
| 1018 src1A = SLDI_UB(src1A, src1A, 3); |
| 1019 src2A = SLDI_UB(src2A, src2A, 3); |
| 1020 src3A = SLDI_UB(src3A, src3A, 3); |
| 1021 src4A = SLDI_UB(src4A, src4A, 3); |
| 1022 src5A = SLDI_UB(src5A, src5A, 3); |
| 1023 src6A = SLDI_UB(src6A, src6A, 3); |
| 1024 src7A = SLDI_UB(src7A, src7A, 3); |
| 1025 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R); |
| 1026 AND_V4_UB(src4, src5, src6, src7, vCnst255, src4R, src5R, src6R, src7R); |
| 1027 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 1028 FFINTU_W4_SP(src4A, src5A, src6A, src7A, fsrc4A, fsrc5A, fsrc6A, fsrc7A); |
| 1029 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R); |
| 1030 FFINTU_W4_SP(src4R, src5R, src6R, src7R, fsrc4R, fsrc5R, fsrc6R, fsrc7R); |
| 1031 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255, |
| 1032 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 1033 DIV4(vfCnst255, fsrc4A, vfCnst255, fsrc5A, vfCnst255, fsrc6A, vfCnst255, |
| 1034 fsrc7A, fsrc4A, fsrc5A, fsrc6A, fsrc7A); |
| 1035 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, fdst0R, |
| 1036 fdst1R, fdst2R, fdst3R); |
| 1037 MUL4(fsrc4R, fsrc4A, fsrc5R, fsrc5A, fsrc6R, fsrc6A, fsrc7R, fsrc7A, fdst4R, |
| 1038 fdst5R, fdst6R, fdst7R); |
| 1039 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3); |
| 1040 FTRUNCU_W4_UB(fdst4R, fdst5R, fdst6R, fdst7R, dst4, dst5, dst6, dst7); |
| 1041 dst0 = VSHF_UB(dst0, src0, vshfm); |
| 1042 dst1 = VSHF_UB(dst1, src1, vshfm); |
| 1043 dst2 = VSHF_UB(dst2, src2, vshfm); |
| 1044 dst3 = VSHF_UB(dst3, src3, vshfm); |
| 1045 dst4 = VSHF_UB(dst4, src4, vshfm); |
| 1046 dst5 = VSHF_UB(dst5, src5, vshfm); |
| 1047 dst6 = VSHF_UB(dst6, src6, vshfm); |
| 1048 dst7 = VSHF_UB(dst7, src7, vshfm); |
| 1049 ILVR_D4_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst7, dst6, dst0, dst1, dst2, |
| 1050 dst3); |
| 1051 ST_UB4(dst0, dst1, dst2, dst3, destination, 16); |
| 1052 } |
| 1053 |
| 1054 if (pixelsPerRow & 31) { |
| 1055 if ((pixelsPerRow & 16) && (pixelsPerRow & 8)) { |
| 1056 LD_UB6(source, 16, src0, src1, src2, src3, src4, src5); |
| 1057 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A); |
| 1058 CEQI_B2_UB(src4, src5, 0, src4A, src5A); |
| 1059 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 1060 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 1061 src2A = __msa_bmnz_v(src2, alphaMask, src2A); |
| 1062 src3A = __msa_bmnz_v(src3, alphaMask, src3A); |
| 1063 src4A = __msa_bmnz_v(src4, alphaMask, src4A); |
| 1064 src5A = __msa_bmnz_v(src5, alphaMask, src5A); |
| 1065 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A, |
| 1066 src3A); |
| 1067 AND_V2_UB(src4A, src5A, alphaMask, src4A, src5A); |
| 1068 src0A = SLDI_UB(src0A, src0A, 3); |
| 1069 src1A = SLDI_UB(src1A, src1A, 3); |
| 1070 src2A = SLDI_UB(src2A, src2A, 3); |
| 1071 src3A = SLDI_UB(src3A, src3A, 3); |
| 1072 src4A = SLDI_UB(src4A, src4A, 3); |
| 1073 src5A = SLDI_UB(src5A, src5A, 3); |
| 1074 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R); |
| 1075 AND_V2_UB(src4, src5, vCnst255, src4R, src5R); |
| 1076 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 1077 FFINTU_W2_SP(src4A, src5A, fsrc4A, fsrc5A); |
| 1078 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R); |
| 1079 FFINTU_W2_SP(src4R, src5R, fsrc4R, fsrc5R); |
| 1080 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255, |
| 1081 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 1082 DIV2(vfCnst255, fsrc4A, vfCnst255, fsrc5A, fsrc4A, fsrc5A); |
| 1083 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, |
| 1084 fdst0R, fdst1R, fdst2R, fdst3R); |
| 1085 MUL2(fsrc4R, fsrc4A, fsrc5R, fsrc5A, fdst4R, fdst5R); |
| 1086 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3); |
| 1087 FTRUNCU_W2_UB(fdst4R, fdst5R, dst4, dst5); |
| 1088 dst0 = VSHF_UB(dst0, src0, vshfm); |
| 1089 dst1 = VSHF_UB(dst1, src1, vshfm); |
| 1090 dst2 = VSHF_UB(dst2, src2, vshfm); |
| 1091 dst3 = VSHF_UB(dst3, src3, vshfm); |
| 1092 dst4 = VSHF_UB(dst4, src4, vshfm); |
| 1093 dst5 = VSHF_UB(dst5, src5, vshfm); |
| 1094 ILVR_D3_UB(dst1, dst0, dst3, dst2, dst5, dst4, dst0, dst1, dst2); |
| 1095 ST_UB3(dst0, dst1, dst2, destination, 16); |
| 1096 } else if (pixelsPerRow & 16) { |
| 1097 LD_UB4(source, 16, src0, src1, src2, src3); |
| 1098 CEQI_B4_UB(src0, src1, src2, src3, 0, src0A, src1A, src2A, src3A); |
| 1099 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 1100 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 1101 src2A = __msa_bmnz_v(src2, alphaMask, src2A); |
| 1102 src3A = __msa_bmnz_v(src3, alphaMask, src3A); |
| 1103 AND_V4_UB(src0A, src1A, src2A, src3A, alphaMask, src0A, src1A, src2A, |
| 1104 src3A); |
| 1105 src0A = SLDI_UB(src0A, src0A, 3); |
| 1106 src1A = SLDI_UB(src1A, src1A, 3); |
| 1107 src2A = SLDI_UB(src2A, src2A, 3); |
| 1108 src3A = SLDI_UB(src3A, src3A, 3); |
| 1109 AND_V4_UB(src0, src1, src2, src3, vCnst255, src0R, src1R, src2R, src3R); |
| 1110 FFINTU_W4_SP(src0A, src1A, src2A, src3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 1111 FFINTU_W4_SP(src0R, src1R, src2R, src3R, fsrc0R, fsrc1R, fsrc2R, fsrc3R); |
| 1112 DIV4(vfCnst255, fsrc0A, vfCnst255, fsrc1A, vfCnst255, fsrc2A, vfCnst255, |
| 1113 fsrc3A, fsrc0A, fsrc1A, fsrc2A, fsrc3A); |
| 1114 MUL4(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fsrc2R, fsrc2A, fsrc3R, fsrc3A, |
| 1115 fdst0R, fdst1R, fdst2R, fdst3R); |
| 1116 FTRUNCU_W4_UB(fdst0R, fdst1R, fdst2R, fdst3R, dst0, dst1, dst2, dst3); |
| 1117 dst0 = VSHF_UB(dst0, src0, vshfm); |
| 1118 dst1 = VSHF_UB(dst1, src1, vshfm); |
| 1119 dst2 = VSHF_UB(dst2, src2, vshfm); |
| 1120 dst3 = VSHF_UB(dst3, src3, vshfm); |
| 1121 ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1); |
| 1122 ST_UB2(dst0, dst1, destination, 16); |
| 1123 } else if (pixelsPerRow & 8) { |
| 1124 LD_UB2(source, 16, src0, src1); |
| 1125 CEQI_B2_UB(src0, src1, 0, src0A, src1A); |
| 1126 src0A = __msa_bmnz_v(src0, alphaMask, src0A); |
| 1127 src1A = __msa_bmnz_v(src1, alphaMask, src1A); |
| 1128 AND_V2_UB(src0A, src1A, alphaMask, src0A, src1A); |
| 1129 src0A = SLDI_UB(src0A, src0A, 3); |
| 1130 src1A = SLDI_UB(src1A, src1A, 3); |
| 1131 AND_V2_UB(src0, src1, vCnst255, src0R, src1R); |
| 1132 FFINTU_W2_SP(src0A, src1A, fsrc0A, fsrc1A); |
| 1133 FFINTU_W2_SP(src0R, src1R, fsrc0R, fsrc1R); |
| 1134 DIV2(vfCnst255, fsrc0A, vfCnst255, fsrc1A, fsrc0A, fsrc1A); |
| 1135 MUL2(fsrc0R, fsrc0A, fsrc1R, fsrc1A, fdst0R, fdst1R); |
| 1136 FTRUNCU_W2_UB(fdst0R, fdst1R, dst0, dst1); |
| 1137 dst0 = VSHF_UB(dst0, src0, vshfm); |
| 1138 dst1 = VSHF_UB(dst1, src1, vshfm); |
| 1139 dst0 = (v16u8)__msa_ilvr_d((v2i64)dst1, (v2i64)dst0); |
| 1140 ST_UB(dst0, destination); |
| 1141 destination += 16; |
| 1142 } |
| 1143 } |
| 1144 |
| 1145 pixelsPerRow &= 7; |
| 1146 } |
| 1147 |
| 718 } // namespace SIMD | 1148 } // namespace SIMD |
| 719 | 1149 |
| 720 } // namespace blink | 1150 } // namespace blink |
| 721 | 1151 |
| 722 #endif // HAVE(MIPS_MSA_INTRINSICS) | 1152 #endif // HAVE(MIPS_MSA_INTRINSICS) |
| 723 | 1153 |
| 724 #endif // WebGLImageConversionMSA_h | 1154 #endif // WebGLImageConversionMSA_h |
| OLD | NEW |