OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 770 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
781 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); | 781 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); |
782 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); | 782 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); |
783 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); | 783 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); |
784 ST_UB2(dst0, dst1, dst_rgb, 16); | 784 ST_UB2(dst0, dst1, dst_rgb, 16); |
785 ST_UB(dst2, (dst_rgb + 32)); | 785 ST_UB(dst2, (dst_rgb + 32)); |
786 src_argb += 64; | 786 src_argb += 64; |
787 dst_rgb += 48; | 787 dst_rgb += 48; |
788 } | 788 } |
789 } | 789 } |
790 | 790 |
791 void ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { | |
792 int x; | |
793 v16u8 src0, src1, dst0; | |
794 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; | |
795 v16i8 zero = {0}; | |
796 | |
797 for (x = 0; x < width; x += 8) { | |
798 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); | |
799 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); | |
800 vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); | |
801 vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3); | |
802 vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5); | |
803 vec4 = (v16u8)__msa_srai_b((v16i8)src1, 3); | |
804 vec5 = (v16u8)__msa_slli_b((v16i8)src1, 3); | |
805 vec6 = (v16u8)__msa_srai_b((v16i8)src1, 5); | |
806 vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); | |
807 vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); | |
808 vec5 = (v16u8)__msa_sldi_b(zero, (v16i8)vec5, 1); | |
809 vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); | |
810 vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 2); | |
811 vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 2); | |
812 vec0 = __msa_binsli_b(vec0, vec1, 2); | |
813 vec1 = __msa_binsli_b(vec2, vec3, 4); | |
814 vec4 = __msa_binsli_b(vec4, vec5, 2); | |
815 vec5 = __msa_binsli_b(vec6, vec7, 4); | |
816 vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); | |
817 vec4 = (v16u8)__msa_ilvev_b((v16i8)vec5, (v16i8)vec4); | |
818 dst0 = (v16u8)__msa_pckev_h((v8i16)vec4, (v8i16)vec0); | |
819 ST_UB(dst0, dst_rgb); | |
820 src_argb += 32; | |
821 dst_rgb += 16; | |
822 } | |
823 } | |
824 | |
825 void ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { | |
826 int x; | |
827 v16u8 src0, src1, dst0; | |
828 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; | |
829 v16i8 zero = {0}; | |
830 | |
831 for (x = 0; x < width; x += 8) { | |
832 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); | |
833 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); | |
834 vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); | |
835 vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2); | |
836 vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3); | |
837 vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); | |
838 vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); | |
839 vec3 = (v16u8)__msa_srai_b((v16i8)src0, 1); | |
840 vec5 = (v16u8)__msa_srai_b((v16i8)src1, 3); | |
841 vec6 = (v16u8)__msa_slli_b((v16i8)src1, 2); | |
842 vec7 = (v16u8)__msa_srai_b((v16i8)vec5, 3); | |
843 vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); | |
844 vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)vec7, 1); | |
845 vec8 = (v16u8)__msa_srai_b((v16i8)src1, 1); | |
846 vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)vec3, 2); | |
847 vec8 = (v16u8)__msa_sldi_b(zero, (v16i8)vec8, 2); | |
848 vec4 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 3); | |
849 vec9 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 3); | |
850 vec0 = __msa_binsli_b(vec0, vec1, 2); | |
851 vec5 = __msa_binsli_b(vec5, vec6, 2); | |
852 vec1 = __msa_binsli_b(vec2, vec3, 5); | |
853 vec6 = __msa_binsli_b(vec7, vec8, 5); | |
854 vec1 = __msa_binsli_b(vec1, vec4, 0); | |
855 vec6 = __msa_binsli_b(vec6, vec9, 0); | |
856 vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); | |
857 vec1 = (v16u8)__msa_ilvev_b((v16i8)vec6, (v16i8)vec5); | |
858 dst0 = (v16u8)__msa_pckev_h((v8i16)vec1, (v8i16)vec0); | |
859 ST_UB(dst0, dst_rgb); | |
860 src_argb += 32; | |
861 dst_rgb += 16; | |
862 } | |
863 } | |
864 | |
865 void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) { | |
866 int x; | |
867 v16u8 src0, src1; | |
868 v16u8 vec0, vec1; | |
869 v16u8 dst0; | |
870 v16i8 zero = {0}; | |
871 | |
872 for (x = 0; x < width; x += 8) { | |
873 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); | |
874 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); | |
875 vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4); | |
876 vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4); | |
877 src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1); | |
878 src1 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 1); | |
879 vec0 = __msa_binsli_b(vec0, src0, 3); | |
880 vec1 = __msa_binsli_b(vec1, src1, 3); | |
881 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); | |
882 ST_UB(dst0, dst_rgb); | |
883 src_argb += 32; | |
884 dst_rgb += 16; | |
885 } | |
886 } | |
887 | |
888 void ARGBToUV444Row_MSA(const uint8* src_argb, | |
889 uint8* dst_u, | |
890 uint8* dst_v, | |
891 int32 width) { | |
892 int32 x; | |
893 v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1; | |
894 v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; | |
895 v8u16 vec8, vec9, vec10, vec11; | |
896 v8u16 const_112 = (v8u16)__msa_ldi_h(112); | |
fbarchard1
2016/11/22 18:46:15
note these constants will need to change for other
| |
897 v8u16 const_74 = (v8u16)__msa_ldi_h(74); | |
898 v8u16 const_38 = (v8u16)__msa_ldi_h(38); | |
899 v8u16 const_94 = (v8u16)__msa_ldi_h(94); | |
900 v8u16 const_18 = (v8u16)__msa_ldi_h(18); | |
901 v8u16 const_32896 = (v8u16)__msa_fill_h(32896); | |
902 v16i8 zero = {0}; | |
903 | |
904 for (x = width; x > 0; x -= 16) { | |
905 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); | |
906 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); | |
907 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32); | |
908 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48); | |
909 reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); | |
910 reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); | |
911 reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); | |
912 reg3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); | |
913 src0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); | |
914 src1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); | |
915 src2 = (v16u8)__msa_pckod_b((v16i8)reg1, (v16i8)reg0); | |
916 vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0); | |
917 vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0); | |
918 vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1); | |
919 vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1); | |
920 vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2); | |
921 vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2); | |
922 vec10 = vec0 * const_18; | |
923 vec11 = vec1 * const_18; | |
924 vec8 = vec2 * const_94; | |
925 vec9 = vec3 * const_94; | |
926 vec6 = vec4 * const_112; | |
927 vec7 = vec5 * const_112; | |
928 vec0 *= const_112; | |
929 vec1 *= const_112; | |
930 vec2 *= const_74; | |
931 vec3 *= const_74; | |
932 vec4 *= const_38; | |
933 vec5 *= const_38; | |
934 vec8 += vec10; | |
935 vec9 += vec11; | |
936 vec6 += const_32896; | |
937 vec7 += const_32896; | |
938 vec0 += const_32896; | |
939 vec1 += const_32896; | |
940 vec2 += vec4; | |
941 vec3 += vec5; | |
942 vec0 -= vec2; | |
943 vec1 -= vec3; | |
944 vec6 -= vec8; | |
945 vec7 -= vec9; | |
946 vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); | |
947 vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); | |
948 vec6 = (v8u16)__msa_srai_h((v8i16)vec6, 8); | |
949 vec7 = (v8u16)__msa_srai_h((v8i16)vec7, 8); | |
950 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); | |
951 dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); | |
952 ST_UB(dst0, dst_u); | |
953 ST_UB(dst1, dst_v); | |
954 src_argb += 64; | |
955 dst_u += 16; | |
956 dst_v += 16; | |
957 } | |
958 } | |
959 | |
791 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, | 960 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, |
792 uint8* dst_argb, | 961 uint8* dst_argb, |
793 int width) { | 962 int width) { |
794 int x; | 963 int x; |
795 v16u8 src0, src1; | 964 v16u8 src0, src1; |
796 v8u16 vec0, vec1, vec2, vec3; | 965 v8u16 vec0, vec1, vec2, vec3; |
797 v16u8 dst0, dst1, dst2, dst3; | 966 v16u8 dst0, dst1, dst2, dst3; |
798 | 967 |
799 for (x = 0; x < width; x += 16) { | 968 for (x = 0; x < width; x += 16) { |
800 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); | 969 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); |
(...skipping 15 matching lines...) Expand all Loading... | |
816 dst_argb += 64; | 985 dst_argb += 64; |
817 } | 986 } |
818 } | 987 } |
819 | 988 |
820 #ifdef __cplusplus | 989 #ifdef __cplusplus |
821 } // extern "C" | 990 } // extern "C" |
822 } // namespace libyuv | 991 } // namespace libyuv |
823 #endif | 992 #endif |
824 | 993 |
825 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 994 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
OLD | NEW |