Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(595)

Side by Side Diff: source/row_msa.cc

Issue 2520003004: Add MSA optimized ARGBToRGB565Row_MSA, ARGBToARGB1555Row_MSA, ARGBToARGB4444Row_MSA, ARGBToUV444Row… (Closed)
Patch Set: Clang format Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved.
3 * 3 *
4 * Use of this source code is governed by a BSD-style license 4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source 5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found 6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may 7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree. 8 * be found in the AUTHORS file in the root of the source tree.
9 */ 9 */
10 10
(...skipping 770 matching lines...) Expand 10 before | Expand all | Expand 10 after
781 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); 781 dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0);
782 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); 782 dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1);
783 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); 783 dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2);
784 ST_UB2(dst0, dst1, dst_rgb, 16); 784 ST_UB2(dst0, dst1, dst_rgb, 16);
785 ST_UB(dst2, (dst_rgb + 32)); 785 ST_UB(dst2, (dst_rgb + 32));
786 src_argb += 64; 786 src_argb += 64;
787 dst_rgb += 48; 787 dst_rgb += 48;
788 } 788 }
789 } 789 }
790 790
791 void ARGBToRGB565Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
792 int x;
793 v16u8 src0, src1, dst0;
794 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
795 v16i8 zero = {0};
796
797 for (x = 0; x < width; x += 8) {
798 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
799 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
800 vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3);
801 vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3);
802 vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5);
803 vec4 = (v16u8)__msa_srai_b((v16i8)src1, 3);
804 vec5 = (v16u8)__msa_slli_b((v16i8)src1, 3);
805 vec6 = (v16u8)__msa_srai_b((v16i8)src1, 5);
806 vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1);
807 vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1);
808 vec5 = (v16u8)__msa_sldi_b(zero, (v16i8)vec5, 1);
809 vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1);
810 vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 2);
811 vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 2);
812 vec0 = __msa_binsli_b(vec0, vec1, 2);
813 vec1 = __msa_binsli_b(vec2, vec3, 4);
814 vec4 = __msa_binsli_b(vec4, vec5, 2);
815 vec5 = __msa_binsli_b(vec6, vec7, 4);
816 vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0);
817 vec4 = (v16u8)__msa_ilvev_b((v16i8)vec5, (v16i8)vec4);
818 dst0 = (v16u8)__msa_pckev_h((v8i16)vec4, (v8i16)vec0);
819 ST_UB(dst0, dst_rgb);
820 src_argb += 32;
821 dst_rgb += 16;
822 }
823 }
824
825 void ARGBToARGB1555Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
826 int x;
827 v16u8 src0, src1, dst0;
828 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9;
829 v16i8 zero = {0};
830
831 for (x = 0; x < width; x += 8) {
832 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
833 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
834 vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3);
835 vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2);
836 vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3);
837 vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1);
838 vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1);
839 vec3 = (v16u8)__msa_srai_b((v16i8)src0, 1);
840 vec5 = (v16u8)__msa_srai_b((v16i8)src1, 3);
841 vec6 = (v16u8)__msa_slli_b((v16i8)src1, 2);
842 vec7 = (v16u8)__msa_srai_b((v16i8)vec5, 3);
843 vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1);
844 vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)vec7, 1);
845 vec8 = (v16u8)__msa_srai_b((v16i8)src1, 1);
846 vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)vec3, 2);
847 vec8 = (v16u8)__msa_sldi_b(zero, (v16i8)vec8, 2);
848 vec4 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 3);
849 vec9 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 3);
850 vec0 = __msa_binsli_b(vec0, vec1, 2);
851 vec5 = __msa_binsli_b(vec5, vec6, 2);
852 vec1 = __msa_binsli_b(vec2, vec3, 5);
853 vec6 = __msa_binsli_b(vec7, vec8, 5);
854 vec1 = __msa_binsli_b(vec1, vec4, 0);
855 vec6 = __msa_binsli_b(vec6, vec9, 0);
856 vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0);
857 vec1 = (v16u8)__msa_ilvev_b((v16i8)vec6, (v16i8)vec5);
858 dst0 = (v16u8)__msa_pckev_h((v8i16)vec1, (v8i16)vec0);
859 ST_UB(dst0, dst_rgb);
860 src_argb += 32;
861 dst_rgb += 16;
862 }
863 }
864
865 void ARGBToARGB4444Row_MSA(const uint8* src_argb, uint8* dst_rgb, int width) {
866 int x;
867 v16u8 src0, src1;
868 v16u8 vec0, vec1;
869 v16u8 dst0;
870 v16i8 zero = {0};
871
872 for (x = 0; x < width; x += 8) {
873 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
874 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
875 vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4);
876 vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4);
877 src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1);
878 src1 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 1);
879 vec0 = __msa_binsli_b(vec0, src0, 3);
880 vec1 = __msa_binsli_b(vec1, src1, 3);
881 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
882 ST_UB(dst0, dst_rgb);
883 src_argb += 32;
884 dst_rgb += 16;
885 }
886 }
887
888 void ARGBToUV444Row_MSA(const uint8* src_argb,
889 uint8* dst_u,
890 uint8* dst_v,
891 int32 width) {
892 int32 x;
893 v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1;
894 v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7;
895 v8u16 vec8, vec9, vec10, vec11;
896 v8u16 const_112 = (v8u16)__msa_ldi_h(112);
fbarchard1 2016/11/22 18:46:15 note these constants will need to change for other
897 v8u16 const_74 = (v8u16)__msa_ldi_h(74);
898 v8u16 const_38 = (v8u16)__msa_ldi_h(38);
899 v8u16 const_94 = (v8u16)__msa_ldi_h(94);
900 v8u16 const_18 = (v8u16)__msa_ldi_h(18);
901 v8u16 const_32896 = (v8u16)__msa_fill_h(32896);
902 v16i8 zero = {0};
903
904 for (x = width; x > 0; x -= 16) {
905 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0);
906 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16);
907 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb, 32);
908 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb, 48);
909 reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0);
910 reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2);
911 reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0);
912 reg3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2);
913 src0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0);
914 src1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2);
915 src2 = (v16u8)__msa_pckod_b((v16i8)reg1, (v16i8)reg0);
916 vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0);
917 vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0);
918 vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1);
919 vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1);
920 vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2);
921 vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2);
922 vec10 = vec0 * const_18;
923 vec11 = vec1 * const_18;
924 vec8 = vec2 * const_94;
925 vec9 = vec3 * const_94;
926 vec6 = vec4 * const_112;
927 vec7 = vec5 * const_112;
928 vec0 *= const_112;
929 vec1 *= const_112;
930 vec2 *= const_74;
931 vec3 *= const_74;
932 vec4 *= const_38;
933 vec5 *= const_38;
934 vec8 += vec10;
935 vec9 += vec11;
936 vec6 += const_32896;
937 vec7 += const_32896;
938 vec0 += const_32896;
939 vec1 += const_32896;
940 vec2 += vec4;
941 vec3 += vec5;
942 vec0 -= vec2;
943 vec1 -= vec3;
944 vec6 -= vec8;
945 vec7 -= vec9;
946 vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8);
947 vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8);
948 vec6 = (v8u16)__msa_srai_h((v8i16)vec6, 8);
949 vec7 = (v8u16)__msa_srai_h((v8i16)vec7, 8);
950 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0);
951 dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6);
952 ST_UB(dst0, dst_u);
953 ST_UB(dst1, dst_v);
954 src_argb += 64;
955 dst_u += 16;
956 dst_v += 16;
957 }
958 }
959
791 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, 960 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444,
792 uint8* dst_argb, 961 uint8* dst_argb,
793 int width) { 962 int width) {
794 int x; 963 int x;
795 v16u8 src0, src1; 964 v16u8 src0, src1;
796 v8u16 vec0, vec1, vec2, vec3; 965 v8u16 vec0, vec1, vec2, vec3;
797 v16u8 dst0, dst1, dst2, dst3; 966 v16u8 dst0, dst1, dst2, dst3;
798 967
799 for (x = 0; x < width; x += 16) { 968 for (x = 0; x < width; x += 16) {
800 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); 969 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0);
(...skipping 15 matching lines...) Expand all
816 dst_argb += 64; 985 dst_argb += 64;
817 } 986 }
818 } 987 }
819 988
820 #ifdef __cplusplus 989 #ifdef __cplusplus
821 } // extern "C" 990 } // extern "C"
822 } // namespace libyuv 991 } // namespace libyuv
823 #endif 992 #endif
824 993
825 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) 994 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
OLDNEW
« no previous file with comments | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698