OLD | NEW |
1 /* | 1 /* |
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 939 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
950 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); | 950 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); |
951 dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); | 951 dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); |
952 ST_UB(dst0, dst_u); | 952 ST_UB(dst0, dst_u); |
953 ST_UB(dst1, dst_v); | 953 ST_UB(dst1, dst_v); |
954 src_argb += 64; | 954 src_argb += 64; |
955 dst_u += 16; | 955 dst_u += 16; |
956 dst_v += 16; | 956 dst_v += 16; |
957 } | 957 } |
958 } | 958 } |
959 | 959 |
| 960 void ARGBMultiplyRow_MSA(const uint8* src_argb0, |
| 961 const uint8* src_argb1, |
| 962 uint8* dst_argb, |
| 963 int width) { |
| 964 int x; |
| 965 v16u8 src0, src1, dst0; |
| 966 v8u16 vec0, vec1, vec2, vec3; |
| 967 v4u32 reg0, reg1, reg2, reg3; |
| 968 v8i16 zero = {0}; |
| 969 |
| 970 for (x = 0; x < width; x += 4) { |
| 971 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); |
| 972 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); |
| 973 vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); |
| 974 vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); |
| 975 vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); |
| 976 vec3 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src1); |
| 977 reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); |
| 978 reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); |
| 979 reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); |
| 980 reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); |
| 981 reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); |
| 982 reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); |
| 983 reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); |
| 984 reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); |
| 985 reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 16); |
| 986 reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 16); |
| 987 reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 16); |
| 988 reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 16); |
| 989 vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); |
| 990 vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); |
| 991 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); |
| 992 ST_UB(dst0, dst_argb); |
| 993 src_argb0 += 16; |
| 994 src_argb1 += 16; |
| 995 dst_argb += 16; |
| 996 } |
| 997 } |
| 998 |
| 999 void ARGBAddRow_MSA(const uint8* src_argb0, |
| 1000 const uint8* src_argb1, |
| 1001 uint8* dst_argb, |
| 1002 int width) { |
| 1003 int x; |
| 1004 v16u8 src0, src1, src2, src3, dst0, dst1; |
| 1005 |
| 1006 for (x = 0; x < width; x += 8) { |
| 1007 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); |
| 1008 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); |
| 1009 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); |
| 1010 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); |
| 1011 dst0 = __msa_adds_u_b(src0, src2); |
| 1012 dst1 = __msa_adds_u_b(src1, src3); |
| 1013 ST_UB2(dst0, dst1, dst_argb, 16); |
| 1014 src_argb0 += 32; |
| 1015 src_argb1 += 32; |
| 1016 dst_argb += 32; |
| 1017 } |
| 1018 } |
| 1019 |
| 1020 void ARGBSubtractRow_MSA(const uint8* src_argb0, |
| 1021 const uint8* src_argb1, |
| 1022 uint8* dst_argb, |
| 1023 int width) { |
| 1024 int x; |
| 1025 v16u8 src0, src1, src2, src3, dst0, dst1; |
| 1026 |
| 1027 for (x = 0; x < width; x += 8) { |
| 1028 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); |
| 1029 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); |
| 1030 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); |
| 1031 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); |
| 1032 dst0 = __msa_subs_u_b(src0, src2); |
| 1033 dst1 = __msa_subs_u_b(src1, src3); |
| 1034 ST_UB2(dst0, dst1, dst_argb, 16); |
| 1035 src_argb0 += 32; |
| 1036 src_argb1 += 32; |
| 1037 dst_argb += 32; |
| 1038 } |
| 1039 } |
| 1040 |
960 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, | 1041 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, |
961 uint8* dst_argb, | 1042 uint8* dst_argb, |
962 int width) { | 1043 int width) { |
963 int x; | 1044 int x; |
964 v16u8 src0, src1; | 1045 v16u8 src0, src1; |
965 v8u16 vec0, vec1, vec2, vec3; | 1046 v8u16 vec0, vec1, vec2, vec3; |
966 v16u8 dst0, dst1, dst2, dst3; | 1047 v16u8 dst0, dst1, dst2, dst3; |
967 | 1048 |
968 for (x = 0; x < width; x += 16) { | 1049 for (x = 0; x < width; x += 16) { |
969 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); | 1050 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); |
(...skipping 15 matching lines...) Expand all Loading... |
985 dst_argb += 64; | 1066 dst_argb += 64; |
986 } | 1067 } |
987 } | 1068 } |
988 | 1069 |
989 #ifdef __cplusplus | 1070 #ifdef __cplusplus |
990 } // extern "C" | 1071 } // extern "C" |
991 } // namespace libyuv | 1072 } // namespace libyuv |
992 #endif | 1073 #endif |
993 | 1074 |
994 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 1075 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
OLD | NEW |