| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 939 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 950 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); | 950 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); |
| 951 dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); | 951 dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); |
| 952 ST_UB(dst0, dst_u); | 952 ST_UB(dst0, dst_u); |
| 953 ST_UB(dst1, dst_v); | 953 ST_UB(dst1, dst_v); |
| 954 src_argb += 64; | 954 src_argb += 64; |
| 955 dst_u += 16; | 955 dst_u += 16; |
| 956 dst_v += 16; | 956 dst_v += 16; |
| 957 } | 957 } |
| 958 } | 958 } |
| 959 | 959 |
| 960 void ARGBMultiplyRow_MSA(const uint8* src_argb0, |
| 961 const uint8* src_argb1, |
| 962 uint8* dst_argb, |
| 963 int width) { |
| 964 int x; |
| 965 v16u8 src0, src1, dst0; |
| 966 v8u16 vec0, vec1, vec2, vec3; |
| 967 v4u32 reg0, reg1, reg2, reg3; |
| 968 v8i16 zero = {0}; |
| 969 |
| 970 for (x = 0; x < width; x += 4) { |
| 971 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); |
| 972 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); |
| 973 vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); |
| 974 vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); |
| 975 vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); |
| 976 vec3 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src1); |
| 977 reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); |
| 978 reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); |
| 979 reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); |
| 980 reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); |
| 981 reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); |
| 982 reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); |
| 983 reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); |
| 984 reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); |
| 985 reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 16); |
| 986 reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 16); |
| 987 reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 16); |
| 988 reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 16); |
| 989 vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); |
| 990 vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); |
| 991 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); |
| 992 ST_UB(dst0, dst_argb); |
| 993 src_argb0 += 16; |
| 994 src_argb1 += 16; |
| 995 dst_argb += 16; |
| 996 } |
| 997 } |
| 998 |
| 999 void ARGBAddRow_MSA(const uint8* src_argb0, |
| 1000 const uint8* src_argb1, |
| 1001 uint8* dst_argb, |
| 1002 int width) { |
| 1003 int x; |
| 1004 v16u8 src0, src1, src2, src3, dst0, dst1; |
| 1005 |
| 1006 for (x = 0; x < width; x += 8) { |
| 1007 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); |
| 1008 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); |
| 1009 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); |
| 1010 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); |
| 1011 dst0 = __msa_adds_u_b(src0, src2); |
| 1012 dst1 = __msa_adds_u_b(src1, src3); |
| 1013 ST_UB2(dst0, dst1, dst_argb, 16); |
| 1014 src_argb0 += 32; |
| 1015 src_argb1 += 32; |
| 1016 dst_argb += 32; |
| 1017 } |
| 1018 } |
| 1019 |
| 1020 void ARGBSubtractRow_MSA(const uint8* src_argb0, |
| 1021 const uint8* src_argb1, |
| 1022 uint8* dst_argb, |
| 1023 int width) { |
| 1024 int x; |
| 1025 v16u8 src0, src1, src2, src3, dst0, dst1; |
| 1026 |
| 1027 for (x = 0; x < width; x += 8) { |
| 1028 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 0); |
| 1029 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb0, 16); |
| 1030 src2 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 0); |
| 1031 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); |
| 1032 dst0 = __msa_subs_u_b(src0, src2); |
| 1033 dst1 = __msa_subs_u_b(src1, src3); |
| 1034 ST_UB2(dst0, dst1, dst_argb, 16); |
| 1035 src_argb0 += 32; |
| 1036 src_argb1 += 32; |
| 1037 dst_argb += 32; |
| 1038 } |
| 1039 } |
| 1040 |
| 960 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, | 1041 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, |
| 961 uint8* dst_argb, | 1042 uint8* dst_argb, |
| 962 int width) { | 1043 int width) { |
| 963 int x; | 1044 int x; |
| 964 v16u8 src0, src1; | 1045 v16u8 src0, src1; |
| 965 v8u16 vec0, vec1, vec2, vec3; | 1046 v8u16 vec0, vec1, vec2, vec3; |
| 966 v16u8 dst0, dst1, dst2, dst3; | 1047 v16u8 dst0, dst1, dst2, dst3; |
| 967 | 1048 |
| 968 for (x = 0; x < width; x += 16) { | 1049 for (x = 0; x < width; x += 16) { |
| 969 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); | 1050 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); |
| (...skipping 15 matching lines...) Expand all Loading... |
| 985 dst_argb += 64; | 1066 dst_argb += 64; |
| 986 } | 1067 } |
| 987 } | 1068 } |
| 988 | 1069 |
| 989 #ifdef __cplusplus | 1070 #ifdef __cplusplus |
| 990 } // extern "C" | 1071 } // extern "C" |
| 991 } // namespace libyuv | 1072 } // namespace libyuv |
| 992 #endif | 1073 #endif |
| 993 | 1074 |
| 994 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 1075 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| OLD | NEW |