Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 /* | 1 /* |
| 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 1020 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1031 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); | 1031 src3 = (v16u8)__msa_ld_b((v16i8*)src_argb1, 16); |
| 1032 dst0 = __msa_subs_u_b(src0, src2); | 1032 dst0 = __msa_subs_u_b(src0, src2); |
| 1033 dst1 = __msa_subs_u_b(src1, src3); | 1033 dst1 = __msa_subs_u_b(src1, src3); |
| 1034 ST_UB2(dst0, dst1, dst_argb, 16); | 1034 ST_UB2(dst0, dst1, dst_argb, 16); |
| 1035 src_argb0 += 32; | 1035 src_argb0 += 32; |
| 1036 src_argb1 += 32; | 1036 src_argb1 += 32; |
| 1037 dst_argb += 32; | 1037 dst_argb += 32; |
| 1038 } | 1038 } |
| 1039 } | 1039 } |
| 1040 | 1040 |
| 1041 void ARGBAttenuateRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) { | |
| 1042 int x; | |
| 1043 v16u8 src0, src1, dst0, dst1; | |
| 1044 v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; | |
| 1045 v4u32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; | |
| 1046 v8i16 zero = {0}; | |
| 1047 v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; | |
| 1048 | |
| 1049 for (x = 0; x < width; x += 8) { | |
| 1050 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); | |
|
fbarchard1
2016/12/12 20:09:03
Can this function be further optimized? Suggest a
manojkumar.bhosale
2016/12/15 06:35:21
Acknowledged.
| |
| 1051 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); | |
| 1052 vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); | |
| 1053 vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); | |
| 1054 vec2 = (v8u16)__msa_ilvr_b((v16i8)src1, (v16i8)src1); | |
| 1055 vec3 = (v8u16)__msa_ilvl_b((v16i8)src1, (v16i8)src1); | |
| 1056 vec4 = (v8u16)__msa_fill_h(vec0[3]); | |
| 1057 vec5 = (v8u16)__msa_fill_h(vec0[7]); | |
| 1058 vec6 = (v8u16)__msa_fill_h(vec1[3]); | |
| 1059 vec7 = (v8u16)__msa_fill_h(vec1[7]); | |
| 1060 vec4 = (v8u16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); | |
| 1061 vec5 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); | |
| 1062 vec6 = (v8u16)__msa_fill_h(vec2[3]); | |
| 1063 vec7 = (v8u16)__msa_fill_h(vec2[7]); | |
| 1064 vec8 = (v8u16)__msa_fill_h(vec3[3]); | |
| 1065 vec9 = (v8u16)__msa_fill_h(vec3[7]); | |
| 1066 vec6 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); | |
| 1067 vec7 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8); | |
| 1068 reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec4); | |
| 1069 reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec4); | |
| 1070 reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec5); | |
| 1071 reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec5); | |
| 1072 reg4 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec6); | |
| 1073 reg5 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec6); | |
| 1074 reg6 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec7); | |
| 1075 reg7 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec7); | |
| 1076 reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); | |
| 1077 reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); | |
| 1078 reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); | |
| 1079 reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); | |
| 1080 reg4 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); | |
| 1081 reg5 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); | |
| 1082 reg6 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); | |
| 1083 reg7 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); | |
| 1084 reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); | |
| 1085 reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); | |
| 1086 reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); | |
| 1087 reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); | |
| 1088 reg4 = (v4u32)__msa_srai_w((v4i32)reg4, 24); | |
| 1089 reg5 = (v4u32)__msa_srai_w((v4i32)reg5, 24); | |
| 1090 reg6 = (v4u32)__msa_srai_w((v4i32)reg6, 24); | |
| 1091 reg7 = (v4u32)__msa_srai_w((v4i32)reg7, 24); | |
| 1092 vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); | |
| 1093 vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); | |
| 1094 vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4); | |
| 1095 vec3 = (v8u16)__msa_pckev_h((v8i16)reg7, (v8i16)reg6); | |
| 1096 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); | |
| 1097 dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); | |
| 1098 dst0 = __msa_bmnz_v(dst0, src0, mask); | |
| 1099 dst1 = __msa_bmnz_v(dst1, src1, mask); | |
| 1100 ST_UB2(dst0, dst1, dst_argb, 16); | |
| 1101 src_argb += 32; | |
| 1102 dst_argb += 32; | |
| 1103 } | |
| 1104 } | |
| 1105 | |
| 1106 void ARGBToRGB565DitherRow_MSA(const uint8* src_argb, | |
| 1107 uint8* dst_rgb, | |
| 1108 uint32 dither4, | |
| 1109 int width) { | |
| 1110 int x; | |
| 1111 v16u8 src0, src1, dst0, vec0, vec1; | |
| 1112 v8i16 vec_d0; | |
| 1113 v8i16 reg0, reg1, reg2; | |
| 1114 v16i8 zero = {0}; | |
| 1115 v8i16 max = __msa_ldi_h(0xFF); | |
| 1116 | |
| 1117 vec_d0 = (v8i16)__msa_fill_w(dither4); | |
| 1118 vec_d0 = (v8i16)__msa_ilvr_b(zero, (v16i8)vec_d0); | |
| 1119 | |
| 1120 for (x = 0; x < width; x += 8) { | |
| 1121 src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); | |
| 1122 src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); | |
| 1123 vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); | |
| 1124 vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); | |
| 1125 reg0 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec0); | |
| 1126 reg1 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec1); | |
| 1127 reg2 = (v8i16)__msa_ilvod_b(zero, (v16i8)vec0); | |
| 1128 reg0 += vec_d0; | |
| 1129 reg1 += vec_d0; | |
| 1130 reg2 += vec_d0; | |
| 1131 reg0 = __msa_maxi_s_h((v8i16)reg0, 0); | |
| 1132 reg1 = __msa_maxi_s_h((v8i16)reg1, 0); | |
| 1133 reg2 = __msa_maxi_s_h((v8i16)reg2, 0); | |
| 1134 reg0 = __msa_min_s_h((v8i16)max, (v8i16)reg0); | |
| 1135 reg1 = __msa_min_s_h((v8i16)max, (v8i16)reg1); | |
| 1136 reg2 = __msa_min_s_h((v8i16)max, (v8i16)reg2); | |
| 1137 reg0 = __msa_srai_h(reg0, 3); | |
| 1138 reg2 = __msa_srai_h(reg2, 3); | |
| 1139 reg1 = __msa_srai_h(reg1, 2); | |
| 1140 reg2 = __msa_slli_h(reg2, 11); | |
| 1141 reg1 = __msa_slli_h(reg1, 5); | |
| 1142 reg0 |= reg1; | |
| 1143 dst0 = (v16u8)(reg0 | reg2); | |
| 1144 ST_UB(dst0, dst_rgb); | |
| 1145 src_argb += 32; | |
| 1146 dst_rgb += 16; | |
| 1147 } | |
| 1148 } | |
| 1149 | |
| 1150 void ARGBShuffleRow_MSA(const uint8* src_argb, | |
| 1151 uint8* dst_argb, | |
| 1152 const uint8* shuffler, | |
| 1153 int width) { | |
| 1154 int x; | |
| 1155 v16u8 src0, src1, dst0, dst1; | |
| 1156 v16i8 vec0; | |
| 1157 v16i8 shuffler_vec = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12}; | |
| 1158 int32 val = LW((int32*)shuffler); | |
| 1159 | |
| 1160 vec0 = (v16i8)__msa_fill_w(val); | |
| 1161 shuffler_vec += vec0; | |
| 1162 | |
| 1163 for (x = 0; x < width; x += 8) { | |
| 1164 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); | |
| 1165 src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); | |
| 1166 dst0 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src0, (v16i8)src0); | |
| 1167 dst1 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src1, (v16i8)src1); | |
| 1168 ST_UB2(dst0, dst1, dst_argb, 16); | |
| 1169 src_argb += 32; | |
| 1170 dst_argb += 32; | |
| 1171 } | |
| 1172 } | |
| 1173 | |
| 1174 void ARGBShadeRow_MSA(const uint8* src_argb, | |
| 1175 uint8* dst_argb, | |
| 1176 int width, | |
| 1177 uint32 value) { | |
| 1178 int x; | |
| 1179 v16u8 src0, dst0; | |
| 1180 v8u16 vec0, vec1; | |
| 1181 v4u32 reg0, reg1, reg2, reg3, rgba_scale; | |
| 1182 v8i16 zero = {0}; | |
| 1183 | |
| 1184 rgba_scale[0] = value; | |
| 1185 rgba_scale = (v4u32)__msa_ilvr_b((v16i8)rgba_scale, (v16i8)rgba_scale); | |
| 1186 rgba_scale = (v4u32)__msa_ilvr_h(zero, (v8i16)rgba_scale); | |
| 1187 | |
| 1188 for (x = 0; x < width; x += 4) { | |
| 1189 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); | |
| 1190 vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); | |
| 1191 vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); | |
| 1192 reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); | |
| 1193 reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); | |
| 1194 reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); | |
| 1195 reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); | |
| 1196 reg0 *= rgba_scale; | |
| 1197 reg1 *= rgba_scale; | |
| 1198 reg2 *= rgba_scale; | |
| 1199 reg3 *= rgba_scale; | |
| 1200 reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); | |
| 1201 reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); | |
| 1202 reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); | |
| 1203 reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); | |
| 1204 vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); | |
| 1205 vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); | |
| 1206 dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); | |
| 1207 ST_UB(dst0, dst_argb); | |
| 1208 src_argb += 16; | |
| 1209 dst_argb += 16; | |
| 1210 } | |
| 1211 } | |
| 1212 | |
| 1213 void ARGBGrayRow_MSA(const uint8* src_argb, uint8* dst_argb, int width) { | |
| 1214 int x; | |
| 1215 v16u8 src0, src1, vec0, vec1, dst0, dst1; | |
| 1216 v8u16 reg0; | |
| 1217 v16u8 const_0x26 = (v16u8)__msa_ldi_h(0x26); | |
| 1218 v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); | |
|
fbarchard1
2016/12/12 20:09:03
note these constants may need to be passed in as p
manojkumar.bhosale
2016/12/15 06:35:21
Acknowledged.
| |
| 1219 | |
| 1220 for (x = 0; x < width; x += 8) { | |
| 1221 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb, 0); | |
| 1222 src1 = (v16u8)__msa_ld_b((v16u8*)src_argb, 16); | |
| 1223 vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); | |
| 1224 vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); | |
| 1225 reg0 = __msa_dotp_u_h(vec0, const_0x4B0F); | |
| 1226 reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x26); | |
| 1227 reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 7); | |
| 1228 vec0 = (v16u8)__msa_ilvev_b((v16i8)reg0, (v16i8)reg0); | |
| 1229 vec1 = (v16u8)__msa_ilvod_b((v16i8)vec1, (v16i8)vec0); | |
| 1230 dst0 = (v16u8)__msa_ilvr_b((v16i8)vec1, (v16i8)vec0); | |
| 1231 dst1 = (v16u8)__msa_ilvl_b((v16i8)vec1, (v16i8)vec0); | |
| 1232 ST_UB2(dst0, dst1, dst_argb, 16); | |
| 1233 src_argb += 32; | |
| 1234 dst_argb += 32; | |
| 1235 } | |
| 1236 } | |
| 1237 | |
| 1238 void ARGBSepiaRow_MSA(uint8* dst_argb, int width) { | |
| 1239 int x; | |
| 1240 v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5; | |
| 1241 v8u16 reg0, reg1, reg2; | |
| 1242 v16u8 const_0x4411 = (v16u8)__msa_fill_h(0x4411); | |
| 1243 v16u8 const_0x23 = (v16u8)__msa_ldi_h(0x23); | |
| 1244 v16u8 const_0x5816 = (v16u8)__msa_fill_h(0x5816); | |
| 1245 v16u8 const_0x2D = (v16u8)__msa_ldi_h(0x2D); | |
| 1246 v16u8 const_0x6218 = (v16u8)__msa_fill_h(0x6218); | |
| 1247 v16u8 const_0x32 = (v16u8)__msa_ldi_h(0x32); | |
| 1248 v8u16 const_0xFF = (v8u16)__msa_ldi_h(0xFF); | |
| 1249 | |
| 1250 for (x = 0; x < width; x += 8) { | |
| 1251 src0 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 0); | |
| 1252 src1 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 16); | |
| 1253 vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); | |
| 1254 vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); | |
| 1255 vec3 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec1); | |
| 1256 reg0 = (v8u16)__msa_dotp_u_h(vec0, const_0x4411); | |
| 1257 reg1 = (v8u16)__msa_dotp_u_h(vec0, const_0x5816); | |
| 1258 reg2 = (v8u16)__msa_dotp_u_h(vec0, const_0x6218); | |
| 1259 reg0 = (v8u16)__msa_dpadd_u_h(reg0, vec1, const_0x23); | |
| 1260 reg1 = (v8u16)__msa_dpadd_u_h(reg1, vec1, const_0x2D); | |
| 1261 reg2 = (v8u16)__msa_dpadd_u_h(reg2, vec1, const_0x32); | |
| 1262 reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 7); | |
| 1263 reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 7); | |
| 1264 reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 7); | |
| 1265 reg1 = (v8u16)__msa_min_u_h((v8u16)reg1, const_0xFF); | |
| 1266 reg2 = (v8u16)__msa_min_u_h((v8u16)reg2, const_0xFF); | |
| 1267 vec0 = (v16u8)__msa_pckev_b((v16i8)reg0, (v16i8)reg0); | |
| 1268 vec1 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg1); | |
| 1269 vec2 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg2); | |
| 1270 vec4 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); | |
| 1271 vec5 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); | |
| 1272 dst0 = (v16u8)__msa_ilvr_b((v16i8)vec5, (v16i8)vec4); | |
| 1273 dst1 = (v16u8)__msa_ilvl_b((v16i8)vec5, (v16i8)vec4); | |
| 1274 ST_UB2(dst0, dst1, dst_argb, 16); | |
| 1275 dst_argb += 32; | |
| 1276 } | |
| 1277 } | |
| 1278 | |
| 1041 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, | 1279 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, |
| 1042 uint8* dst_argb, | 1280 uint8* dst_argb, |
| 1043 int width) { | 1281 int width) { |
| 1044 int x; | 1282 int x; |
| 1045 v16u8 src0, src1; | 1283 v16u8 src0, src1; |
| 1046 v8u16 vec0, vec1, vec2, vec3; | 1284 v8u16 vec0, vec1, vec2, vec3; |
| 1047 v16u8 dst0, dst1, dst2, dst3; | 1285 v16u8 dst0, dst1, dst2, dst3; |
| 1048 | 1286 |
| 1049 for (x = 0; x < width; x += 16) { | 1287 for (x = 0; x < width; x += 16) { |
| 1050 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); | 1288 src0 = (v16u8)__msa_ld_b((v16u8*)src_argb4444, 0); |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 1066 dst_argb += 64; | 1304 dst_argb += 64; |
| 1067 } | 1305 } |
| 1068 } | 1306 } |
| 1069 | 1307 |
| 1070 #ifdef __cplusplus | 1308 #ifdef __cplusplus |
| 1071 } // extern "C" | 1309 } // extern "C" |
| 1072 } // namespace libyuv | 1310 } // namespace libyuv |
| 1073 #endif | 1311 #endif |
| 1074 | 1312 |
| 1075 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 1313 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| OLD | NEW |