OLD | NEW |
---|---|
1 /* | 1 /* |
2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 2 * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
3 * | 3 * |
4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
9 */ | 9 */ |
10 | 10 |
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
214 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | 214 dst0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); |
215 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | 215 dst1 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); |
216 ST_UB(dst0, dst_u); | 216 ST_UB(dst0, dst_u); |
217 ST_UB(dst1, dst_v); | 217 ST_UB(dst1, dst_v); |
218 src_uyvy += 64; | 218 src_uyvy += 64; |
219 dst_u += 16; | 219 dst_u += 16; |
220 dst_v += 16; | 220 dst_v += 16; |
221 } | 221 } |
222 } | 222 } |
223 | 223 |
224 void ARGBToYRow_MSA(const uint8* src_argb0, uint8* dst_y, int width) { | |
225 int x; | |
226 v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; | |
227 v8u16 reg0, reg1, reg2, reg3, reg4, reg5; | |
228 v16i8 zero = { 0 }; | |
229 v8u16 const_0x19 = (v8u16) __msa_ldi_h(0x19); | |
230 v8u16 const_0x81 = (v8u16) __msa_ldi_h(0x81); | |
231 v8u16 const_0x42 = (v8u16) __msa_ldi_h(0x42); | |
232 v8u16 const_0x1080 = (v8u16) __msa_fill_h(0x1080); | |
233 | |
234 for (x = 0; x < width; x += 16) { | |
235 src0 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 0); | |
236 src1 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 16); | |
237 src2 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 32); | |
238 src3 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 48); | |
239 vec0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | |
240 vec1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); | |
241 vec2 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | |
242 vec3 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2); | |
243 reg0 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec0); | |
244 reg1 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec1); | |
245 reg2 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec2); | |
246 reg3 = (v8u16) __msa_ilvev_b(zero, (v16i8) vec3); | |
247 reg4 = (v8u16) __msa_ilvod_b(zero, (v16i8) vec0); | |
248 reg5 = (v8u16) __msa_ilvod_b(zero, (v16i8) vec1); | |
249 reg0 *= const_0x19; | |
250 reg1 *= const_0x19; | |
251 reg2 *= const_0x81; | |
252 reg3 *= const_0x81; | |
253 reg4 *= const_0x42; | |
254 reg5 *= const_0x42; | |
255 reg0 += reg2; | |
256 reg1 += reg3; | |
257 reg0 += reg4; | |
258 reg1 += reg5; | |
259 reg0 += const_0x1080; | |
260 reg1 += const_0x1080; | |
261 reg0 = (v8u16) __msa_srai_h((v8i16) reg0, 8); | |
262 reg1 = (v8u16) __msa_srai_h((v8i16) reg1, 8); | |
263 dst0 = (v16u8) __msa_pckev_b((v16i8) reg1, (v16i8) reg0); | |
264 ST_UB(dst0, dst_y); | |
265 src_argb0 += 64; | |
266 dst_y += 16; | |
267 } | |
268 } | |
269 | |
270 void ARGBToUVRow_MSA(const uint8* src_argb0, int src_stride_argb, | |
fbarchard1
2016/10/19 18:10:33
this is kinda HUGE! But I see it does a 16 byte s
| |
271 uint8* dst_u, uint8* dst_v, int width) { | |
272 int x; | |
273 const uint8* src_argb0_next = src_argb0 + src_stride_argb; | |
274 v16u8 src0, src1, src2, src3, src4, src5, src6, src7; | |
275 v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; | |
276 v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; | |
277 v16u8 dst0, dst1; | |
278 v8u16 const_0x70 = (v8u16) __msa_ldi_h(0x70); | |
279 v8u16 const_0x4A = (v8u16) __msa_ldi_h(0x4A); | |
280 v8u16 const_0x26 = (v8u16) __msa_ldi_h(0x26); | |
281 v8u16 const_0x5E = (v8u16) __msa_ldi_h(0x5E); | |
282 v8u16 const_0x12 = (v8u16) __msa_ldi_h(0x12); | |
283 v8u16 const_0x8080 = (v8u16) __msa_fill_h(0x8080); | |
284 | |
285 for (x = 0; x < width; x += 32) { | |
286 src0 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 0); | |
287 src1 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 16); | |
288 src2 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 32); | |
289 src3 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 48); | |
290 src4 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 64); | |
291 src5 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 80); | |
292 src6 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 96); | |
293 src7 = (v16u8) __msa_ld_b((v16u8*) src_argb0, 112); | |
294 vec0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | |
295 vec1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); | |
296 vec2 = (v16u8) __msa_pckev_b((v16i8) src5, (v16i8) src4); | |
297 vec3 = (v16u8) __msa_pckev_b((v16i8) src7, (v16i8) src6); | |
298 vec4 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | |
299 vec5 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2); | |
300 vec6 = (v16u8) __msa_pckod_b((v16i8) src5, (v16i8) src4); | |
301 vec7 = (v16u8) __msa_pckod_b((v16i8) src7, (v16i8) src6); | |
302 vec8 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0); | |
303 vec9 = (v16u8) __msa_pckev_b((v16i8) vec3, (v16i8) vec2); | |
304 vec4 = (v16u8) __msa_pckev_b((v16i8) vec5, (v16i8) vec4); | |
305 vec5 = (v16u8) __msa_pckev_b((v16i8) vec7, (v16i8) vec6); | |
306 vec0 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0); | |
307 vec1 = (v16u8) __msa_pckod_b((v16i8) vec3, (v16i8) vec2); | |
308 reg0 = __msa_hadd_u_h(vec8, vec8); | |
309 reg1 = __msa_hadd_u_h(vec9, vec9); | |
310 reg2 = __msa_hadd_u_h(vec4, vec4); | |
311 reg3 = __msa_hadd_u_h(vec5, vec5); | |
312 reg4 = __msa_hadd_u_h(vec0, vec0); | |
313 reg5 = __msa_hadd_u_h(vec1, vec1); | |
314 src0 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 0); | |
315 src1 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 16); | |
316 src2 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 32); | |
317 src3 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 48); | |
318 src4 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 64); | |
319 src5 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 80); | |
320 src6 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 96); | |
321 src7 = (v16u8) __msa_ld_b((v16u8*) src_argb0_next, 112); | |
322 vec0 = (v16u8) __msa_pckev_b((v16i8) src1, (v16i8) src0); | |
323 vec1 = (v16u8) __msa_pckev_b((v16i8) src3, (v16i8) src2); | |
324 vec2 = (v16u8) __msa_pckev_b((v16i8) src5, (v16i8) src4); | |
325 vec3 = (v16u8) __msa_pckev_b((v16i8) src7, (v16i8) src6); | |
326 vec4 = (v16u8) __msa_pckod_b((v16i8) src1, (v16i8) src0); | |
327 vec5 = (v16u8) __msa_pckod_b((v16i8) src3, (v16i8) src2); | |
328 vec6 = (v16u8) __msa_pckod_b((v16i8) src5, (v16i8) src4); | |
329 vec7 = (v16u8) __msa_pckod_b((v16i8) src7, (v16i8) src6); | |
330 vec8 = (v16u8) __msa_pckev_b((v16i8) vec1, (v16i8) vec0); | |
331 vec9 = (v16u8) __msa_pckev_b((v16i8) vec3, (v16i8) vec2); | |
332 vec4 = (v16u8) __msa_pckev_b((v16i8) vec5, (v16i8) vec4); | |
333 vec5 = (v16u8) __msa_pckev_b((v16i8) vec7, (v16i8) vec6); | |
334 vec0 = (v16u8) __msa_pckod_b((v16i8) vec1, (v16i8) vec0); | |
335 vec1 = (v16u8) __msa_pckod_b((v16i8) vec3, (v16i8) vec2); | |
336 reg0 += __msa_hadd_u_h(vec8, vec8); | |
337 reg1 += __msa_hadd_u_h(vec9, vec9); | |
338 reg2 += __msa_hadd_u_h(vec4, vec4); | |
339 reg3 += __msa_hadd_u_h(vec5, vec5); | |
340 reg4 += __msa_hadd_u_h(vec0, vec0); | |
341 reg5 += __msa_hadd_u_h(vec1, vec1); | |
342 reg0 = (v8u16) __msa_srai_h((v8i16) reg0, 2); | |
343 reg1 = (v8u16) __msa_srai_h((v8i16) reg1, 2); | |
344 reg2 = (v8u16) __msa_srai_h((v8i16) reg2, 2); | |
345 reg3 = (v8u16) __msa_srai_h((v8i16) reg3, 2); | |
346 reg4 = (v8u16) __msa_srai_h((v8i16) reg4, 2); | |
347 reg5 = (v8u16) __msa_srai_h((v8i16) reg5, 2); | |
348 reg6 = reg0 * const_0x70; | |
349 reg7 = reg1 * const_0x70; | |
350 reg8 = reg2 * const_0x4A; | |
351 reg9 = reg3 * const_0x4A; | |
352 reg6 += const_0x8080; | |
353 reg7 += const_0x8080; | |
354 reg8 += reg4 * const_0x26; | |
355 reg9 += reg5 * const_0x26; | |
356 reg0 *= const_0x12; | |
357 reg1 *= const_0x12; | |
358 reg2 *= const_0x5E; | |
359 reg3 *= const_0x5E; | |
360 reg4 *= const_0x70; | |
361 reg5 *= const_0x70; | |
362 reg2 += reg0; | |
363 reg3 += reg1; | |
364 reg4 += const_0x8080; | |
365 reg5 += const_0x8080; | |
366 reg6 -= reg8; | |
367 reg7 -= reg9; | |
368 reg4 -= reg2; | |
369 reg5 -= reg3; | |
370 reg6 = (v8u16) __msa_srai_h((v8i16) reg6, 8); | |
371 reg7 = (v8u16) __msa_srai_h((v8i16) reg7, 8); | |
372 reg4 = (v8u16) __msa_srai_h((v8i16) reg4, 8); | |
373 reg5 = (v8u16) __msa_srai_h((v8i16) reg5, 8); | |
374 dst0 = (v16u8) __msa_pckev_b((v16i8) reg7, (v16i8) reg6); | |
375 dst1 = (v16u8) __msa_pckev_b((v16i8) reg5, (v16i8) reg4); | |
376 ST_UB(dst0, dst_u); | |
377 ST_UB(dst1, dst_v); | |
378 src_argb0 += 128; | |
379 src_argb0_next += 128; | |
380 dst_u += 16; | |
381 dst_v += 16; | |
382 } | |
383 } | |
384 | |
385 void ARGB4444ToARGBRow_MSA(const uint8* src_argb4444, uint8* dst_argb, | |
386 int width) { | |
387 int x; | |
388 v16u8 src0, src1; | |
389 v8u16 vec0, vec1, vec2, vec3; | |
390 v16u8 dst0, dst1, dst2, dst3; | |
391 | |
392 for (x = 0; x < width; x += 16) { | |
393 src0 = (v16u8) __msa_ld_b((v16u8*) src_argb4444, 0); | |
394 src1 = (v16u8) __msa_ld_b((v16u8*) src_argb4444, 16); | |
395 vec0 = (v8u16) __msa_andi_b(src0, 0x0F); | |
396 vec1 = (v8u16) __msa_andi_b(src1, 0x0F); | |
397 vec2 = (v8u16) __msa_andi_b(src0, 0xF0); | |
398 vec3 = (v8u16) __msa_andi_b(src1, 0xF0); | |
399 vec0 |= (v8u16) __msa_slli_b((v16i8) vec0, 4); | |
400 vec1 |= (v8u16) __msa_slli_b((v16i8) vec1, 4); | |
401 vec2 |= (v8u16) __msa_srli_b((v16i8) vec2, 4); | |
402 vec3 |= (v8u16) __msa_srli_b((v16i8) vec3, 4); | |
403 dst0 = (v16u8) __msa_ilvr_b((v16i8) vec2, (v16i8) vec0); | |
404 dst1 = (v16u8) __msa_ilvl_b((v16i8) vec2, (v16i8) vec0); | |
405 dst2 = (v16u8) __msa_ilvr_b((v16i8) vec3, (v16i8) vec1); | |
406 dst3 = (v16u8) __msa_ilvl_b((v16i8) vec3, (v16i8) vec1); | |
407 ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); | |
408 src_argb4444 += 32; | |
409 dst_argb += 64; | |
410 } | |
411 } | |
412 | |
224 #ifdef __cplusplus | 413 #ifdef __cplusplus |
225 } // extern "C" | 414 } // extern "C" |
226 } // namespace libyuv | 415 } // namespace libyuv |
227 #endif | 416 #endif |
228 | 417 |
229 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) | 418 #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
OLD | NEW |