Chromium Code Reviews| Index: source/row_msa.cc |
| diff --git a/source/row_msa.cc b/source/row_msa.cc |
| index 6dd6f5f3b7a4c581ea02369f9cf1af4606a9bf98..29e913b53281436d979dbb85cb1902dee696fd27 100644 |
| --- a/source/row_msa.cc |
| +++ b/source/row_msa.cc |
| @@ -37,6 +37,24 @@ void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { |
| src -= 64; |
| } |
| } |
| + |
| +void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) { |
| + int count; |
|
fbarchard1
2016/09/26 18:04:34
prefer int x for horizontal counts for consistency
|
| + v16u8 src0, src1, src2, src3; |
| + v16u8 dst0, dst1, dst2, dst3; |
| + v4i32 mask = { 3, 2, 1, 0 }; |
| + |
| + src += width * 4 - 64; |
| + |
| + for (count = 0; count < width; count += 16) { |
| + LD_UB4(src, 16, src3, src2, src1, src0); |
| + VSHF_W4_UB(src0, src0, src1, src1, src2, src2, src3, src3, |
|
fbarchard1
2016/09/26 18:04:34
consider less unrolling. (measure performance)
F
|
| + mask, mask, mask, mask, dst0, dst1, dst2, dst3); |
| + ST_UB4(dst0, dst1, dst2, dst3, dst, 16); |
| + dst += 64; |
| + src -= 64; |
| + } |
| +} |
| #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
| #ifdef __cplusplus |