Index: source/row_msa.cc |
diff --git a/source/row_msa.cc b/source/row_msa.cc |
index 6dd6f5f3b7a4c581ea02369f9cf1af4606a9bf98..29e913b53281436d979dbb85cb1902dee696fd27 100644 |
--- a/source/row_msa.cc |
+++ b/source/row_msa.cc |
@@ -37,6 +37,24 @@ void MirrorRow_MSA(const uint8* src, uint8* dst, int width) { |
src -= 64; |
} |
} |
+ |
+void ARGBMirrorRow_MSA(const uint8* src, uint8* dst, int width) { |
+ int count; |
fbarchard1
2016/09/26 18:04:34
prefer int x for horizontal counts for consistency
|
+ v16u8 src0, src1, src2, src3; |
+ v16u8 dst0, dst1, dst2, dst3; |
+ v4i32 mask = { 3, 2, 1, 0 }; |
+ |
+ src += width * 4 - 64; |
+ |
+ for (count = 0; count < width; count += 16) { |
+ LD_UB4(src, 16, src3, src2, src1, src0); |
+ VSHF_W4_UB(src0, src0, src1, src1, src2, src2, src3, src3, |
fbarchard1
2016/09/26 18:04:34
consider less unrolling. (measure performance)
F
|
+ mask, mask, mask, mask, dst0, dst1, dst2, dst3); |
+ ST_UB4(dst0, dst1, dst2, dst3, dst, 16); |
+ dst += 64; |
+ src -= 64; |
+ } |
+} |
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) |
#ifdef __cplusplus |