Chromium Code Reviews| Index: source/row_msa.cc |
| diff --git a/source/row_msa.cc b/source/row_msa.cc |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..4221a26dc4357a2ad8e08e4a68b97af3f8a2ef42 |
| --- /dev/null |
| +++ b/source/row_msa.cc |
| @@ -0,0 +1,139 @@ |
| +/* |
| + * Copyright 2016 The LibYuv Project Authors. All rights reserved. |
| + * |
| + * Use of this source code is governed by a BSD-style license |
| + * that can be found in the LICENSE file in the root of the source |
| + * tree. An additional intellectual property rights grant can be found |
| + * in the file PATENTS. All contributing project authors may |
| + * be found in the AUTHORS file in the root of the source tree. |
| + */ |
| + |
| +#include "libyuv/row.h" |
| + |
| +#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) |
| +#include "libyuv/cpu/mips/macros_msa.h" |
| +#endif |
| + |
| +#ifdef __cplusplus |
| +namespace libyuv { |
| +extern "C" { |
| +#endif |
| + |
| +#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) |
| +void MirrorRow_MSA(const uint8* src, uint8* dst, int width) |
| +{ |
|
fbarchard1
2016/09/14 01:48:05
{ should be on same row as void MirrorRow_MSA()
manojkumar.bhosale
2016/09/14 12:45:30
Done.
|
| + int count; |
| + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; |
| + v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; |
| + v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; |
| + |
| + src += width; |
| + |
| + for (count = 0; count < (width >> 7); count++) |
| + { |
|
fbarchard1
2016/09/14 01:48:05
{ should be on same line as for ()
manojkumar.bhosale
2016/09/14 12:45:30
Done.
|
| + src -= 128; |
| + LD_UB8(src, 16, src7, src6, src5, src4, src3, src2, src1, src0); |
| + |
| + VSHF_B2_UB(src7, src7, src6, src6, mask, mask, dst7, dst6); |
|
fbarchard1
2016/09/14 01:48:04
I wouldnt normally unroll this much. 2 or 4 vecto
manojkumar.bhosale
2016/09/14 12:45:30
Done with 4 vectors
|
| + VSHF_B2_UB(src5, src5, src4, src4, mask, mask, dst5, dst4); |
| + VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2); |
| + VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0); |
| + |
| + ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, 16); |
| + dst += 128; |
| + } |
| + |
| + if (width & 0x40) |
|
fbarchard1
2016/09/14 01:48:04
suggest removing everything from here down to make
manojkumar.bhosale
2016/09/14 12:45:30
Done.
|
| + { |
| + if (width & 0x20) |
| + { |
| + if (width & 0x10) |
| + { |
| + src -= 112; |
| + LD_UB7(src, 16, src6, src5, src4, src3, src2, src1, src0); |
| + |
| + VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); |
| + VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); |
| + VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5); |
| + dst6 = (v16u8) __msa_vshf_b(mask, (v16i8) src6, (v16i8) src6); |
| + |
| + ST_UB7(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst, 16); |
| + dst += 112; |
| + } |
| + else |
| + { |
| + src -= 96; |
| + LD_UB6(src, 16, src5, src4, src3, src2, src1, src0); |
| + |
| + VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); |
| + VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); |
| + VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5); |
| + |
| + ST_UB6(dst0, dst1, dst2, dst3, dst4, dst5, dst, 16); |
| + dst += 96; |
| + } |
| + } |
| + else if (width & 0x10) |
| + { |
| + src -= 80; |
| + LD_UB5(src, 16, src4, src3, src2, src1, src0); |
| + |
| + VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); |
| + VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); |
| + dst4 = (v16u8) __msa_vshf_b(mask, (v16i8) src4, (v16i8) src4); |
| + |
| + ST_UB5(dst0, dst1, dst2, dst3, dst4, dst, 16); |
| + dst += 80; |
| + } |
| + else |
| + { |
| + src -= 64; |
| + LD_UB4(src, 16, src3, src2, src1, src0); |
| + |
| + VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); |
| + VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3); |
| + |
| + ST_UB4(dst0, dst1, dst2, dst3, dst, 16); |
| + dst += 64; |
| + } |
| + } |
| + else if (width & 0x20) |
| + { |
| + if (width & 0x10) |
| + { |
| + src -= 48; |
| + LD_UB3(src, 16, src2, src1, src0); |
| + |
| + VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); |
| + dst2 = (v16u8) __msa_vshf_b(mask, (v16i8) src2, (v16i8) src2); |
| + |
| + ST_UB3(dst0, dst1, dst2, dst, 16); |
| + dst += 48; |
| + } |
| + else |
| + { |
| + src -= 32; |
| + LD_UB2(src, 16, src1, src0); |
| + |
| + VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1); |
| + |
| + ST_UB2(dst0, dst1, dst, 16); |
| + dst += 32; |
| + } |
| + } |
| + else if (width & 0x10) |
| + { |
| + src -= 16; |
| + src0 = LD_UB(src); |
| + |
| + dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0); |
| + ST_UB(dst0, dst); |
| + dst += 16; |
| + } |
| +} |
| +#endif // !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) |
| + |
| +#ifdef __cplusplus |
| +} // extern "C" |
| +} // namespace libyuv |
| +#endif |