Chromium Code Reviews| Index: source/row_msa.cc | 
| diff --git a/source/row_msa.cc b/source/row_msa.cc | 
| new file mode 100644 | 
| index 0000000000000000000000000000000000000000..7d517fa46d9876fe9d69f5070bed05fc61f8b74e | 
| --- /dev/null | 
| +++ b/source/row_msa.cc | 
| @@ -0,0 +1,181 @@ | 
| +/* | 
| + * Copyright 2016 The LibYuv Project Authors. All rights reserved. | 
| + * | 
| + * Use of this source code is governed by a BSD-style license | 
| + * that can be found in the LICENSE file in the root of the source | 
| + * tree. An additional intellectual property rights grant can be found | 
| + * in the file PATENTS. All contributing project authors may | 
| + * be found in the AUTHORS file in the root of the source tree. | 
| + */ | 
| + | 
| +#include "libyuv/row.h" | 
| +#include "libyuv/macros_msa.h" | 
| 
 
fbarchard1
2016/08/31 01:20:42
This header wont compile on intel/arm as is.  May
 
 | 
| + | 
| +#ifdef __cplusplus | 
| +namespace libyuv { | 
| +extern "C" { | 
| +#endif | 
| + | 
| +#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | 
| + | 
| +void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) | 
| +{ | 
| + int32_t count; | 
| + uint8_t val0, val1, val2, val3; | 
| + v16u8 src0, src1, src2, src3, src4, src5, src6, src7; | 
| 
 
fbarchard1
2016/08/31 01:20:43
prefer uvec8 be defined as v16u8 in row.h if that
 
 | 
| + v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; | 
| + v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; | 
| + int32_t cnt64, cnt32, cnt16; | 
| 
 
fbarchard1
2016/08/31 01:20:43
prefer int32 defined in libyuv/basic_types.h
or if
 
 | 
| + | 
| + src += width; | 
| + | 
| + for (count = 0; count < (width >> 7); count++) | 
| + { | 
| + src -= 128; | 
| + LD_UB8(src, 16, src7, src6, src5, src4, src3, src2, src1, src0); | 
| + VSHF_B2_UB(src7, src7, src6, src6, mask, mask, dst7, dst6); | 
| + VSHF_B2_UB(src5, src5, src4, src4, mask, mask, dst5, dst4); | 
| + VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2); | 
| + VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0); | 
| + ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, 16); | 
| + dst += 128; | 
| + } | 
| + | 
| + if ((width >> 4) & 0x7) | 
| 
 
fbarchard1
2016/08/31 01:20:42
its not necessary to hand odd widths in row functi
 
 | 
| + { | 
| + cnt64 = (width >> 6) & 0x1; | 
| + cnt32 = (width >> 5) & 0x1; | 
| + cnt16 = (width >> 4) & 0x1; | 
| + | 
| + if (cnt16) | 
| + { | 
| + src -= 16; | 
| + src0 = LD_UB(src); | 
| + | 
| + if (cnt32) | 
| + { | 
| + src -= 32; | 
| + LD_UB2(src, 16, src2, src1); | 
| + | 
| + if (cnt64) | 
| + { | 
| + src -= 64; | 
| + LD_UB4(src, 16, src6, src5, src4, src3); | 
| + | 
| + dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, | 
| + (v16i8) src0); | 
| + VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst2, dst1); | 
| + VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst6, dst5); | 
| + VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst4, dst3); | 
| + | 
| + ST_UB(dst0, dst); | 
| + dst += 16; | 
| + ST_UB2(dst1, dst2, dst, 16); | 
| + dst += 32; | 
| + ST_UB4(dst3, dst4, dst5, dst6, dst, 16); | 
| + dst += 64; | 
| + } | 
| + else | 
| + { | 
| + dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, | 
| + (v16i8) src0); | 
| + VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst2, dst1); | 
| + | 
| + ST_UB(dst0, dst); | 
| + dst += 16; | 
| + ST_UB2(dst1, dst2, dst, 16); | 
| + dst += 32; | 
| + } | 
| + } | 
| + else if (cnt64) | 
| + { | 
| + src -= 64; | 
| + LD_UB4(src, 16, src6, src5, src4, src3); | 
| + | 
| + dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, | 
| + (v16i8) src0); | 
| + VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst4, dst3); | 
| + VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst2, dst1); | 
| + | 
| + ST_UB(dst0, dst); | 
| + dst += 16; | 
| + ST_UB4(dst1, dst2, dst3, dst4, dst, 16); | 
| + dst += 64; | 
| + } | 
| + else | 
| + { | 
| + dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, | 
| + (v16i8) src0); | 
| + | 
| + ST_UB(dst0, dst); | 
| + dst += 16; | 
| + } | 
| + } | 
| + else if (cnt32) | 
| + { | 
| + src -= 32; | 
| + LD_UB2(src, 16, src2, src1); | 
| + | 
| + if (cnt64) | 
| + { | 
| + src -= 64; | 
| + LD_UB4(src, 16, src6, src5, src4, src3); | 
| + | 
| + VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst1, dst0); | 
| + VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst5, dst4); | 
| + VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst3, dst2); | 
| + | 
| + ST_UB2(dst0, dst1, dst, 16); | 
| + dst += 32; | 
| + ST_UB4(dst2, dst3, dst4, dst5, dst, 16); | 
| + dst += 64; | 
| + } | 
| + else | 
| + { | 
| + VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst1, dst0); | 
| + | 
| + ST_UB2(dst0, dst1, dst, 16); | 
| + dst += 32; | 
| + } | 
| + } | 
| + else if (cnt64) | 
| + { | 
| + src -= 64; | 
| + LD_UB4(src, 16, src6, src5, src4, src3); | 
| + | 
| + VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst3, dst2); | 
| + VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst1, dst0); | 
| + | 
| + ST_UB4(dst0, dst1, dst2, dst3, dst, 16); | 
| + dst += 64; | 
| + } | 
| + } | 
| + | 
| + for (count = 0; count < ((width >> 2) & 0x3); count++) | 
| + { | 
| + src -= 4; | 
| + val0 = src[3]; | 
| + val1 = src[2]; | 
| + val2 = src[1]; | 
| + val3 = src[0]; | 
| + dst[0] = val0; | 
| + dst[1] = val1; | 
| + dst[2] = val2; | 
| + dst[3] = val3; | 
| + dst += 4; | 
| + } | 
| + | 
| + for (count = 0; count < (width & 0x3); count++) | 
| + { | 
| + val0 = src[-1]; | 
| + src--; | 
| + dst[0] = val0; | 
| + dst++; | 
| + } | 
| +} | 
| +#endif // !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa) | 
| + | 
| +#ifdef __cplusplus | 
| +} // extern "C" | 
| +} // namespace libyuv | 
| +#endif |