Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(16)

Unified Diff: source/row_msa.cc

Issue 2285683002: Add MIPS SIMD Arch (MSA) optimized MirrorRow function (Closed)
Patch Set: Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« include/libyuv/macros_msa.h ('K') | « libyuv_test.gyp ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/row_msa.cc
diff --git a/source/row_msa.cc b/source/row_msa.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7d517fa46d9876fe9d69f5070bed05fc61f8b74e
--- /dev/null
+++ b/source/row_msa.cc
@@ -0,0 +1,181 @@
+/*
+ * Copyright 2016 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+#include "libyuv/macros_msa.h"
fbarchard1 2016/08/31 01:20:42 This header wont compile on intel/arm as is. May
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
+
+void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width)
+{
+ int32_t count;
+ uint8_t val0, val1, val2, val3;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
fbarchard1 2016/08/31 01:20:43 prefer uvec8 be defined as v16u8 in row.h if that
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+ int32_t cnt64, cnt32, cnt16;
fbarchard1 2016/08/31 01:20:43 prefer int32 defined in libyuv/basic_types.h or if
+
+ src += width;
+
+ for (count = 0; count < (width >> 7); count++)
+ {
+ src -= 128;
+ LD_UB8(src, 16, src7, src6, src5, src4, src3, src2, src1, src0);
+ VSHF_B2_UB(src7, src7, src6, src6, mask, mask, dst7, dst6);
+ VSHF_B2_UB(src5, src5, src4, src4, mask, mask, dst5, dst4);
+ VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2);
+ VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0);
+ ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, 16);
+ dst += 128;
+ }
+
+ if ((width >> 4) & 0x7)
fbarchard1 2016/08/31 01:20:42 its not necessary to hand odd widths in row functi
+ {
+ cnt64 = (width >> 6) & 0x1;
+ cnt32 = (width >> 5) & 0x1;
+ cnt16 = (width >> 4) & 0x1;
+
+ if (cnt16)
+ {
+ src -= 16;
+ src0 = LD_UB(src);
+
+ if (cnt32)
+ {
+ src -= 32;
+ LD_UB2(src, 16, src2, src1);
+
+ if (cnt64)
+ {
+ src -= 64;
+ LD_UB4(src, 16, src6, src5, src4, src3);
+
+ dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0,
+ (v16i8) src0);
+ VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst2, dst1);
+ VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst6, dst5);
+ VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst4, dst3);
+
+ ST_UB(dst0, dst);
+ dst += 16;
+ ST_UB2(dst1, dst2, dst, 16);
+ dst += 32;
+ ST_UB4(dst3, dst4, dst5, dst6, dst, 16);
+ dst += 64;
+ }
+ else
+ {
+ dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0,
+ (v16i8) src0);
+ VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst2, dst1);
+
+ ST_UB(dst0, dst);
+ dst += 16;
+ ST_UB2(dst1, dst2, dst, 16);
+ dst += 32;
+ }
+ }
+ else if (cnt64)
+ {
+ src -= 64;
+ LD_UB4(src, 16, src6, src5, src4, src3);
+
+ dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0,
+ (v16i8) src0);
+ VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst4, dst3);
+ VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst2, dst1);
+
+ ST_UB(dst0, dst);
+ dst += 16;
+ ST_UB4(dst1, dst2, dst3, dst4, dst, 16);
+ dst += 64;
+ }
+ else
+ {
+ dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0,
+ (v16i8) src0);
+
+ ST_UB(dst0, dst);
+ dst += 16;
+ }
+ }
+ else if (cnt32)
+ {
+ src -= 32;
+ LD_UB2(src, 16, src2, src1);
+
+ if (cnt64)
+ {
+ src -= 64;
+ LD_UB4(src, 16, src6, src5, src4, src3);
+
+ VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst1, dst0);
+ VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst5, dst4);
+ VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst3, dst2);
+
+ ST_UB2(dst0, dst1, dst, 16);
+ dst += 32;
+ ST_UB4(dst2, dst3, dst4, dst5, dst, 16);
+ dst += 64;
+ }
+ else
+ {
+ VSHF_B2_UB(src2, src2, src1, src1, mask, mask, dst1, dst0);
+
+ ST_UB2(dst0, dst1, dst, 16);
+ dst += 32;
+ }
+ }
+ else if (cnt64)
+ {
+ src -= 64;
+ LD_UB4(src, 16, src6, src5, src4, src3);
+
+ VSHF_B2_UB(src6, src6, src5, src5, mask, mask, dst3, dst2);
+ VSHF_B2_UB(src4, src4, src3, src3, mask, mask, dst1, dst0);
+
+ ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
+ dst += 64;
+ }
+ }
+
+ for (count = 0; count < ((width >> 2) & 0x3); count++)
+ {
+ src -= 4;
+ val0 = src[3];
+ val1 = src[2];
+ val2 = src[1];
+ val3 = src[0];
+ dst[0] = val0;
+ dst[1] = val1;
+ dst[2] = val2;
+ dst[3] = val3;
+ dst += 4;
+ }
+
+ for (count = 0; count < (width & 0x3); count++)
+ {
+ val0 = src[-1];
+ src--;
+ dst[0] = val0;
+ dst++;
+ }
+}
+#endif // !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
« include/libyuv/macros_msa.h ('K') | « libyuv_test.gyp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698