Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(386)

Unified Diff: source/row_msa.cc

Issue 2285683002: Add MIPS SIMD Arch (MSA) optimized MirrorRow function (Closed)
Patch Set: Changes as per review comments Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« source/cpu_id.cc ('K') | « source/row_any.cc ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: source/row_msa.cc
diff --git a/source/row_msa.cc b/source/row_msa.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4221a26dc4357a2ad8e08e4a68b97af3f8a2ef42
--- /dev/null
+++ b/source/row_msa.cc
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2016 The LibYuv Project Authors. All rights reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "libyuv/row.h"
+
+#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
+#include "libyuv/cpu/mips/macros_msa.h"
+#endif
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
+void MirrorRow_MSA(const uint8* src, uint8* dst, int width)
+{
fbarchard1 2016/09/14 01:48:05 { should be on same row as void MirrorRow_MSA()
manojkumar.bhosale 2016/09/14 12:45:30 Done.
+ int count;
+ v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
+ v16i8 mask = { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+
+ src += width;
+
+ for (count = 0; count < (width >> 7); count++)
+ {
fbarchard1 2016/09/14 01:48:05 { should be on same line as for ()
manojkumar.bhosale 2016/09/14 12:45:30 Done.
+ src -= 128;
+ LD_UB8(src, 16, src7, src6, src5, src4, src3, src2, src1, src0);
+
+ VSHF_B2_UB(src7, src7, src6, src6, mask, mask, dst7, dst6);
fbarchard1 2016/09/14 01:48:04 I wouldnt normally unroll this much. 2 or 4 vecto
manojkumar.bhosale 2016/09/14 12:45:30 Done with 4 vectors
+ VSHF_B2_UB(src5, src5, src4, src4, mask, mask, dst5, dst4);
+ VSHF_B2_UB(src3, src3, src2, src2, mask, mask, dst3, dst2);
+ VSHF_B2_UB(src1, src1, src0, src0, mask, mask, dst1, dst0);
+
+ ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, 16);
+ dst += 128;
+ }
+
+ if (width & 0x40)
fbarchard1 2016/09/14 01:48:04 suggest removing everything from here down to make
manojkumar.bhosale 2016/09/14 12:45:30 Done.
+ {
+ if (width & 0x20)
+ {
+ if (width & 0x10)
+ {
+ src -= 112;
+ LD_UB7(src, 16, src6, src5, src4, src3, src2, src1, src0);
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5);
+ dst6 = (v16u8) __msa_vshf_b(mask, (v16i8) src6, (v16i8) src6);
+
+ ST_UB7(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst, 16);
+ dst += 112;
+ }
+ else
+ {
+ src -= 96;
+ LD_UB6(src, 16, src5, src4, src3, src2, src1, src0);
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3);
+ VSHF_B2_UB(src4, src4, src5, src5, mask, mask, dst4, dst5);
+
+ ST_UB6(dst0, dst1, dst2, dst3, dst4, dst5, dst, 16);
+ dst += 96;
+ }
+ }
+ else if (width & 0x10)
+ {
+ src -= 80;
+ LD_UB5(src, 16, src4, src3, src2, src1, src0);
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3);
+ dst4 = (v16u8) __msa_vshf_b(mask, (v16i8) src4, (v16i8) src4);
+
+ ST_UB5(dst0, dst1, dst2, dst3, dst4, dst, 16);
+ dst += 80;
+ }
+ else
+ {
+ src -= 64;
+ LD_UB4(src, 16, src3, src2, src1, src0);
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
+ VSHF_B2_UB(src2, src2, src3, src3, mask, mask, dst2, dst3);
+
+ ST_UB4(dst0, dst1, dst2, dst3, dst, 16);
+ dst += 64;
+ }
+ }
+ else if (width & 0x20)
+ {
+ if (width & 0x10)
+ {
+ src -= 48;
+ LD_UB3(src, 16, src2, src1, src0);
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
+ dst2 = (v16u8) __msa_vshf_b(mask, (v16i8) src2, (v16i8) src2);
+
+ ST_UB3(dst0, dst1, dst2, dst, 16);
+ dst += 48;
+ }
+ else
+ {
+ src -= 32;
+ LD_UB2(src, 16, src1, src0);
+
+ VSHF_B2_UB(src0, src0, src1, src1, mask, mask, dst0, dst1);
+
+ ST_UB2(dst0, dst1, dst, 16);
+ dst += 32;
+ }
+ }
+ else if (width & 0x10)
+ {
+ src -= 16;
+ src0 = LD_UB(src);
+
+ dst0 = (v16u8) __msa_vshf_b(mask, (v16i8) src0, (v16i8) src0);
+ ST_UB(dst0, dst);
+ dst += 16;
+ }
+}
+#endif // !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_msa)
+
+#ifdef __cplusplus
+} // extern "C"
+} // namespace libyuv
+#endif
« source/cpu_id.cc ('K') | « source/row_any.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698