Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1020)

Unified Diff: third_party/WebKit/Source/platform/audio/VectorMath.cpp

Issue 2340583003: Add MSA (MIPS SIMD Arch) optimized VectorMath functions (Closed)
Patch Set: Incorporate review comments Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/platform/audio/VectorMath.cpp
diff --git a/third_party/WebKit/Source/platform/audio/VectorMath.cpp b/third_party/WebKit/Source/platform/audio/VectorMath.cpp
index bf0d2ada982376de5c77ecf0b10d5bf76052cd51..7cf6eec3b2b9bd075d64ecbc2d9b1663fc9825ee 100644
--- a/third_party/WebKit/Source/platform/audio/VectorMath.cpp
+++ b/third_party/WebKit/Source/platform/audio/VectorMath.cpp
@@ -41,6 +41,10 @@
#include <arm_neon.h>
#endif
+#if HAVE(MIPS_MSA_INTRINSICS)
+#include "platform/cpu/mips/CommonMacrosMSA.h"
+#endif
+
#include <math.h>
#include <algorithm>
@@ -229,6 +233,23 @@ void vsma(const float* sourceP,
}
n = tailFrames;
}
+#elif HAVE(MIPS_MSA_INTRINSICS)
+ if ((sourceStride == 1) && (destStride == 1)) {
+ float* destPCopy = destP;
+ const v4f32 vScale = (v4f32)__msa_fill_w(FLOAT2INT(*scale));
+ v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
+ v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
+
+ for (; n >= 32; n -= 32) {
+ LD_SP8(sourceP, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
+ vSrc7);
+ LD_SP8(destPCopy, 4, vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6,
+ vDst7);
+ VSMA4(vSrc0, vSrc1, vSrc2, vSrc3, vDst0, vDst1, vDst2, vDst3, vScale);
+ VSMA4(vSrc4, vSrc5, vSrc6, vSrc7, vDst4, vDst5, vDst6, vDst7, vScale);
+ ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4);
+ }
+ }
#endif
while (n) {
*destP += *sourceP * *scale;
@@ -310,6 +331,20 @@ void vsmul(const float* sourceP,
}
n = tailFrames;
}
+#elif HAVE(MIPS_MSA_INTRINSICS)
+ if ((sourceStride == 1) && (destStride == 1)) {
+ const v4f32 vScale = (v4f32)__msa_fill_w(FLOAT2INT(*scale));
+ v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
+ v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
+
+ for (; n >= 32; n -= 32) {
+ LD_SP8(sourceP, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
+ vSrc7);
+ VSMUL4(vSrc0, vSrc1, vSrc2, vSrc3, vDst0, vDst1, vDst2, vDst3, vScale);
+ VSMUL4(vSrc4, vSrc5, vSrc6, vSrc7, vDst4, vDst5, vDst6, vDst7, vScale);
+ ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4);
+ }
+ }
#endif
float k = *scale;
while (n--) {
@@ -431,6 +466,26 @@ void vadd(const float* source1P,
}
n = tailFrames;
}
+#elif HAVE(MIPS_MSA_INTRINSICS)
+ if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) {
+ v4f32 vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, vSrc1P6,
+ vSrc1P7;
+ v4f32 vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, vSrc2P6,
+ vSrc2P7;
+ v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
+
+ for (; n >= 32; n -= 32) {
+ LD_SP8(source1P, 4, vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5,
+ vSrc1P6, vSrc1P7);
+ LD_SP8(source2P, 4, vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5,
+ vSrc2P6, vSrc2P7);
+ ADD4(vSrc1P0, vSrc2P0, vSrc1P1, vSrc2P1, vSrc1P2, vSrc2P2, vSrc1P3,
+ vSrc2P3, vDst0, vDst1, vDst2, vDst3);
+ ADD4(vSrc1P4, vSrc2P4, vSrc1P5, vSrc2P5, vSrc1P6, vSrc2P6, vSrc1P7,
+ vSrc2P7, vDst4, vDst5, vDst6, vDst7);
+ ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4);
+ }
+ }
#endif
while (n--) {
*destP = *source1P + *source2P;
@@ -514,6 +569,26 @@ void vmul(const float* source1P,
}
n = tailFrames;
}
+#elif HAVE(MIPS_MSA_INTRINSICS)
+ if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) {
+ v4f32 vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, vSrc1P6,
+ vSrc1P7;
+ v4f32 vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, vSrc2P6,
+ vSrc2P7;
+ v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
+
+ for (; n >= 32; n -= 32) {
+ LD_SP8(source1P, 4, vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5,
+ vSrc1P6, vSrc1P7);
+ LD_SP8(source2P, 4, vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5,
+ vSrc2P6, vSrc2P7);
+ MUL4(vSrc1P0, vSrc2P0, vSrc1P1, vSrc2P1, vSrc1P2, vSrc2P2, vSrc1P3,
+ vSrc2P3, vDst0, vDst1, vDst2, vDst3);
+ MUL4(vSrc1P4, vSrc2P4, vSrc1P5, vSrc2P5, vSrc1P6, vSrc2P6, vSrc1P7,
+ vSrc2P7, vDst4, vDst5, vDst6, vDst7);
+ ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4);
+ }
+ }
#endif
while (n) {
*destP = *source1P * *source2P;
@@ -716,6 +791,28 @@ void vmaxmgv(const float* sourceP,
n = tailFrames;
}
+#elif HAVE(MIPS_MSA_INTRINSICS)
+ if (sourceStride == 1) {
+ v4f32 vMax = {
+ 0,
+ };
+ v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
+ const v16i8 vSignBitMask = (v16i8)__msa_fill_w(0x7FFFFFFF);
+
+ for (; n >= 32; n -= 32) {
+ LD_SP8(sourceP, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
+ vSrc7);
+ AND_W4_SP(vSrc0, vSrc1, vSrc2, vSrc3, vSignBitMask);
+ VMAX_W4_SP(vSrc0, vSrc1, vSrc2, vSrc3, vMax);
+ AND_W4_SP(vSrc4, vSrc5, vSrc6, vSrc7, vSignBitMask);
+ VMAX_W4_SP(vSrc4, vSrc5, vSrc6, vSrc7, vMax);
+ }
+
+ max = std::max(max, vMax[0]);
+ max = std::max(max, vMax[1]);
+ max = std::max(max, vMax[2]);
+ max = std::max(max, vMax[3]);
+ }
#endif
while (n--) {
@@ -754,6 +851,23 @@ void vclip(const float* sourceP,
}
n = tailFrames;
}
+#elif HAVE(MIPS_MSA_INTRINSICS)
+ if ((sourceStride == 1) && (destStride == 1)) {
+ v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7;
+ v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7;
+ const v4f32 vLowThr = (v4f32)__msa_fill_w(FLOAT2INT(lowThreshold));
Raymond Toy 2016/10/05 17:26:53 Isn't this some kind of gcc/clang extension? FLOA
Prashant.Patil 2016/10/06 08:27:35 I will remove this macro usage.
+ const v4f32 vHighThr = (v4f32)__msa_fill_w(FLOAT2INT(highThreshold));
+
+ for (; n >= 32; n -= 32) {
+ LD_SP8(sourceP, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6,
+ vSrc7);
+ VCLIP4(vSrc0, vSrc1, vSrc2, vSrc3, vLowThr, vHighThr, vDst0, vDst1, vDst2,
+ vDst3);
+ VCLIP4(vSrc4, vSrc5, vSrc6, vSrc7, vLowThr, vHighThr, vDst4, vDst5, vDst6,
+ vDst7);
+ ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4);
+ }
+ }
#endif
while (n--) {
*destP = clampTo(*sourceP, lowThreshold, highThreshold);

Powered by Google App Engine
This is Rietveld 408576698