Chromium Code Reviews| Index: third_party/WebKit/Source/platform/audio/VectorMath.cpp |
| diff --git a/third_party/WebKit/Source/platform/audio/VectorMath.cpp b/third_party/WebKit/Source/platform/audio/VectorMath.cpp |
| index bf0d2ada982376de5c77ecf0b10d5bf76052cd51..c7cf211312fdcb6969a8a9376a91134b3e1cd27a 100644 |
| --- a/third_party/WebKit/Source/platform/audio/VectorMath.cpp |
| +++ b/third_party/WebKit/Source/platform/audio/VectorMath.cpp |
| @@ -41,6 +41,10 @@ |
| #include <arm_neon.h> |
| #endif |
| +#if HAVE(MIPS_MSA_INTRINSICS) |
| +#include "platform/cpu/mips/CommonMacrosMSA.h" |
| +#endif |
| + |
| #include <math.h> |
| #include <algorithm> |
| @@ -229,6 +233,23 @@ void vsma(const float* sourceP, |
| } |
| n = tailFrames; |
| } |
| +#elif HAVE(MIPS_MSA_INTRINSICS) |
| + if ((sourceStride == 1) && (destStride == 1)) { |
| + float* destPCopy = destP; |
| + const v4f32 vScale = (v4f32)__msa_fill_w(*((int32_t*)scale)); |
| + v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7; |
| + v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7; |
| + |
| + for (; n >= 32; n -= 32) { |
| + LD_SP8(sourceP, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, |
| + vSrc7); |
| + LD_SP8(destPCopy, 4, vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, |
| + vDst7); |
| + VSMA4(vSrc0, vSrc1, vSrc2, vSrc3, vDst0, vDst1, vDst2, vDst3, vScale); |
| + VSMA4(vSrc4, vSrc5, vSrc6, vSrc7, vDst4, vDst5, vDst6, vDst7, vScale); |
| + ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4); |
| + } |
| + } |
| #endif |
| while (n) { |
| *destP += *sourceP * *scale; |
| @@ -310,6 +331,20 @@ void vsmul(const float* sourceP, |
| } |
| n = tailFrames; |
| } |
| +#elif HAVE(MIPS_MSA_INTRINSICS) |
| + if ((sourceStride == 1) && (destStride == 1)) { |
| + const v4f32 vScale = (v4f32)__msa_fill_w(*((int32_t*)scale)); |
| + v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7; |
| + v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7; |
| + |
| + for (; n >= 32; n -= 32) { |
| + LD_SP8(sourceP, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, |
| + vSrc7); |
| + VSMUL4(vSrc0, vSrc1, vSrc2, vSrc3, vDst0, vDst1, vDst2, vDst3, vScale); |
| + VSMUL4(vSrc4, vSrc5, vSrc6, vSrc7, vDst4, vDst5, vDst6, vDst7, vScale); |
| + ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4); |
| + } |
| + } |
| #endif |
| float k = *scale; |
| while (n--) { |
| @@ -431,6 +466,26 @@ void vadd(const float* source1P, |
| } |
| n = tailFrames; |
| } |
| +#elif HAVE(MIPS_MSA_INTRINSICS) |
| + if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) { |
| + v4f32 vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, vSrc1P6, |
| + vSrc1P7; |
| + v4f32 vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, vSrc2P6, |
| + vSrc2P7; |
| + v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7; |
| + |
| + for (; n >= 32; n -= 32) { |
| + LD_SP8(source1P, 4, vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, |
| + vSrc1P6, vSrc1P7); |
| + LD_SP8(source2P, 4, vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, |
| + vSrc2P6, vSrc2P7); |
| + ADD4(vSrc1P0, vSrc2P0, vSrc1P1, vSrc2P1, vSrc1P2, vSrc2P2, vSrc1P3, |
| + vSrc2P3, vDst0, vDst1, vDst2, vDst3); |
| + ADD4(vSrc1P4, vSrc2P4, vSrc1P5, vSrc2P5, vSrc1P6, vSrc2P6, vSrc1P7, |
| + vSrc2P7, vDst4, vDst5, vDst6, vDst7); |
| + ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4); |
| + } |
| + } |
| #endif |
| while (n--) { |
| *destP = *source1P + *source2P; |
| @@ -514,6 +569,26 @@ void vmul(const float* source1P, |
| } |
| n = tailFrames; |
| } |
| +#elif HAVE(MIPS_MSA_INTRINSICS) |
| + if ((sourceStride1 == 1) && (sourceStride2 == 1) && (destStride == 1)) { |
| + v4f32 vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, vSrc1P6, |
| + vSrc1P7; |
| + v4f32 vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, vSrc2P6, |
| + vSrc2P7; |
| + v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7; |
| + |
| + for (; n >= 32; n -= 32) { |
| + LD_SP8(source1P, 4, vSrc1P0, vSrc1P1, vSrc1P2, vSrc1P3, vSrc1P4, vSrc1P5, |
| + vSrc1P6, vSrc1P7); |
| + LD_SP8(source2P, 4, vSrc2P0, vSrc2P1, vSrc2P2, vSrc2P3, vSrc2P4, vSrc2P5, |
| + vSrc2P6, vSrc2P7); |
| + MUL4(vSrc1P0, vSrc2P0, vSrc1P1, vSrc2P1, vSrc1P2, vSrc2P2, vSrc1P3, |
| + vSrc2P3, vDst0, vDst1, vDst2, vDst3); |
| + MUL4(vSrc1P4, vSrc2P4, vSrc1P5, vSrc2P5, vSrc1P6, vSrc2P6, vSrc1P7, |
| + vSrc2P7, vDst4, vDst5, vDst6, vDst7); |
| + ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4); |
| + } |
| + } |
| #endif |
| while (n) { |
| *destP = *source1P * *source2P; |
| @@ -716,6 +791,28 @@ void vmaxmgv(const float* sourceP, |
| n = tailFrames; |
| } |
| +#elif HAVE(MIPS_MSA_INTRINSICS) |
| + if (sourceStride == 1) { |
| + v4f32 vMax = { |
| + 0, |
| + }; |
| + v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7; |
| + const v16i8 vSignBitMask = (v16i8)__msa_fill_w(0x7FFFFFFF); |
| + |
| + for (; n >= 32; n -= 32) { |
| + LD_SP8(sourceP, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, |
| + vSrc7); |
| + AND_W4_SP(vSrc0, vSrc1, vSrc2, vSrc3, vSignBitMask); |
| + VMAX_W4_SP(vSrc0, vSrc1, vSrc2, vSrc3, vMax); |
| + AND_W4_SP(vSrc4, vSrc5, vSrc6, vSrc7, vSignBitMask); |
| + VMAX_W4_SP(vSrc4, vSrc5, vSrc6, vSrc7, vMax); |
| + } |
| + |
| + max = std::max(max, vMax[0]); |
| + max = std::max(max, vMax[1]); |
| + max = std::max(max, vMax[2]); |
| + max = std::max(max, vMax[3]); |
| + } |
| #endif |
| while (n--) { |
| @@ -754,6 +851,23 @@ void vclip(const float* sourceP, |
| } |
| n = tailFrames; |
| } |
| +#elif HAVE(MIPS_MSA_INTRINSICS) |
| + if ((sourceStride == 1) && (destStride == 1)) { |
| + v4f32 vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, vSrc7; |
| + v4f32 vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7; |
| + const v4f32 vLowThr = (v4f32)__msa_fill_w(*((int32_t*)lowThresholdP)); |
| + const v4f32 vHighThr = (v4f32)__msa_fill_w(*((int32_t*)highThresholdP)); |
|
Raymond Toy
2016/10/04 15:48:07
Use C++ casting here instead of C.
However, is th
|
| + |
| + for (; n >= 32; n -= 32) { |
| + LD_SP8(sourceP, 4, vSrc0, vSrc1, vSrc2, vSrc3, vSrc4, vSrc5, vSrc6, |
| + vSrc7); |
| + VCLIP4(vSrc0, vSrc1, vSrc2, vSrc3, vLowThr, vHighThr, vDst0, vDst1, vDst2, |
| + vDst3); |
| + VCLIP4(vSrc4, vSrc5, vSrc6, vSrc7, vLowThr, vHighThr, vDst4, vDst5, vDst6, |
| + vDst7); |
| + ST_SP8(vDst0, vDst1, vDst2, vDst3, vDst4, vDst5, vDst6, vDst7, destP, 4); |
| + } |
| + } |
| #endif |
| while (n--) { |
| *destP = clampTo(*sourceP, lowThreshold, highThreshold); |