Source/platform/audio/cpu/arm/VectorMathNEON.cpp - Issue 715753002: [WIP] support arm_neon_optional flag in blink.

Unified Diff: Source/platform/audio/cpu/arm/VectorMathNEON.cpp

Issue 715753002: [WIP] support arm_neon_optional flag in blink. Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: Source/platform/audio/cpu/arm/VectorMathNEON.cpp

diff --git a/Source/platform/audio/cpu/arm/VectorMathNEON.cpp b/Source/platform/audio/cpu/arm/VectorMathNEON.cpp

new file mode 100644

index 0000000000000000000000000000000000000000..643283debb62a1b55aae1aefebb031de20ecc246

--- /dev/null

+++ b/Source/platform/audio/cpu/arm/VectorMathNEON.cpp

@@ -0,0 +1,300 @@

+/*

+ *

+ * Redistribution and use in source and binary forms, with or without

+ * modification, are permitted provided that the following conditions

+ * are met:

+ * 1. Redistributions of source code must retain the above copyright

+ * notice, this list of conditions and the following disclaimer.

+ * 2. Redistributions in binary form must reproduce the above copyright

+ * notice, this list of conditions and the following disclaimer in the

+ * documentation and/or other materials provided with the distribution.

+ *

+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY

+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

+ * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY

+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON

+ * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ */

+#include "config.h"

+#if ENABLE(WEB_AUDIO)

+#include "platform/audio/cpu/arm/VectorMathNEON.h"

+#include "wtf/Assertions.h"

+#include "wtf/CPU.h"

+#include <algorithm>

+#include <arm_neon.h>

+namespace blink {

+namespace VectorMath {

+void vsma(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)

+ int n = framesToProcess;

+ if (WTF_CPU_ARM_HAS_NEON()) {

+ if ((sourceStride == 1) && (destStride == 1)) {

+ int tailFrames = n % 4;

+ const float* endP = destP + n - tailFrames;

+ float32x4_t k = vdupq_n_f32(*scale);

+ while (destP < endP) {

+ float32x4_t source = vld1q_f32(sourceP);

+ float32x4_t dest = vld1q_f32(destP);

+ dest = vmlaq_f32(dest, source, k);

+ vst1q_f32(destP, dest);

+ sourceP += 4;

+ destP += 4;

+ }

+ n = tailFrames;

+ }

+ while (n) {

+ *destP += *sourceP * *scale;

+ sourceP += sourceStride;

+ destP += destStride;

+ n--;

+ }

+void vsmul(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)

+ int n = framesToProcess;

+ if (WTF_CPU_ARM_HAS_NEON()) {

+ if ((sourceStride == 1) && (destStride == 1)) {

+ float k = *scale;

+ int tailFrames = n % 4;

+ const float* endP = destP + n - tailFrames;

+ while (destP < endP) {

+ float32x4_t source = vld1q_f32(sourceP);

+ vst1q_f32(destP, vmulq_n_f32(source, k));

+ sourceP += 4;

+ destP += 4;

+ }

+ n = tailFrames;

+ }

+ float k = *scale;

+ while (n--) {

+ *destP = k * *sourceP;

+ sourceP += sourceStride;

+ destP += destStride;

+ }

+void vadd(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)

+ int n = framesToProcess;

+ if (WTF_CPU_ARM_HAS_NEON()) {

+ if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {

+ int tailFrames = n % 4;

+ const float* endP = destP + n - tailFrames;

+ while (destP < endP) {

+ float32x4_t source1 = vld1q_f32(source1P);

+ float32x4_t source2 = vld1q_f32(source2P);

+ vst1q_f32(destP, vaddq_f32(source1, source2));

+ source1P += 4;

+ source2P += 4;

+ destP += 4;

+ }

+ n = tailFrames;

+ }

+ while (n--) {

+ *destP = *source1P + *source2P;

+ source1P += sourceStride1;

+ source2P += sourceStride2;

+ destP += destStride;

+ }

+void vmul(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)

+ int n = framesToProcess;

+ if (WTF_CPU_ARM_HAS_NEON()) {

+ if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {

+ int tailFrames = n % 4;

+ const float* endP = destP + n - tailFrames;

+ while (destP < endP) {

+ float32x4_t source1 = vld1q_f32(source1P);

+ float32x4_t source2 = vld1q_f32(source2P);

+ vst1q_f32(destP, vmulq_f32(source1, source2));

+ source1P += 4;

+ source2P += 4;

+ destP += 4;

+ }

+ n = tailFrames;

+ }

+ while (n) {

+ *destP = *source1P * *source2P;

+ source1P += sourceStride1;

+ source2P += sourceStride2;

+ destP += destStride;

+ n--;

+ }

+void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)

+ unsigned i = 0;

+ unsigned endSize = framesToProcess - framesToProcess % 4;

+ if (WTF_CPU_ARM_HAS_NEON()) {

+ while (i < endSize) {

+ float32x4_t real1 = vld1q_f32(real1P + i);

+ float32x4_t real2 = vld1q_f32(real2P + i);

+ float32x4_t imag1 = vld1q_f32(imag1P + i);

+ float32x4_t imag2 = vld1q_f32(imag2P + i);

+ float32x4_t realResult = vmlsq_f32(vmulq_f32(real1, real2), imag1, imag2);

+ float32x4_t imagResult = vmlaq_f32(vmulq_f32(real1, imag2), imag1, real2);

+ vst1q_f32(realDestP + i, realResult);

+ vst1q_f32(imagDestP + i, imagResult);

+ i += 4;

+ }

+ for (; i < framesToProcess; ++i) {

+ // Read and compute result before storing them, in case the

+ // destination is the same as one of the sources.

+ float realResult = real1P[i] * real2P[i] - imag1P[i] * imag2P[i];

+ float imagResult = real1P[i] * imag2P[i] + imag1P[i] * real2P[i];

+ realDestP[i] = realResult;

+ imagDestP[i] = imagResult;

+ }

+void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesToProcess)

+ int n = framesToProcess;

+ float sum = 0;

+ if (WTF_CPU_ARM_HAS_NEON()) {

+ if (sourceStride == 1) {

+ int tailFrames = n % 4;

+ const float* endP = sourceP + n - tailFrames;

+ float32x4_t fourSum = vdupq_n_f32(0);

+ while (sourceP < endP) {

+ float32x4_t source = vld1q_f32(sourceP);

+ fourSum = vmlaq_f32(fourSum, source, source);

+ sourceP += 4;

+ }

+ float32x2_t twoSum = vadd_f32(vget_low_f32(fourSum), vget_high_f32(fourSum));

+ float groupSum[2];

+ vst1_f32(groupSum, twoSum);

+ sum += groupSum[0] + groupSum[1];

+ n = tailFrames;

+ }

+ while (n--) {

+ float sample = *sourceP;

+ sum += sample * sample;

+ sourceP += sourceStride;

+ }

+ ASSERT(sumP);

+ *sumP = sum;

+void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesToProcess)

+ int n = framesToProcess;

+ float max = 0;

+ if (WTF_CPU_ARM_HAS_NEON()) {

+ if (sourceStride == 1) {

+ int tailFrames = n % 4;

+ const float* endP = sourceP + n - tailFrames;

+ float32x4_t fourMax = vdupq_n_f32(0);

+ while (sourceP < endP) {

+ float32x4_t source = vld1q_f32(sourceP);

+ fourMax = vmaxq_f32(fourMax, vabsq_f32(source));

+ sourceP += 4;

+ }

+ float32x2_t twoMax = vmax_f32(vget_low_f32(fourMax), vget_high_f32(fourMax));

+ float groupMax[2];

+ vst1_f32(groupMax, twoMax);

+ max = std::max(groupMax[0], groupMax[1]);

+ n = tailFrames;

+ }

+ while (n--) {

+ max = std::max(max, fabsf(*sourceP));

+ sourceP += sourceStride;

+ }

+ ASSERT(maxP);

+ *maxP = max;

+void vclip(const float* sourceP, int sourceStride, const float* lowThresholdP, const float* highThresholdP, float* destP, int destStride, size_t framesToProcess)

+ int n = framesToProcess;

+ float lowThreshold = *lowThresholdP;

+ float highThreshold = *highThresholdP;

+ if (WTF_CPU_ARM_HAS_NEON()) {

+ if ((sourceStride == 1) && (destStride == 1)) {

+ int tailFrames = n % 4;

+ const float* endP = destP + n - tailFrames;

+ float32x4_t low = vdupq_n_f32(lowThreshold);

+ float32x4_t high = vdupq_n_f32(highThreshold);

+ while (destP < endP) {

+ float32x4_t source = vld1q_f32(sourceP);

+ vst1q_f32(destP, vmaxq_f32(vminq_f32(source, high), low));

+ sourceP += 4;

+ destP += 4;

+ }

+ n = tailFrames;

+ }

+ while (n--) {

+ *destP = std::max(std::min(*sourceP, highThreshold), lowThreshold);

+ sourceP += sourceStride;

+ destP += destStride;

+ }

+} // namespace VectorMath

+} // namespace blink

+#endif // ENABLE(WEB_AUDIO)

« no previous file with comments | « Source/platform/audio/cpu/arm/VectorMathNEON.h ('k') | Source/platform/audio/mac/VectorMathMac.cpp » ('j') | no next file with comments »