| Index: Source/platform/audio/cpu/arm/VectorMathNEON.h
 | 
| diff --git a/Source/platform/audio/cpu/arm/VectorMathNEON.h b/Source/platform/audio/cpu/arm/VectorMathNEON.h
 | 
| new file mode 100644
 | 
| index 0000000000000000000000000000000000000000..0b015687d860df0da478fd4c1e8338aa1c02f526
 | 
| --- /dev/null
 | 
| +++ b/Source/platform/audio/cpu/arm/VectorMathNEON.h
 | 
| @@ -0,0 +1,281 @@
 | 
| +/*
 | 
| + * Copyright (C) 2010, Google Inc. All rights reserved.
 | 
| + *
 | 
| + * Redistribution and use in source and binary forms, with or without
 | 
| + * modification, are permitted provided that the following conditions
 | 
| + * are met:
 | 
| + * 1.  Redistributions of source code must retain the above copyright
 | 
| + *    notice, this list of conditions and the following disclaimer.
 | 
| + * 2.  Redistributions in binary form must reproduce the above copyright
 | 
| + *    notice, this list of conditions and the following disclaimer in the
 | 
| + *    documentation and/or other materials provided with the distribution.
 | 
| + *
 | 
| + * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
 | 
| + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 | 
| + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 | 
| + * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
 | 
| + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 | 
| + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 | 
| + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 | 
| + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 | 
| + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 | 
| + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
| + */
 | 
| +
 | 
| +#ifndef VectorMathNEON_h
 | 
| +#define VectorMathNEON_h
 | 
| +
 | 
| +#if HAVE(ARM_NEON_INTRINSICS)
 | 
| +
 | 
| +#include "platform/audio/VectorMath.h"
 | 
| +#include <arm_neon.h>
 | 
| +
 | 
| +namespace blink {
 | 
| +namespace VectorMath {
 | 
| +
 | 
| +void vsmaNEON(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)
 | 
| +{
 | 
| +    int n = framesToProcess;
 | 
| +    if ((sourceStride == 1) && (destStride == 1)) {
 | 
| +        int tailFrames = n % 4;
 | 
| +        const float* endP = destP + n - tailFrames;
 | 
| +
 | 
| +        float32x4_t k = vdupq_n_f32(*scale);
 | 
| +        while (destP < endP) {
 | 
| +            float32x4_t source = vld1q_f32(sourceP);
 | 
| +            float32x4_t dest = vld1q_f32(destP);
 | 
| +
 | 
| +            dest = vmlaq_f32(dest, source, k);
 | 
| +            vst1q_f32(destP, dest);
 | 
| +
 | 
| +            sourceP += 4;
 | 
| +            destP += 4;
 | 
| +        }
 | 
| +        n = tailFrames;
 | 
| +    }
 | 
| +
 | 
| +    while (n) {
 | 
| +        *destP += *sourceP * *scale;
 | 
| +        sourceP += sourceStride;
 | 
| +        destP += destStride;
 | 
| +        n--;
 | 
| +    }
 | 
| +}
 | 
| +
 | 
| +void vsmulNEON(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)
 | 
| +{
 | 
| +    int n = framesToProcess;
 | 
| +    if ((sourceStride == 1) && (destStride == 1)) {
 | 
| +        float k = *scale;
 | 
| +        int tailFrames = n % 4;
 | 
| +        const float* endP = destP + n - tailFrames;
 | 
| +
 | 
| +        while (destP < endP) {
 | 
| +            float32x4_t source = vld1q_f32(sourceP);
 | 
| +            vst1q_f32(destP, vmulq_n_f32(source, k));
 | 
| +
 | 
| +            sourceP += 4;
 | 
| +            destP += 4;
 | 
| +        }
 | 
| +        n = tailFrames;
 | 
| +    }
 | 
| +
 | 
| +    float k = *scale;
 | 
| +    while (n--) {
 | 
| +        *destP = k * *sourceP;
 | 
| +        sourceP += sourceStride;
 | 
| +        destP += destStride;
 | 
| +    }
 | 
| +}
 | 
| +
 | 
| +void vaddNEON(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)
 | 
| +{
 | 
| +    int n = framesToProcess;
 | 
| +
 | 
| +    if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {
 | 
| +        int tailFrames = n % 4;
 | 
| +        const float* endP = destP + n - tailFrames;
 | 
| +
 | 
| +        while (destP < endP) {
 | 
| +            float32x4_t source1 = vld1q_f32(source1P);
 | 
| +            float32x4_t source2 = vld1q_f32(source2P);
 | 
| +            vst1q_f32(destP, vaddq_f32(source1, source2));
 | 
| +
 | 
| +            source1P += 4;
 | 
| +            source2P += 4;
 | 
| +            destP += 4;
 | 
| +        }
 | 
| +        n = tailFrames;
 | 
| +    }
 | 
| +
 | 
| +    while (n--) {
 | 
| +        *destP = *source1P + *source2P;
 | 
| +        source1P += sourceStride1;
 | 
| +        source2P += sourceStride2;
 | 
| +        destP += destStride;
 | 
| +    }
 | 
| +}
 | 
| +
 | 
| +void vmulNEON(const float* source1P, int sourceStride1, const float* source2P, int sourceStride2, float* destP, int destStride, size_t framesToProcess)
 | 
| +{
 | 
| +
 | 
| +    int n = framesToProcess;
 | 
| +
 | 
| +    if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {
 | 
| +        int tailFrames = n % 4;
 | 
| +        const float* endP = destP + n - tailFrames;
 | 
| +
 | 
| +        while (destP < endP) {
 | 
| +            float32x4_t source1 = vld1q_f32(source1P);
 | 
| +            float32x4_t source2 = vld1q_f32(source2P);
 | 
| +            vst1q_f32(destP, vmulq_f32(source1, source2));
 | 
| +
 | 
| +            source1P += 4;
 | 
| +            source2P += 4;
 | 
| +            destP += 4;
 | 
| +        }
 | 
| +        n = tailFrames;
 | 
| +    }
 | 
| +
 | 
| +    while (n) {
 | 
| +        *destP = *source1P * *source2P;
 | 
| +        source1P += sourceStride1;
 | 
| +        source2P += sourceStride2;
 | 
| +        destP += destStride;
 | 
| +        n--;
 | 
| +    }
 | 
| +}
 | 
| +
 | 
| +void zvmulNEON(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)
 | 
| +{
 | 
| +    unsigned i = 0;
 | 
| +    unsigned endSize = framesToProcess - framesToProcess % 4;
 | 
| +
 | 
| +    while (i < endSize) {
 | 
| +        float32x4_t real1 = vld1q_f32(real1P + i);
 | 
| +        float32x4_t real2 = vld1q_f32(real2P + i);
 | 
| +        float32x4_t imag1 = vld1q_f32(imag1P + i);
 | 
| +        float32x4_t imag2 = vld1q_f32(imag2P + i);
 | 
| +
 | 
| +        float32x4_t realResult = vmlsq_f32(vmulq_f32(real1, real2), imag1, imag2);
 | 
| +        float32x4_t imagResult = vmlaq_f32(vmulq_f32(real1, imag2), imag1, real2);
 | 
| +
 | 
| +        vst1q_f32(realDestP + i, realResult);
 | 
| +        vst1q_f32(imagDestP + i, imagResult);
 | 
| +
 | 
| +        i += 4;
 | 
| +    }
 | 
| +
 | 
| +    for (; i < framesToProcess; ++i) {
 | 
| +        // Read and compute result before storing them, in case the
 | 
| +        // destination is the same as one of the sources.
 | 
| +        float realResult = real1P[i] * real2P[i] - imag1P[i] * imag2P[i];
 | 
| +        float imagResult = real1P[i] * imag2P[i] + imag1P[i] * real2P[i];
 | 
| +
 | 
| +        realDestP[i] = realResult;
 | 
| +        imagDestP[i] = imagResult;
 | 
| +    }
 | 
| +}
 | 
| +
 | 
| +void vsvesqNEON(const float* sourceP, int sourceStride, float* sumP, size_t framesToProcess)
 | 
| +{
 | 
| +    int n = framesToProcess;
 | 
| +    float sum = 0;
 | 
| +
 | 
| +    if (sourceStride == 1) {
 | 
| +        int tailFrames = n % 4;
 | 
| +        const float* endP = sourceP + n - tailFrames;
 | 
| +
 | 
| +        float32x4_t fourSum = vdupq_n_f32(0);
 | 
| +        while (sourceP < endP) {
 | 
| +            float32x4_t source = vld1q_f32(sourceP);
 | 
| +            fourSum = vmlaq_f32(fourSum, source, source);
 | 
| +            sourceP += 4;
 | 
| +        }
 | 
| +        float32x2_t twoSum = vadd_f32(vget_low_f32(fourSum), vget_high_f32(fourSum));
 | 
| +
 | 
| +        float groupSum[2];
 | 
| +        vst1_f32(groupSum, twoSum);
 | 
| +        sum += groupSum[0] + groupSum[1];
 | 
| +
 | 
| +        n = tailFrames;
 | 
| +    }
 | 
| +
 | 
| +    while (n--) {
 | 
| +        float sample = *sourceP;
 | 
| +        sum += sample * sample;
 | 
| +        sourceP += sourceStride;
 | 
| +    }
 | 
| +
 | 
| +    ASSERT(sumP);
 | 
| +    *sumP = sum;
 | 
| +}
 | 
| +
 | 
| +void vmaxmgvNEON(const float* sourceP, int sourceStride, float* maxP, size_t framesToProcess)
 | 
| +{
 | 
| +    int n = framesToProcess;
 | 
| +    float max = 0;
 | 
| +
 | 
| +    if (sourceStride == 1) {
 | 
| +        int tailFrames = n % 4;
 | 
| +        const float* endP = sourceP + n - tailFrames;
 | 
| +
 | 
| +        float32x4_t fourMax = vdupq_n_f32(0);
 | 
| +        while (sourceP < endP) {
 | 
| +            float32x4_t source = vld1q_f32(sourceP);
 | 
| +            fourMax = vmaxq_f32(fourMax, vabsq_f32(source));
 | 
| +            sourceP += 4;
 | 
| +        }
 | 
| +        float32x2_t twoMax = vmax_f32(vget_low_f32(fourMax), vget_high_f32(fourMax));
 | 
| +
 | 
| +        float groupMax[2];
 | 
| +        vst1_f32(groupMax, twoMax);
 | 
| +        max = std::max(groupMax[0], groupMax[1]);
 | 
| +
 | 
| +        n = tailFrames;
 | 
| +    }
 | 
| +
 | 
| +    while (n--) {
 | 
| +        max = std::max(max, fabsf(*sourceP));
 | 
| +        sourceP += sourceStride;
 | 
| +    }
 | 
| +
 | 
| +    ASSERT(maxP);
 | 
| +    *maxP = max;
 | 
| +}
 | 
| +
 | 
| +void vclipNEON(const float* sourceP, int sourceStride, const float* lowThresholdP, const float* highThresholdP, float* destP, int destStride, size_t framesToProcess)
 | 
| +{
 | 
| +    int n = framesToProcess;
 | 
| +    float lowThreshold = *lowThresholdP;
 | 
| +    float highThreshold = *highThresholdP;
 | 
| +
 | 
| +    if ((sourceStride == 1) && (destStride == 1)) {
 | 
| +        int tailFrames = n % 4;
 | 
| +        const float* endP = destP + n - tailFrames;
 | 
| +
 | 
| +        float32x4_t low = vdupq_n_f32(lowThreshold);
 | 
| +        float32x4_t high = vdupq_n_f32(highThreshold);
 | 
| +        while (destP < endP) {
 | 
| +            float32x4_t source = vld1q_f32(sourceP);
 | 
| +            vst1q_f32(destP, vmaxq_f32(vminq_f32(source, high), low));
 | 
| +            sourceP += 4;
 | 
| +            destP += 4;
 | 
| +        }
 | 
| +        n = tailFrames;
 | 
| +    }
 | 
| +
 | 
| +    while (n--) {
 | 
| +        *destP = std::max(std::min(*sourceP, highThreshold), lowThreshold);
 | 
| +        sourceP += sourceStride;
 | 
| +        destP += destStride;
 | 
| +    }
 | 
| +}
 | 
| +
 | 
| +} // namespace VectorMath
 | 
| +
 | 
| +} // namespace blink
 | 
| +
 | 
| +#endif
 | 
| +
 | 
| +#endif // VectorMathNEON_h
 | 
| 
 |