Source/platform/audio/cpu/arm/VectorMathNEON.cpp - Issue 715753002: [WIP] support arm_neon_optional flag in blink.

Side by Side Diff: Source/platform/audio/cpu/arm/VectorMathNEON.cpp

Issue 715753002: [WIP] support arm_neon_optional flag in blink. Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (C) 2010, Google Inc. All rights reserved.

	3 *

	4 * Redistribution and use in source and binary forms, with or without

	5 * modification, are permitted provided that the following conditions

	6 * are met:

	7 * 1. Redistributions of source code must retain the above copyright

	8 * notice, this list of conditions and the following disclaimer.

	9 * 2. Redistributions in binary form must reproduce the above copyright

	10 * notice, this list of conditions and the following disclaimer in the

	11 * documentation and/or other materials provided with the distribution.

	12 *

	13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND AN Y

	14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

	15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

	16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR AN Y

	17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

	18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

	19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND O N

	20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

	22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	23 */

	24

	25 #include "config.h"

	26

	27 #if ENABLE(WEB_AUDIO)

	28

	29 #include "platform/audio/cpu/arm/VectorMathNEON.h"

	30

	31 #include "wtf/Assertions.h"

	32 #include "wtf/CPU.h"

	33 #include <algorithm>

	34 #include <arm_neon.h>

	35

	36 namespace blink {

	37

	38 namespace VectorMath {

	39

	40 void vsma(const float* sourceP, int sourceStride, const float* scale, float* des tP, int destStride, size_t framesToProcess)

	41 {

	42 int n = framesToProcess;

	43

	44 if (WTF_CPU_ARM_HAS_NEON()) {

	45 if ((sourceStride == 1) && (destStride == 1)) {

	46 int tailFrames = n % 4;

	47 const float* endP = destP + n - tailFrames;

	48

	49 float32x4_t k = vdupq_n_f32(*scale);

	50 while (destP < endP) {

	51 float32x4_t source = vld1q_f32(sourceP);

	52 float32x4_t dest = vld1q_f32(destP);

	53

	54 dest = vmlaq_f32(dest, source, k);

	55 vst1q_f32(destP, dest);

	56

	57 sourceP += 4;

	58 destP += 4;

	59 }

	60 n = tailFrames;

	61 }

	62 }

	63

	64 while (n) {

	65 destP += sourceP * *scale;

	66 sourceP += sourceStride;

	67 destP += destStride;

	68 n--;

	69 }

	70 }

	71

	72 void vsmul(const float* sourceP, int sourceStride, const float* scale, float* de stP, int destStride, size_t framesToProcess)

	73 {

	74 int n = framesToProcess;

	75

	76 if (WTF_CPU_ARM_HAS_NEON()) {

	77 if ((sourceStride == 1) && (destStride == 1)) {

	78 float k = *scale;

	79 int tailFrames = n % 4;

	80 const float* endP = destP + n - tailFrames;

	81

	82 while (destP < endP) {

	83 float32x4_t source = vld1q_f32(sourceP);

	84 vst1q_f32(destP, vmulq_n_f32(source, k));

	85

	86 sourceP += 4;

	87 destP += 4;

	88 }

	89 n = tailFrames;

	90 }

	91 }

	92

	93 float k = *scale;

	94 while (n--) {

	95 destP = k *sourceP;

	96 sourceP += sourceStride;

	97 destP += destStride;

	98 }

	99 }

	100

	101 void vadd(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)

	102 {

	103 int n = framesToProcess;

	104

	105 if (WTF_CPU_ARM_HAS_NEON()) {

	106 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {

	107 int tailFrames = n % 4;

	108 const float* endP = destP + n - tailFrames;

	109

	110 while (destP < endP) {

	111 float32x4_t source1 = vld1q_f32(source1P);

	112 float32x4_t source2 = vld1q_f32(source2P);

	113 vst1q_f32(destP, vaddq_f32(source1, source2));

	114

	115 source1P += 4;

	116 source2P += 4;

	117 destP += 4;

	118 }

	119 n = tailFrames;

	120 }

	121 }

	122

	123 while (n--) {

	124 destP = source1P + *source2P;

	125 source1P += sourceStride1;

	126 source2P += sourceStride2;

	127 destP += destStride;

	128 }

	129 }

	130

	131 void vmul(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)

	132 {

	133 int n = framesToProcess;

	134

	135 if (WTF_CPU_ARM_HAS_NEON()) {

	136 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {

	137 int tailFrames = n % 4;

	138 const float* endP = destP + n - tailFrames;

	139

	140 while (destP < endP) {

	141 float32x4_t source1 = vld1q_f32(source1P);

	142 float32x4_t source2 = vld1q_f32(source2P);

	143 vst1q_f32(destP, vmulq_f32(source1, source2));

	144

	145 source1P += 4;

	146 source2P += 4;

	147 destP += 4;

	148 }

	149 n = tailFrames;

	150 }

	151 }

	152

	153 while (n) {

	154 destP = source1P * *source2P;

	155 source1P += sourceStride1;

	156 source2P += sourceStride2;

	157 destP += destStride;

	158 n--;

	159 }

	160 }

	161

	162 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)

	163 {

	164 unsigned i = 0;

	165

	166 unsigned endSize = framesToProcess - framesToProcess % 4;

	167

	168 if (WTF_CPU_ARM_HAS_NEON()) {

	169 while (i < endSize) {

	170 float32x4_t real1 = vld1q_f32(real1P + i);

	171 float32x4_t real2 = vld1q_f32(real2P + i);

	172 float32x4_t imag1 = vld1q_f32(imag1P + i);

	173 float32x4_t imag2 = vld1q_f32(imag2P + i);

	174

	175 float32x4_t realResult = vmlsq_f32(vmulq_f32(real1, real2), imag1, i mag2);

	176 float32x4_t imagResult = vmlaq_f32(vmulq_f32(real1, imag2), imag1, r eal2);

	177

	178 vst1q_f32(realDestP + i, realResult);

	179 vst1q_f32(imagDestP + i, imagResult);

	180

	181 i += 4;

	182 }

	183 }

	184 for (; i < framesToProcess; ++i) {

	185 // Read and compute result before storing them, in case the

	186 // destination is the same as one of the sources.

	187 float realResult = real1P[i] * real2P[i] - imag1P[i] * imag2P[i];

	188 float imagResult = real1P[i] * imag2P[i] + imag1P[i] * real2P[i];

	189

	190 realDestP[i] = realResult;

	191 imagDestP[i] = imagResult;

	192 }

	193 }

	194

	195 void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesTo Process)

	196 {

	197 int n = framesToProcess;

	198 float sum = 0;

	199

	200 if (WTF_CPU_ARM_HAS_NEON()) {

	201 if (sourceStride == 1) {

	202 int tailFrames = n % 4;

	203 const float* endP = sourceP + n - tailFrames;

	204

	205 float32x4_t fourSum = vdupq_n_f32(0);

	206 while (sourceP < endP) {

	207 float32x4_t source = vld1q_f32(sourceP);

	208 fourSum = vmlaq_f32(fourSum, source, source);

	209 sourceP += 4;

	210 }

	211 float32x2_t twoSum = vadd_f32(vget_low_f32(fourSum), vget_high_f32(f ourSum));

	212

	213 float groupSum[2];

	214 vst1_f32(groupSum, twoSum);

	215 sum += groupSum[0] + groupSum[1];

	216

	217 n = tailFrames;

	218 }

	219 }

	220

	221 while (n--) {

	222 float sample = *sourceP;

	223 sum += sample * sample;

	224 sourceP += sourceStride;

	225 }

	226

	227 ASSERT(sumP);

	228 *sumP = sum;

	229 }

	230

	231 void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesT oProcess)

	232 {

	233 int n = framesToProcess;

	234 float max = 0;

	235

	236 if (WTF_CPU_ARM_HAS_NEON()) {

	237 if (sourceStride == 1) {

	238 int tailFrames = n % 4;

	239 const float* endP = sourceP + n - tailFrames;

	240

	241 float32x4_t fourMax = vdupq_n_f32(0);

	242 while (sourceP < endP) {

	243 float32x4_t source = vld1q_f32(sourceP);

	244 fourMax = vmaxq_f32(fourMax, vabsq_f32(source));

	245 sourceP += 4;

	246 }

	247 float32x2_t twoMax = vmax_f32(vget_low_f32(fourMax), vget_high_f32(f ourMax));

	248

	249 float groupMax[2];

	250 vst1_f32(groupMax, twoMax);

	251 max = std::max(groupMax[0], groupMax[1]);

	252

	253 n = tailFrames;

	254 }

	255 }

	256

	257 while (n--) {

	258 max = std::max(max, fabsf(*sourceP));

	259 sourceP += sourceStride;

	260 }

	261

	262 ASSERT(maxP);

	263 *maxP = max;

	264 }

	265

	266 void vclip(const float* sourceP, int sourceStride, const float* lowThresholdP, c onst float* highThresholdP, float* destP, int destStride, size_t framesToProcess )

	267 {

	268 int n = framesToProcess;

	269 float lowThreshold = *lowThresholdP;

	270 float highThreshold = *highThresholdP;

	271

	272 if (WTF_CPU_ARM_HAS_NEON()) {

	273 if ((sourceStride == 1) && (destStride == 1)) {

	274 int tailFrames = n % 4;

	275 const float* endP = destP + n - tailFrames;

	276

	277 float32x4_t low = vdupq_n_f32(lowThreshold);

	278 float32x4_t high = vdupq_n_f32(highThreshold);

	279 while (destP < endP) {

	280 float32x4_t source = vld1q_f32(sourceP);

	281 vst1q_f32(destP, vmaxq_f32(vminq_f32(source, high), low));

	282 sourceP += 4;

	283 destP += 4;

	284 }

	285 n = tailFrames;

	286 }

	287 }

	288

	289 while (n--) {

	290 destP = std::max(std::min(sourceP, highThreshold), lowThreshold);

	291 sourceP += sourceStride;

	292 destP += destStride;

	293 }

	294 }

	295

	296 } // namespace VectorMath

	297

	298 } // namespace blink

	299

	300 #endif // ENABLE(WEB_AUDIO)

OLD	NEW

« no previous file with comments | « Source/platform/audio/cpu/arm/VectorMathNEON.h ('k') | Source/platform/audio/mac/VectorMathMac.cpp » ('j') | no next file with comments »