Source/platform/audio/cpu/arm/VectorMathNEON.h - Issue 604373003: [WIP] Supporting arm_neon_optional flag for blink platform.

Side by Side Diff: Source/platform/audio/cpu/arm/VectorMathNEON.h

Issue 604373003: [WIP] Supporting arm_neon_optional flag for blink platform. Base URL: https://chromium.googlesource.com/chromium/blink.git@master

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (C) 2010, Google Inc. All rights reserved.

	3 *

	4 * Redistribution and use in source and binary forms, with or without

	5 * modification, are permitted provided that the following conditions

	6 * are met:

	7 * 1. Redistributions of source code must retain the above copyright

	8 * notice, this list of conditions and the following disclaimer.

	9 * 2. Redistributions in binary form must reproduce the above copyright

	10 * notice, this list of conditions and the following disclaimer in the

	11 * documentation and/or other materials provided with the distribution.

	12 *

	13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND AN Y

	14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED

	15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE

	16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR AN Y

	17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES

	18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;

	19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND O N

	20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS

	22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	23 */

	24

	25 #ifndef VectorMathNEON_h

	26 #define VectorMathNEON_h

	27

	28 #if HAVE(ARM_NEON_INTRINSICS)

	29

	30 #include "platform/audio/VectorMath.h"

	31 #include <arm_neon.h>

	32

	33 namespace blink {

	34 namespace VectorMath {

	35

	36 void vsmaNEON(const float* sourceP, int sourceStride, const float* scale, float* destP, int destStride, size_t framesToProcess)

	37 {

	38 int n = framesToProcess;

	39 if ((sourceStride == 1) && (destStride == 1)) {

	40 int tailFrames = n % 4;

	41 const float* endP = destP + n - tailFrames;

	42

	43 float32x4_t k = vdupq_n_f32(*scale);

	44 while (destP < endP) {

	45 float32x4_t source = vld1q_f32(sourceP);

	46 float32x4_t dest = vld1q_f32(destP);

	47

	48 dest = vmlaq_f32(dest, source, k);

	49 vst1q_f32(destP, dest);

	50

	51 sourceP += 4;

	52 destP += 4;

	53 }

	54 n = tailFrames;

	55 }

	56

	57 while (n) {

	58 destP += sourceP * *scale;

	59 sourceP += sourceStride;

	60 destP += destStride;

	61 n--;

	62 }

	63 }

	64

	65 void vsmulNEON(const float* sourceP, int sourceStride, const float* scale, float * destP, int destStride, size_t framesToProcess)

	66 {

	67 int n = framesToProcess;

	68 if ((sourceStride == 1) && (destStride == 1)) {

	69 float k = *scale;

	70 int tailFrames = n % 4;

	71 const float* endP = destP + n - tailFrames;

	72

	73 while (destP < endP) {

	74 float32x4_t source = vld1q_f32(sourceP);

	75 vst1q_f32(destP, vmulq_n_f32(source, k));

	76

	77 sourceP += 4;

	78 destP += 4;

	79 }

	80 n = tailFrames;

	81 }

	82

	83 float k = *scale;

	84 while (n--) {

	85 destP = k *sourceP;

	86 sourceP += sourceStride;

	87 destP += destStride;

	88 }

	89 }

	90

	91 void vaddNEON(const float* source1P, int sourceStride1, const float* source2P, i nt sourceStride2, float* destP, int destStride, size_t framesToProcess)

	92 {

	93 int n = framesToProcess;

	94

	95 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {

	96 int tailFrames = n % 4;

	97 const float* endP = destP + n - tailFrames;

	98

	99 while (destP < endP) {

	100 float32x4_t source1 = vld1q_f32(source1P);

	101 float32x4_t source2 = vld1q_f32(source2P);

	102 vst1q_f32(destP, vaddq_f32(source1, source2));

	103

	104 source1P += 4;

	105 source2P += 4;

	106 destP += 4;

	107 }

	108 n = tailFrames;

	109 }

	110

	111 while (n--) {

	112 destP = source1P + *source2P;

	113 source1P += sourceStride1;

	114 source2P += sourceStride2;

	115 destP += destStride;

	116 }

	117 }

	118

	119 void vmulNEON(const float* source1P, int sourceStride1, const float* source2P, i nt sourceStride2, float* destP, int destStride, size_t framesToProcess)

	120 {

	121

	122 int n = framesToProcess;

	123

	124 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {

	125 int tailFrames = n % 4;

	126 const float* endP = destP + n - tailFrames;

	127

	128 while (destP < endP) {

	129 float32x4_t source1 = vld1q_f32(source1P);

	130 float32x4_t source2 = vld1q_f32(source2P);

	131 vst1q_f32(destP, vmulq_f32(source1, source2));

	132

	133 source1P += 4;

	134 source2P += 4;

	135 destP += 4;

	136 }

	137 n = tailFrames;

	138 }

	139

	140 while (n) {

	141 destP = source1P * *source2P;

	142 source1P += sourceStride1;

	143 source2P += sourceStride2;

	144 destP += destStride;

	145 n--;

	146 }

	147 }

	148

	149 void zvmulNEON(const float* real1P, const float* imag1P, const float* real2P, co nst float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)

	150 {

	151 unsigned i = 0;

	152 unsigned endSize = framesToProcess - framesToProcess % 4;

	153

	154 while (i < endSize) {

	155 float32x4_t real1 = vld1q_f32(real1P + i);

	156 float32x4_t real2 = vld1q_f32(real2P + i);

	157 float32x4_t imag1 = vld1q_f32(imag1P + i);

	158 float32x4_t imag2 = vld1q_f32(imag2P + i);

	159

	160 float32x4_t realResult = vmlsq_f32(vmulq_f32(real1, real2), imag1, imag2 );

	161 float32x4_t imagResult = vmlaq_f32(vmulq_f32(real1, imag2), imag1, real2 );

	162

	163 vst1q_f32(realDestP + i, realResult);

	164 vst1q_f32(imagDestP + i, imagResult);

	165

	166 i += 4;

	167 }

	168

	169 for (; i < framesToProcess; ++i) {

	170 // Read and compute result before storing them, in case the

	171 // destination is the same as one of the sources.

	172 float realResult = real1P[i] * real2P[i] - imag1P[i] * imag2P[i];

	173 float imagResult = real1P[i] * imag2P[i] + imag1P[i] * real2P[i];

	174

	175 realDestP[i] = realResult;

	176 imagDestP[i] = imagResult;

	177 }

	178 }

	179

	180 void vsvesqNEON(const float* sourceP, int sourceStride, float* sumP, size_t fram esToProcess)

	181 {

	182 int n = framesToProcess;

	183 float sum = 0;

	184

	185 if (sourceStride == 1) {

	186 int tailFrames = n % 4;

	187 const float* endP = sourceP + n - tailFrames;

	188

	189 float32x4_t fourSum = vdupq_n_f32(0);

	190 while (sourceP < endP) {

	191 float32x4_t source = vld1q_f32(sourceP);

	192 fourSum = vmlaq_f32(fourSum, source, source);

	193 sourceP += 4;

	194 }

	195 float32x2_t twoSum = vadd_f32(vget_low_f32(fourSum), vget_high_f32(fourS um));

	196

	197 float groupSum[2];

	198 vst1_f32(groupSum, twoSum);

	199 sum += groupSum[0] + groupSum[1];

	200

	201 n = tailFrames;

	202 }

	203

	204 while (n--) {

	205 float sample = *sourceP;

	206 sum += sample * sample;

	207 sourceP += sourceStride;

	208 }

	209

	210 ASSERT(sumP);

	211 *sumP = sum;

	212 }

	213

	214 void vmaxmgvNEON(const float* sourceP, int sourceStride, float* maxP, size_t fra mesToProcess)

	215 {

	216 int n = framesToProcess;

	217 float max = 0;

	218

	219 if (sourceStride == 1) {

	220 int tailFrames = n % 4;

	221 const float* endP = sourceP + n - tailFrames;

	222

	223 float32x4_t fourMax = vdupq_n_f32(0);

	224 while (sourceP < endP) {

	225 float32x4_t source = vld1q_f32(sourceP);

	226 fourMax = vmaxq_f32(fourMax, vabsq_f32(source));

	227 sourceP += 4;

	228 }

	229 float32x2_t twoMax = vmax_f32(vget_low_f32(fourMax), vget_high_f32(fourM ax));

	230

	231 float groupMax[2];

	232 vst1_f32(groupMax, twoMax);

	233 max = std::max(groupMax[0], groupMax[1]);

	234

	235 n = tailFrames;

	236 }

	237

	238 while (n--) {

	239 max = std::max(max, fabsf(*sourceP));

	240 sourceP += sourceStride;

	241 }

	242

	243 ASSERT(maxP);

	244 *maxP = max;

	245 }

	246

	247 void vclipNEON(const float* sourceP, int sourceStride, const float* lowThreshold P, const float* highThresholdP, float* destP, int destStride, size_t framesToPro cess)

	248 {

	249 int n = framesToProcess;

	250 float lowThreshold = *lowThresholdP;

	251 float highThreshold = *highThresholdP;

	252

	253 if ((sourceStride == 1) && (destStride == 1)) {

	254 int tailFrames = n % 4;

	255 const float* endP = destP + n - tailFrames;

	256

	257 float32x4_t low = vdupq_n_f32(lowThreshold);

	258 float32x4_t high = vdupq_n_f32(highThreshold);

	259 while (destP < endP) {

	260 float32x4_t source = vld1q_f32(sourceP);

	261 vst1q_f32(destP, vmaxq_f32(vminq_f32(source, high), low));

	262 sourceP += 4;

	263 destP += 4;

	264 }

	265 n = tailFrames;

	266 }

	267

	268 while (n--) {

	269 destP = std::max(std::min(sourceP, highThreshold), lowThreshold);

	270 sourceP += sourceStride;

	271 destP += destStride;

	272 }

	273 }

	274

	275 } // namespace VectorMath

	276

	277 } // namespace blink

	278

	279 #endif

	280

	281 #endif // VectorMathNEON_h

OLD	NEW

« no previous file with comments | « Source/platform/audio/VectorMath.cpp ('k') | Source/platform/blink_platform.gyp » ('j') | no next file with comments »