Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(376)

Side by Side Diff: Source/platform/audio/cpu/arm/VectorMathNEON.cpp

Issue 715753002: [WIP] support arm_neon_optional flag in blink. Base URL: https://chromium.googlesource.com/chromium/blink.git@master
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright (C) 2010, Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND AN Y
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR AN Y
17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND O N
20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23 */
24
25 #include "config.h"
26
27 #if ENABLE(WEB_AUDIO)
28
29 #include "platform/audio/cpu/arm/VectorMathNEON.h"
30
31 #include "wtf/Assertions.h"
32 #include "wtf/CPU.h"
33 #include <algorithm>
34 #include <arm_neon.h>
35
36 namespace blink {
37
38 namespace VectorMath {
39
40 void vsma(const float* sourceP, int sourceStride, const float* scale, float* des tP, int destStride, size_t framesToProcess)
41 {
42 int n = framesToProcess;
43
44 if (WTF_CPU_ARM_HAS_NEON()) {
45 if ((sourceStride == 1) && (destStride == 1)) {
46 int tailFrames = n % 4;
47 const float* endP = destP + n - tailFrames;
48
49 float32x4_t k = vdupq_n_f32(*scale);
50 while (destP < endP) {
51 float32x4_t source = vld1q_f32(sourceP);
52 float32x4_t dest = vld1q_f32(destP);
53
54 dest = vmlaq_f32(dest, source, k);
55 vst1q_f32(destP, dest);
56
57 sourceP += 4;
58 destP += 4;
59 }
60 n = tailFrames;
61 }
62 }
63
64 while (n) {
65 *destP += *sourceP * *scale;
66 sourceP += sourceStride;
67 destP += destStride;
68 n--;
69 }
70 }
71
72 void vsmul(const float* sourceP, int sourceStride, const float* scale, float* de stP, int destStride, size_t framesToProcess)
73 {
74 int n = framesToProcess;
75
76 if (WTF_CPU_ARM_HAS_NEON()) {
77 if ((sourceStride == 1) && (destStride == 1)) {
78 float k = *scale;
79 int tailFrames = n % 4;
80 const float* endP = destP + n - tailFrames;
81
82 while (destP < endP) {
83 float32x4_t source = vld1q_f32(sourceP);
84 vst1q_f32(destP, vmulq_n_f32(source, k));
85
86 sourceP += 4;
87 destP += 4;
88 }
89 n = tailFrames;
90 }
91 }
92
93 float k = *scale;
94 while (n--) {
95 *destP = k * *sourceP;
96 sourceP += sourceStride;
97 destP += destStride;
98 }
99 }
100
101 void vadd(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)
102 {
103 int n = framesToProcess;
104
105 if (WTF_CPU_ARM_HAS_NEON()) {
106 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {
107 int tailFrames = n % 4;
108 const float* endP = destP + n - tailFrames;
109
110 while (destP < endP) {
111 float32x4_t source1 = vld1q_f32(source1P);
112 float32x4_t source2 = vld1q_f32(source2P);
113 vst1q_f32(destP, vaddq_f32(source1, source2));
114
115 source1P += 4;
116 source2P += 4;
117 destP += 4;
118 }
119 n = tailFrames;
120 }
121 }
122
123 while (n--) {
124 *destP = *source1P + *source2P;
125 source1P += sourceStride1;
126 source2P += sourceStride2;
127 destP += destStride;
128 }
129 }
130
131 void vmul(const float* source1P, int sourceStride1, const float* source2P, int s ourceStride2, float* destP, int destStride, size_t framesToProcess)
132 {
133 int n = framesToProcess;
134
135 if (WTF_CPU_ARM_HAS_NEON()) {
136 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) {
137 int tailFrames = n % 4;
138 const float* endP = destP + n - tailFrames;
139
140 while (destP < endP) {
141 float32x4_t source1 = vld1q_f32(source1P);
142 float32x4_t source2 = vld1q_f32(source2P);
143 vst1q_f32(destP, vmulq_f32(source1, source2));
144
145 source1P += 4;
146 source2P += 4;
147 destP += 4;
148 }
149 n = tailFrames;
150 }
151 }
152
153 while (n) {
154 *destP = *source1P * *source2P;
155 source1P += sourceStride1;
156 source2P += sourceStride2;
157 destP += destStride;
158 n--;
159 }
160 }
161
162 void zvmul(const float* real1P, const float* imag1P, const float* real2P, const float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess)
163 {
164 unsigned i = 0;
165
166 unsigned endSize = framesToProcess - framesToProcess % 4;
167
168 if (WTF_CPU_ARM_HAS_NEON()) {
169 while (i < endSize) {
170 float32x4_t real1 = vld1q_f32(real1P + i);
171 float32x4_t real2 = vld1q_f32(real2P + i);
172 float32x4_t imag1 = vld1q_f32(imag1P + i);
173 float32x4_t imag2 = vld1q_f32(imag2P + i);
174
175 float32x4_t realResult = vmlsq_f32(vmulq_f32(real1, real2), imag1, i mag2);
176 float32x4_t imagResult = vmlaq_f32(vmulq_f32(real1, imag2), imag1, r eal2);
177
178 vst1q_f32(realDestP + i, realResult);
179 vst1q_f32(imagDestP + i, imagResult);
180
181 i += 4;
182 }
183 }
184 for (; i < framesToProcess; ++i) {
185 // Read and compute result before storing them, in case the
186 // destination is the same as one of the sources.
187 float realResult = real1P[i] * real2P[i] - imag1P[i] * imag2P[i];
188 float imagResult = real1P[i] * imag2P[i] + imag1P[i] * real2P[i];
189
190 realDestP[i] = realResult;
191 imagDestP[i] = imagResult;
192 }
193 }
194
195 void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesTo Process)
196 {
197 int n = framesToProcess;
198 float sum = 0;
199
200 if (WTF_CPU_ARM_HAS_NEON()) {
201 if (sourceStride == 1) {
202 int tailFrames = n % 4;
203 const float* endP = sourceP + n - tailFrames;
204
205 float32x4_t fourSum = vdupq_n_f32(0);
206 while (sourceP < endP) {
207 float32x4_t source = vld1q_f32(sourceP);
208 fourSum = vmlaq_f32(fourSum, source, source);
209 sourceP += 4;
210 }
211 float32x2_t twoSum = vadd_f32(vget_low_f32(fourSum), vget_high_f32(f ourSum));
212
213 float groupSum[2];
214 vst1_f32(groupSum, twoSum);
215 sum += groupSum[0] + groupSum[1];
216
217 n = tailFrames;
218 }
219 }
220
221 while (n--) {
222 float sample = *sourceP;
223 sum += sample * sample;
224 sourceP += sourceStride;
225 }
226
227 ASSERT(sumP);
228 *sumP = sum;
229 }
230
231 void vmaxmgv(const float* sourceP, int sourceStride, float* maxP, size_t framesT oProcess)
232 {
233 int n = framesToProcess;
234 float max = 0;
235
236 if (WTF_CPU_ARM_HAS_NEON()) {
237 if (sourceStride == 1) {
238 int tailFrames = n % 4;
239 const float* endP = sourceP + n - tailFrames;
240
241 float32x4_t fourMax = vdupq_n_f32(0);
242 while (sourceP < endP) {
243 float32x4_t source = vld1q_f32(sourceP);
244 fourMax = vmaxq_f32(fourMax, vabsq_f32(source));
245 sourceP += 4;
246 }
247 float32x2_t twoMax = vmax_f32(vget_low_f32(fourMax), vget_high_f32(f ourMax));
248
249 float groupMax[2];
250 vst1_f32(groupMax, twoMax);
251 max = std::max(groupMax[0], groupMax[1]);
252
253 n = tailFrames;
254 }
255 }
256
257 while (n--) {
258 max = std::max(max, fabsf(*sourceP));
259 sourceP += sourceStride;
260 }
261
262 ASSERT(maxP);
263 *maxP = max;
264 }
265
266 void vclip(const float* sourceP, int sourceStride, const float* lowThresholdP, c onst float* highThresholdP, float* destP, int destStride, size_t framesToProcess )
267 {
268 int n = framesToProcess;
269 float lowThreshold = *lowThresholdP;
270 float highThreshold = *highThresholdP;
271
272 if (WTF_CPU_ARM_HAS_NEON()) {
273 if ((sourceStride == 1) && (destStride == 1)) {
274 int tailFrames = n % 4;
275 const float* endP = destP + n - tailFrames;
276
277 float32x4_t low = vdupq_n_f32(lowThreshold);
278 float32x4_t high = vdupq_n_f32(highThreshold);
279 while (destP < endP) {
280 float32x4_t source = vld1q_f32(sourceP);
281 vst1q_f32(destP, vmaxq_f32(vminq_f32(source, high), low));
282 sourceP += 4;
283 destP += 4;
284 }
285 n = tailFrames;
286 }
287 }
288
289 while (n--) {
290 *destP = std::max(std::min(*sourceP, highThreshold), lowThreshold);
291 sourceP += sourceStride;
292 destP += destStride;
293 }
294 }
295
296 } // namespace VectorMath
297
298 } // namespace blink
299
300 #endif // ENABLE(WEB_AUDIO)
OLDNEW
« no previous file with comments | « Source/platform/audio/cpu/arm/VectorMathNEON.h ('k') | Source/platform/audio/mac/VectorMathMac.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698