OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2010, Google Inc. All rights reserved. |
| 3 * |
| 4 * Redistribution and use in source and binary forms, with or without |
| 5 * modification, are permitted provided that the following conditions |
| 6 * are met: |
| 7 * 1. Redistributions of source code must retain the above copyright |
| 8 * notice, this list of conditions and the following disclaimer. |
| 9 * 2. Redistributions in binary form must reproduce the above copyright |
| 10 * notice, this list of conditions and the following disclaimer in the |
| 11 * documentation and/or other materials provided with the distribution. |
| 12 * |
| 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND AN
Y |
| 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR AN
Y |
| 17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| 18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| 19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND O
N |
| 20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 23 */ |
| 24 |
| 25 #ifndef VectorMathNEON_h |
| 26 #define VectorMathNEON_h |
| 27 |
| 28 #if HAVE(ARM_NEON_INTRINSICS) |
| 29 |
| 30 #include "platform/audio/VectorMath.h" |
| 31 #include <arm_neon.h> |
| 32 |
| 33 namespace blink { |
| 34 namespace VectorMath { |
| 35 |
| 36 void vsmaNEON(const float* sourceP, int sourceStride, const float* scale, float*
destP, int destStride, size_t framesToProcess) |
| 37 { |
| 38 int n = framesToProcess; |
| 39 if ((sourceStride == 1) && (destStride == 1)) { |
| 40 int tailFrames = n % 4; |
| 41 const float* endP = destP + n - tailFrames; |
| 42 |
| 43 float32x4_t k = vdupq_n_f32(*scale); |
| 44 while (destP < endP) { |
| 45 float32x4_t source = vld1q_f32(sourceP); |
| 46 float32x4_t dest = vld1q_f32(destP); |
| 47 |
| 48 dest = vmlaq_f32(dest, source, k); |
| 49 vst1q_f32(destP, dest); |
| 50 |
| 51 sourceP += 4; |
| 52 destP += 4; |
| 53 } |
| 54 n = tailFrames; |
| 55 } |
| 56 |
| 57 while (n) { |
| 58 *destP += *sourceP * *scale; |
| 59 sourceP += sourceStride; |
| 60 destP += destStride; |
| 61 n--; |
| 62 } |
| 63 } |
| 64 |
| 65 void vsmulNEON(const float* sourceP, int sourceStride, const float* scale, float
* destP, int destStride, size_t framesToProcess) |
| 66 { |
| 67 int n = framesToProcess; |
| 68 if ((sourceStride == 1) && (destStride == 1)) { |
| 69 float k = *scale; |
| 70 int tailFrames = n % 4; |
| 71 const float* endP = destP + n - tailFrames; |
| 72 |
| 73 while (destP < endP) { |
| 74 float32x4_t source = vld1q_f32(sourceP); |
| 75 vst1q_f32(destP, vmulq_n_f32(source, k)); |
| 76 |
| 77 sourceP += 4; |
| 78 destP += 4; |
| 79 } |
| 80 n = tailFrames; |
| 81 } |
| 82 |
| 83 float k = *scale; |
| 84 while (n--) { |
| 85 *destP = k * *sourceP; |
| 86 sourceP += sourceStride; |
| 87 destP += destStride; |
| 88 } |
| 89 } |
| 90 |
| 91 void vaddNEON(const float* source1P, int sourceStride1, const float* source2P, i
nt sourceStride2, float* destP, int destStride, size_t framesToProcess) |
| 92 { |
| 93 int n = framesToProcess; |
| 94 |
| 95 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) { |
| 96 int tailFrames = n % 4; |
| 97 const float* endP = destP + n - tailFrames; |
| 98 |
| 99 while (destP < endP) { |
| 100 float32x4_t source1 = vld1q_f32(source1P); |
| 101 float32x4_t source2 = vld1q_f32(source2P); |
| 102 vst1q_f32(destP, vaddq_f32(source1, source2)); |
| 103 |
| 104 source1P += 4; |
| 105 source2P += 4; |
| 106 destP += 4; |
| 107 } |
| 108 n = tailFrames; |
| 109 } |
| 110 |
| 111 while (n--) { |
| 112 *destP = *source1P + *source2P; |
| 113 source1P += sourceStride1; |
| 114 source2P += sourceStride2; |
| 115 destP += destStride; |
| 116 } |
| 117 } |
| 118 |
| 119 void vmulNEON(const float* source1P, int sourceStride1, const float* source2P, i
nt sourceStride2, float* destP, int destStride, size_t framesToProcess) |
| 120 { |
| 121 |
| 122 int n = framesToProcess; |
| 123 |
| 124 if ((sourceStride1 ==1) && (sourceStride2 == 1) && (destStride == 1)) { |
| 125 int tailFrames = n % 4; |
| 126 const float* endP = destP + n - tailFrames; |
| 127 |
| 128 while (destP < endP) { |
| 129 float32x4_t source1 = vld1q_f32(source1P); |
| 130 float32x4_t source2 = vld1q_f32(source2P); |
| 131 vst1q_f32(destP, vmulq_f32(source1, source2)); |
| 132 |
| 133 source1P += 4; |
| 134 source2P += 4; |
| 135 destP += 4; |
| 136 } |
| 137 n = tailFrames; |
| 138 } |
| 139 |
| 140 while (n) { |
| 141 *destP = *source1P * *source2P; |
| 142 source1P += sourceStride1; |
| 143 source2P += sourceStride2; |
| 144 destP += destStride; |
| 145 n--; |
| 146 } |
| 147 } |
| 148 |
| 149 void zvmulNEON(const float* real1P, const float* imag1P, const float* real2P, co
nst float* imag2P, float* realDestP, float* imagDestP, size_t framesToProcess) |
| 150 { |
| 151 unsigned i = 0; |
| 152 unsigned endSize = framesToProcess - framesToProcess % 4; |
| 153 |
| 154 while (i < endSize) { |
| 155 float32x4_t real1 = vld1q_f32(real1P + i); |
| 156 float32x4_t real2 = vld1q_f32(real2P + i); |
| 157 float32x4_t imag1 = vld1q_f32(imag1P + i); |
| 158 float32x4_t imag2 = vld1q_f32(imag2P + i); |
| 159 |
| 160 float32x4_t realResult = vmlsq_f32(vmulq_f32(real1, real2), imag1, imag2
); |
| 161 float32x4_t imagResult = vmlaq_f32(vmulq_f32(real1, imag2), imag1, real2
); |
| 162 |
| 163 vst1q_f32(realDestP + i, realResult); |
| 164 vst1q_f32(imagDestP + i, imagResult); |
| 165 |
| 166 i += 4; |
| 167 } |
| 168 |
| 169 for (; i < framesToProcess; ++i) { |
| 170 // Read and compute result before storing them, in case the |
| 171 // destination is the same as one of the sources. |
| 172 float realResult = real1P[i] * real2P[i] - imag1P[i] * imag2P[i]; |
| 173 float imagResult = real1P[i] * imag2P[i] + imag1P[i] * real2P[i]; |
| 174 |
| 175 realDestP[i] = realResult; |
| 176 imagDestP[i] = imagResult; |
| 177 } |
| 178 } |
| 179 |
| 180 void vsvesqNEON(const float* sourceP, int sourceStride, float* sumP, size_t fram
esToProcess) |
| 181 { |
| 182 int n = framesToProcess; |
| 183 float sum = 0; |
| 184 |
| 185 if (sourceStride == 1) { |
| 186 int tailFrames = n % 4; |
| 187 const float* endP = sourceP + n - tailFrames; |
| 188 |
| 189 float32x4_t fourSum = vdupq_n_f32(0); |
| 190 while (sourceP < endP) { |
| 191 float32x4_t source = vld1q_f32(sourceP); |
| 192 fourSum = vmlaq_f32(fourSum, source, source); |
| 193 sourceP += 4; |
| 194 } |
| 195 float32x2_t twoSum = vadd_f32(vget_low_f32(fourSum), vget_high_f32(fourS
um)); |
| 196 |
| 197 float groupSum[2]; |
| 198 vst1_f32(groupSum, twoSum); |
| 199 sum += groupSum[0] + groupSum[1]; |
| 200 |
| 201 n = tailFrames; |
| 202 } |
| 203 |
| 204 while (n--) { |
| 205 float sample = *sourceP; |
| 206 sum += sample * sample; |
| 207 sourceP += sourceStride; |
| 208 } |
| 209 |
| 210 ASSERT(sumP); |
| 211 *sumP = sum; |
| 212 } |
| 213 |
| 214 void vmaxmgvNEON(const float* sourceP, int sourceStride, float* maxP, size_t fra
mesToProcess) |
| 215 { |
| 216 int n = framesToProcess; |
| 217 float max = 0; |
| 218 |
| 219 if (sourceStride == 1) { |
| 220 int tailFrames = n % 4; |
| 221 const float* endP = sourceP + n - tailFrames; |
| 222 |
| 223 float32x4_t fourMax = vdupq_n_f32(0); |
| 224 while (sourceP < endP) { |
| 225 float32x4_t source = vld1q_f32(sourceP); |
| 226 fourMax = vmaxq_f32(fourMax, vabsq_f32(source)); |
| 227 sourceP += 4; |
| 228 } |
| 229 float32x2_t twoMax = vmax_f32(vget_low_f32(fourMax), vget_high_f32(fourM
ax)); |
| 230 |
| 231 float groupMax[2]; |
| 232 vst1_f32(groupMax, twoMax); |
| 233 max = std::max(groupMax[0], groupMax[1]); |
| 234 |
| 235 n = tailFrames; |
| 236 } |
| 237 |
| 238 while (n--) { |
| 239 max = std::max(max, fabsf(*sourceP)); |
| 240 sourceP += sourceStride; |
| 241 } |
| 242 |
| 243 ASSERT(maxP); |
| 244 *maxP = max; |
| 245 } |
| 246 |
| 247 void vclipNEON(const float* sourceP, int sourceStride, const float* lowThreshold
P, const float* highThresholdP, float* destP, int destStride, size_t framesToPro
cess) |
| 248 { |
| 249 int n = framesToProcess; |
| 250 float lowThreshold = *lowThresholdP; |
| 251 float highThreshold = *highThresholdP; |
| 252 |
| 253 if ((sourceStride == 1) && (destStride == 1)) { |
| 254 int tailFrames = n % 4; |
| 255 const float* endP = destP + n - tailFrames; |
| 256 |
| 257 float32x4_t low = vdupq_n_f32(lowThreshold); |
| 258 float32x4_t high = vdupq_n_f32(highThreshold); |
| 259 while (destP < endP) { |
| 260 float32x4_t source = vld1q_f32(sourceP); |
| 261 vst1q_f32(destP, vmaxq_f32(vminq_f32(source, high), low)); |
| 262 sourceP += 4; |
| 263 destP += 4; |
| 264 } |
| 265 n = tailFrames; |
| 266 } |
| 267 |
| 268 while (n--) { |
| 269 *destP = std::max(std::min(*sourceP, highThreshold), lowThreshold); |
| 270 sourceP += sourceStride; |
| 271 destP += destStride; |
| 272 } |
| 273 } |
| 274 |
| 275 } // namespace VectorMath |
| 276 |
| 277 } // namespace blink |
| 278 |
| 279 #endif |
| 280 |
| 281 #endif // VectorMathNEON_h |
OLD | NEW |