Index: Source/core/platform/audio/VectorMath.cpp |
diff --git a/Source/core/platform/audio/VectorMath.cpp b/Source/core/platform/audio/VectorMath.cpp |
index 9788228ac8cfcc3ad50166cfd3a006cd05aa1304..6b20f0706b6a274f91340597220218b3f87c8ebd 100644 |
--- a/Source/core/platform/audio/VectorMath.cpp |
+++ b/Source/core/platform/audio/VectorMath.cpp |
@@ -157,9 +157,9 @@ void vsma(const float* sourceP, int sourceStride, const float* scale, float* des |
destP += 4; \ |
} |
- if (destAligned) |
+ if (destAligned) |
SSE2_MULT_ADD(load, store) |
- else |
+ else |
SSE2_MULT_ADD(loadu, storeu) |
n = tailFrames; |
@@ -309,7 +309,7 @@ void vadd(const float* source1P, int sourceStride1, const float* source2P, int s |
destP += 4; |
} |
- } else if (source2Aligned && !destAligned) { // source2 aligned but dest not aligned |
+ } else if (source2Aligned && !destAligned) { // source2 aligned but dest not aligned |
while (group--) { |
pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P)); |
pSource2 = reinterpret_cast<__m128*>(const_cast<float*>(source2P)); |
@@ -321,7 +321,7 @@ void vadd(const float* source1P, int sourceStride1, const float* source2P, int s |
destP += 4; |
} |
- } else if (!source2Aligned && destAligned) { // source2 not aligned but dest aligned |
+ } else if (!source2Aligned && destAligned) { // source2 not aligned but dest aligned |
while (group--) { |
pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P)); |
source2 = _mm_loadu_ps(source2P); |
@@ -332,7 +332,7 @@ void vadd(const float* source1P, int sourceStride1, const float* source2P, int s |
source2P += 4; |
destP += 4; |
} |
- } else if (!source2Aligned && !destAligned) { // both source2 and dest not aligned |
+ } else if (!source2Aligned && !destAligned) { // both source2 and dest not aligned |
while (group--) { |
pSource1 = reinterpret_cast<__m128*>(const_cast<float*>(source1P)); |
source2 = _mm_loadu_ps(source2P); |
@@ -462,7 +462,7 @@ void zvmul(const float* real1P, const float* imag1P, const float* real2P, const |
{ |
unsigned i = 0; |
#ifdef __SSE2__ |
- // Only use the SSE optimization in the very common case that all addresses are 16-byte aligned. |
+ // Only use the SSE optimization in the very common case that all addresses are 16-byte aligned. |
// Otherwise, fall through to the scalar code below. |
if (!(reinterpret_cast<uintptr_t>(real1P) & 0x0F) |
&& !(reinterpret_cast<uintptr_t>(imag1P) & 0x0F) |
@@ -470,7 +470,7 @@ void zvmul(const float* real1P, const float* imag1P, const float* real2P, const |
&& !(reinterpret_cast<uintptr_t>(imag2P) & 0x0F) |
&& !(reinterpret_cast<uintptr_t>(realDestP) & 0x0F) |
&& !(reinterpret_cast<uintptr_t>(imagDestP) & 0x0F)) { |
- |
+ |
unsigned endSize = framesToProcess - framesToProcess % 4; |
while (i < endSize) { |
__m128 real1 = _mm_load_ps(real1P + i); |
@@ -519,35 +519,35 @@ void vsvesq(const float* sourceP, int sourceStride, float* sumP, size_t framesTo |
int n = framesToProcess; |
float sum = 0; |
-#ifdef __SSE2__ |
- if (sourceStride == 1) { |
- // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately. |
- while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { |
- float sample = *sourceP; |
- sum += sample * sample; |
- sourceP++; |
- n--; |
- } |
- |
+#ifdef __SSE2__ |
+ if (sourceStride == 1) { |
+ // If the sourceP address is not 16-byte aligned, the first several frames (at most three) should be processed separately. |
+ while ((reinterpret_cast<uintptr_t>(sourceP) & 0x0F) && n) { |
+ float sample = *sourceP; |
+ sum += sample * sample; |
+ sourceP++; |
+ n--; |
+ } |
+ |
// Now the sourceP is aligned, use SSE. |
- int tailFrames = n % 4; |
- const float* endP = sourceP + n - tailFrames; |
- __m128 source; |
- __m128 mSum = _mm_setzero_ps(); |
- |
- while (sourceP < endP) { |
- source = _mm_load_ps(sourceP); |
- source = _mm_mul_ps(source, source); |
- mSum = _mm_add_ps(mSum, source); |
- sourceP += 4; |
- } |
- |
- // Summarize the SSE results. |
- const float* groupSumP = reinterpret_cast<float*>(&mSum); |
- sum += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3]; |
- |
- n = tailFrames; |
- } |
+ int tailFrames = n % 4; |
+ const float* endP = sourceP + n - tailFrames; |
+ __m128 source; |
+ __m128 mSum = _mm_setzero_ps(); |
+ |
+ while (sourceP < endP) { |
+ source = _mm_load_ps(sourceP); |
+ source = _mm_mul_ps(source, source); |
+ mSum = _mm_add_ps(mSum, source); |
+ sourceP += 4; |
+ } |
+ |
+ // Summarize the SSE results. |
+ const float* groupSumP = reinterpret_cast<float*>(&mSum); |
+ sum += groupSumP[0] + groupSumP[1] + groupSumP[2] + groupSumP[3]; |
+ |
+ n = tailFrames; |
+ } |
#elif HAVE(ARM_NEON_INTRINSICS) |
if (sourceStride == 1) { |
int tailFrames = n % 4; |