Index: Source/modules/webaudio/AudioParamTimeline.cpp |
diff --git a/Source/modules/webaudio/AudioParamTimeline.cpp b/Source/modules/webaudio/AudioParamTimeline.cpp |
index 7482279a05b2498b21cbdcec0f28584a0b184bd8..6c6f731d870ae14385a751c07af8fb5222481fa6 100644 |
--- a/Source/modules/webaudio/AudioParamTimeline.cpp |
+++ b/Source/modules/webaudio/AudioParamTimeline.cpp |
@@ -349,6 +349,38 @@ float AudioParamTimeline::valuesForTimeRangeImpl( |
float timeConstant = event.timeConstant(); |
float discreteTimeConstant = static_cast<float>(AudioUtilities::discreteTimeConstantForSampleRate(timeConstant, controlRate)); |
+#if CPU(X86) || CPU(X86_64) |
+ // Resorve recursion by expanding constants to achieve a 4-step loop unrolling. |
Raymond Toy
2015/08/14 16:00:03
"Resorve"?
adrian.belgun
2015/08/17 07:58:14
Done. Sorry for that.
|
+ // v1 = v0 + (t - v0) * c |
+ // v2 = v1 + (t - v1) * c |
+ // v2 = v0 + (t - v0) * c + (t - (v0 + (t - v0) * c)) * c |
+ // v2 = v0 + (t - v0) * c + (t - v0) * c - (t - v0) * c * c |
+ // v2 = v0 + (t - v0) * (2c - c^2) |
+ // Thus c0 = c, c1 = 2c - c^2. The same logic applies to c2 and c3. |
+ const float c0 = discreteTimeConstant; |
+ const float c1 = 2 * c0 - c0 * c0; |
+ const float c2 = 3 * c0 - 3 * c0 * c0 + c0 * c0 * c0; |
+ const float c3 = 4 * c0 - 6 * c0 * c0 + 4 * c0 * c0 * c0 - c0 * c0 * c0 * c0; |
+ |
+ float delta; |
+ __m128 vC = _mm_set_ps(c2, c1, c0, 0); |
+ __m128 vDelta, vValue, vResult; |
+ |
+ // Process 4 loop steps. |
+ unsigned fillToFrameTrunc = writeIndex + ((fillToFrame - writeIndex) / 4) * 4; |
+ for (; writeIndex < fillToFrameTrunc; writeIndex += 4) { |
+ delta = target - value; |
+ vDelta = _mm_set_ps1(delta); |
+ vValue = _mm_set_ps1(value); |
+ |
+ vResult = _mm_add_ps(vValue, _mm_mul_ps(vDelta, vC)); |
+ _mm_storeu_ps(values + writeIndex, vResult); |
+ |
+ // Update value for next iteration. |
+ value += delta * c3; |
+ } |
+#endif |
+ // Serially process remaining values |
for (; writeIndex < fillToFrame; ++writeIndex) { |
values[writeIndex] = value; |
value += (target - value) * discreteTimeConstant; |