| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" | 22 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" |
| 23 | 23 |
| 24 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { | 24 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { |
| 25 return aRe * bRe - aIm * bIm; | 25 return aRe * bRe - aIm * bIm; |
| 26 } | 26 } |
| 27 | 27 |
| 28 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { | 28 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { |
| 29 return aRe * bIm + aIm * bRe; | 29 return aRe * bIm + aIm * bRe; |
| 30 } | 30 } |
| 31 | 31 |
| 32 static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) { | 32 static void FilterFarSSE2(int num_partitions, |
| 33 int xfBufBlockPos, |
| 34 float xfBuf[2][kExtendedNumPartitions * PART_LEN1], |
| 35 float wfBuf[2][kExtendedNumPartitions * PART_LEN1], |
| 36 float yf[2][PART_LEN1]) { |
| 37 |
| 33 int i; | 38 int i; |
| 34 const int num_partitions = aec->num_partitions; | |
| 35 for (i = 0; i < num_partitions; i++) { | 39 for (i = 0; i < num_partitions; i++) { |
| 36 int j; | 40 int j; |
| 37 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; | 41 int xPos = (i + xfBufBlockPos) * PART_LEN1; |
| 38 int pos = i * PART_LEN1; | 42 int pos = i * PART_LEN1; |
| 39 // Check for wrap | 43 // Check for wrap |
| 40 if (i + aec->xfBufBlockPos >= num_partitions) { | 44 if (i + xfBufBlockPos >= num_partitions) { |
| 41 xPos -= num_partitions * (PART_LEN1); | 45 xPos -= num_partitions * (PART_LEN1); |
| 42 } | 46 } |
| 43 | 47 |
| 44 // vectorized code (four at once) | 48 // vectorized code (four at once) |
| 45 for (j = 0; j + 3 < PART_LEN1; j += 4) { | 49 for (j = 0; j + 3 < PART_LEN1; j += 4) { |
| 46 const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); | 50 const __m128 xfBuf_re = _mm_loadu_ps(&xfBuf[0][xPos + j]); |
| 47 const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); | 51 const __m128 xfBuf_im = _mm_loadu_ps(&xfBuf[1][xPos + j]); |
| 48 const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); | 52 const __m128 wfBuf_re = _mm_loadu_ps(&wfBuf[0][pos + j]); |
| 49 const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); | 53 const __m128 wfBuf_im = _mm_loadu_ps(&wfBuf[1][pos + j]); |
| 50 const __m128 yf_re = _mm_loadu_ps(&yf[0][j]); | 54 const __m128 yf_re = _mm_loadu_ps(&yf[0][j]); |
| 51 const __m128 yf_im = _mm_loadu_ps(&yf[1][j]); | 55 const __m128 yf_im = _mm_loadu_ps(&yf[1][j]); |
| 52 const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re); | 56 const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re); |
| 53 const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im); | 57 const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im); |
| 54 const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im); | 58 const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im); |
| 55 const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re); | 59 const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re); |
| 56 const __m128 e = _mm_sub_ps(a, b); | 60 const __m128 e = _mm_sub_ps(a, b); |
| 57 const __m128 f = _mm_add_ps(c, d); | 61 const __m128 f = _mm_add_ps(c, d); |
| 58 const __m128 g = _mm_add_ps(yf_re, e); | 62 const __m128 g = _mm_add_ps(yf_re, e); |
| 59 const __m128 h = _mm_add_ps(yf_im, f); | 63 const __m128 h = _mm_add_ps(yf_im, f); |
| 60 _mm_storeu_ps(&yf[0][j], g); | 64 _mm_storeu_ps(&yf[0][j], g); |
| 61 _mm_storeu_ps(&yf[1][j], h); | 65 _mm_storeu_ps(&yf[1][j], h); |
| 62 } | 66 } |
| 63 // scalar code for the remaining items. | 67 // scalar code for the remaining items. |
| 64 for (; j < PART_LEN1; j++) { | 68 for (; j < PART_LEN1; j++) { |
| 65 yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], | 69 yf[0][j] += MulRe(xfBuf[0][xPos + j], |
| 66 aec->xfBuf[1][xPos + j], | 70 xfBuf[1][xPos + j], |
| 67 aec->wfBuf[0][pos + j], | 71 wfBuf[0][pos + j], |
| 68 aec->wfBuf[1][pos + j]); | 72 wfBuf[1][pos + j]); |
| 69 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], | 73 yf[1][j] += MulIm(xfBuf[0][xPos + j], |
| 70 aec->xfBuf[1][xPos + j], | 74 xfBuf[1][xPos + j], |
| 71 aec->wfBuf[0][pos + j], | 75 wfBuf[0][pos + j], |
| 72 aec->wfBuf[1][pos + j]); | 76 wfBuf[1][pos + j]); |
| 73 } | 77 } |
| 74 } | 78 } |
| 75 } | 79 } |
| 76 | 80 |
| 77 static void ScaleErrorSignalSSE2(int extended_filter_enabled, | 81 static void ScaleErrorSignalSSE2(int extended_filter_enabled, |
| 78 float normal_mu, | 82 float normal_mu, |
| 79 float normal_error_threshold, | 83 float normal_error_threshold, |
| 80 float *x_pow, | 84 float *x_pow, |
| 81 float ef[2][PART_LEN1]) { | 85 float ef[2][PART_LEN1]) { |
| 82 const __m128 k1e_10f = _mm_set1_ps(1e-10f); | 86 const __m128 k1e_10f = _mm_set1_ps(1e-10f); |
| (...skipping 643 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 726 } | 730 } |
| 727 } | 731 } |
| 728 | 732 |
| 729 void WebRtcAec_InitAec_SSE2(void) { | 733 void WebRtcAec_InitAec_SSE2(void) { |
| 730 WebRtcAec_FilterFar = FilterFarSSE2; | 734 WebRtcAec_FilterFar = FilterFarSSE2; |
| 731 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; | 735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; |
| 732 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; | 736 WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; |
| 733 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; | 737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; |
| 734 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; | 738 WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; |
| 735 } | 739 } |
| OLD | NEW |