| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. | 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 * | 3 * |
| 4 * Use of this source code is governed by a BSD-style license | 4 * Use of this source code is governed by a BSD-style license |
| 5 * that can be found in the LICENSE file in the root of the source | 5 * that can be found in the LICENSE file in the root of the source |
| 6 * tree. An additional intellectual property rights grant can be found | 6 * tree. An additional intellectual property rights grant can be found |
| 7 * in the file PATENTS. All contributing project authors may | 7 * in the file PATENTS. All contributing project authors may |
| 8 * be found in the AUTHORS file in the root of the source tree. | 8 * be found in the AUTHORS file in the root of the source tree. |
| 9 */ | 9 */ |
| 10 | 10 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 27 enum { kFloatExponentShift = 23 }; | 27 enum { kFloatExponentShift = 23 }; |
| 28 | 28 |
| 29 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { | 29 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { |
| 30 return aRe * bRe - aIm * bIm; | 30 return aRe * bRe - aIm * bIm; |
| 31 } | 31 } |
| 32 | 32 |
| 33 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { | 33 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { |
| 34 return aRe * bIm + aIm * bRe; | 34 return aRe * bIm + aIm * bRe; |
| 35 } | 35 } |
| 36 | 36 |
| 37 static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { | 37 static void FilterFarNEON(int num_partitions, |
| 38 int xfBufBlockPos, |
| 39 float xfBuf[2][kExtendedNumPartitions * PART_LEN1], |
| 40 float wfBuf[2][kExtendedNumPartitions * PART_LEN1], |
| 41 float yf[2][PART_LEN1]) { |
| 38 int i; | 42 int i; |
| 39 const int num_partitions = aec->num_partitions; | |
| 40 for (i = 0; i < num_partitions; i++) { | 43 for (i = 0; i < num_partitions; i++) { |
| 41 int j; | 44 int j; |
| 42 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; | 45 int xPos = (i + xfBufBlockPos) * PART_LEN1; |
| 43 int pos = i * PART_LEN1; | 46 int pos = i * PART_LEN1; |
| 44 // Check for wrap | 47 // Check for wrap |
| 45 if (i + aec->xfBufBlockPos >= num_partitions) { | 48 if (i + xfBufBlockPos >= num_partitions) { |
| 46 xPos -= num_partitions * PART_LEN1; | 49 xPos -= num_partitions * PART_LEN1; |
| 47 } | 50 } |
| 48 | 51 |
| 49 // vectorized code (four at once) | 52 // vectorized code (four at once) |
| 50 for (j = 0; j + 3 < PART_LEN1; j += 4) { | 53 for (j = 0; j + 3 < PART_LEN1; j += 4) { |
| 51 const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); | 54 const float32x4_t xfBuf_re = vld1q_f32(&xfBuf[0][xPos + j]); |
| 52 const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); | 55 const float32x4_t xfBuf_im = vld1q_f32(&xfBuf[1][xPos + j]); |
| 53 const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); | 56 const float32x4_t wfBuf_re = vld1q_f32(&wfBuf[0][pos + j]); |
| 54 const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); | 57 const float32x4_t wfBuf_im = vld1q_f32(&wfBuf[1][pos + j]); |
| 55 const float32x4_t yf_re = vld1q_f32(&yf[0][j]); | 58 const float32x4_t yf_re = vld1q_f32(&yf[0][j]); |
| 56 const float32x4_t yf_im = vld1q_f32(&yf[1][j]); | 59 const float32x4_t yf_im = vld1q_f32(&yf[1][j]); |
| 57 const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); | 60 const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); |
| 58 const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); | 61 const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); |
| 59 const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); | 62 const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); |
| 60 const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); | 63 const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); |
| 61 const float32x4_t g = vaddq_f32(yf_re, e); | 64 const float32x4_t g = vaddq_f32(yf_re, e); |
| 62 const float32x4_t h = vaddq_f32(yf_im, f); | 65 const float32x4_t h = vaddq_f32(yf_im, f); |
| 63 vst1q_f32(&yf[0][j], g); | 66 vst1q_f32(&yf[0][j], g); |
| 64 vst1q_f32(&yf[1][j], h); | 67 vst1q_f32(&yf[1][j], h); |
| 65 } | 68 } |
| 66 // scalar code for the remaining items. | 69 // scalar code for the remaining items. |
| 67 for (; j < PART_LEN1; j++) { | 70 for (; j < PART_LEN1; j++) { |
| 68 yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], | 71 yf[0][j] += MulRe(xfBuf[0][xPos + j], |
| 69 aec->xfBuf[1][xPos + j], | 72 xfBuf[1][xPos + j], |
| 70 aec->wfBuf[0][pos + j], | 73 wfBuf[0][pos + j], |
| 71 aec->wfBuf[1][pos + j]); | 74 wfBuf[1][pos + j]); |
| 72 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], | 75 yf[1][j] += MulIm(xfBuf[0][xPos + j], |
| 73 aec->xfBuf[1][xPos + j], | 76 xfBuf[1][xPos + j], |
| 74 aec->wfBuf[0][pos + j], | 77 wfBuf[0][pos + j], |
| 75 aec->wfBuf[1][pos + j]); | 78 wfBuf[1][pos + j]); |
| 76 } | 79 } |
| 77 } | 80 } |
| 78 } | 81 } |
| 79 | 82 |
| 80 // ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32. | 83 // ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32. |
| 81 #if !defined (WEBRTC_ARCH_ARM64) | 84 #if !defined (WEBRTC_ARCH_ARM64) |
| 82 static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { | 85 static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { |
| 83 int i; | 86 int i; |
| 84 float32x4_t x = vrecpeq_f32(b); | 87 float32x4_t x = vrecpeq_f32(b); |
| 85 // from arm documentation | 88 // from arm documentation |
| (...skipping 644 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 730 } | 733 } |
| 731 } | 734 } |
| 732 | 735 |
| 733 void WebRtcAec_InitAec_neon(void) { | 736 void WebRtcAec_InitAec_neon(void) { |
| 734 WebRtcAec_FilterFar = FilterFarNEON; | 737 WebRtcAec_FilterFar = FilterFarNEON; |
| 735 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; | 738 WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; |
| 736 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; | 739 WebRtcAec_FilterAdaptation = FilterAdaptationNEON; |
| 737 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; | 740 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; |
| 738 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; | 741 WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; |
| 739 } | 742 } |
| OLD | NEW |