Index: celt/celt_lpc.c |
diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c |
index d2addbf24b2323e0368242df94605c8a3a46c12d..7ffe90a357ed482b98c7b0bcc94ccf3f96f9ed81 100644 |
--- a/celt/celt_lpc.c |
+++ b/celt/celt_lpc.c |
@@ -32,6 +32,7 @@ |
#include "celt_lpc.h" |
#include "stack_alloc.h" |
#include "mathops.h" |
+#include "pitch.h" |
void _celt_lpc( |
opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ |
@@ -87,42 +88,71 @@ int p |
#endif |
} |
-void celt_fir(const opus_val16 *x, |
+void celt_fir(const opus_val16 *_x, |
const opus_val16 *num, |
- opus_val16 *y, |
+ opus_val16 *_y, |
int N, |
int ord, |
opus_val16 *mem) |
{ |
int i,j; |
+ VARDECL(opus_val16, rnum); |
+ VARDECL(opus_val16, x); |
+ SAVE_STACK; |
+ ALLOC(rnum, ord, opus_val16); |
+ ALLOC(x, N+ord, opus_val16); |
+ for(i=0;i<ord;i++) |
+ rnum[i] = num[ord-i-1]; |
+ for(i=0;i<ord;i++) |
+ x[i] = mem[ord-i-1]; |
+ for (i=0;i<N;i++) |
+ x[i+ord]=_x[i]; |
+ for(i=0;i<ord;i++) |
+ mem[i] = _x[N-i-1]; |
+#ifdef SMALL_FOOTPRINT |
for (i=0;i<N;i++) |
{ |
- opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); |
+ opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT); |
for (j=0;j<ord;j++) |
{ |
- sum += MULT16_16(num[j],mem[j]); |
- } |
- for (j=ord-1;j>=1;j--) |
- { |
- mem[j]=mem[j-1]; |
+ sum = MAC16_16(sum,rnum[j],x[i+j]); |
} |
- mem[0] = x[i]; |
- y[i] = ROUND16(sum, SIG_SHIFT); |
+ _y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT)); |
} |
+#else |
+ for (i=0;i<N-3;i+=4) |
+ { |
+ opus_val32 sum[4]={0,0,0,0}; |
+ xcorr_kernel(rnum, x+i, sum, ord); |
+ _y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT))); |
+ _y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT))); |
+ _y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT))); |
+ _y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT))); |
+ } |
+ for (;i<N;i++) |
+ { |
+ opus_val32 sum = 0; |
+ for (j=0;j<ord;j++) |
+ sum = MAC16_16(sum,rnum[j],x[i+j]); |
+ _y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT))); |
+ } |
+#endif |
+ RESTORE_STACK; |
} |
-void celt_iir(const opus_val32 *x, |
+void celt_iir(const opus_val32 *_x, |
const opus_val16 *den, |
- opus_val32 *y, |
+ opus_val32 *_y, |
int N, |
int ord, |
opus_val16 *mem) |
{ |
+#ifdef SMALL_FOOTPRINT |
int i,j; |
for (i=0;i<N;i++) |
{ |
- opus_val32 sum = x[i]; |
+ opus_val32 sum = _x[i]; |
for (j=0;j<ord;j++) |
{ |
sum -= MULT16_16(den[j],mem[j]); |
@@ -132,11 +162,65 @@ void celt_iir(const opus_val32 *x, |
mem[j]=mem[j-1]; |
} |
mem[0] = ROUND16(sum,SIG_SHIFT); |
- y[i] = sum; |
+ _y[i] = sum; |
} |
+#else |
+ int i,j; |
+ VARDECL(opus_val16, rden); |
+ VARDECL(opus_val16, y); |
+ SAVE_STACK; |
+ |
+ celt_assert((ord&3)==0); |
+ ALLOC(rden, ord, opus_val16); |
+ ALLOC(y, N+ord, opus_val16); |
+ for(i=0;i<ord;i++) |
+ rden[i] = den[ord-i-1]; |
+ for(i=0;i<ord;i++) |
+ y[i] = -mem[ord-i-1]; |
+ for(;i<N+ord;i++) |
+ y[i]=0; |
+ for (i=0;i<N-3;i+=4) |
+ { |
+ /* Unroll by 4 as if it were an FIR filter */ |
+ opus_val32 sum[4]; |
+ sum[0]=_x[i]; |
+ sum[1]=_x[i+1]; |
+ sum[2]=_x[i+2]; |
+ sum[3]=_x[i+3]; |
+ xcorr_kernel(rden, y+i, sum, ord); |
+ |
+ /* Patch up the result to compensate for the fact that this is an IIR */ |
+ y[i+ord ] = -ROUND16(sum[0],SIG_SHIFT); |
+ _y[i ] = sum[0]; |
+ sum[1] = MAC16_16(sum[1], y[i+ord ], den[0]); |
+ y[i+ord+1] = -ROUND16(sum[1],SIG_SHIFT); |
+ _y[i+1] = sum[1]; |
+ sum[2] = MAC16_16(sum[2], y[i+ord+1], den[0]); |
+ sum[2] = MAC16_16(sum[2], y[i+ord ], den[1]); |
+ y[i+ord+2] = -ROUND16(sum[2],SIG_SHIFT); |
+ _y[i+2] = sum[2]; |
+ |
+ sum[3] = MAC16_16(sum[3], y[i+ord+2], den[0]); |
+ sum[3] = MAC16_16(sum[3], y[i+ord+1], den[1]); |
+ sum[3] = MAC16_16(sum[3], y[i+ord ], den[2]); |
+ y[i+ord+3] = -ROUND16(sum[3],SIG_SHIFT); |
+ _y[i+3] = sum[3]; |
+ } |
+ for (;i<N;i++) |
+ { |
+ opus_val32 sum = _x[i]; |
+ for (j=0;j<ord;j++) |
+ sum -= MULT16_16(rden[j],y[i+j]); |
+ y[i+ord] = ROUND16(sum,SIG_SHIFT); |
+ _y[i] = sum; |
+ } |
+ for(i=0;i<ord;i++) |
+ mem[i] = _y[N-i-1]; |
+ RESTORE_STACK; |
+#endif |
} |
-void _celt_autocorr( |
+int _celt_autocorr( |
const opus_val16 *x, /* in: [0...n-1] samples x */ |
opus_val32 *ac, /* out: [0...lag-1] ac values */ |
const opus_val16 *window, |
@@ -146,43 +230,79 @@ void _celt_autocorr( |
) |
{ |
opus_val32 d; |
- int i; |
+ int i, k; |
+ int fastN=n-lag; |
+ int shift; |
+ const opus_val16 *xptr; |
VARDECL(opus_val16, xx); |
SAVE_STACK; |
ALLOC(xx, n, opus_val16); |
celt_assert(n>0); |
celt_assert(overlap>=0); |
- for (i=0;i<n;i++) |
- xx[i] = x[i]; |
- for (i=0;i<overlap;i++) |
+ if (overlap == 0) |
{ |
- xx[i] = MULT16_16_Q15(x[i],window[i]); |
- xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); |
+ xptr = x; |
+ } else { |
+ for (i=0;i<n;i++) |
+ xx[i] = x[i]; |
+ for (i=0;i<overlap;i++) |
+ { |
+ xx[i] = MULT16_16_Q15(x[i],window[i]); |
+ xx[n-i-1] = MULT16_16_Q15(x[n-i-1],window[i]); |
+ } |
+ xptr = xx; |
} |
+ shift=0; |
#ifdef FIXED_POINT |
{ |
- opus_val32 ac0=0; |
- int shift; |
- for(i=0;i<n;i++) |
- ac0 += SHR32(MULT16_16(xx[i],xx[i]),9); |
- ac0 += 1+n; |
+ opus_val32 ac0; |
+ ac0 = 1+(n<<7); |
+ if (n&1) ac0 += SHR32(MULT16_16(xptr[0],xptr[0]),9); |
+ for(i=(n&1);i<n;i+=2) |
+ { |
+ ac0 += SHR32(MULT16_16(xptr[i],xptr[i]),9); |
+ ac0 += SHR32(MULT16_16(xptr[i+1],xptr[i+1]),9); |
+ } |
shift = celt_ilog2(ac0)-30+10; |
- shift = (shift+1)/2; |
- for(i=0;i<n;i++) |
- xx[i] = VSHR32(xx[i], shift); |
+ shift = (shift)/2; |
+ if (shift>0) |
+ { |
+ for(i=0;i<n;i++) |
+ xx[i] = PSHR32(xptr[i], shift); |
+ xptr = xx; |
+ } else |
+ shift = 0; |
} |
#endif |
- while (lag>=0) |
+ celt_pitch_xcorr(xptr, xptr, ac, fastN, lag+1); |
+ for (k=0;k<=lag;k++) |
{ |
- for (i = lag, d = 0; i < n; i++) |
- d += xx[i] * xx[i-lag]; |
- ac[lag] = d; |
- /*printf ("%f ", ac[lag]);*/ |
- lag--; |
+ for (i = k+fastN, d = 0; i < n; i++) |
+ d = MAC16_16(d, xptr[i], xptr[i-k]); |
+ ac[k] += d; |
} |
- /*printf ("\n");*/ |
- ac[0] += 10; |
+#ifdef FIXED_POINT |
+ shift = 2*shift; |
+ if (shift<=0) |
+ ac[0] += SHL32((opus_int32)1, -shift); |
+ if (ac[0] < 268435456) |
+ { |
+ int shift2 = 29 - EC_ILOG(ac[0]); |
+ for (i=0;i<=lag;i++) |
+ ac[i] = SHL32(ac[i], shift2); |
+ shift -= shift2; |
+ } else if (ac[0] >= 536870912) |
+ { |
+ int shift2=1; |
+ if (ac[0] >= 1073741824) |
+ shift2++; |
+ for (i=0;i<=lag;i++) |
+ ac[i] = SHR32(ac[i], shift2); |
+ shift += shift2; |
+ } |
+#endif |
RESTORE_STACK; |
+ return shift; |
} |