Index: celt/pitch.c |
diff --git a/celt/pitch.c b/celt/pitch.c |
index ca0f523e3c68fa78ec4f3d79bb4868fda98471d0..0352b30252467261741483bb951d750d8581b55e 100644 |
--- a/celt/pitch.c |
+++ b/celt/pitch.c |
@@ -102,13 +102,57 @@ static void find_best_pitch(opus_val32 *xcorr, opus_val16 *y, int len, |
} |
} |
+static void celt_fir5(const opus_val16 *x, |
+ const opus_val16 *num, |
+ opus_val16 *y, |
+ int N, |
+ opus_val16 *mem) |
+{ |
+ int i; |
+ opus_val16 num0, num1, num2, num3, num4; |
+ opus_val32 mem0, mem1, mem2, mem3, mem4; |
+ num0=num[0]; |
+ num1=num[1]; |
+ num2=num[2]; |
+ num3=num[3]; |
+ num4=num[4]; |
+ mem0=mem[0]; |
+ mem1=mem[1]; |
+ mem2=mem[2]; |
+ mem3=mem[3]; |
+ mem4=mem[4]; |
+ for (i=0;i<N;i++) |
+ { |
+ opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT); |
+ sum = MAC16_16(sum,num0,mem0); |
+ sum = MAC16_16(sum,num1,mem1); |
+ sum = MAC16_16(sum,num2,mem2); |
+ sum = MAC16_16(sum,num3,mem3); |
+ sum = MAC16_16(sum,num4,mem4); |
+ mem4 = mem3; |
+ mem3 = mem2; |
+ mem2 = mem1; |
+ mem1 = mem0; |
+ mem0 = x[i]; |
+ y[i] = ROUND16(sum, SIG_SHIFT); |
+ } |
+ mem[0]=mem0; |
+ mem[1]=mem1; |
+ mem[2]=mem2; |
+ mem[3]=mem3; |
+ mem[4]=mem4; |
+} |
+ |
+ |
void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x_lp, |
int len, int C) |
{ |
int i; |
opus_val32 ac[5]; |
opus_val16 tmp=Q15ONE; |
- opus_val16 lpc[4], mem[4]={0,0,0,0}; |
+ opus_val16 lpc[4], mem[5]={0,0,0,0,0}; |
+ opus_val16 lpc2[5]; |
+ opus_val16 c1 = QCONST16(.8f,15); |
#ifdef FIXED_POINT |
int shift; |
opus_val32 maxabs = celt_maxabs32(x[0], len); |
@@ -161,14 +205,89 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x |
tmp = MULT16_16_Q15(QCONST16(.9f,15), tmp); |
lpc[i] = MULT16_16_Q15(lpc[i], tmp); |
} |
- celt_fir(x_lp, lpc, x_lp, len>>1, 4, mem); |
+ /* Add a zero */ |
+ lpc2[0] = lpc[0] + QCONST16(.8f,SIG_SHIFT); |
+ lpc2[1] = lpc[1] + MULT16_16_Q15(c1,lpc[0]); |
+ lpc2[2] = lpc[2] + MULT16_16_Q15(c1,lpc[1]); |
+ lpc2[3] = lpc[3] + MULT16_16_Q15(c1,lpc[2]); |
+ lpc2[4] = MULT16_16_Q15(c1,lpc[3]); |
+ celt_fir5(x_lp, lpc2, x_lp, len>>1, mem); |
+} |
- mem[0]=0; |
- lpc[0]=QCONST16(.8f,12); |
- celt_fir(x_lp, lpc, x_lp, len>>1, 1, mem); |
+#if 0 /* This is a simple version of the pitch correlation that should work |
+ well on DSPs like Blackfin and TI C5x/C6x */ |
+#ifdef FIXED_POINT |
+opus_val32 |
+#else |
+void |
+#endif |
+celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch) |
+{ |
+ int i, j; |
+#ifdef FIXED_POINT |
+ opus_val32 maxcorr=1; |
+#endif |
+ for (i=0;i<max_pitch;i++) |
+ { |
+ opus_val32 sum = 0; |
+ for (j=0;j<len;j++) |
+ sum = MAC16_16(sum, x[j],y[i+j]); |
+ xcorr[i] = sum; |
+#ifdef FIXED_POINT |
+ maxcorr = MAX32(maxcorr, sum); |
+#endif |
+ } |
+#ifdef FIXED_POINT |
+ return maxcorr; |
+#endif |
} |
+#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */ |
+ |
+#ifdef FIXED_POINT |
+opus_val32 |
+#else |
+void |
+#endif |
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) |
+{ |
+ int i,j; |
+#ifdef FIXED_POINT |
+ opus_val32 maxcorr=1; |
+#endif |
+ for (i=0;i<max_pitch-3;i+=4) |
+ { |
+ opus_val32 sum[4]={0,0,0,0}; |
+ xcorr_kernel(_x, _y+i, sum, len); |
+ xcorr[i]=sum[0]; |
+ xcorr[i+1]=sum[1]; |
+ xcorr[i+2]=sum[2]; |
+ xcorr[i+3]=sum[3]; |
+#ifdef FIXED_POINT |
+ sum[0] = MAX32(sum[0], sum[1]); |
+ sum[2] = MAX32(sum[2], sum[3]); |
+ sum[0] = MAX32(sum[0], sum[2]); |
+ maxcorr = MAX32(maxcorr, sum[0]); |
+#endif |
+ } |
+ /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */ |
+ for (;i<max_pitch;i++) |
+ { |
+ opus_val32 sum = 0; |
+ for (j=0;j<len;j++) |
+ sum = MAC16_16(sum, _x[j],_y[i+j]); |
+ xcorr[i] = sum; |
+#ifdef FIXED_POINT |
+ maxcorr = MAX32(maxcorr, sum); |
+#endif |
+ } |
+#ifdef FIXED_POINT |
+ return maxcorr; |
+#endif |
+} |
+ |
+#endif |
void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y, |
int len, int max_pitch, int *pitch) |
{ |
@@ -179,8 +298,8 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR |
VARDECL(opus_val16, y_lp4); |
VARDECL(opus_val32, xcorr); |
#ifdef FIXED_POINT |
- opus_val32 maxcorr=1; |
- opus_val16 xmax, ymax; |
+ opus_val32 maxcorr; |
+ opus_val32 xmax, ymax; |
int shift=0; |
#endif |
int offset; |
@@ -204,7 +323,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR |
#ifdef FIXED_POINT |
xmax = celt_maxabs16(x_lp4, len>>2); |
ymax = celt_maxabs16(y_lp4, lag>>2); |
- shift = celt_ilog2(MAX16(1, MAX16(xmax, ymax)))-11; |
+ shift = celt_ilog2(MAX32(1, MAX32(xmax, ymax)))-11; |
if (shift>0) |
{ |
for (j=0;j<len>>2;j++) |
@@ -220,16 +339,11 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR |
/* Coarse search with 4x decimation */ |
- for (i=0;i<max_pitch>>2;i++) |
- { |
- opus_val32 sum = 0; |
- for (j=0;j<len>>2;j++) |
- sum = MAC16_16(sum, x_lp4[j],y_lp4[i+j]); |
- xcorr[i] = MAX32(-1, sum); |
#ifdef FIXED_POINT |
- maxcorr = MAX32(maxcorr, sum); |
+ maxcorr = |
#endif |
- } |
+ celt_pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2); |
+ |
find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch |
#ifdef FIXED_POINT |
, 0, maxcorr |
@@ -287,11 +401,13 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, |
int k, i, T, T0; |
opus_val16 g, g0; |
opus_val16 pg; |
- opus_val32 xy,xx,yy; |
+ opus_val32 xy,xx,yy,xy2; |
opus_val32 xcorr[3]; |
opus_val32 best_xy, best_yy; |
int offset; |
int minperiod0; |
+ VARDECL(opus_val32, yy_lookup); |
+ SAVE_STACK; |
minperiod0 = minperiod; |
maxperiod /= 2; |
@@ -304,13 +420,16 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, |
*T0_=maxperiod-1; |
T = T0 = *T0_; |
- xx=xy=yy=0; |
- for (i=0;i<N;i++) |
+ ALLOC(yy_lookup, maxperiod+1, opus_val32); |
+ dual_inner_prod(x, x, x-T0, N, &xx, &xy); |
+ yy_lookup[0] = xx; |
+ yy=xx; |
+ for (i=1;i<=maxperiod;i++) |
{ |
- xy = MAC16_16(xy, x[i], x[i-T0]); |
- xx = MAC16_16(xx, x[i], x[i]); |
- yy = MAC16_16(yy, x[i-T0],x[i-T0]); |
+ yy = yy+MULT16_16(x[-i],x[-i])-MULT16_16(x[N-i],x[N-i]); |
+ yy_lookup[i] = MAX32(0, yy); |
} |
+ yy = yy_lookup[T0]; |
best_xy = xy; |
best_yy = yy; |
#ifdef FIXED_POINT |
@@ -331,6 +450,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, |
int T1, T1b; |
opus_val16 g1; |
opus_val16 cont=0; |
+ opus_val16 thresh; |
T1 = (2*T0+k)/(2*k); |
if (T1 < minperiod) |
break; |
@@ -345,15 +465,9 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, |
{ |
T1b = (2*second_check[k]*T0+k)/(2*k); |
} |
- xy=yy=0; |
- for (i=0;i<N;i++) |
- { |
- xy = MAC16_16(xy, x[i], x[i-T1]); |
- yy = MAC16_16(yy, x[i-T1], x[i-T1]); |
- |
- xy = MAC16_16(xy, x[i], x[i-T1b]); |
- yy = MAC16_16(yy, x[i-T1b], x[i-T1b]); |
- } |
+ dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2); |
+ xy += xy2; |
+ yy = yy_lookup[T1] + yy_lookup[T1b]; |
#ifdef FIXED_POINT |
{ |
opus_val32 x2y2; |
@@ -372,7 +486,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, |
cont = HALF32(prev_gain); |
else |
cont = 0; |
- if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont) |
+ thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont); |
+ /* Bias against very high pitch (very short period) to avoid false-positives |
+ due to short-term correlation */ |
+ if (T1<3*minperiod) |
+ thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85f,15),g0)-cont); |
+ else if (T1<2*minperiod) |
+ thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9f,15),g0)-cont); |
+ if (g1 > thresh) |
{ |
best_xy = xy; |
best_yy = yy; |
@@ -406,5 +527,6 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, |
if (*T0_<minperiod0) |
*T0_=minperiod0; |
+ RESTORE_STACK; |
return pg; |
} |