| Index: celt/pitch.c
|
| diff --git a/celt/pitch.c b/celt/pitch.c
|
| index d2b305441dadb2aaae7f1d0a4a9c47115b644b34..43647030936454662b073b58e0d7ef271d440c26 100644
|
| --- a/celt/pitch.c
|
| +++ b/celt/pitch.c
|
| @@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
|
| celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
|
| }
|
|
|
| -#if 0 /* This is a simple version of the pitch correlation that should work
|
| - well on DSPs like Blackfin and TI C5x/C6x */
|
| -
|
| +/* Pure C implementation. */
|
| #ifdef FIXED_POINT
|
| opus_val32
|
| #else
|
| void
|
| #endif
|
| -celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
|
| +#if defined(OVERRIDE_PITCH_XCORR)
|
| +celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
|
| + opus_val32 *xcorr, int len, int max_pitch)
|
| +#else
|
| +celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
|
| + opus_val32 *xcorr, int len, int max_pitch, int arch)
|
| +#endif
|
| {
|
| +
|
| +#if 0 /* This is a simple version of the pitch correlation that should work
|
| + well on DSPs like Blackfin and TI C5x/C6x */
|
| int i, j;
|
| #ifdef FIXED_POINT
|
| opus_val32 maxcorr=1;
|
| #endif
|
| +#if !defined(OVERRIDE_PITCH_XCORR)
|
| + (void)arch;
|
| +#endif
|
| for (i=0;i<max_pitch;i++)
|
| {
|
| opus_val32 sum = 0;
|
| for (j=0;j<len;j++)
|
| - sum = MAC16_16(sum, x[j],y[i+j]);
|
| + sum = MAC16_16(sum, _x[j], _y[i+j]);
|
| xcorr[i] = sum;
|
| #ifdef FIXED_POINT
|
| maxcorr = MAX32(maxcorr, sum);
|
| @@ -241,30 +251,25 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m
|
| #ifdef FIXED_POINT
|
| return maxcorr;
|
| #endif
|
| -}
|
|
|
| #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
|
| -
|
| -#ifdef FIXED_POINT
|
| -opus_val32
|
| -#else
|
| -void
|
| -#endif
|
| -celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
|
| -{
|
| - int i,j;
|
| + int i;
|
| /*The EDSP version requires that max_pitch is at least 1, and that _x is
|
| 32-bit aligned.
|
| Since it's hard to put asserts in assembly, put them here.*/
|
| - celt_assert(max_pitch>0);
|
| - celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
|
| #ifdef FIXED_POINT
|
| opus_val32 maxcorr=1;
|
| #endif
|
| + celt_assert(max_pitch>0);
|
| + celt_assert((((unsigned char *)_x-(unsigned char *)NULL)&3)==0);
|
| for (i=0;i<max_pitch-3;i+=4)
|
| {
|
| opus_val32 sum[4]={0,0,0,0};
|
| - xcorr_kernel(_x, _y+i, sum, len);
|
| +#if defined(OVERRIDE_PITCH_XCORR)
|
| + xcorr_kernel_c(_x, _y+i, sum, len);
|
| +#else
|
| + xcorr_kernel(_x, _y+i, sum, len, arch);
|
| +#endif
|
| xcorr[i]=sum[0];
|
| xcorr[i+1]=sum[1];
|
| xcorr[i+2]=sum[2];
|
| @@ -279,9 +284,12 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
|
| /* In case max_pitch isn't a multiple of 4, do non-unrolled version. */
|
| for (;i<max_pitch;i++)
|
| {
|
| - opus_val32 sum = 0;
|
| - for (j=0;j<len;j++)
|
| - sum = MAC16_16(sum, _x[j],_y[i+j]);
|
| + opus_val32 sum;
|
| +#if defined(OVERRIDE_PITCH_XCORR)
|
| + sum = celt_inner_prod_c(_x, _y+i, len);
|
| +#else
|
| + sum = celt_inner_prod(_x, _y+i, len, arch);
|
| +#endif
|
| xcorr[i] = sum;
|
| #ifdef FIXED_POINT
|
| maxcorr = MAX32(maxcorr, sum);
|
| @@ -290,9 +298,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y, opus_val32 *xcorr
|
| #ifdef FIXED_POINT
|
| return maxcorr;
|
| #endif
|
| +#endif
|
| }
|
|
|
| -#endif
|
| void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
|
| int len, int max_pitch, int *pitch, int arch)
|
| {
|
| @@ -361,12 +369,17 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
|
| #endif
|
| for (i=0;i<max_pitch>>1;i++)
|
| {
|
| - opus_val32 sum=0;
|
| + opus_val32 sum;
|
| xcorr[i] = 0;
|
| if (abs(i-2*best_pitch[0])>2 && abs(i-2*best_pitch[1])>2)
|
| continue;
|
| +#ifdef FIXED_POINT
|
| + sum = 0;
|
| for (j=0;j<len>>1;j++)
|
| sum += SHR32(MULT16_16(x_lp[j],y[i+j]), shift);
|
| +#else
|
| + sum = celt_inner_prod_c(x_lp, y+i, len>>1);
|
| +#endif
|
| xcorr[i] = MAX32(-1, sum);
|
| #ifdef FIXED_POINT
|
| maxcorr = MAX32(maxcorr, sum);
|
| @@ -401,7 +414,7 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
|
|
|
| static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
|
| opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
|
| - int N, int *T0_, int prev_period, opus_val16 prev_gain)
|
| + int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
|
| {
|
| int k, i, T, T0;
|
| opus_val16 g, g0;
|
| @@ -456,7 +469,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
|
| opus_val16 g1;
|
| opus_val16 cont=0;
|
| opus_val16 thresh;
|
| - T1 = (2*T0+k)/(2*k);
|
| + T1 = celt_udiv(2*T0+k, 2*k);
|
| if (T1 < minperiod)
|
| break;
|
| /* Look for another strong correlation at T1b */
|
| @@ -468,7 +481,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
|
| T1b = T0+T1;
|
| } else
|
| {
|
| - T1b = (2*second_check[k]*T0+k)/(2*k);
|
| + T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
|
| }
|
| dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2);
|
| xy += xy2;
|
| @@ -513,13 +526,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
|
| pg = SHR32(frac_div32(best_xy,best_yy+1),16);
|
|
|
| for (k=0;k<3;k++)
|
| - {
|
| - int T1 = T+k-1;
|
| - xy = 0;
|
| - for (i=0;i<N;i++)
|
| - xy = MAC16_16(xy, x[i], x[i-T1]);
|
| - xcorr[k] = xy;
|
| - }
|
| + xcorr[k] = celt_inner_prod(x, x-(T+k-1), N, arch);
|
| if ((xcorr[2]-xcorr[0]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[0]))
|
| offset = 1;
|
| else if ((xcorr[0]-xcorr[2]) > MULT16_32_Q15(QCONST16(.7f,15),xcorr[1]-xcorr[2]))
|
|
|