| Index: celt/celt.c
|
| diff --git a/celt/celt.c b/celt/celt.c
|
| index 3e0ce6e6a550768ea603e05b6ac740b029fd55c6..c0a1e0dab91fe1cbb091390ae4a7fc6cb7ff50b4 100644
|
| --- a/celt/celt.c
|
| +++ b/celt/celt.c
|
| @@ -54,6 +54,10 @@
|
| #define PACKAGE_VERSION "unknown"
|
| #endif
|
|
|
| +#if defined(MIPSr1_ASM)
|
| +#include "mips/celt_mipsr1.h"
|
| +#endif
|
| +
|
|
|
| int resampling_factor(opus_int32 rate)
|
| {
|
| @@ -86,6 +90,63 @@ int resampling_factor(opus_int32 rate)
|
| }
|
|
|
| #ifndef OVERRIDE_COMB_FILTER_CONST
|
| +/* This version should be faster on ARM */
|
| +#ifdef OPUS_ARM_ASM
|
| +static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
|
| + opus_val16 g10, opus_val16 g11, opus_val16 g12)
|
| +{
|
| + opus_val32 x0, x1, x2, x3, x4;
|
| + int i;
|
| + x4 = SHL32(x[-T-2], 1);
|
| + x3 = SHL32(x[-T-1], 1);
|
| + x2 = SHL32(x[-T], 1);
|
| + x1 = SHL32(x[-T+1], 1);
|
| + for (i=0;i<N-4;i+=5)
|
| + {
|
| + opus_val32 t;
|
| + x0=SHL32(x[i-T+2],1);
|
| + t = MAC16_32_Q16(x[i], g10, x2);
|
| + t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
|
| + t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
|
| + y[i] = t;
|
| + x4=SHL32(x[i-T+3],1);
|
| + t = MAC16_32_Q16(x[i+1], g10, x1);
|
| + t = MAC16_32_Q16(t, g11, ADD32(x0,x2));
|
| + t = MAC16_32_Q16(t, g12, ADD32(x4,x3));
|
| + y[i+1] = t;
|
| + x3=SHL32(x[i-T+4],1);
|
| + t = MAC16_32_Q16(x[i+2], g10, x0);
|
| + t = MAC16_32_Q16(t, g11, ADD32(x4,x1));
|
| + t = MAC16_32_Q16(t, g12, ADD32(x3,x2));
|
| + y[i+2] = t;
|
| + x2=SHL32(x[i-T+5],1);
|
| + t = MAC16_32_Q16(x[i+3], g10, x4);
|
| + t = MAC16_32_Q16(t, g11, ADD32(x3,x0));
|
| + t = MAC16_32_Q16(t, g12, ADD32(x2,x1));
|
| + y[i+3] = t;
|
| + x1=SHL32(x[i-T+6],1);
|
| + t = MAC16_32_Q16(x[i+4], g10, x3);
|
| + t = MAC16_32_Q16(t, g11, ADD32(x2,x4));
|
| + t = MAC16_32_Q16(t, g12, ADD32(x1,x0));
|
| + y[i+4] = t;
|
| + }
|
| +#ifdef CUSTOM_MODES
|
| + for (;i<N;i++)
|
| + {
|
| + opus_val32 t;
|
| + x0=SHL32(x[i-T+2],1);
|
| + t = MAC16_32_Q16(x[i], g10, x2);
|
| + t = MAC16_32_Q16(t, g11, ADD32(x1,x3));
|
| + t = MAC16_32_Q16(t, g12, ADD32(x0,x4));
|
| + y[i] = t;
|
| + x4=x3;
|
| + x3=x2;
|
| + x2=x1;
|
| + x1=x0;
|
| + }
|
| +#endif
|
| +}
|
| +#else
|
| static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
|
| opus_val16 g10, opus_val16 g11, opus_val16 g12)
|
| {
|
| @@ -110,7 +171,9 @@ static void comb_filter_const(opus_val32 *y, opus_val32 *x, int T, int N,
|
|
|
| }
|
| #endif
|
| +#endif
|
|
|
| +#ifndef OVERRIDE_comb_filter
|
| void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
|
| opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
|
| const opus_val16 *window, int overlap)
|
| @@ -131,16 +194,19 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
|
| OPUS_MOVE(y, x, N);
|
| return;
|
| }
|
| - g00 = MULT16_16_Q15(g0, gains[tapset0][0]);
|
| - g01 = MULT16_16_Q15(g0, gains[tapset0][1]);
|
| - g02 = MULT16_16_Q15(g0, gains[tapset0][2]);
|
| - g10 = MULT16_16_Q15(g1, gains[tapset1][0]);
|
| - g11 = MULT16_16_Q15(g1, gains[tapset1][1]);
|
| - g12 = MULT16_16_Q15(g1, gains[tapset1][2]);
|
| + g00 = MULT16_16_P15(g0, gains[tapset0][0]);
|
| + g01 = MULT16_16_P15(g0, gains[tapset0][1]);
|
| + g02 = MULT16_16_P15(g0, gains[tapset0][2]);
|
| + g10 = MULT16_16_P15(g1, gains[tapset1][0]);
|
| + g11 = MULT16_16_P15(g1, gains[tapset1][1]);
|
| + g12 = MULT16_16_P15(g1, gains[tapset1][2]);
|
| x1 = x[-T1+1];
|
| x2 = x[-T1 ];
|
| x3 = x[-T1-1];
|
| x4 = x[-T1-2];
|
| + /* If the filter didn't change, we don't need the overlap */
|
| + if (g0==g1 && T0==T1 && tapset0==tapset1)
|
| + overlap=0;
|
| for (i=0;i<overlap;i++)
|
| {
|
| opus_val16 f;
|
| @@ -170,6 +236,7 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
|
| /* Compute the part with the constant filter. */
|
| comb_filter_const(y+i, x+i, T1, N-i, g10, g11, g12);
|
| }
|
| +#endif /* OVERRIDE_comb_filter */
|
|
|
| const signed char tf_select_table[4][8] = {
|
| {0, -1, 0, -1, 0,-1, 0,-1},
|
|
|