| Index: celt/kiss_fft.c
|
| diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c
|
| index ad706c739717bc060df49df07b862b625bb50a60..cc487fcfb4862a371cafdaf69fc3d0db2ab5b7a7 100644
|
| --- a/celt/kiss_fft.c
|
| +++ b/celt/kiss_fft.c
|
| @@ -47,64 +47,56 @@
|
|
|
| static void kf_bfly2(
|
| kiss_fft_cpx * Fout,
|
| - const size_t fstride,
|
| - const kiss_fft_state *st,
|
| int m,
|
| - int N,
|
| - int mm
|
| + int N
|
| )
|
| {
|
| kiss_fft_cpx * Fout2;
|
| - const kiss_twiddle_cpx * tw1;
|
| - int i,j;
|
| - kiss_fft_cpx * Fout_beg = Fout;
|
| - for (i=0;i<N;i++)
|
| + int i;
|
| + (void)m;
|
| +#ifdef CUSTOM_MODES
|
| + if (m==1)
|
| {
|
| - Fout = Fout_beg + i*mm;
|
| - Fout2 = Fout + m;
|
| - tw1 = st->twiddles;
|
| - for(j=0;j<m;j++)
|
| + celt_assert(m==1);
|
| + for (i=0;i<N;i++)
|
| {
|
| kiss_fft_cpx t;
|
| - Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
|
| - Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
|
| - C_MUL (t, *Fout2 , *tw1);
|
| - tw1 += fstride;
|
| + Fout2 = Fout + 1;
|
| + t = *Fout2;
|
| C_SUB( *Fout2 , *Fout , t );
|
| C_ADDTO( *Fout , t );
|
| - ++Fout2;
|
| - ++Fout;
|
| + Fout += 2;
|
| }
|
| - }
|
| -}
|
| -
|
| -static void ki_bfly2(
|
| - kiss_fft_cpx * Fout,
|
| - const size_t fstride,
|
| - const kiss_fft_state *st,
|
| - int m,
|
| - int N,
|
| - int mm
|
| - )
|
| -{
|
| - kiss_fft_cpx * Fout2;
|
| - const kiss_twiddle_cpx * tw1;
|
| - kiss_fft_cpx t;
|
| - int i,j;
|
| - kiss_fft_cpx * Fout_beg = Fout;
|
| - for (i=0;i<N;i++)
|
| + } else
|
| +#endif
|
| {
|
| - Fout = Fout_beg + i*mm;
|
| - Fout2 = Fout + m;
|
| - tw1 = st->twiddles;
|
| - for(j=0;j<m;j++)
|
| + opus_val16 tw;
|
| + tw = QCONST16(0.7071067812f, 15);
|
| + /* We know that m==4 here because the radix-2 is just after a radix-4 */
|
| + celt_assert(m==4);
|
| + for (i=0;i<N;i++)
|
| {
|
| - C_MULC (t, *Fout2 , *tw1);
|
| - tw1 += fstride;
|
| - C_SUB( *Fout2 , *Fout , t );
|
| - C_ADDTO( *Fout , t );
|
| - ++Fout2;
|
| - ++Fout;
|
| + kiss_fft_cpx t;
|
| + Fout2 = Fout + 4;
|
| + t = Fout2[0];
|
| + C_SUB( Fout2[0] , Fout[0] , t );
|
| + C_ADDTO( Fout[0] , t );
|
| +
|
| + t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
|
| + t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
|
| + C_SUB( Fout2[1] , Fout[1] , t );
|
| + C_ADDTO( Fout[1] , t );
|
| +
|
| + t.r = Fout2[2].i;
|
| + t.i = -Fout2[2].r;
|
| + C_SUB( Fout2[2] , Fout[2] , t );
|
| + C_ADDTO( Fout[2] , t );
|
| +
|
| + t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
|
| + t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
|
| + C_SUB( Fout2[3] , Fout[3] , t );
|
| + C_ADDTO( Fout[3] , t );
|
| + Fout += 8;
|
| }
|
| }
|
| }
|
| @@ -118,89 +110,67 @@ static void kf_bfly4(
|
| int mm
|
| )
|
| {
|
| - const kiss_twiddle_cpx *tw1,*tw2,*tw3;
|
| - kiss_fft_cpx scratch[6];
|
| - const size_t m2=2*m;
|
| - const size_t m3=3*m;
|
| - int i, j;
|
| + int i;
|
|
|
| - kiss_fft_cpx * Fout_beg = Fout;
|
| - for (i=0;i<N;i++)
|
| + if (m==1)
|
| {
|
| - Fout = Fout_beg + i*mm;
|
| - tw3 = tw2 = tw1 = st->twiddles;
|
| - for (j=0;j<m;j++)
|
| + /* Degenerate case where all the twiddles are 1. */
|
| + for (i=0;i<N;i++)
|
| {
|
| - C_MUL4(scratch[0],Fout[m] , *tw1 );
|
| - C_MUL4(scratch[1],Fout[m2] , *tw2 );
|
| - C_MUL4(scratch[2],Fout[m3] , *tw3 );
|
| -
|
| - Fout->r = PSHR32(Fout->r, 2);
|
| - Fout->i = PSHR32(Fout->i, 2);
|
| - C_SUB( scratch[5] , *Fout, scratch[1] );
|
| - C_ADDTO(*Fout, scratch[1]);
|
| - C_ADD( scratch[3] , scratch[0] , scratch[2] );
|
| - C_SUB( scratch[4] , scratch[0] , scratch[2] );
|
| - C_SUB( Fout[m2], *Fout, scratch[3] );
|
| - tw1 += fstride;
|
| - tw2 += fstride*2;
|
| - tw3 += fstride*3;
|
| - C_ADDTO( *Fout , scratch[3] );
|
| -
|
| - Fout[m].r = scratch[5].r + scratch[4].i;
|
| - Fout[m].i = scratch[5].i - scratch[4].r;
|
| - Fout[m3].r = scratch[5].r - scratch[4].i;
|
| - Fout[m3].i = scratch[5].i + scratch[4].r;
|
| - ++Fout;
|
| + kiss_fft_cpx scratch0, scratch1;
|
| +
|
| + C_SUB( scratch0 , *Fout, Fout[2] );
|
| + C_ADDTO(*Fout, Fout[2]);
|
| + C_ADD( scratch1 , Fout[1] , Fout[3] );
|
| + C_SUB( Fout[2], *Fout, scratch1 );
|
| + C_ADDTO( *Fout , scratch1 );
|
| + C_SUB( scratch1 , Fout[1] , Fout[3] );
|
| +
|
| + Fout[1].r = scratch0.r + scratch1.i;
|
| + Fout[1].i = scratch0.i - scratch1.r;
|
| + Fout[3].r = scratch0.r - scratch1.i;
|
| + Fout[3].i = scratch0.i + scratch1.r;
|
| + Fout+=4;
|
| }
|
| - }
|
| -}
|
| -
|
| -static void ki_bfly4(
|
| - kiss_fft_cpx * Fout,
|
| - const size_t fstride,
|
| - const kiss_fft_state *st,
|
| - int m,
|
| - int N,
|
| - int mm
|
| - )
|
| -{
|
| - const kiss_twiddle_cpx *tw1,*tw2,*tw3;
|
| - kiss_fft_cpx scratch[6];
|
| - const size_t m2=2*m;
|
| - const size_t m3=3*m;
|
| - int i, j;
|
| -
|
| - kiss_fft_cpx * Fout_beg = Fout;
|
| - for (i=0;i<N;i++)
|
| - {
|
| - Fout = Fout_beg + i*mm;
|
| - tw3 = tw2 = tw1 = st->twiddles;
|
| - for (j=0;j<m;j++)
|
| + } else {
|
| + int j;
|
| + kiss_fft_cpx scratch[6];
|
| + const kiss_twiddle_cpx *tw1,*tw2,*tw3;
|
| + const int m2=2*m;
|
| + const int m3=3*m;
|
| + kiss_fft_cpx * Fout_beg = Fout;
|
| + for (i=0;i<N;i++)
|
| {
|
| - C_MULC(scratch[0],Fout[m] , *tw1 );
|
| - C_MULC(scratch[1],Fout[m2] , *tw2 );
|
| - C_MULC(scratch[2],Fout[m3] , *tw3 );
|
| -
|
| - C_SUB( scratch[5] , *Fout, scratch[1] );
|
| - C_ADDTO(*Fout, scratch[1]);
|
| - C_ADD( scratch[3] , scratch[0] , scratch[2] );
|
| - C_SUB( scratch[4] , scratch[0] , scratch[2] );
|
| - C_SUB( Fout[m2], *Fout, scratch[3] );
|
| - tw1 += fstride;
|
| - tw2 += fstride*2;
|
| - tw3 += fstride*3;
|
| - C_ADDTO( *Fout , scratch[3] );
|
| -
|
| - Fout[m].r = scratch[5].r - scratch[4].i;
|
| - Fout[m].i = scratch[5].i + scratch[4].r;
|
| - Fout[m3].r = scratch[5].r + scratch[4].i;
|
| - Fout[m3].i = scratch[5].i - scratch[4].r;
|
| - ++Fout;
|
| + Fout = Fout_beg + i*mm;
|
| + tw3 = tw2 = tw1 = st->twiddles;
|
| + /* m is guaranteed to be a multiple of 4. */
|
| + for (j=0;j<m;j++)
|
| + {
|
| + C_MUL(scratch[0],Fout[m] , *tw1 );
|
| + C_MUL(scratch[1],Fout[m2] , *tw2 );
|
| + C_MUL(scratch[2],Fout[m3] , *tw3 );
|
| +
|
| + C_SUB( scratch[5] , *Fout, scratch[1] );
|
| + C_ADDTO(*Fout, scratch[1]);
|
| + C_ADD( scratch[3] , scratch[0] , scratch[2] );
|
| + C_SUB( scratch[4] , scratch[0] , scratch[2] );
|
| + C_SUB( Fout[m2], *Fout, scratch[3] );
|
| + tw1 += fstride;
|
| + tw2 += fstride*2;
|
| + tw3 += fstride*3;
|
| + C_ADDTO( *Fout , scratch[3] );
|
| +
|
| + Fout[m].r = scratch[5].r + scratch[4].i;
|
| + Fout[m].i = scratch[5].i - scratch[4].r;
|
| + Fout[m3].r = scratch[5].r - scratch[4].i;
|
| + Fout[m3].i = scratch[5].i + scratch[4].r;
|
| + ++Fout;
|
| + }
|
| }
|
| }
|
| }
|
|
|
| +
|
| #ifndef RADIX_TWO_ONLY
|
|
|
| static void kf_bfly3(
|
| @@ -220,14 +190,19 @@ static void kf_bfly3(
|
| kiss_twiddle_cpx epi3;
|
|
|
| kiss_fft_cpx * Fout_beg = Fout;
|
| +#ifdef FIXED_POINT
|
| + epi3.r = -16384;
|
| + epi3.i = -28378;
|
| +#else
|
| epi3 = st->twiddles[fstride*m];
|
| +#endif
|
| for (i=0;i<N;i++)
|
| {
|
| Fout = Fout_beg + i*mm;
|
| tw1=tw2=st->twiddles;
|
| + /* For non-custom modes, m is guaranteed to be a multiple of 4. */
|
| k=m;
|
| do {
|
| - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
|
|
|
| C_MUL(scratch[1],Fout[m] , *tw1);
|
| C_MUL(scratch[2],Fout[m2] , *tw2);
|
| @@ -255,56 +230,8 @@ static void kf_bfly3(
|
| }
|
| }
|
|
|
| -static void ki_bfly3(
|
| - kiss_fft_cpx * Fout,
|
| - const size_t fstride,
|
| - const kiss_fft_state *st,
|
| - int m,
|
| - int N,
|
| - int mm
|
| - )
|
| -{
|
| - int i, k;
|
| - const size_t m2 = 2*m;
|
| - const kiss_twiddle_cpx *tw1,*tw2;
|
| - kiss_fft_cpx scratch[5];
|
| - kiss_twiddle_cpx epi3;
|
| -
|
| - kiss_fft_cpx * Fout_beg = Fout;
|
| - epi3 = st->twiddles[fstride*m];
|
| - for (i=0;i<N;i++)
|
| - {
|
| - Fout = Fout_beg + i*mm;
|
| - tw1=tw2=st->twiddles;
|
| - k=m;
|
| - do{
|
| -
|
| - C_MULC(scratch[1],Fout[m] , *tw1);
|
| - C_MULC(scratch[2],Fout[m2] , *tw2);
|
| -
|
| - C_ADD(scratch[3],scratch[1],scratch[2]);
|
| - C_SUB(scratch[0],scratch[1],scratch[2]);
|
| - tw1 += fstride;
|
| - tw2 += fstride*2;
|
| -
|
| - Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
|
| - Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
|
| -
|
| - C_MULBYSCALAR( scratch[0] , -epi3.i );
|
| -
|
| - C_ADDTO(*Fout,scratch[3]);
|
| -
|
| - Fout[m2].r = Fout[m].r + scratch[0].i;
|
| - Fout[m2].i = Fout[m].i - scratch[0].r;
|
| -
|
| - Fout[m].r -= scratch[0].i;
|
| - Fout[m].i += scratch[0].r;
|
| -
|
| - ++Fout;
|
| - }while(--k);
|
| - }
|
| -}
|
|
|
| +#ifndef OVERRIDE_kf_bfly5
|
| static void kf_bfly5(
|
| kiss_fft_cpx * Fout,
|
| const size_t fstride,
|
| @@ -317,13 +244,19 @@ static void kf_bfly5(
|
| kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
|
| int i, u;
|
| kiss_fft_cpx scratch[13];
|
| - const kiss_twiddle_cpx * twiddles = st->twiddles;
|
| const kiss_twiddle_cpx *tw;
|
| kiss_twiddle_cpx ya,yb;
|
| kiss_fft_cpx * Fout_beg = Fout;
|
|
|
| - ya = twiddles[fstride*m];
|
| - yb = twiddles[fstride*2*m];
|
| +#ifdef FIXED_POINT
|
| + ya.r = 10126;
|
| + ya.i = -31164;
|
| + yb.r = -26510;
|
| + yb.i = -19261;
|
| +#else
|
| + ya = st->twiddles[fstride*m];
|
| + yb = st->twiddles[fstride*2*m];
|
| +#endif
|
| tw=st->twiddles;
|
|
|
| for (i=0;i<N;i++)
|
| @@ -335,8 +268,8 @@ static void kf_bfly5(
|
| Fout3=Fout0+3*m;
|
| Fout4=Fout0+4*m;
|
|
|
| + /* For non-custom modes, m is guaranteed to be a multiple of 4. */
|
| for ( u=0; u<m; ++u ) {
|
| - C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
|
| scratch[0] = *Fout0;
|
|
|
| C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
|
| @@ -373,74 +306,8 @@ static void kf_bfly5(
|
| }
|
| }
|
| }
|
| +#endif /* OVERRIDE_kf_bfly5 */
|
|
|
| -static void ki_bfly5(
|
| - kiss_fft_cpx * Fout,
|
| - const size_t fstride,
|
| - const kiss_fft_state *st,
|
| - int m,
|
| - int N,
|
| - int mm
|
| - )
|
| -{
|
| - kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
|
| - int i, u;
|
| - kiss_fft_cpx scratch[13];
|
| - const kiss_twiddle_cpx * twiddles = st->twiddles;
|
| - const kiss_twiddle_cpx *tw;
|
| - kiss_twiddle_cpx ya,yb;
|
| - kiss_fft_cpx * Fout_beg = Fout;
|
| -
|
| - ya = twiddles[fstride*m];
|
| - yb = twiddles[fstride*2*m];
|
| - tw=st->twiddles;
|
| -
|
| - for (i=0;i<N;i++)
|
| - {
|
| - Fout = Fout_beg + i*mm;
|
| - Fout0=Fout;
|
| - Fout1=Fout0+m;
|
| - Fout2=Fout0+2*m;
|
| - Fout3=Fout0+3*m;
|
| - Fout4=Fout0+4*m;
|
| -
|
| - for ( u=0; u<m; ++u ) {
|
| - scratch[0] = *Fout0;
|
| -
|
| - C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
|
| - C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
|
| - C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
|
| - C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
|
| -
|
| - C_ADD( scratch[7],scratch[1],scratch[4]);
|
| - C_SUB( scratch[10],scratch[1],scratch[4]);
|
| - C_ADD( scratch[8],scratch[2],scratch[3]);
|
| - C_SUB( scratch[9],scratch[2],scratch[3]);
|
| -
|
| - Fout0->r += scratch[7].r + scratch[8].r;
|
| - Fout0->i += scratch[7].i + scratch[8].i;
|
| -
|
| - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
|
| - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
|
| -
|
| - scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
|
| - scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
|
| -
|
| - C_SUB(*Fout1,scratch[5],scratch[6]);
|
| - C_ADD(*Fout4,scratch[5],scratch[6]);
|
| -
|
| - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
|
| - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
|
| - scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
|
| - scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
|
| -
|
| - C_ADD(*Fout2,scratch[11],scratch[12]);
|
| - C_SUB(*Fout3,scratch[11],scratch[12]);
|
| -
|
| - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
|
| - }
|
| - }
|
| -}
|
|
|
| #endif
|
|
|
| @@ -488,6 +355,9 @@ static
|
| int kf_factor(int n,opus_int16 * facbuf)
|
| {
|
| int p=4;
|
| + int i;
|
| + int stages=0;
|
| + int nbak = n;
|
|
|
| /*factor out powers of 4, powers of 2, then any remaining primes */
|
| do {
|
| @@ -509,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf)
|
| {
|
| return 0;
|
| }
|
| - *facbuf++ = p;
|
| - *facbuf++ = n;
|
| + facbuf[2*stages] = p;
|
| + if (p==2 && stages > 1)
|
| + {
|
| + facbuf[2*stages] = 4;
|
| + facbuf[2] = 2;
|
| + }
|
| + stages++;
|
| } while (n > 1);
|
| + n = nbak;
|
| + /* Reverse the order to get the radix 4 at the end, so we can use the
|
| + fast degenerate case. It turns out that reversing the order also
|
| + improves the noise behaviour. */
|
| + for (i=0;i<stages/2;i++)
|
| + {
|
| + int tmp;
|
| + tmp = facbuf[2*i];
|
| + facbuf[2*i] = facbuf[2*(stages-i-1)];
|
| + facbuf[2*(stages-i-1)] = tmp;
|
| + }
|
| + for (i=0;i<stages;i++)
|
| + {
|
| + n /= facbuf[2*i];
|
| + facbuf[2*i+1] = n;
|
| + }
|
| return 1;
|
| }
|
|
|
| @@ -555,14 +446,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
|
| kiss_twiddle_cpx *twiddles;
|
|
|
| st->nfft=nfft;
|
| -#ifndef FIXED_POINT
|
| +#ifdef FIXED_POINT
|
| + st->scale_shift = celt_ilog2(st->nfft);
|
| + if (st->nfft == 1<<st->scale_shift)
|
| + st->scale = Q15ONE;
|
| + else
|
| + st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
|
| +#else
|
| st->scale = 1.f/nfft;
|
| #endif
|
| if (base != NULL)
|
| {
|
| st->twiddles = base->twiddles;
|
| st->shift = 0;
|
| - while (nfft<<st->shift != base->nfft && st->shift < 32)
|
| + while (st->shift < 32 && nfft<<st->shift != base->nfft)
|
| st->shift++;
|
| if (st->shift>=32)
|
| goto fail;
|
| @@ -606,7 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg)
|
|
|
| #endif /* CUSTOM_MODES */
|
|
|
| -void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
| +void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
|
| {
|
| int m2, m;
|
| int p;
|
| @@ -618,17 +515,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
|
| /* st->shift can be -1 */
|
| shift = st->shift>0 ? st->shift : 0;
|
|
|
| - celt_assert2 (fin != fout, "In-place FFT not supported");
|
| - /* Bit-reverse the input */
|
| - for (i=0;i<st->nfft;i++)
|
| - {
|
| - fout[st->bitrev[i]] = fin[i];
|
| -#ifndef FIXED_POINT
|
| - fout[st->bitrev[i]].r *= st->scale;
|
| - fout[st->bitrev[i]].i *= st->scale;
|
| -#endif
|
| - }
|
| -
|
| fstride[0] = 1;
|
| L=0;
|
| do {
|
| @@ -647,7 +533,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
|
| switch (st->factors[2*i])
|
| {
|
| case 2:
|
| - kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
| + kf_bfly2(fout, m, fstride[i]);
|
| break;
|
| case 4:
|
| kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
| @@ -665,55 +551,41 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
|
| }
|
| }
|
|
|
| -void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
| +void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
| {
|
| - int m2, m;
|
| - int p;
|
| - int L;
|
| - int fstride[MAXFACTORS];
|
| int i;
|
| - int shift;
|
| + opus_val16 scale;
|
| +#ifdef FIXED_POINT
|
| + /* Allows us to scale with MULT16_32_Q16(), which is faster than
|
| + MULT16_32_Q15() on ARM. */
|
| + int scale_shift = st->scale_shift-1;
|
| +#endif
|
| + scale = st->scale;
|
|
|
| - /* st->shift can be -1 */
|
| - shift = st->shift>0 ? st->shift : 0;
|
| celt_assert2 (fin != fout, "In-place FFT not supported");
|
| /* Bit-reverse the input */
|
| for (i=0;i<st->nfft;i++)
|
| - fout[st->bitrev[i]] = fin[i];
|
| -
|
| - fstride[0] = 1;
|
| - L=0;
|
| - do {
|
| - p = st->factors[2*L];
|
| - m = st->factors[2*L+1];
|
| - fstride[L+1] = fstride[L]*p;
|
| - L++;
|
| - } while(m!=1);
|
| - m = st->factors[2*L-1];
|
| - for (i=L-1;i>=0;i--)
|
| {
|
| - if (i!=0)
|
| - m2 = st->factors[2*i-1];
|
| - else
|
| - m2 = 1;
|
| - switch (st->factors[2*i])
|
| - {
|
| - case 2:
|
| - ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
| - break;
|
| - case 4:
|
| - ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
| - break;
|
| -#ifndef RADIX_TWO_ONLY
|
| - case 3:
|
| - ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
| - break;
|
| - case 5:
|
| - ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
|
| - break;
|
| -#endif
|
| - }
|
| - m = m2;
|
| + kiss_fft_cpx x = fin[i];
|
| + fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
|
| + fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
|
| }
|
| + opus_fft_impl(st, fout);
|
| }
|
|
|
| +
|
| +#ifdef TEST_UNIT_DFT_C
|
| +void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
|
| +{
|
| + int i;
|
| + celt_assert2 (fin != fout, "In-place FFT not supported");
|
| + /* Bit-reverse the input */
|
| + for (i=0;i<st->nfft;i++)
|
| + fout[st->bitrev[i]] = fin[i];
|
| + for (i=0;i<st->nfft;i++)
|
| + fout[i].i = -fout[i].i;
|
| + opus_fft_impl(st, fout);
|
| + for (i=0;i<st->nfft;i++)
|
| + fout[i].i = -fout[i].i;
|
| +}
|
| +#endif
|
|
|