celt/kiss_fft.c - Issue 882843002: Update to opus-HEAD-66611f1.

Unified Diff: celt/kiss_fft.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master

Patch Set: Add the contents of Makefile.mips back. Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: celt/kiss_fft.c

diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c

index ad706c739717bc060df49df07b862b625bb50a60..cc487fcfb4862a371cafdaf69fc3d0db2ab5b7a7 100644

--- a/celt/kiss_fft.c

+++ b/celt/kiss_fft.c

@@ -47,64 +47,56 @@

static void kf_bfly2(

kiss_fft_cpx * Fout,

- const size_t fstride,

- const kiss_fft_state *st,

int m,

- int N,

- int mm

+ int N

)

{

kiss_fft_cpx * Fout2;

- const kiss_twiddle_cpx * tw1;

- int i,j;

- kiss_fft_cpx * Fout_beg = Fout;

- for (i=0;i<N;i++)

+ int i;

+ (void)m;

+#ifdef CUSTOM_MODES

+ if (m==1)

{

- Fout = Fout_beg + i*mm;

- Fout2 = Fout + m;

- tw1 = st->twiddles;

- for(j=0;j<m;j++)

+ celt_assert(m==1);

+ for (i=0;i<N;i++)

{

kiss_fft_cpx t;

- Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);

- Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);

- C_MUL (t, *Fout2 , *tw1);

- tw1 += fstride;

+ Fout2 = Fout + 1;

+ t = *Fout2;

C_SUB( *Fout2 , *Fout , t );

C_ADDTO( *Fout , t );

- ++Fout2;

- ++Fout;

+ Fout += 2;

}

- }

-static void ki_bfly2(

- kiss_fft_cpx * Fout,

- const size_t fstride,

- const kiss_fft_state *st,

- int m,

- int N,

- int mm

- )

- kiss_fft_cpx * Fout2;

- const kiss_twiddle_cpx * tw1;

- kiss_fft_cpx t;

- int i,j;

- kiss_fft_cpx * Fout_beg = Fout;

- for (i=0;i<N;i++)

+ } else

+#endif

{

- Fout = Fout_beg + i*mm;

- Fout2 = Fout + m;

- tw1 = st->twiddles;

- for(j=0;j<m;j++)

+ opus_val16 tw;

+ tw = QCONST16(0.7071067812f, 15);

+ /* We know that m==4 here because the radix-2 is just after a radix-4 */

+ celt_assert(m==4);

+ for (i=0;i<N;i++)

{

- C_MULC (t, *Fout2 , *tw1);

- tw1 += fstride;

- C_SUB( *Fout2 , *Fout , t );

- C_ADDTO( *Fout , t );

- ++Fout2;

- ++Fout;

+ kiss_fft_cpx t;

+ Fout2 = Fout + 4;

+ t = Fout2[0];

+ C_SUB( Fout2[0] , Fout[0] , t );

+ C_ADDTO( Fout[0] , t );

+ t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);

+ t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);

+ C_SUB( Fout2[1] , Fout[1] , t );

+ C_ADDTO( Fout[1] , t );

+ t.r = Fout2[2].i;

+ t.i = -Fout2[2].r;

+ C_SUB( Fout2[2] , Fout[2] , t );

+ C_ADDTO( Fout[2] , t );

+ t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);

+ t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);

+ C_SUB( Fout2[3] , Fout[3] , t );

+ C_ADDTO( Fout[3] , t );

+ Fout += 8;

}

@@ -118,89 +110,67 @@ static void kf_bfly4(

int mm

)

{

- const kiss_twiddle_cpx *tw1,*tw2,*tw3;

- kiss_fft_cpx scratch[6];

- const size_t m2=2*m;

- const size_t m3=3*m;

- int i, j;

+ int i;

- kiss_fft_cpx * Fout_beg = Fout;

- for (i=0;i<N;i++)

+ if (m==1)

{

- Fout = Fout_beg + i*mm;

- tw3 = tw2 = tw1 = st->twiddles;

- for (j=0;j<m;j++)

+ /* Degenerate case where all the twiddles are 1. */

+ for (i=0;i<N;i++)

{

- C_MUL4(scratch[0],Fout[m] , *tw1 );

- C_MUL4(scratch[1],Fout[m2] , *tw2 );

- C_MUL4(scratch[2],Fout[m3] , *tw3 );

- Fout->r = PSHR32(Fout->r, 2);

- Fout->i = PSHR32(Fout->i, 2);

- C_SUB( scratch[5] , *Fout, scratch[1] );

- C_ADDTO(*Fout, scratch[1]);

- C_ADD( scratch[3] , scratch[0] , scratch[2] );

- C_SUB( scratch[4] , scratch[0] , scratch[2] );

- C_SUB( Fout[m2], *Fout, scratch[3] );

- tw1 += fstride;

- tw2 += fstride*2;

- tw3 += fstride*3;

- C_ADDTO( *Fout , scratch[3] );

- Fout[m].r = scratch[5].r + scratch[4].i;

- Fout[m].i = scratch[5].i - scratch[4].r;

- Fout[m3].r = scratch[5].r - scratch[4].i;

- Fout[m3].i = scratch[5].i + scratch[4].r;

- ++Fout;

+ kiss_fft_cpx scratch0, scratch1;

+ C_SUB( scratch0 , *Fout, Fout[2] );

+ C_ADDTO(*Fout, Fout[2]);

+ C_ADD( scratch1 , Fout[1] , Fout[3] );

+ C_SUB( Fout[2], *Fout, scratch1 );

+ C_ADDTO( *Fout , scratch1 );

+ C_SUB( scratch1 , Fout[1] , Fout[3] );

+ Fout[1].r = scratch0.r + scratch1.i;

+ Fout[1].i = scratch0.i - scratch1.r;

+ Fout[3].r = scratch0.r - scratch1.i;

+ Fout[3].i = scratch0.i + scratch1.r;

+ Fout+=4;

}

- }

-static void ki_bfly4(

- kiss_fft_cpx * Fout,

- const size_t fstride,

- const kiss_fft_state *st,

- int m,

- int N,

- int mm

- )

- const kiss_twiddle_cpx *tw1,*tw2,*tw3;

- kiss_fft_cpx scratch[6];

- const size_t m2=2*m;

- const size_t m3=3*m;

- int i, j;

- kiss_fft_cpx * Fout_beg = Fout;

- for (i=0;i<N;i++)

- {

- Fout = Fout_beg + i*mm;

- tw3 = tw2 = tw1 = st->twiddles;

- for (j=0;j<m;j++)

+ } else {

+ int j;

+ kiss_fft_cpx scratch[6];

+ const kiss_twiddle_cpx *tw1,*tw2,*tw3;

+ const int m2=2*m;

+ const int m3=3*m;

+ kiss_fft_cpx * Fout_beg = Fout;

+ for (i=0;i<N;i++)

{

- C_MULC(scratch[0],Fout[m] , *tw1 );

- C_MULC(scratch[1],Fout[m2] , *tw2 );

- C_MULC(scratch[2],Fout[m3] , *tw3 );

- C_SUB( scratch[5] , *Fout, scratch[1] );

- C_ADDTO(*Fout, scratch[1]);

- C_ADD( scratch[3] , scratch[0] , scratch[2] );

- C_SUB( scratch[4] , scratch[0] , scratch[2] );

- C_SUB( Fout[m2], *Fout, scratch[3] );

- tw1 += fstride;

- tw2 += fstride*2;

- tw3 += fstride*3;

- C_ADDTO( *Fout , scratch[3] );

- Fout[m].r = scratch[5].r - scratch[4].i;

- Fout[m].i = scratch[5].i + scratch[4].r;

- Fout[m3].r = scratch[5].r + scratch[4].i;

- Fout[m3].i = scratch[5].i - scratch[4].r;

- ++Fout;

+ Fout = Fout_beg + i*mm;

+ tw3 = tw2 = tw1 = st->twiddles;

+ /* m is guaranteed to be a multiple of 4. */

+ for (j=0;j<m;j++)

+ {

+ C_MUL(scratch[0],Fout[m] , *tw1 );

+ C_MUL(scratch[1],Fout[m2] , *tw2 );

+ C_MUL(scratch[2],Fout[m3] , *tw3 );

+ C_SUB( scratch[5] , *Fout, scratch[1] );

+ C_ADDTO(*Fout, scratch[1]);

+ C_ADD( scratch[3] , scratch[0] , scratch[2] );

+ C_SUB( scratch[4] , scratch[0] , scratch[2] );

+ C_SUB( Fout[m2], *Fout, scratch[3] );

+ tw1 += fstride;

+ tw2 += fstride*2;

+ tw3 += fstride*3;

+ C_ADDTO( *Fout , scratch[3] );

+ Fout[m].r = scratch[5].r + scratch[4].i;

+ Fout[m].i = scratch[5].i - scratch[4].r;

+ Fout[m3].r = scratch[5].r - scratch[4].i;

+ Fout[m3].i = scratch[5].i + scratch[4].r;

+ ++Fout;

+ }

}

#ifndef RADIX_TWO_ONLY

static void kf_bfly3(

@@ -220,14 +190,19 @@ static void kf_bfly3(

kiss_twiddle_cpx epi3;

kiss_fft_cpx * Fout_beg = Fout;

+#ifdef FIXED_POINT

+ epi3.r = -16384;

+ epi3.i = -28378;

+#else

epi3 = st->twiddles[fstride*m];

+#endif

for (i=0;i<N;i++)

{

Fout = Fout_beg + i*mm;

tw1=tw2=st->twiddles;

+ /* For non-custom modes, m is guaranteed to be a multiple of 4. */

k=m;

do {

- C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);

C_MUL(scratch[1],Fout[m] , *tw1);

C_MUL(scratch[2],Fout[m2] , *tw2);

@@ -255,56 +230,8 @@ static void kf_bfly3(

}

-static void ki_bfly3(

- kiss_fft_cpx * Fout,

- const size_t fstride,

- const kiss_fft_state *st,

- int m,

- int N,

- int mm

- )

- int i, k;

- const size_t m2 = 2*m;

- const kiss_twiddle_cpx *tw1,*tw2;

- kiss_fft_cpx scratch[5];

- kiss_twiddle_cpx epi3;

- kiss_fft_cpx * Fout_beg = Fout;

- epi3 = st->twiddles[fstride*m];

- for (i=0;i<N;i++)

- {

- Fout = Fout_beg + i*mm;

- tw1=tw2=st->twiddles;

- k=m;

- do{

- C_MULC(scratch[1],Fout[m] , *tw1);

- C_MULC(scratch[2],Fout[m2] , *tw2);

- C_ADD(scratch[3],scratch[1],scratch[2]);

- C_SUB(scratch[0],scratch[1],scratch[2]);

- tw1 += fstride;

- tw2 += fstride*2;

- Fout[m].r = Fout->r - HALF_OF(scratch[3].r);

- Fout[m].i = Fout->i - HALF_OF(scratch[3].i);

- C_MULBYSCALAR( scratch[0] , -epi3.i );

- C_ADDTO(*Fout,scratch[3]);

- Fout[m2].r = Fout[m].r + scratch[0].i;

- Fout[m2].i = Fout[m].i - scratch[0].r;

- Fout[m].r -= scratch[0].i;

- Fout[m].i += scratch[0].r;

- ++Fout;

- }while(--k);

- }

+#ifndef OVERRIDE_kf_bfly5

static void kf_bfly5(

kiss_fft_cpx * Fout,

const size_t fstride,

@@ -317,13 +244,19 @@ static void kf_bfly5(

kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;

int i, u;

kiss_fft_cpx scratch[13];

- const kiss_twiddle_cpx * twiddles = st->twiddles;

const kiss_twiddle_cpx *tw;

kiss_twiddle_cpx ya,yb;

kiss_fft_cpx * Fout_beg = Fout;

- ya = twiddles[fstride*m];

- yb = twiddles[fstride*2*m];

+#ifdef FIXED_POINT

+ ya.r = 10126;

+ ya.i = -31164;

+ yb.r = -26510;

+ yb.i = -19261;

+#else

+ ya = st->twiddles[fstride*m];

+ yb = st->twiddles[fstride*2*m];

+#endif

tw=st->twiddles;

for (i=0;i<N;i++)

@@ -335,8 +268,8 @@ static void kf_bfly5(

Fout3=Fout0+3*m;

Fout4=Fout0+4*m;

+ /* For non-custom modes, m is guaranteed to be a multiple of 4. */

for ( u=0; u<m; ++u ) {

- C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);

scratch[0] = *Fout0;

C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);

@@ -373,74 +306,8 @@ static void kf_bfly5(

}

+#endif /* OVERRIDE_kf_bfly5 */

-static void ki_bfly5(

- kiss_fft_cpx * Fout,

- const size_t fstride,

- const kiss_fft_state *st,

- int m,

- int N,

- int mm

- )

- kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;

- int i, u;

- kiss_fft_cpx scratch[13];

- const kiss_twiddle_cpx * twiddles = st->twiddles;

- const kiss_twiddle_cpx *tw;

- kiss_twiddle_cpx ya,yb;

- kiss_fft_cpx * Fout_beg = Fout;

- ya = twiddles[fstride*m];

- yb = twiddles[fstride*2*m];

- tw=st->twiddles;

- for (i=0;i<N;i++)

- {

- Fout = Fout_beg + i*mm;

- Fout0=Fout;

- Fout1=Fout0+m;

- Fout2=Fout0+2*m;

- Fout3=Fout0+3*m;

- Fout4=Fout0+4*m;

- for ( u=0; u<m; ++u ) {

- scratch[0] = *Fout0;

- C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);

- C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);

- C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);

- C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);

- C_ADD( scratch[7],scratch[1],scratch[4]);

- C_SUB( scratch[10],scratch[1],scratch[4]);

- C_ADD( scratch[8],scratch[2],scratch[3]);

- C_SUB( scratch[9],scratch[2],scratch[3]);

- Fout0->r += scratch[7].r + scratch[8].r;

- Fout0->i += scratch[7].i + scratch[8].i;

- scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);

- scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);

- scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);

- scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);

- C_SUB(*Fout1,scratch[5],scratch[6]);

- C_ADD(*Fout4,scratch[5],scratch[6]);

- scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);

- scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);

- scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);

- scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);

- C_ADD(*Fout2,scratch[11],scratch[12]);

- C_SUB(*Fout3,scratch[11],scratch[12]);

- ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;

- }

#endif

@@ -488,6 +355,9 @@ static

int kf_factor(int n,opus_int16 * facbuf)

{

int p=4;

+ int i;

+ int stages=0;

+ int nbak = n;

/*factor out powers of 4, powers of 2, then any remaining primes */

do {

@@ -509,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf)

{

return 0;

}

- *facbuf++ = p;

- *facbuf++ = n;

+ facbuf[2*stages] = p;

+ if (p==2 && stages > 1)

+ {

+ facbuf[2*stages] = 4;

+ facbuf[2] = 2;

+ }

+ stages++;

} while (n > 1);

+ n = nbak;

+ /* Reverse the order to get the radix 4 at the end, so we can use the

+ fast degenerate case. It turns out that reversing the order also

+ improves the noise behaviour. */

+ for (i=0;i<stages/2;i++)

+ {

+ int tmp;

+ tmp = facbuf[2*i];

+ facbuf[2*i] = facbuf[2*(stages-i-1)];

+ facbuf[2*(stages-i-1)] = tmp;

+ }

+ for (i=0;i<stages;i++)

+ {

+ n /= facbuf[2*i];

+ facbuf[2*i+1] = n;

+ }

return 1;

}

@@ -555,14 +446,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co

kiss_twiddle_cpx *twiddles;

st->nfft=nfft;

-#ifndef FIXED_POINT

+#ifdef FIXED_POINT

+ st->scale_shift = celt_ilog2(st->nfft);

+ if (st->nfft == 1<<st->scale_shift)

+ st->scale = Q15ONE;

+ else

+ st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);

+#else

st->scale = 1.f/nfft;

#endif

if (base != NULL)

{

st->twiddles = base->twiddles;

st->shift = 0;

- while (nfft<<st->shift != base->nfft && st->shift < 32)

+ while (st->shift < 32 && nfft<<st->shift != base->nfft)

st->shift++;

if (st->shift>=32)

goto fail;

@@ -606,7 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg)

#endif /* CUSTOM_MODES */

-void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)

+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)

{

int m2, m;

int p;

@@ -618,17 +515,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou

/* st->shift can be -1 */

shift = st->shift>0 ? st->shift : 0;

- celt_assert2 (fin != fout, "In-place FFT not supported");

- /* Bit-reverse the input */

- for (i=0;i<st->nfft;i++)

- {

- fout[st->bitrev[i]] = fin[i];

-#ifndef FIXED_POINT

- fout[st->bitrev[i]].r *= st->scale;

- fout[st->bitrev[i]].i *= st->scale;

-#endif

- }

fstride[0] = 1;

L=0;

do {

@@ -647,7 +533,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou

switch (st->factors[2*i])

{

case 2:

- kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);

+ kf_bfly2(fout, m, fstride[i]);

break;

case 4:

kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);

@@ -665,55 +551,41 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou

}

-void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)

+void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)

{

- int m2, m;

- int p;

- int L;

- int fstride[MAXFACTORS];

int i;

- int shift;

+ opus_val16 scale;

+#ifdef FIXED_POINT

+ /* Allows us to scale with MULT16_32_Q16(), which is faster than

+ MULT16_32_Q15() on ARM. */

+ int scale_shift = st->scale_shift-1;

+#endif

+ scale = st->scale;

- /* st->shift can be -1 */

- shift = st->shift>0 ? st->shift : 0;

celt_assert2 (fin != fout, "In-place FFT not supported");

/* Bit-reverse the input */

for (i=0;i<st->nfft;i++)

- fout[st->bitrev[i]] = fin[i];

- fstride[0] = 1;

- L=0;

- do {

- p = st->factors[2*L];

- m = st->factors[2*L+1];

- fstride[L+1] = fstride[L]*p;

- L++;

- } while(m!=1);

- m = st->factors[2*L-1];

- for (i=L-1;i>=0;i--)

{

- if (i!=0)

- m2 = st->factors[2*i-1];

- else

- m2 = 1;

- switch (st->factors[2*i])

- {

- case 2:

- ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);

- break;

- case 4:

- ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);

- break;

-#ifndef RADIX_TWO_ONLY

- case 3:

- ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);

- break;

- case 5:

- ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);

- break;

-#endif

- }

- m = m2;

+ kiss_fft_cpx x = fin[i];

+ fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);

+ fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);

}

+ opus_fft_impl(st, fout);

}

+#ifdef TEST_UNIT_DFT_C

+void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)

+ int i;

+ celt_assert2 (fin != fout, "In-place FFT not supported");

+ /* Bit-reverse the input */

+ for (i=0;i<st->nfft;i++)

+ fout[st->bitrev[i]] = fin[i];

+ for (i=0;i<st->nfft;i++)

+ fout[i].i = -fout[i].i;

+ opus_fft_impl(st, fout);

+ for (i=0;i<st->nfft;i++)

+ fout[i].i = -fout[i].i;

+#endif

« no previous file with comments | « celt/kiss_fft.h ('k') | celt/mdct.c » ('j') | no next file with comments »