Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(3433)

Unified Diff: celt/kiss_fft.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master
Patch Set: Add the contents of Makefile.mips back. Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « celt/kiss_fft.h ('k') | celt/mdct.c » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: celt/kiss_fft.c
diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c
index ad706c739717bc060df49df07b862b625bb50a60..cc487fcfb4862a371cafdaf69fc3d0db2ab5b7a7 100644
--- a/celt/kiss_fft.c
+++ b/celt/kiss_fft.c
@@ -47,64 +47,56 @@
static void kf_bfly2(
kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
int m,
- int N,
- int mm
+ int N
)
{
kiss_fft_cpx * Fout2;
- const kiss_twiddle_cpx * tw1;
- int i,j;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
+ int i;
+ (void)m;
+#ifdef CUSTOM_MODES
+ if (m==1)
{
- Fout = Fout_beg + i*mm;
- Fout2 = Fout + m;
- tw1 = st->twiddles;
- for(j=0;j<m;j++)
+ celt_assert(m==1);
+ for (i=0;i<N;i++)
{
kiss_fft_cpx t;
- Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);
- Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);
- C_MUL (t, *Fout2 , *tw1);
- tw1 += fstride;
+ Fout2 = Fout + 1;
+ t = *Fout2;
C_SUB( *Fout2 , *Fout , t );
C_ADDTO( *Fout , t );
- ++Fout2;
- ++Fout;
+ Fout += 2;
}
- }
-}
-
-static void ki_bfly2(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- kiss_fft_cpx * Fout2;
- const kiss_twiddle_cpx * tw1;
- kiss_fft_cpx t;
- int i,j;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
+ } else
+#endif
{
- Fout = Fout_beg + i*mm;
- Fout2 = Fout + m;
- tw1 = st->twiddles;
- for(j=0;j<m;j++)
+ opus_val16 tw;
+ tw = QCONST16(0.7071067812f, 15);
+ /* We know that m==4 here because the radix-2 is just after a radix-4 */
+ celt_assert(m==4);
+ for (i=0;i<N;i++)
{
- C_MULC (t, *Fout2 , *tw1);
- tw1 += fstride;
- C_SUB( *Fout2 , *Fout , t );
- C_ADDTO( *Fout , t );
- ++Fout2;
- ++Fout;
+ kiss_fft_cpx t;
+ Fout2 = Fout + 4;
+ t = Fout2[0];
+ C_SUB( Fout2[0] , Fout[0] , t );
+ C_ADDTO( Fout[0] , t );
+
+ t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
+ t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
+ C_SUB( Fout2[1] , Fout[1] , t );
+ C_ADDTO( Fout[1] , t );
+
+ t.r = Fout2[2].i;
+ t.i = -Fout2[2].r;
+ C_SUB( Fout2[2] , Fout[2] , t );
+ C_ADDTO( Fout[2] , t );
+
+ t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
+ t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
+ C_SUB( Fout2[3] , Fout[3] , t );
+ C_ADDTO( Fout[3] , t );
+ Fout += 8;
}
}
}
@@ -118,89 +110,67 @@ static void kf_bfly4(
int mm
)
{
- const kiss_twiddle_cpx *tw1,*tw2,*tw3;
- kiss_fft_cpx scratch[6];
- const size_t m2=2*m;
- const size_t m3=3*m;
- int i, j;
+ int i;
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
+ if (m==1)
{
- Fout = Fout_beg + i*mm;
- tw3 = tw2 = tw1 = st->twiddles;
- for (j=0;j<m;j++)
+ /* Degenerate case where all the twiddles are 1. */
+ for (i=0;i<N;i++)
{
- C_MUL4(scratch[0],Fout[m] , *tw1 );
- C_MUL4(scratch[1],Fout[m2] , *tw2 );
- C_MUL4(scratch[2],Fout[m3] , *tw3 );
-
- Fout->r = PSHR32(Fout->r, 2);
- Fout->i = PSHR32(Fout->i, 2);
- C_SUB( scratch[5] , *Fout, scratch[1] );
- C_ADDTO(*Fout, scratch[1]);
- C_ADD( scratch[3] , scratch[0] , scratch[2] );
- C_SUB( scratch[4] , scratch[0] , scratch[2] );
- C_SUB( Fout[m2], *Fout, scratch[3] );
- tw1 += fstride;
- tw2 += fstride*2;
- tw3 += fstride*3;
- C_ADDTO( *Fout , scratch[3] );
-
- Fout[m].r = scratch[5].r + scratch[4].i;
- Fout[m].i = scratch[5].i - scratch[4].r;
- Fout[m3].r = scratch[5].r - scratch[4].i;
- Fout[m3].i = scratch[5].i + scratch[4].r;
- ++Fout;
+ kiss_fft_cpx scratch0, scratch1;
+
+ C_SUB( scratch0 , *Fout, Fout[2] );
+ C_ADDTO(*Fout, Fout[2]);
+ C_ADD( scratch1 , Fout[1] , Fout[3] );
+ C_SUB( Fout[2], *Fout, scratch1 );
+ C_ADDTO( *Fout , scratch1 );
+ C_SUB( scratch1 , Fout[1] , Fout[3] );
+
+ Fout[1].r = scratch0.r + scratch1.i;
+ Fout[1].i = scratch0.i - scratch1.r;
+ Fout[3].r = scratch0.r - scratch1.i;
+ Fout[3].i = scratch0.i + scratch1.r;
+ Fout+=4;
}
- }
-}
-
-static void ki_bfly4(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- const kiss_twiddle_cpx *tw1,*tw2,*tw3;
- kiss_fft_cpx scratch[6];
- const size_t m2=2*m;
- const size_t m3=3*m;
- int i, j;
-
- kiss_fft_cpx * Fout_beg = Fout;
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- tw3 = tw2 = tw1 = st->twiddles;
- for (j=0;j<m;j++)
+ } else {
+ int j;
+ kiss_fft_cpx scratch[6];
+ const kiss_twiddle_cpx *tw1,*tw2,*tw3;
+ const int m2=2*m;
+ const int m3=3*m;
+ kiss_fft_cpx * Fout_beg = Fout;
+ for (i=0;i<N;i++)
{
- C_MULC(scratch[0],Fout[m] , *tw1 );
- C_MULC(scratch[1],Fout[m2] , *tw2 );
- C_MULC(scratch[2],Fout[m3] , *tw3 );
-
- C_SUB( scratch[5] , *Fout, scratch[1] );
- C_ADDTO(*Fout, scratch[1]);
- C_ADD( scratch[3] , scratch[0] , scratch[2] );
- C_SUB( scratch[4] , scratch[0] , scratch[2] );
- C_SUB( Fout[m2], *Fout, scratch[3] );
- tw1 += fstride;
- tw2 += fstride*2;
- tw3 += fstride*3;
- C_ADDTO( *Fout , scratch[3] );
-
- Fout[m].r = scratch[5].r - scratch[4].i;
- Fout[m].i = scratch[5].i + scratch[4].r;
- Fout[m3].r = scratch[5].r + scratch[4].i;
- Fout[m3].i = scratch[5].i - scratch[4].r;
- ++Fout;
+ Fout = Fout_beg + i*mm;
+ tw3 = tw2 = tw1 = st->twiddles;
+ /* m is guaranteed to be a multiple of 4. */
+ for (j=0;j<m;j++)
+ {
+ C_MUL(scratch[0],Fout[m] , *tw1 );
+ C_MUL(scratch[1],Fout[m2] , *tw2 );
+ C_MUL(scratch[2],Fout[m3] , *tw3 );
+
+ C_SUB( scratch[5] , *Fout, scratch[1] );
+ C_ADDTO(*Fout, scratch[1]);
+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
+ C_SUB( Fout[m2], *Fout, scratch[3] );
+ tw1 += fstride;
+ tw2 += fstride*2;
+ tw3 += fstride*3;
+ C_ADDTO( *Fout , scratch[3] );
+
+ Fout[m].r = scratch[5].r + scratch[4].i;
+ Fout[m].i = scratch[5].i - scratch[4].r;
+ Fout[m3].r = scratch[5].r - scratch[4].i;
+ Fout[m3].i = scratch[5].i + scratch[4].r;
+ ++Fout;
+ }
}
}
}
+
#ifndef RADIX_TWO_ONLY
static void kf_bfly3(
@@ -220,14 +190,19 @@ static void kf_bfly3(
kiss_twiddle_cpx epi3;
kiss_fft_cpx * Fout_beg = Fout;
+#ifdef FIXED_POINT
+ epi3.r = -16384;
+ epi3.i = -28378;
+#else
epi3 = st->twiddles[fstride*m];
+#endif
for (i=0;i<N;i++)
{
Fout = Fout_beg + i*mm;
tw1=tw2=st->twiddles;
+ /* For non-custom modes, m is guaranteed to be a multiple of 4. */
k=m;
do {
- C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
C_MUL(scratch[1],Fout[m] , *tw1);
C_MUL(scratch[2],Fout[m2] , *tw2);
@@ -255,56 +230,8 @@ static void kf_bfly3(
}
}
-static void ki_bfly3(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- int i, k;
- const size_t m2 = 2*m;
- const kiss_twiddle_cpx *tw1,*tw2;
- kiss_fft_cpx scratch[5];
- kiss_twiddle_cpx epi3;
-
- kiss_fft_cpx * Fout_beg = Fout;
- epi3 = st->twiddles[fstride*m];
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- tw1=tw2=st->twiddles;
- k=m;
- do{
-
- C_MULC(scratch[1],Fout[m] , *tw1);
- C_MULC(scratch[2],Fout[m2] , *tw2);
-
- C_ADD(scratch[3],scratch[1],scratch[2]);
- C_SUB(scratch[0],scratch[1],scratch[2]);
- tw1 += fstride;
- tw2 += fstride*2;
-
- Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
- Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
-
- C_MULBYSCALAR( scratch[0] , -epi3.i );
-
- C_ADDTO(*Fout,scratch[3]);
-
- Fout[m2].r = Fout[m].r + scratch[0].i;
- Fout[m2].i = Fout[m].i - scratch[0].r;
-
- Fout[m].r -= scratch[0].i;
- Fout[m].i += scratch[0].r;
-
- ++Fout;
- }while(--k);
- }
-}
+#ifndef OVERRIDE_kf_bfly5
static void kf_bfly5(
kiss_fft_cpx * Fout,
const size_t fstride,
@@ -317,13 +244,19 @@ static void kf_bfly5(
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
int i, u;
kiss_fft_cpx scratch[13];
- const kiss_twiddle_cpx * twiddles = st->twiddles;
const kiss_twiddle_cpx *tw;
kiss_twiddle_cpx ya,yb;
kiss_fft_cpx * Fout_beg = Fout;
- ya = twiddles[fstride*m];
- yb = twiddles[fstride*2*m];
+#ifdef FIXED_POINT
+ ya.r = 10126;
+ ya.i = -31164;
+ yb.r = -26510;
+ yb.i = -19261;
+#else
+ ya = st->twiddles[fstride*m];
+ yb = st->twiddles[fstride*2*m];
+#endif
tw=st->twiddles;
for (i=0;i<N;i++)
@@ -335,8 +268,8 @@ static void kf_bfly5(
Fout3=Fout0+3*m;
Fout4=Fout0+4*m;
+ /* For non-custom modes, m is guaranteed to be a multiple of 4. */
for ( u=0; u<m; ++u ) {
- C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
scratch[0] = *Fout0;
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
@@ -373,74 +306,8 @@ static void kf_bfly5(
}
}
}
+#endif /* OVERRIDE_kf_bfly5 */
-static void ki_bfly5(
- kiss_fft_cpx * Fout,
- const size_t fstride,
- const kiss_fft_state *st,
- int m,
- int N,
- int mm
- )
-{
- kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
- int i, u;
- kiss_fft_cpx scratch[13];
- const kiss_twiddle_cpx * twiddles = st->twiddles;
- const kiss_twiddle_cpx *tw;
- kiss_twiddle_cpx ya,yb;
- kiss_fft_cpx * Fout_beg = Fout;
-
- ya = twiddles[fstride*m];
- yb = twiddles[fstride*2*m];
- tw=st->twiddles;
-
- for (i=0;i<N;i++)
- {
- Fout = Fout_beg + i*mm;
- Fout0=Fout;
- Fout1=Fout0+m;
- Fout2=Fout0+2*m;
- Fout3=Fout0+3*m;
- Fout4=Fout0+4*m;
-
- for ( u=0; u<m; ++u ) {
- scratch[0] = *Fout0;
-
- C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
- C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
- C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
- C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
-
- C_ADD( scratch[7],scratch[1],scratch[4]);
- C_SUB( scratch[10],scratch[1],scratch[4]);
- C_ADD( scratch[8],scratch[2],scratch[3]);
- C_SUB( scratch[9],scratch[2],scratch[3]);
-
- Fout0->r += scratch[7].r + scratch[8].r;
- Fout0->i += scratch[7].i + scratch[8].i;
-
- scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
- scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
-
- scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
- scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
-
- C_SUB(*Fout1,scratch[5],scratch[6]);
- C_ADD(*Fout4,scratch[5],scratch[6]);
-
- scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
- scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
- scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
- scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
-
- C_ADD(*Fout2,scratch[11],scratch[12]);
- C_SUB(*Fout3,scratch[11],scratch[12]);
-
- ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
- }
- }
-}
#endif
@@ -488,6 +355,9 @@ static
int kf_factor(int n,opus_int16 * facbuf)
{
int p=4;
+ int i;
+ int stages=0;
+ int nbak = n;
/*factor out powers of 4, powers of 2, then any remaining primes */
do {
@@ -509,9 +379,30 @@ int kf_factor(int n,opus_int16 * facbuf)
{
return 0;
}
- *facbuf++ = p;
- *facbuf++ = n;
+ facbuf[2*stages] = p;
+ if (p==2 && stages > 1)
+ {
+ facbuf[2*stages] = 4;
+ facbuf[2] = 2;
+ }
+ stages++;
} while (n > 1);
+ n = nbak;
+ /* Reverse the order to get the radix 4 at the end, so we can use the
+ fast degenerate case. It turns out that reversing the order also
+ improves the noise behaviour. */
+ for (i=0;i<stages/2;i++)
+ {
+ int tmp;
+ tmp = facbuf[2*i];
+ facbuf[2*i] = facbuf[2*(stages-i-1)];
+ facbuf[2*(stages-i-1)] = tmp;
+ }
+ for (i=0;i<stages;i++)
+ {
+ n /= facbuf[2*i];
+ facbuf[2*i+1] = n;
+ }
return 1;
}
@@ -555,14 +446,20 @@ kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, co
kiss_twiddle_cpx *twiddles;
st->nfft=nfft;
-#ifndef FIXED_POINT
+#ifdef FIXED_POINT
+ st->scale_shift = celt_ilog2(st->nfft);
+ if (st->nfft == 1<<st->scale_shift)
+ st->scale = Q15ONE;
+ else
+ st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
+#else
st->scale = 1.f/nfft;
#endif
if (base != NULL)
{
st->twiddles = base->twiddles;
st->shift = 0;
- while (nfft<<st->shift != base->nfft && st->shift < 32)
+ while (st->shift < 32 && nfft<<st->shift != base->nfft)
st->shift++;
if (st->shift>=32)
goto fail;
@@ -606,7 +503,7 @@ void opus_fft_free(const kiss_fft_state *cfg)
#endif /* CUSTOM_MODES */
-void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
{
int m2, m;
int p;
@@ -618,17 +515,6 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
/* st->shift can be -1 */
shift = st->shift>0 ? st->shift : 0;
- celt_assert2 (fin != fout, "In-place FFT not supported");
- /* Bit-reverse the input */
- for (i=0;i<st->nfft;i++)
- {
- fout[st->bitrev[i]] = fin[i];
-#ifndef FIXED_POINT
- fout[st->bitrev[i]].r *= st->scale;
- fout[st->bitrev[i]].i *= st->scale;
-#endif
- }
-
fstride[0] = 1;
L=0;
do {
@@ -647,7 +533,7 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
switch (st->factors[2*i])
{
case 2:
- kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
+ kf_bfly2(fout, m, fstride[i]);
break;
case 4:
kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
@@ -665,55 +551,41 @@ void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
}
}
-void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
{
- int m2, m;
- int p;
- int L;
- int fstride[MAXFACTORS];
int i;
- int shift;
+ opus_val16 scale;
+#ifdef FIXED_POINT
+ /* Allows us to scale with MULT16_32_Q16(), which is faster than
+ MULT16_32_Q15() on ARM. */
+ int scale_shift = st->scale_shift-1;
+#endif
+ scale = st->scale;
- /* st->shift can be -1 */
- shift = st->shift>0 ? st->shift : 0;
celt_assert2 (fin != fout, "In-place FFT not supported");
/* Bit-reverse the input */
for (i=0;i<st->nfft;i++)
- fout[st->bitrev[i]] = fin[i];
-
- fstride[0] = 1;
- L=0;
- do {
- p = st->factors[2*L];
- m = st->factors[2*L+1];
- fstride[L+1] = fstride[L]*p;
- L++;
- } while(m!=1);
- m = st->factors[2*L-1];
- for (i=L-1;i>=0;i--)
{
- if (i!=0)
- m2 = st->factors[2*i-1];
- else
- m2 = 1;
- switch (st->factors[2*i])
- {
- case 2:
- ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
- case 4:
- ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
-#ifndef RADIX_TWO_ONLY
- case 3:
- ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
- case 5:
- ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
- break;
-#endif
- }
- m = m2;
+ kiss_fft_cpx x = fin[i];
+ fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
+ fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
}
+ opus_fft_impl(st, fout);
}
+
+#ifdef TEST_UNIT_DFT_C
+void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
+{
+ int i;
+ celt_assert2 (fin != fout, "In-place FFT not supported");
+ /* Bit-reverse the input */
+ for (i=0;i<st->nfft;i++)
+ fout[st->bitrev[i]] = fin[i];
+ for (i=0;i<st->nfft;i++)
+ fout[i].i = -fout[i].i;
+ opus_fft_impl(st, fout);
+ for (i=0;i<st->nfft;i++)
+ fout[i].i = -fout[i].i;
+}
+#endif
« no previous file with comments | « celt/kiss_fft.h ('k') | celt/mdct.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698