| OLD | NEW |
| 1 /*Copyright (c) 2003-2004, Mark Borgerding | 1 /*Copyright (c) 2003-2004, Mark Borgerding |
| 2 Lots of modifications by Jean-Marc Valin | 2 Lots of modifications by Jean-Marc Valin |
| 3 Copyright (c) 2005-2007, Xiph.Org Foundation | 3 Copyright (c) 2005-2007, Xiph.Org Foundation |
| 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO | 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO |
| 5 | 5 |
| 6 All rights reserved. | 6 All rights reserved. |
| 7 | 7 |
| 8 Redistribution and use in source and binary forms, with or without | 8 Redistribution and use in source and binary forms, with or without |
| 9 modification, are permitted provided that the following conditions are met: | 9 modification, are permitted provided that the following conditions are met: |
| 10 | 10 |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 75 /* We know that m==4 here because the radix-2 is just after a radix-4 */ | 75 /* We know that m==4 here because the radix-2 is just after a radix-4 */ |
| 76 celt_assert(m==4); | 76 celt_assert(m==4); |
| 77 for (i=0;i<N;i++) | 77 for (i=0;i<N;i++) |
| 78 { | 78 { |
| 79 kiss_fft_cpx t; | 79 kiss_fft_cpx t; |
| 80 Fout2 = Fout + 4; | 80 Fout2 = Fout + 4; |
| 81 t = Fout2[0]; | 81 t = Fout2[0]; |
| 82 C_SUB( Fout2[0] , Fout[0] , t ); | 82 C_SUB( Fout2[0] , Fout[0] , t ); |
| 83 C_ADDTO( Fout[0] , t ); | 83 C_ADDTO( Fout[0] , t ); |
| 84 | 84 |
| 85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); | 85 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw); |
| 86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); | 86 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw); |
| 87 C_SUB( Fout2[1] , Fout[1] , t ); | 87 C_SUB( Fout2[1] , Fout[1] , t ); |
| 88 C_ADDTO( Fout[1] , t ); | 88 C_ADDTO( Fout[1] , t ); |
| 89 | 89 |
| 90 t.r = Fout2[2].i; | 90 t.r = Fout2[2].i; |
| 91 t.i = -Fout2[2].r; | 91 t.i = -Fout2[2].r; |
| 92 C_SUB( Fout2[2] , Fout[2] , t ); | 92 C_SUB( Fout2[2] , Fout[2] , t ); |
| 93 C_ADDTO( Fout[2] , t ); | 93 C_ADDTO( Fout[2] , t ); |
| 94 | 94 |
| 95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); | 95 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw); |
| 96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); | 96 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw); |
| 97 C_SUB( Fout2[3] , Fout[3] , t ); | 97 C_SUB( Fout2[3] , Fout[3] , t ); |
| 98 C_ADDTO( Fout[3] , t ); | 98 C_ADDTO( Fout[3] , t ); |
| 99 Fout += 8; | 99 Fout += 8; |
| 100 } | 100 } |
| 101 } | 101 } |
| 102 } | 102 } |
| 103 | 103 |
| 104 static void kf_bfly4( | 104 static void kf_bfly4( |
| 105 kiss_fft_cpx * Fout, | 105 kiss_fft_cpx * Fout, |
| 106 const size_t fstride, | 106 const size_t fstride, |
| (...skipping 12 matching lines...) Expand all Loading... |
| 119 { | 119 { |
| 120 kiss_fft_cpx scratch0, scratch1; | 120 kiss_fft_cpx scratch0, scratch1; |
| 121 | 121 |
| 122 C_SUB( scratch0 , *Fout, Fout[2] ); | 122 C_SUB( scratch0 , *Fout, Fout[2] ); |
| 123 C_ADDTO(*Fout, Fout[2]); | 123 C_ADDTO(*Fout, Fout[2]); |
| 124 C_ADD( scratch1 , Fout[1] , Fout[3] ); | 124 C_ADD( scratch1 , Fout[1] , Fout[3] ); |
| 125 C_SUB( Fout[2], *Fout, scratch1 ); | 125 C_SUB( Fout[2], *Fout, scratch1 ); |
| 126 C_ADDTO( *Fout , scratch1 ); | 126 C_ADDTO( *Fout , scratch1 ); |
| 127 C_SUB( scratch1 , Fout[1] , Fout[3] ); | 127 C_SUB( scratch1 , Fout[1] , Fout[3] ); |
| 128 | 128 |
| 129 Fout[1].r = scratch0.r + scratch1.i; | 129 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i); |
| 130 Fout[1].i = scratch0.i - scratch1.r; | 130 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r); |
| 131 Fout[3].r = scratch0.r - scratch1.i; | 131 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i); |
| 132 Fout[3].i = scratch0.i + scratch1.r; | 132 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r); |
| 133 Fout+=4; | 133 Fout+=4; |
| 134 } | 134 } |
| 135 } else { | 135 } else { |
| 136 int j; | 136 int j; |
| 137 kiss_fft_cpx scratch[6]; | 137 kiss_fft_cpx scratch[6]; |
| 138 const kiss_twiddle_cpx *tw1,*tw2,*tw3; | 138 const kiss_twiddle_cpx *tw1,*tw2,*tw3; |
| 139 const int m2=2*m; | 139 const int m2=2*m; |
| 140 const int m3=3*m; | 140 const int m3=3*m; |
| 141 kiss_fft_cpx * Fout_beg = Fout; | 141 kiss_fft_cpx * Fout_beg = Fout; |
| 142 for (i=0;i<N;i++) | 142 for (i=0;i<N;i++) |
| (...skipping 10 matching lines...) Expand all Loading... |
| 153 C_SUB( scratch[5] , *Fout, scratch[1] ); | 153 C_SUB( scratch[5] , *Fout, scratch[1] ); |
| 154 C_ADDTO(*Fout, scratch[1]); | 154 C_ADDTO(*Fout, scratch[1]); |
| 155 C_ADD( scratch[3] , scratch[0] , scratch[2] ); | 155 C_ADD( scratch[3] , scratch[0] , scratch[2] ); |
| 156 C_SUB( scratch[4] , scratch[0] , scratch[2] ); | 156 C_SUB( scratch[4] , scratch[0] , scratch[2] ); |
| 157 C_SUB( Fout[m2], *Fout, scratch[3] ); | 157 C_SUB( Fout[m2], *Fout, scratch[3] ); |
| 158 tw1 += fstride; | 158 tw1 += fstride; |
| 159 tw2 += fstride*2; | 159 tw2 += fstride*2; |
| 160 tw3 += fstride*3; | 160 tw3 += fstride*3; |
| 161 C_ADDTO( *Fout , scratch[3] ); | 161 C_ADDTO( *Fout , scratch[3] ); |
| 162 | 162 |
| 163 Fout[m].r = scratch[5].r + scratch[4].i; | 163 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i); |
| 164 Fout[m].i = scratch[5].i - scratch[4].r; | 164 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r); |
| 165 Fout[m3].r = scratch[5].r - scratch[4].i; | 165 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i); |
| 166 Fout[m3].i = scratch[5].i + scratch[4].r; | 166 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r); |
| 167 ++Fout; | 167 ++Fout; |
| 168 } | 168 } |
| 169 } | 169 } |
| 170 } | 170 } |
| 171 } | 171 } |
| 172 | 172 |
| 173 | 173 |
| 174 #ifndef RADIX_TWO_ONLY | 174 #ifndef RADIX_TWO_ONLY |
| 175 | 175 |
| 176 static void kf_bfly3( | 176 static void kf_bfly3( |
| (...skipping 28 matching lines...) Expand all Loading... |
| 205 do { | 205 do { |
| 206 | 206 |
| 207 C_MUL(scratch[1],Fout[m] , *tw1); | 207 C_MUL(scratch[1],Fout[m] , *tw1); |
| 208 C_MUL(scratch[2],Fout[m2] , *tw2); | 208 C_MUL(scratch[2],Fout[m2] , *tw2); |
| 209 | 209 |
| 210 C_ADD(scratch[3],scratch[1],scratch[2]); | 210 C_ADD(scratch[3],scratch[1],scratch[2]); |
| 211 C_SUB(scratch[0],scratch[1],scratch[2]); | 211 C_SUB(scratch[0],scratch[1],scratch[2]); |
| 212 tw1 += fstride; | 212 tw1 += fstride; |
| 213 tw2 += fstride*2; | 213 tw2 += fstride*2; |
| 214 | 214 |
| 215 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); | 215 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); |
| 216 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); | 216 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); |
| 217 | 217 |
| 218 C_MULBYSCALAR( scratch[0] , epi3.i ); | 218 C_MULBYSCALAR( scratch[0] , epi3.i ); |
| 219 | 219 |
| 220 C_ADDTO(*Fout,scratch[3]); | 220 C_ADDTO(*Fout,scratch[3]); |
| 221 | 221 |
| 222 Fout[m2].r = Fout[m].r + scratch[0].i; | 222 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); |
| 223 Fout[m2].i = Fout[m].i - scratch[0].r; | 223 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); |
| 224 | 224 |
| 225 Fout[m].r -= scratch[0].i; | 225 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); |
| 226 Fout[m].i += scratch[0].r; | 226 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); |
| 227 | 227 |
| 228 ++Fout; | 228 ++Fout; |
| 229 } while(--k); | 229 } while(--k); |
| 230 } | 230 } |
| 231 } | 231 } |
| 232 | 232 |
| 233 | 233 |
| 234 #ifndef OVERRIDE_kf_bfly5 | 234 #ifndef OVERRIDE_kf_bfly5 |
| 235 static void kf_bfly5( | 235 static void kf_bfly5( |
| 236 kiss_fft_cpx * Fout, | 236 kiss_fft_cpx * Fout, |
| (...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); |
| 276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | 276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); |
| 277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); | 277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); |
| 278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); | 278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); |
| 279 | 279 |
| 280 C_ADD( scratch[7],scratch[1],scratch[4]); | 280 C_ADD( scratch[7],scratch[1],scratch[4]); |
| 281 C_SUB( scratch[10],scratch[1],scratch[4]); | 281 C_SUB( scratch[10],scratch[1],scratch[4]); |
| 282 C_ADD( scratch[8],scratch[2],scratch[3]); | 282 C_ADD( scratch[8],scratch[2],scratch[3]); |
| 283 C_SUB( scratch[9],scratch[2],scratch[3]); | 283 C_SUB( scratch[9],scratch[2],scratch[3]); |
| 284 | 284 |
| 285 Fout0->r += scratch[7].r + scratch[8].r; | 285 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r
)); |
| 286 Fout0->i += scratch[7].i + scratch[8].i; | 286 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i
)); |
| 287 | 287 |
| 288 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[
8].r,yb.r); | 288 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r
,ya.r), S_MUL(scratch[8].r,yb.r))); |
| 289 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[
8].i,yb.r); | 289 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i
,ya.r), S_MUL(scratch[8].i,yb.r))); |
| 290 | 290 |
| 291 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); | 291 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9]
.i,yb.i)); |
| 292 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); | 292 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL
(scratch[9].r,yb.i))); |
| 293 | 293 |
| 294 C_SUB(*Fout1,scratch[5],scratch[6]); | 294 C_SUB(*Fout1,scratch[5],scratch[6]); |
| 295 C_ADD(*Fout4,scratch[5],scratch[6]); | 295 C_ADD(*Fout4,scratch[5],scratch[6]); |
| 296 | 296 |
| 297 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch
[8].r,ya.r); | 297 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].
r,yb.r), S_MUL(scratch[8].r,ya.r))); |
| 298 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch
[8].i,ya.r); | 298 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].
i,yb.r), S_MUL(scratch[8].i,ya.r))); |
| 299 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); | 299 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10]
.i,yb.i)); |
| 300 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); | 300 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9]
.r,ya.i)); |
| 301 | 301 |
| 302 C_ADD(*Fout2,scratch[11],scratch[12]); | 302 C_ADD(*Fout2,scratch[11],scratch[12]); |
| 303 C_SUB(*Fout3,scratch[11],scratch[12]); | 303 C_SUB(*Fout3,scratch[11],scratch[12]); |
| 304 | 304 |
| 305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | 305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; |
| 306 } | 306 } |
| 307 } | 307 } |
| 308 } | 308 } |
| 309 #endif /* OVERRIDE_kf_bfly5 */ | 309 #endif /* OVERRIDE_kf_bfly5 */ |
| 310 | 310 |
| (...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 595 celt_assert2 (fin != fout, "In-place FFT not supported"); | 595 celt_assert2 (fin != fout, "In-place FFT not supported"); |
| 596 /* Bit-reverse the input */ | 596 /* Bit-reverse the input */ |
| 597 for (i=0;i<st->nfft;i++) | 597 for (i=0;i<st->nfft;i++) |
| 598 fout[st->bitrev[i]] = fin[i]; | 598 fout[st->bitrev[i]] = fin[i]; |
| 599 for (i=0;i<st->nfft;i++) | 599 for (i=0;i<st->nfft;i++) |
| 600 fout[i].i = -fout[i].i; | 600 fout[i].i = -fout[i].i; |
| 601 opus_fft_impl(st, fout); | 601 opus_fft_impl(st, fout); |
| 602 for (i=0;i<st->nfft;i++) | 602 for (i=0;i<st->nfft;i++) |
| 603 fout[i].i = -fout[i].i; | 603 fout[i].i = -fout[i].i; |
| 604 } | 604 } |
| OLD | NEW |