OLD | NEW |
1 /*Copyright (c) 2003-2004, Mark Borgerding | 1 /*Copyright (c) 2003-2004, Mark Borgerding |
2 Lots of modifications by Jean-Marc Valin | 2 Lots of modifications by Jean-Marc Valin |
3 Copyright (c) 2005-2007, Xiph.Org Foundation | 3 Copyright (c) 2005-2007, Xiph.Org Foundation |
4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO | 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO |
5 | 5 |
6 All rights reserved. | 6 All rights reserved. |
7 | 7 |
8 Redistribution and use in source and binary forms, with or without | 8 Redistribution and use in source and binary forms, with or without |
9 modification, are permitted provided that the following conditions are met: | 9 modification, are permitted provided that the following conditions are met: |
10 | 10 |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
75 /* We know that m==4 here because the radix-2 is just after a radix-4 */ | 75 /* We know that m==4 here because the radix-2 is just after a radix-4 */ |
76 celt_assert(m==4); | 76 celt_assert(m==4); |
77 for (i=0;i<N;i++) | 77 for (i=0;i<N;i++) |
78 { | 78 { |
79 kiss_fft_cpx t; | 79 kiss_fft_cpx t; |
80 Fout2 = Fout + 4; | 80 Fout2 = Fout + 4; |
81 t = Fout2[0]; | 81 t = Fout2[0]; |
82 C_SUB( Fout2[0] , Fout[0] , t ); | 82 C_SUB( Fout2[0] , Fout[0] , t ); |
83 C_ADDTO( Fout[0] , t ); | 83 C_ADDTO( Fout[0] , t ); |
84 | 84 |
85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); | 85 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw); |
86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); | 86 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw); |
87 C_SUB( Fout2[1] , Fout[1] , t ); | 87 C_SUB( Fout2[1] , Fout[1] , t ); |
88 C_ADDTO( Fout[1] , t ); | 88 C_ADDTO( Fout[1] , t ); |
89 | 89 |
90 t.r = Fout2[2].i; | 90 t.r = Fout2[2].i; |
91 t.i = -Fout2[2].r; | 91 t.i = -Fout2[2].r; |
92 C_SUB( Fout2[2] , Fout[2] , t ); | 92 C_SUB( Fout2[2] , Fout[2] , t ); |
93 C_ADDTO( Fout[2] , t ); | 93 C_ADDTO( Fout[2] , t ); |
94 | 94 |
95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); | 95 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw); |
96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); | 96 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw); |
97 C_SUB( Fout2[3] , Fout[3] , t ); | 97 C_SUB( Fout2[3] , Fout[3] , t ); |
98 C_ADDTO( Fout[3] , t ); | 98 C_ADDTO( Fout[3] , t ); |
99 Fout += 8; | 99 Fout += 8; |
100 } | 100 } |
101 } | 101 } |
102 } | 102 } |
103 | 103 |
104 static void kf_bfly4( | 104 static void kf_bfly4( |
105 kiss_fft_cpx * Fout, | 105 kiss_fft_cpx * Fout, |
106 const size_t fstride, | 106 const size_t fstride, |
(...skipping 12 matching lines...) Expand all Loading... |
119 { | 119 { |
120 kiss_fft_cpx scratch0, scratch1; | 120 kiss_fft_cpx scratch0, scratch1; |
121 | 121 |
122 C_SUB( scratch0 , *Fout, Fout[2] ); | 122 C_SUB( scratch0 , *Fout, Fout[2] ); |
123 C_ADDTO(*Fout, Fout[2]); | 123 C_ADDTO(*Fout, Fout[2]); |
124 C_ADD( scratch1 , Fout[1] , Fout[3] ); | 124 C_ADD( scratch1 , Fout[1] , Fout[3] ); |
125 C_SUB( Fout[2], *Fout, scratch1 ); | 125 C_SUB( Fout[2], *Fout, scratch1 ); |
126 C_ADDTO( *Fout , scratch1 ); | 126 C_ADDTO( *Fout , scratch1 ); |
127 C_SUB( scratch1 , Fout[1] , Fout[3] ); | 127 C_SUB( scratch1 , Fout[1] , Fout[3] ); |
128 | 128 |
129 Fout[1].r = scratch0.r + scratch1.i; | 129 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i); |
130 Fout[1].i = scratch0.i - scratch1.r; | 130 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r); |
131 Fout[3].r = scratch0.r - scratch1.i; | 131 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i); |
132 Fout[3].i = scratch0.i + scratch1.r; | 132 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r); |
133 Fout+=4; | 133 Fout+=4; |
134 } | 134 } |
135 } else { | 135 } else { |
136 int j; | 136 int j; |
137 kiss_fft_cpx scratch[6]; | 137 kiss_fft_cpx scratch[6]; |
138 const kiss_twiddle_cpx *tw1,*tw2,*tw3; | 138 const kiss_twiddle_cpx *tw1,*tw2,*tw3; |
139 const int m2=2*m; | 139 const int m2=2*m; |
140 const int m3=3*m; | 140 const int m3=3*m; |
141 kiss_fft_cpx * Fout_beg = Fout; | 141 kiss_fft_cpx * Fout_beg = Fout; |
142 for (i=0;i<N;i++) | 142 for (i=0;i<N;i++) |
(...skipping 10 matching lines...) Expand all Loading... |
153 C_SUB( scratch[5] , *Fout, scratch[1] ); | 153 C_SUB( scratch[5] , *Fout, scratch[1] ); |
154 C_ADDTO(*Fout, scratch[1]); | 154 C_ADDTO(*Fout, scratch[1]); |
155 C_ADD( scratch[3] , scratch[0] , scratch[2] ); | 155 C_ADD( scratch[3] , scratch[0] , scratch[2] ); |
156 C_SUB( scratch[4] , scratch[0] , scratch[2] ); | 156 C_SUB( scratch[4] , scratch[0] , scratch[2] ); |
157 C_SUB( Fout[m2], *Fout, scratch[3] ); | 157 C_SUB( Fout[m2], *Fout, scratch[3] ); |
158 tw1 += fstride; | 158 tw1 += fstride; |
159 tw2 += fstride*2; | 159 tw2 += fstride*2; |
160 tw3 += fstride*3; | 160 tw3 += fstride*3; |
161 C_ADDTO( *Fout , scratch[3] ); | 161 C_ADDTO( *Fout , scratch[3] ); |
162 | 162 |
163 Fout[m].r = scratch[5].r + scratch[4].i; | 163 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i); |
164 Fout[m].i = scratch[5].i - scratch[4].r; | 164 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r); |
165 Fout[m3].r = scratch[5].r - scratch[4].i; | 165 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i); |
166 Fout[m3].i = scratch[5].i + scratch[4].r; | 166 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r); |
167 ++Fout; | 167 ++Fout; |
168 } | 168 } |
169 } | 169 } |
170 } | 170 } |
171 } | 171 } |
172 | 172 |
173 | 173 |
174 #ifndef RADIX_TWO_ONLY | 174 #ifndef RADIX_TWO_ONLY |
175 | 175 |
176 static void kf_bfly3( | 176 static void kf_bfly3( |
(...skipping 28 matching lines...) Expand all Loading... |
205 do { | 205 do { |
206 | 206 |
207 C_MUL(scratch[1],Fout[m] , *tw1); | 207 C_MUL(scratch[1],Fout[m] , *tw1); |
208 C_MUL(scratch[2],Fout[m2] , *tw2); | 208 C_MUL(scratch[2],Fout[m2] , *tw2); |
209 | 209 |
210 C_ADD(scratch[3],scratch[1],scratch[2]); | 210 C_ADD(scratch[3],scratch[1],scratch[2]); |
211 C_SUB(scratch[0],scratch[1],scratch[2]); | 211 C_SUB(scratch[0],scratch[1],scratch[2]); |
212 tw1 += fstride; | 212 tw1 += fstride; |
213 tw2 += fstride*2; | 213 tw2 += fstride*2; |
214 | 214 |
215 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); | 215 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); |
216 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); | 216 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); |
217 | 217 |
218 C_MULBYSCALAR( scratch[0] , epi3.i ); | 218 C_MULBYSCALAR( scratch[0] , epi3.i ); |
219 | 219 |
220 C_ADDTO(*Fout,scratch[3]); | 220 C_ADDTO(*Fout,scratch[3]); |
221 | 221 |
222 Fout[m2].r = Fout[m].r + scratch[0].i; | 222 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); |
223 Fout[m2].i = Fout[m].i - scratch[0].r; | 223 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); |
224 | 224 |
225 Fout[m].r -= scratch[0].i; | 225 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); |
226 Fout[m].i += scratch[0].r; | 226 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); |
227 | 227 |
228 ++Fout; | 228 ++Fout; |
229 } while(--k); | 229 } while(--k); |
230 } | 230 } |
231 } | 231 } |
232 | 232 |
233 | 233 |
234 #ifndef OVERRIDE_kf_bfly5 | 234 #ifndef OVERRIDE_kf_bfly5 |
235 static void kf_bfly5( | 235 static void kf_bfly5( |
236 kiss_fft_cpx * Fout, | 236 kiss_fft_cpx * Fout, |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); |
276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | 276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); |
277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); | 277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); |
278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); | 278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); |
279 | 279 |
280 C_ADD( scratch[7],scratch[1],scratch[4]); | 280 C_ADD( scratch[7],scratch[1],scratch[4]); |
281 C_SUB( scratch[10],scratch[1],scratch[4]); | 281 C_SUB( scratch[10],scratch[1],scratch[4]); |
282 C_ADD( scratch[8],scratch[2],scratch[3]); | 282 C_ADD( scratch[8],scratch[2],scratch[3]); |
283 C_SUB( scratch[9],scratch[2],scratch[3]); | 283 C_SUB( scratch[9],scratch[2],scratch[3]); |
284 | 284 |
285 Fout0->r += scratch[7].r + scratch[8].r; | 285 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r
)); |
286 Fout0->i += scratch[7].i + scratch[8].i; | 286 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i
)); |
287 | 287 |
288 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[
8].r,yb.r); | 288 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r
,ya.r), S_MUL(scratch[8].r,yb.r))); |
289 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[
8].i,yb.r); | 289 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i
,ya.r), S_MUL(scratch[8].i,yb.r))); |
290 | 290 |
291 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); | 291 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9]
.i,yb.i)); |
292 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); | 292 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL
(scratch[9].r,yb.i))); |
293 | 293 |
294 C_SUB(*Fout1,scratch[5],scratch[6]); | 294 C_SUB(*Fout1,scratch[5],scratch[6]); |
295 C_ADD(*Fout4,scratch[5],scratch[6]); | 295 C_ADD(*Fout4,scratch[5],scratch[6]); |
296 | 296 |
297 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch
[8].r,ya.r); | 297 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].
r,yb.r), S_MUL(scratch[8].r,ya.r))); |
298 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch
[8].i,ya.r); | 298 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].
i,yb.r), S_MUL(scratch[8].i,ya.r))); |
299 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); | 299 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10]
.i,yb.i)); |
300 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); | 300 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9]
.r,ya.i)); |
301 | 301 |
302 C_ADD(*Fout2,scratch[11],scratch[12]); | 302 C_ADD(*Fout2,scratch[11],scratch[12]); |
303 C_SUB(*Fout3,scratch[11],scratch[12]); | 303 C_SUB(*Fout3,scratch[11],scratch[12]); |
304 | 304 |
305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | 305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; |
306 } | 306 } |
307 } | 307 } |
308 } | 308 } |
309 #endif /* OVERRIDE_kf_bfly5 */ | 309 #endif /* OVERRIDE_kf_bfly5 */ |
310 | 310 |
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
595 celt_assert2 (fin != fout, "In-place FFT not supported"); | 595 celt_assert2 (fin != fout, "In-place FFT not supported"); |
596 /* Bit-reverse the input */ | 596 /* Bit-reverse the input */ |
597 for (i=0;i<st->nfft;i++) | 597 for (i=0;i<st->nfft;i++) |
598 fout[st->bitrev[i]] = fin[i]; | 598 fout[st->bitrev[i]] = fin[i]; |
599 for (i=0;i<st->nfft;i++) | 599 for (i=0;i<st->nfft;i++) |
600 fout[i].i = -fout[i].i; | 600 fout[i].i = -fout[i].i; |
601 opus_fft_impl(st, fout); | 601 opus_fft_impl(st, fout); |
602 for (i=0;i<st->nfft;i++) | 602 for (i=0;i<st->nfft;i++) |
603 fout[i].i = -fout[i].i; | 603 fout[i].i = -fout[i].i; |
604 } | 604 } |
OLD | NEW |