OLD | NEW |
1 /*Copyright (c) 2003-2004, Mark Borgerding | 1 /*Copyright (c) 2003-2004, Mark Borgerding |
2 Lots of modifications by Jean-Marc Valin | 2 Lots of modifications by Jean-Marc Valin |
3 Copyright (c) 2005-2007, Xiph.Org Foundation | 3 Copyright (c) 2005-2007, Xiph.Org Foundation |
4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO | 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO |
5 | 5 |
6 All rights reserved. | 6 All rights reserved. |
7 | 7 |
8 Redistribution and use in source and binary forms, with or without | 8 Redistribution and use in source and binary forms, with or without |
9 modification, are permitted provided that the following conditions are met: | 9 modification, are permitted provided that the following conditions are met: |
10 | 10 |
(...skipping 29 matching lines...) Expand all Loading... |
40 #include "os_support.h" | 40 #include "os_support.h" |
41 #include "mathops.h" | 41 #include "mathops.h" |
42 #include "stack_alloc.h" | 42 #include "stack_alloc.h" |
43 | 43 |
44 /* The guts header contains all the multiplication and addition macros that are
defined for | 44 /* The guts header contains all the multiplication and addition macros that are
defined for |
45 complex numbers. It also delares the kf_ internal functions. | 45 complex numbers. It also delares the kf_ internal functions. |
46 */ | 46 */ |
47 | 47 |
48 static void kf_bfly2( | 48 static void kf_bfly2( |
49 kiss_fft_cpx * Fout, | 49 kiss_fft_cpx * Fout, |
50 const size_t fstride, | |
51 const kiss_fft_state *st, | |
52 int m, | 50 int m, |
53 int N, | 51 int N |
54 int mm | |
55 ) | 52 ) |
56 { | 53 { |
57 kiss_fft_cpx * Fout2; | 54 kiss_fft_cpx * Fout2; |
58 const kiss_twiddle_cpx * tw1; | 55 int i; |
59 int i,j; | 56 (void)m; |
60 kiss_fft_cpx * Fout_beg = Fout; | 57 #ifdef CUSTOM_MODES |
61 for (i=0;i<N;i++) | 58 if (m==1) |
62 { | 59 { |
63 Fout = Fout_beg + i*mm; | 60 celt_assert(m==1); |
64 Fout2 = Fout + m; | 61 for (i=0;i<N;i++) |
65 tw1 = st->twiddles; | |
66 for(j=0;j<m;j++) | |
67 { | 62 { |
68 kiss_fft_cpx t; | 63 kiss_fft_cpx t; |
69 Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); | 64 Fout2 = Fout + 1; |
70 Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); | 65 t = *Fout2; |
71 C_MUL (t, *Fout2 , *tw1); | |
72 tw1 += fstride; | |
73 C_SUB( *Fout2 , *Fout , t ); | 66 C_SUB( *Fout2 , *Fout , t ); |
74 C_ADDTO( *Fout , t ); | 67 C_ADDTO( *Fout , t ); |
75 ++Fout2; | 68 Fout += 2; |
76 ++Fout; | 69 } |
| 70 } else |
| 71 #endif |
| 72 { |
| 73 opus_val16 tw; |
| 74 tw = QCONST16(0.7071067812f, 15); |
| 75 /* We know that m==4 here because the radix-2 is just after a radix-4 */ |
| 76 celt_assert(m==4); |
| 77 for (i=0;i<N;i++) |
| 78 { |
| 79 kiss_fft_cpx t; |
| 80 Fout2 = Fout + 4; |
| 81 t = Fout2[0]; |
| 82 C_SUB( Fout2[0] , Fout[0] , t ); |
| 83 C_ADDTO( Fout[0] , t ); |
| 84 |
| 85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); |
| 86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); |
| 87 C_SUB( Fout2[1] , Fout[1] , t ); |
| 88 C_ADDTO( Fout[1] , t ); |
| 89 |
| 90 t.r = Fout2[2].i; |
| 91 t.i = -Fout2[2].r; |
| 92 C_SUB( Fout2[2] , Fout[2] , t ); |
| 93 C_ADDTO( Fout[2] , t ); |
| 94 |
| 95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); |
| 96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); |
| 97 C_SUB( Fout2[3] , Fout[3] , t ); |
| 98 C_ADDTO( Fout[3] , t ); |
| 99 Fout += 8; |
77 } | 100 } |
78 } | 101 } |
79 } | 102 } |
80 | |
81 static void ki_bfly2( | |
82 kiss_fft_cpx * Fout, | |
83 const size_t fstride, | |
84 const kiss_fft_state *st, | |
85 int m, | |
86 int N, | |
87 int mm | |
88 ) | |
89 { | |
90 kiss_fft_cpx * Fout2; | |
91 const kiss_twiddle_cpx * tw1; | |
92 kiss_fft_cpx t; | |
93 int i,j; | |
94 kiss_fft_cpx * Fout_beg = Fout; | |
95 for (i=0;i<N;i++) | |
96 { | |
97 Fout = Fout_beg + i*mm; | |
98 Fout2 = Fout + m; | |
99 tw1 = st->twiddles; | |
100 for(j=0;j<m;j++) | |
101 { | |
102 C_MULC (t, *Fout2 , *tw1); | |
103 tw1 += fstride; | |
104 C_SUB( *Fout2 , *Fout , t ); | |
105 C_ADDTO( *Fout , t ); | |
106 ++Fout2; | |
107 ++Fout; | |
108 } | |
109 } | |
110 } | |
111 | 103 |
112 static void kf_bfly4( | 104 static void kf_bfly4( |
113 kiss_fft_cpx * Fout, | 105 kiss_fft_cpx * Fout, |
114 const size_t fstride, | 106 const size_t fstride, |
115 const kiss_fft_state *st, | 107 const kiss_fft_state *st, |
116 int m, | 108 int m, |
117 int N, | 109 int N, |
118 int mm | 110 int mm |
119 ) | 111 ) |
120 { | 112 { |
121 const kiss_twiddle_cpx *tw1,*tw2,*tw3; | 113 int i; |
122 kiss_fft_cpx scratch[6]; | |
123 const size_t m2=2*m; | |
124 const size_t m3=3*m; | |
125 int i, j; | |
126 | 114 |
127 kiss_fft_cpx * Fout_beg = Fout; | 115 if (m==1) |
128 for (i=0;i<N;i++) | |
129 { | 116 { |
130 Fout = Fout_beg + i*mm; | 117 /* Degenerate case where all the twiddles are 1. */ |
131 tw3 = tw2 = tw1 = st->twiddles; | 118 for (i=0;i<N;i++) |
132 for (j=0;j<m;j++) | |
133 { | 119 { |
134 C_MUL4(scratch[0],Fout[m] , *tw1 ); | 120 kiss_fft_cpx scratch0, scratch1; |
135 C_MUL4(scratch[1],Fout[m2] , *tw2 ); | |
136 C_MUL4(scratch[2],Fout[m3] , *tw3 ); | |
137 | 121 |
138 Fout->r = PSHR32(Fout->r, 2); | 122 C_SUB( scratch0 , *Fout, Fout[2] ); |
139 Fout->i = PSHR32(Fout->i, 2); | 123 C_ADDTO(*Fout, Fout[2]); |
140 C_SUB( scratch[5] , *Fout, scratch[1] ); | 124 C_ADD( scratch1 , Fout[1] , Fout[3] ); |
141 C_ADDTO(*Fout, scratch[1]); | 125 C_SUB( Fout[2], *Fout, scratch1 ); |
142 C_ADD( scratch[3] , scratch[0] , scratch[2] ); | 126 C_ADDTO( *Fout , scratch1 ); |
143 C_SUB( scratch[4] , scratch[0] , scratch[2] ); | 127 C_SUB( scratch1 , Fout[1] , Fout[3] ); |
144 C_SUB( Fout[m2], *Fout, scratch[3] ); | |
145 tw1 += fstride; | |
146 tw2 += fstride*2; | |
147 tw3 += fstride*3; | |
148 C_ADDTO( *Fout , scratch[3] ); | |
149 | 128 |
150 Fout[m].r = scratch[5].r + scratch[4].i; | 129 Fout[1].r = scratch0.r + scratch1.i; |
151 Fout[m].i = scratch[5].i - scratch[4].r; | 130 Fout[1].i = scratch0.i - scratch1.r; |
152 Fout[m3].r = scratch[5].r - scratch[4].i; | 131 Fout[3].r = scratch0.r - scratch1.i; |
153 Fout[m3].i = scratch[5].i + scratch[4].r; | 132 Fout[3].i = scratch0.i + scratch1.r; |
154 ++Fout; | 133 Fout+=4; |
| 134 } |
| 135 } else { |
| 136 int j; |
| 137 kiss_fft_cpx scratch[6]; |
| 138 const kiss_twiddle_cpx *tw1,*tw2,*tw3; |
| 139 const int m2=2*m; |
| 140 const int m3=3*m; |
| 141 kiss_fft_cpx * Fout_beg = Fout; |
| 142 for (i=0;i<N;i++) |
| 143 { |
| 144 Fout = Fout_beg + i*mm; |
| 145 tw3 = tw2 = tw1 = st->twiddles; |
| 146 /* m is guaranteed to be a multiple of 4. */ |
| 147 for (j=0;j<m;j++) |
| 148 { |
| 149 C_MUL(scratch[0],Fout[m] , *tw1 ); |
| 150 C_MUL(scratch[1],Fout[m2] , *tw2 ); |
| 151 C_MUL(scratch[2],Fout[m3] , *tw3 ); |
| 152 |
| 153 C_SUB( scratch[5] , *Fout, scratch[1] ); |
| 154 C_ADDTO(*Fout, scratch[1]); |
| 155 C_ADD( scratch[3] , scratch[0] , scratch[2] ); |
| 156 C_SUB( scratch[4] , scratch[0] , scratch[2] ); |
| 157 C_SUB( Fout[m2], *Fout, scratch[3] ); |
| 158 tw1 += fstride; |
| 159 tw2 += fstride*2; |
| 160 tw3 += fstride*3; |
| 161 C_ADDTO( *Fout , scratch[3] ); |
| 162 |
| 163 Fout[m].r = scratch[5].r + scratch[4].i; |
| 164 Fout[m].i = scratch[5].i - scratch[4].r; |
| 165 Fout[m3].r = scratch[5].r - scratch[4].i; |
| 166 Fout[m3].i = scratch[5].i + scratch[4].r; |
| 167 ++Fout; |
| 168 } |
155 } | 169 } |
156 } | 170 } |
157 } | 171 } |
158 | 172 |
159 static void ki_bfly4( | |
160 kiss_fft_cpx * Fout, | |
161 const size_t fstride, | |
162 const kiss_fft_state *st, | |
163 int m, | |
164 int N, | |
165 int mm | |
166 ) | |
167 { | |
168 const kiss_twiddle_cpx *tw1,*tw2,*tw3; | |
169 kiss_fft_cpx scratch[6]; | |
170 const size_t m2=2*m; | |
171 const size_t m3=3*m; | |
172 int i, j; | |
173 | |
174 kiss_fft_cpx * Fout_beg = Fout; | |
175 for (i=0;i<N;i++) | |
176 { | |
177 Fout = Fout_beg + i*mm; | |
178 tw3 = tw2 = tw1 = st->twiddles; | |
179 for (j=0;j<m;j++) | |
180 { | |
181 C_MULC(scratch[0],Fout[m] , *tw1 ); | |
182 C_MULC(scratch[1],Fout[m2] , *tw2 ); | |
183 C_MULC(scratch[2],Fout[m3] , *tw3 ); | |
184 | |
185 C_SUB( scratch[5] , *Fout, scratch[1] ); | |
186 C_ADDTO(*Fout, scratch[1]); | |
187 C_ADD( scratch[3] , scratch[0] , scratch[2] ); | |
188 C_SUB( scratch[4] , scratch[0] , scratch[2] ); | |
189 C_SUB( Fout[m2], *Fout, scratch[3] ); | |
190 tw1 += fstride; | |
191 tw2 += fstride*2; | |
192 tw3 += fstride*3; | |
193 C_ADDTO( *Fout , scratch[3] ); | |
194 | |
195 Fout[m].r = scratch[5].r - scratch[4].i; | |
196 Fout[m].i = scratch[5].i + scratch[4].r; | |
197 Fout[m3].r = scratch[5].r + scratch[4].i; | |
198 Fout[m3].i = scratch[5].i - scratch[4].r; | |
199 ++Fout; | |
200 } | |
201 } | |
202 } | |
203 | 173 |
204 #ifndef RADIX_TWO_ONLY | 174 #ifndef RADIX_TWO_ONLY |
205 | 175 |
206 static void kf_bfly3( | 176 static void kf_bfly3( |
207 kiss_fft_cpx * Fout, | 177 kiss_fft_cpx * Fout, |
208 const size_t fstride, | 178 const size_t fstride, |
209 const kiss_fft_state *st, | 179 const kiss_fft_state *st, |
210 int m, | 180 int m, |
211 int N, | 181 int N, |
212 int mm | 182 int mm |
213 ) | 183 ) |
214 { | 184 { |
215 int i; | 185 int i; |
216 size_t k; | 186 size_t k; |
217 const size_t m2 = 2*m; | 187 const size_t m2 = 2*m; |
218 const kiss_twiddle_cpx *tw1,*tw2; | 188 const kiss_twiddle_cpx *tw1,*tw2; |
219 kiss_fft_cpx scratch[5]; | 189 kiss_fft_cpx scratch[5]; |
220 kiss_twiddle_cpx epi3; | 190 kiss_twiddle_cpx epi3; |
221 | 191 |
222 kiss_fft_cpx * Fout_beg = Fout; | 192 kiss_fft_cpx * Fout_beg = Fout; |
| 193 #ifdef FIXED_POINT |
| 194 epi3.r = -16384; |
| 195 epi3.i = -28378; |
| 196 #else |
223 epi3 = st->twiddles[fstride*m]; | 197 epi3 = st->twiddles[fstride*m]; |
| 198 #endif |
224 for (i=0;i<N;i++) | 199 for (i=0;i<N;i++) |
225 { | 200 { |
226 Fout = Fout_beg + i*mm; | 201 Fout = Fout_beg + i*mm; |
227 tw1=tw2=st->twiddles; | 202 tw1=tw2=st->twiddles; |
| 203 /* For non-custom modes, m is guaranteed to be a multiple of 4. */ |
228 k=m; | 204 k=m; |
229 do { | 205 do { |
230 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); | |
231 | 206 |
232 C_MUL(scratch[1],Fout[m] , *tw1); | 207 C_MUL(scratch[1],Fout[m] , *tw1); |
233 C_MUL(scratch[2],Fout[m2] , *tw2); | 208 C_MUL(scratch[2],Fout[m2] , *tw2); |
234 | 209 |
235 C_ADD(scratch[3],scratch[1],scratch[2]); | 210 C_ADD(scratch[3],scratch[1],scratch[2]); |
236 C_SUB(scratch[0],scratch[1],scratch[2]); | 211 C_SUB(scratch[0],scratch[1],scratch[2]); |
237 tw1 += fstride; | 212 tw1 += fstride; |
238 tw2 += fstride*2; | 213 tw2 += fstride*2; |
239 | 214 |
240 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); | 215 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); |
241 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); | 216 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); |
242 | 217 |
243 C_MULBYSCALAR( scratch[0] , epi3.i ); | 218 C_MULBYSCALAR( scratch[0] , epi3.i ); |
244 | 219 |
245 C_ADDTO(*Fout,scratch[3]); | 220 C_ADDTO(*Fout,scratch[3]); |
246 | 221 |
247 Fout[m2].r = Fout[m].r + scratch[0].i; | 222 Fout[m2].r = Fout[m].r + scratch[0].i; |
248 Fout[m2].i = Fout[m].i - scratch[0].r; | 223 Fout[m2].i = Fout[m].i - scratch[0].r; |
249 | 224 |
250 Fout[m].r -= scratch[0].i; | 225 Fout[m].r -= scratch[0].i; |
251 Fout[m].i += scratch[0].r; | 226 Fout[m].i += scratch[0].r; |
252 | 227 |
253 ++Fout; | 228 ++Fout; |
254 } while(--k); | 229 } while(--k); |
255 } | 230 } |
256 } | 231 } |
257 | 232 |
258 static void ki_bfly3( | |
259 kiss_fft_cpx * Fout, | |
260 const size_t fstride, | |
261 const kiss_fft_state *st, | |
262 int m, | |
263 int N, | |
264 int mm | |
265 ) | |
266 { | |
267 int i, k; | |
268 const size_t m2 = 2*m; | |
269 const kiss_twiddle_cpx *tw1,*tw2; | |
270 kiss_fft_cpx scratch[5]; | |
271 kiss_twiddle_cpx epi3; | |
272 | 233 |
273 kiss_fft_cpx * Fout_beg = Fout; | 234 #ifndef OVERRIDE_kf_bfly5 |
274 epi3 = st->twiddles[fstride*m]; | |
275 for (i=0;i<N;i++) | |
276 { | |
277 Fout = Fout_beg + i*mm; | |
278 tw1=tw2=st->twiddles; | |
279 k=m; | |
280 do{ | |
281 | |
282 C_MULC(scratch[1],Fout[m] , *tw1); | |
283 C_MULC(scratch[2],Fout[m2] , *tw2); | |
284 | |
285 C_ADD(scratch[3],scratch[1],scratch[2]); | |
286 C_SUB(scratch[0],scratch[1],scratch[2]); | |
287 tw1 += fstride; | |
288 tw2 += fstride*2; | |
289 | |
290 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); | |
291 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); | |
292 | |
293 C_MULBYSCALAR( scratch[0] , -epi3.i ); | |
294 | |
295 C_ADDTO(*Fout,scratch[3]); | |
296 | |
297 Fout[m2].r = Fout[m].r + scratch[0].i; | |
298 Fout[m2].i = Fout[m].i - scratch[0].r; | |
299 | |
300 Fout[m].r -= scratch[0].i; | |
301 Fout[m].i += scratch[0].r; | |
302 | |
303 ++Fout; | |
304 }while(--k); | |
305 } | |
306 } | |
307 | |
308 static void kf_bfly5( | 235 static void kf_bfly5( |
309 kiss_fft_cpx * Fout, | 236 kiss_fft_cpx * Fout, |
310 const size_t fstride, | 237 const size_t fstride, |
311 const kiss_fft_state *st, | 238 const kiss_fft_state *st, |
312 int m, | 239 int m, |
313 int N, | 240 int N, |
314 int mm | 241 int mm |
315 ) | 242 ) |
316 { | 243 { |
317 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | 244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; |
318 int i, u; | 245 int i, u; |
319 kiss_fft_cpx scratch[13]; | 246 kiss_fft_cpx scratch[13]; |
320 const kiss_twiddle_cpx * twiddles = st->twiddles; | |
321 const kiss_twiddle_cpx *tw; | 247 const kiss_twiddle_cpx *tw; |
322 kiss_twiddle_cpx ya,yb; | 248 kiss_twiddle_cpx ya,yb; |
323 kiss_fft_cpx * Fout_beg = Fout; | 249 kiss_fft_cpx * Fout_beg = Fout; |
324 | 250 |
325 ya = twiddles[fstride*m]; | 251 #ifdef FIXED_POINT |
326 yb = twiddles[fstride*2*m]; | 252 ya.r = 10126; |
| 253 ya.i = -31164; |
| 254 yb.r = -26510; |
| 255 yb.i = -19261; |
| 256 #else |
| 257 ya = st->twiddles[fstride*m]; |
| 258 yb = st->twiddles[fstride*2*m]; |
| 259 #endif |
327 tw=st->twiddles; | 260 tw=st->twiddles; |
328 | 261 |
329 for (i=0;i<N;i++) | 262 for (i=0;i<N;i++) |
330 { | 263 { |
331 Fout = Fout_beg + i*mm; | 264 Fout = Fout_beg + i*mm; |
332 Fout0=Fout; | 265 Fout0=Fout; |
333 Fout1=Fout0+m; | 266 Fout1=Fout0+m; |
334 Fout2=Fout0+2*m; | 267 Fout2=Fout0+2*m; |
335 Fout3=Fout0+3*m; | 268 Fout3=Fout0+3*m; |
336 Fout4=Fout0+4*m; | 269 Fout4=Fout0+4*m; |
337 | 270 |
| 271 /* For non-custom modes, m is guaranteed to be a multiple of 4. */ |
338 for ( u=0; u<m; ++u ) { | 272 for ( u=0; u<m; ++u ) { |
339 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV
( *Fout3,5); C_FIXDIV( *Fout4,5); | |
340 scratch[0] = *Fout0; | 273 scratch[0] = *Fout0; |
341 | 274 |
342 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); | 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); |
343 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); | 276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); |
344 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); | 277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); |
345 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); | 278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); |
346 | 279 |
347 C_ADD( scratch[7],scratch[1],scratch[4]); | 280 C_ADD( scratch[7],scratch[1],scratch[4]); |
348 C_SUB( scratch[10],scratch[1],scratch[4]); | 281 C_SUB( scratch[10],scratch[1],scratch[4]); |
349 C_ADD( scratch[8],scratch[2],scratch[3]); | 282 C_ADD( scratch[8],scratch[2],scratch[3]); |
(...skipping 16 matching lines...) Expand all Loading... |
366 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); | 299 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); |
367 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); | 300 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); |
368 | 301 |
369 C_ADD(*Fout2,scratch[11],scratch[12]); | 302 C_ADD(*Fout2,scratch[11],scratch[12]); |
370 C_SUB(*Fout3,scratch[11],scratch[12]); | 303 C_SUB(*Fout3,scratch[11],scratch[12]); |
371 | 304 |
372 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | 305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; |
373 } | 306 } |
374 } | 307 } |
375 } | 308 } |
| 309 #endif /* OVERRIDE_kf_bfly5 */ |
376 | 310 |
377 static void ki_bfly5( | |
378 kiss_fft_cpx * Fout, | |
379 const size_t fstride, | |
380 const kiss_fft_state *st, | |
381 int m, | |
382 int N, | |
383 int mm | |
384 ) | |
385 { | |
386 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; | |
387 int i, u; | |
388 kiss_fft_cpx scratch[13]; | |
389 const kiss_twiddle_cpx * twiddles = st->twiddles; | |
390 const kiss_twiddle_cpx *tw; | |
391 kiss_twiddle_cpx ya,yb; | |
392 kiss_fft_cpx * Fout_beg = Fout; | |
393 | |
394 ya = twiddles[fstride*m]; | |
395 yb = twiddles[fstride*2*m]; | |
396 tw=st->twiddles; | |
397 | |
398 for (i=0;i<N;i++) | |
399 { | |
400 Fout = Fout_beg + i*mm; | |
401 Fout0=Fout; | |
402 Fout1=Fout0+m; | |
403 Fout2=Fout0+2*m; | |
404 Fout3=Fout0+3*m; | |
405 Fout4=Fout0+4*m; | |
406 | |
407 for ( u=0; u<m; ++u ) { | |
408 scratch[0] = *Fout0; | |
409 | |
410 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]); | |
411 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]); | |
412 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]); | |
413 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]); | |
414 | |
415 C_ADD( scratch[7],scratch[1],scratch[4]); | |
416 C_SUB( scratch[10],scratch[1],scratch[4]); | |
417 C_ADD( scratch[8],scratch[2],scratch[3]); | |
418 C_SUB( scratch[9],scratch[2],scratch[3]); | |
419 | |
420 Fout0->r += scratch[7].r + scratch[8].r; | |
421 Fout0->i += scratch[7].i + scratch[8].i; | |
422 | |
423 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[
8].r,yb.r); | |
424 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[
8].i,yb.r); | |
425 | |
426 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i); | |
427 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i); | |
428 | |
429 C_SUB(*Fout1,scratch[5],scratch[6]); | |
430 C_ADD(*Fout4,scratch[5],scratch[6]); | |
431 | |
432 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch
[8].r,ya.r); | |
433 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch
[8].i,ya.r); | |
434 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i); | |
435 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i); | |
436 | |
437 C_ADD(*Fout2,scratch[11],scratch[12]); | |
438 C_SUB(*Fout3,scratch[11],scratch[12]); | |
439 | |
440 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; | |
441 } | |
442 } | |
443 } | |
444 | 311 |
445 #endif | 312 #endif |
446 | 313 |
447 | 314 |
448 #ifdef CUSTOM_MODES | 315 #ifdef CUSTOM_MODES |
449 | 316 |
450 static | 317 static |
451 void compute_bitrev_table( | 318 void compute_bitrev_table( |
452 int Fout, | 319 int Fout, |
453 opus_int16 *f, | 320 opus_int16 *f, |
(...skipping 27 matching lines...) Expand all Loading... |
481 } | 348 } |
482 | 349 |
483 /* facbuf is populated by p1,m1,p2,m2, ... | 350 /* facbuf is populated by p1,m1,p2,m2, ... |
484 where | 351 where |
485 p[i] * m[i] = m[i-1] | 352 p[i] * m[i] = m[i-1] |
486 m0 = n */ | 353 m0 = n */ |
487 static | 354 static |
488 int kf_factor(int n,opus_int16 * facbuf) | 355 int kf_factor(int n,opus_int16 * facbuf) |
489 { | 356 { |
490 int p=4; | 357 int p=4; |
| 358 int i; |
| 359 int stages=0; |
| 360 int nbak = n; |
491 | 361 |
492 /*factor out powers of 4, powers of 2, then any remaining primes */ | 362 /*factor out powers of 4, powers of 2, then any remaining primes */ |
493 do { | 363 do { |
494 while (n % p) { | 364 while (n % p) { |
495 switch (p) { | 365 switch (p) { |
496 case 4: p = 2; break; | 366 case 4: p = 2; break; |
497 case 2: p = 3; break; | 367 case 2: p = 3; break; |
498 default: p += 2; break; | 368 default: p += 2; break; |
499 } | 369 } |
500 if (p>32000 || (opus_int32)p*(opus_int32)p > n) | 370 if (p>32000 || (opus_int32)p*(opus_int32)p > n) |
501 p = n; /* no more factors, skip to end */ | 371 p = n; /* no more factors, skip to end */ |
502 } | 372 } |
503 n /= p; | 373 n /= p; |
504 #ifdef RADIX_TWO_ONLY | 374 #ifdef RADIX_TWO_ONLY |
505 if (p!=2 && p != 4) | 375 if (p!=2 && p != 4) |
506 #else | 376 #else |
507 if (p>5) | 377 if (p>5) |
508 #endif | 378 #endif |
509 { | 379 { |
510 return 0; | 380 return 0; |
511 } | 381 } |
512 *facbuf++ = p; | 382 facbuf[2*stages] = p; |
513 *facbuf++ = n; | 383 if (p==2 && stages > 1) |
| 384 { |
| 385 facbuf[2*stages] = 4; |
| 386 facbuf[2] = 2; |
| 387 } |
| 388 stages++; |
514 } while (n > 1); | 389 } while (n > 1); |
| 390 n = nbak; |
| 391 /* Reverse the order to get the radix 4 at the end, so we can use the |
| 392 fast degenerate case. It turns out that reversing the order also |
| 393 improves the noise behaviour. */ |
| 394 for (i=0;i<stages/2;i++) |
| 395 { |
| 396 int tmp; |
| 397 tmp = facbuf[2*i]; |
| 398 facbuf[2*i] = facbuf[2*(stages-i-1)]; |
| 399 facbuf[2*(stages-i-1)] = tmp; |
| 400 } |
| 401 for (i=0;i<stages;i++) |
| 402 { |
| 403 n /= facbuf[2*i]; |
| 404 facbuf[2*i+1] = n; |
| 405 } |
515 return 1; | 406 return 1; |
516 } | 407 } |
517 | 408 |
518 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) | 409 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) |
519 { | 410 { |
520 int i; | 411 int i; |
521 #ifdef FIXED_POINT | 412 #ifdef FIXED_POINT |
522 for (i=0;i<nfft;++i) { | 413 for (i=0;i<nfft;++i) { |
523 opus_val32 phase = -i; | 414 opus_val32 phase = -i; |
524 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft)); | 415 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft)); |
(...skipping 23 matching lines...) Expand all Loading... |
548 }else{ | 439 }else{ |
549 if (mem != NULL && *lenmem >= memneeded) | 440 if (mem != NULL && *lenmem >= memneeded) |
550 st = (kiss_fft_state*)mem; | 441 st = (kiss_fft_state*)mem; |
551 *lenmem = memneeded; | 442 *lenmem = memneeded; |
552 } | 443 } |
553 if (st) { | 444 if (st) { |
554 opus_int16 *bitrev; | 445 opus_int16 *bitrev; |
555 kiss_twiddle_cpx *twiddles; | 446 kiss_twiddle_cpx *twiddles; |
556 | 447 |
557 st->nfft=nfft; | 448 st->nfft=nfft; |
558 #ifndef FIXED_POINT | 449 #ifdef FIXED_POINT |
| 450 st->scale_shift = celt_ilog2(st->nfft); |
| 451 if (st->nfft == 1<<st->scale_shift) |
| 452 st->scale = Q15ONE; |
| 453 else |
| 454 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift); |
| 455 #else |
559 st->scale = 1.f/nfft; | 456 st->scale = 1.f/nfft; |
560 #endif | 457 #endif |
561 if (base != NULL) | 458 if (base != NULL) |
562 { | 459 { |
563 st->twiddles = base->twiddles; | 460 st->twiddles = base->twiddles; |
564 st->shift = 0; | 461 st->shift = 0; |
565 while (nfft<<st->shift != base->nfft && st->shift < 32) | 462 while (st->shift < 32 && nfft<<st->shift != base->nfft) |
566 st->shift++; | 463 st->shift++; |
567 if (st->shift>=32) | 464 if (st->shift>=32) |
568 goto fail; | 465 goto fail; |
569 } else { | 466 } else { |
570 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(k
iss_twiddle_cpx)*nfft); | 467 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(k
iss_twiddle_cpx)*nfft); |
571 compute_twiddles(twiddles, nfft); | 468 compute_twiddles(twiddles, nfft); |
572 st->shift = -1; | 469 st->shift = -1; |
573 } | 470 } |
574 if (!kf_factor(nfft,st->factors)) | 471 if (!kf_factor(nfft,st->factors)) |
575 { | 472 { |
(...skipping 23 matching lines...) Expand all Loading... |
599 { | 496 { |
600 opus_free((opus_int16*)cfg->bitrev); | 497 opus_free((opus_int16*)cfg->bitrev); |
601 if (cfg->shift < 0) | 498 if (cfg->shift < 0) |
602 opus_free((kiss_twiddle_cpx*)cfg->twiddles); | 499 opus_free((kiss_twiddle_cpx*)cfg->twiddles); |
603 opus_free((kiss_fft_state*)cfg); | 500 opus_free((kiss_fft_state*)cfg); |
604 } | 501 } |
605 } | 502 } |
606 | 503 |
607 #endif /* CUSTOM_MODES */ | 504 #endif /* CUSTOM_MODES */ |
608 | 505 |
609 void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
t) | 506 void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout) |
610 { | 507 { |
611 int m2, m; | 508 int m2, m; |
612 int p; | 509 int p; |
613 int L; | 510 int L; |
614 int fstride[MAXFACTORS]; | 511 int fstride[MAXFACTORS]; |
615 int i; | 512 int i; |
616 int shift; | 513 int shift; |
617 | 514 |
618 /* st->shift can be -1 */ | 515 /* st->shift can be -1 */ |
619 shift = st->shift>0 ? st->shift : 0; | 516 shift = st->shift>0 ? st->shift : 0; |
620 | 517 |
621 celt_assert2 (fin != fout, "In-place FFT not supported"); | |
622 /* Bit-reverse the input */ | |
623 for (i=0;i<st->nfft;i++) | |
624 { | |
625 fout[st->bitrev[i]] = fin[i]; | |
626 #ifndef FIXED_POINT | |
627 fout[st->bitrev[i]].r *= st->scale; | |
628 fout[st->bitrev[i]].i *= st->scale; | |
629 #endif | |
630 } | |
631 | |
632 fstride[0] = 1; | 518 fstride[0] = 1; |
633 L=0; | 519 L=0; |
634 do { | 520 do { |
635 p = st->factors[2*L]; | 521 p = st->factors[2*L]; |
636 m = st->factors[2*L+1]; | 522 m = st->factors[2*L+1]; |
637 fstride[L+1] = fstride[L]*p; | 523 fstride[L+1] = fstride[L]*p; |
638 L++; | 524 L++; |
639 } while(m!=1); | 525 } while(m!=1); |
640 m = st->factors[2*L-1]; | 526 m = st->factors[2*L-1]; |
641 for (i=L-1;i>=0;i--) | 527 for (i=L-1;i>=0;i--) |
642 { | 528 { |
643 if (i!=0) | 529 if (i!=0) |
644 m2 = st->factors[2*i-1]; | 530 m2 = st->factors[2*i-1]; |
645 else | 531 else |
646 m2 = 1; | 532 m2 = 1; |
647 switch (st->factors[2*i]) | 533 switch (st->factors[2*i]) |
648 { | 534 { |
649 case 2: | 535 case 2: |
650 kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 536 kf_bfly2(fout, m, fstride[i]); |
651 break; | 537 break; |
652 case 4: | 538 case 4: |
653 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 539 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
654 break; | 540 break; |
655 #ifndef RADIX_TWO_ONLY | 541 #ifndef RADIX_TWO_ONLY |
656 case 3: | 542 case 3: |
657 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 543 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
658 break; | 544 break; |
659 case 5: | 545 case 5: |
660 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); | 546 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); |
661 break; | 547 break; |
662 #endif | 548 #endif |
663 } | 549 } |
664 m = m2; | 550 m = m2; |
665 } | 551 } |
666 } | 552 } |
667 | 553 |
| 554 void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou
t) |
| 555 { |
| 556 int i; |
| 557 opus_val16 scale; |
| 558 #ifdef FIXED_POINT |
| 559 /* Allows us to scale with MULT16_32_Q16(), which is faster than |
| 560 MULT16_32_Q15() on ARM. */ |
| 561 int scale_shift = st->scale_shift-1; |
| 562 #endif |
| 563 scale = st->scale; |
| 564 |
| 565 celt_assert2 (fin != fout, "In-place FFT not supported"); |
| 566 /* Bit-reverse the input */ |
| 567 for (i=0;i<st->nfft;i++) |
| 568 { |
| 569 kiss_fft_cpx x = fin[i]; |
| 570 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift); |
| 571 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift); |
| 572 } |
| 573 opus_fft_impl(st, fout); |
| 574 } |
| 575 |
| 576 |
| 577 #ifdef TEST_UNIT_DFT_C |
668 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fo
ut) | 578 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fo
ut) |
669 { | 579 { |
670 int m2, m; | |
671 int p; | |
672 int L; | |
673 int fstride[MAXFACTORS]; | |
674 int i; | 580 int i; |
675 int shift; | |
676 | |
677 /* st->shift can be -1 */ | |
678 shift = st->shift>0 ? st->shift : 0; | |
679 celt_assert2 (fin != fout, "In-place FFT not supported"); | 581 celt_assert2 (fin != fout, "In-place FFT not supported"); |
680 /* Bit-reverse the input */ | 582 /* Bit-reverse the input */ |
681 for (i=0;i<st->nfft;i++) | 583 for (i=0;i<st->nfft;i++) |
682 fout[st->bitrev[i]] = fin[i]; | 584 fout[st->bitrev[i]] = fin[i]; |
683 | 585 for (i=0;i<st->nfft;i++) |
684 fstride[0] = 1; | 586 fout[i].i = -fout[i].i; |
685 L=0; | 587 opus_fft_impl(st, fout); |
686 do { | 588 for (i=0;i<st->nfft;i++) |
687 p = st->factors[2*L]; | 589 fout[i].i = -fout[i].i; |
688 m = st->factors[2*L+1]; | 590 } |
689 fstride[L+1] = fstride[L]*p; | |
690 L++; | |
691 } while(m!=1); | |
692 m = st->factors[2*L-1]; | |
693 for (i=L-1;i>=0;i--) | |
694 { | |
695 if (i!=0) | |
696 m2 = st->factors[2*i-1]; | |
697 else | |
698 m2 = 1; | |
699 switch (st->factors[2*i]) | |
700 { | |
701 case 2: | |
702 ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
703 break; | |
704 case 4: | |
705 ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
706 break; | |
707 #ifndef RADIX_TWO_ONLY | |
708 case 3: | |
709 ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
710 break; | |
711 case 5: | |
712 ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); | |
713 break; | |
714 #endif | 591 #endif |
715 } | |
716 m = m2; | |
717 } | |
718 } | |
719 | |
OLD | NEW |