Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Side by Side Diff: celt/kiss_fft.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master
Patch Set: Add the contents of Makefile.mips back. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « celt/kiss_fft.h ('k') | celt/mdct.c » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /*Copyright (c) 2003-2004, Mark Borgerding 1 /*Copyright (c) 2003-2004, Mark Borgerding
2 Lots of modifications by Jean-Marc Valin 2 Lots of modifications by Jean-Marc Valin
3 Copyright (c) 2005-2007, Xiph.Org Foundation 3 Copyright (c) 2005-2007, Xiph.Org Foundation
4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
5 5
6 All rights reserved. 6 All rights reserved.
7 7
8 Redistribution and use in source and binary forms, with or without 8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met: 9 modification, are permitted provided that the following conditions are met:
10 10
(...skipping 29 matching lines...) Expand all
40 #include "os_support.h" 40 #include "os_support.h"
41 #include "mathops.h" 41 #include "mathops.h"
42 #include "stack_alloc.h" 42 #include "stack_alloc.h"
43 43
44 /* The guts header contains all the multiplication and addition macros that are defined for 44 /* The guts header contains all the multiplication and addition macros that are defined for
45 complex numbers. It also delares the kf_ internal functions. 45 complex numbers. It also delares the kf_ internal functions.
46 */ 46 */
47 47
48 static void kf_bfly2( 48 static void kf_bfly2(
49 kiss_fft_cpx * Fout, 49 kiss_fft_cpx * Fout,
50 const size_t fstride,
51 const kiss_fft_state *st,
52 int m, 50 int m,
53 int N, 51 int N
54 int mm
55 ) 52 )
56 { 53 {
57 kiss_fft_cpx * Fout2; 54 kiss_fft_cpx * Fout2;
58 const kiss_twiddle_cpx * tw1; 55 int i;
59 int i,j; 56 (void)m;
60 kiss_fft_cpx * Fout_beg = Fout; 57 #ifdef CUSTOM_MODES
61 for (i=0;i<N;i++) 58 if (m==1)
62 { 59 {
63 Fout = Fout_beg + i*mm; 60 celt_assert(m==1);
64 Fout2 = Fout + m; 61 for (i=0;i<N;i++)
65 tw1 = st->twiddles;
66 for(j=0;j<m;j++)
67 { 62 {
68 kiss_fft_cpx t; 63 kiss_fft_cpx t;
69 Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1); 64 Fout2 = Fout + 1;
70 Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1); 65 t = *Fout2;
71 C_MUL (t, *Fout2 , *tw1);
72 tw1 += fstride;
73 C_SUB( *Fout2 , *Fout , t ); 66 C_SUB( *Fout2 , *Fout , t );
74 C_ADDTO( *Fout , t ); 67 C_ADDTO( *Fout , t );
75 ++Fout2; 68 Fout += 2;
76 ++Fout; 69 }
70 } else
71 #endif
72 {
73 opus_val16 tw;
74 tw = QCONST16(0.7071067812f, 15);
75 /* We know that m==4 here because the radix-2 is just after a radix-4 */
76 celt_assert(m==4);
77 for (i=0;i<N;i++)
78 {
79 kiss_fft_cpx t;
80 Fout2 = Fout + 4;
81 t = Fout2[0];
82 C_SUB( Fout2[0] , Fout[0] , t );
83 C_ADDTO( Fout[0] , t );
84
85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
87 C_SUB( Fout2[1] , Fout[1] , t );
88 C_ADDTO( Fout[1] , t );
89
90 t.r = Fout2[2].i;
91 t.i = -Fout2[2].r;
92 C_SUB( Fout2[2] , Fout[2] , t );
93 C_ADDTO( Fout[2] , t );
94
95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
97 C_SUB( Fout2[3] , Fout[3] , t );
98 C_ADDTO( Fout[3] , t );
99 Fout += 8;
77 } 100 }
78 } 101 }
79 } 102 }
80
81 static void ki_bfly2(
82 kiss_fft_cpx * Fout,
83 const size_t fstride,
84 const kiss_fft_state *st,
85 int m,
86 int N,
87 int mm
88 )
89 {
90 kiss_fft_cpx * Fout2;
91 const kiss_twiddle_cpx * tw1;
92 kiss_fft_cpx t;
93 int i,j;
94 kiss_fft_cpx * Fout_beg = Fout;
95 for (i=0;i<N;i++)
96 {
97 Fout = Fout_beg + i*mm;
98 Fout2 = Fout + m;
99 tw1 = st->twiddles;
100 for(j=0;j<m;j++)
101 {
102 C_MULC (t, *Fout2 , *tw1);
103 tw1 += fstride;
104 C_SUB( *Fout2 , *Fout , t );
105 C_ADDTO( *Fout , t );
106 ++Fout2;
107 ++Fout;
108 }
109 }
110 }
111 103
112 static void kf_bfly4( 104 static void kf_bfly4(
113 kiss_fft_cpx * Fout, 105 kiss_fft_cpx * Fout,
114 const size_t fstride, 106 const size_t fstride,
115 const kiss_fft_state *st, 107 const kiss_fft_state *st,
116 int m, 108 int m,
117 int N, 109 int N,
118 int mm 110 int mm
119 ) 111 )
120 { 112 {
121 const kiss_twiddle_cpx *tw1,*tw2,*tw3; 113 int i;
122 kiss_fft_cpx scratch[6];
123 const size_t m2=2*m;
124 const size_t m3=3*m;
125 int i, j;
126 114
127 kiss_fft_cpx * Fout_beg = Fout; 115 if (m==1)
128 for (i=0;i<N;i++)
129 { 116 {
130 Fout = Fout_beg + i*mm; 117 /* Degenerate case where all the twiddles are 1. */
131 tw3 = tw2 = tw1 = st->twiddles; 118 for (i=0;i<N;i++)
132 for (j=0;j<m;j++)
133 { 119 {
134 C_MUL4(scratch[0],Fout[m] , *tw1 ); 120 kiss_fft_cpx scratch0, scratch1;
135 C_MUL4(scratch[1],Fout[m2] , *tw2 );
136 C_MUL4(scratch[2],Fout[m3] , *tw3 );
137 121
138 Fout->r = PSHR32(Fout->r, 2); 122 C_SUB( scratch0 , *Fout, Fout[2] );
139 Fout->i = PSHR32(Fout->i, 2); 123 C_ADDTO(*Fout, Fout[2]);
140 C_SUB( scratch[5] , *Fout, scratch[1] ); 124 C_ADD( scratch1 , Fout[1] , Fout[3] );
141 C_ADDTO(*Fout, scratch[1]); 125 C_SUB( Fout[2], *Fout, scratch1 );
142 C_ADD( scratch[3] , scratch[0] , scratch[2] ); 126 C_ADDTO( *Fout , scratch1 );
143 C_SUB( scratch[4] , scratch[0] , scratch[2] ); 127 C_SUB( scratch1 , Fout[1] , Fout[3] );
144 C_SUB( Fout[m2], *Fout, scratch[3] );
145 tw1 += fstride;
146 tw2 += fstride*2;
147 tw3 += fstride*3;
148 C_ADDTO( *Fout , scratch[3] );
149 128
150 Fout[m].r = scratch[5].r + scratch[4].i; 129 Fout[1].r = scratch0.r + scratch1.i;
151 Fout[m].i = scratch[5].i - scratch[4].r; 130 Fout[1].i = scratch0.i - scratch1.r;
152 Fout[m3].r = scratch[5].r - scratch[4].i; 131 Fout[3].r = scratch0.r - scratch1.i;
153 Fout[m3].i = scratch[5].i + scratch[4].r; 132 Fout[3].i = scratch0.i + scratch1.r;
154 ++Fout; 133 Fout+=4;
134 }
135 } else {
136 int j;
137 kiss_fft_cpx scratch[6];
138 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
139 const int m2=2*m;
140 const int m3=3*m;
141 kiss_fft_cpx * Fout_beg = Fout;
142 for (i=0;i<N;i++)
143 {
144 Fout = Fout_beg + i*mm;
145 tw3 = tw2 = tw1 = st->twiddles;
146 /* m is guaranteed to be a multiple of 4. */
147 for (j=0;j<m;j++)
148 {
149 C_MUL(scratch[0],Fout[m] , *tw1 );
150 C_MUL(scratch[1],Fout[m2] , *tw2 );
151 C_MUL(scratch[2],Fout[m3] , *tw3 );
152
153 C_SUB( scratch[5] , *Fout, scratch[1] );
154 C_ADDTO(*Fout, scratch[1]);
155 C_ADD( scratch[3] , scratch[0] , scratch[2] );
156 C_SUB( scratch[4] , scratch[0] , scratch[2] );
157 C_SUB( Fout[m2], *Fout, scratch[3] );
158 tw1 += fstride;
159 tw2 += fstride*2;
160 tw3 += fstride*3;
161 C_ADDTO( *Fout , scratch[3] );
162
163 Fout[m].r = scratch[5].r + scratch[4].i;
164 Fout[m].i = scratch[5].i - scratch[4].r;
165 Fout[m3].r = scratch[5].r - scratch[4].i;
166 Fout[m3].i = scratch[5].i + scratch[4].r;
167 ++Fout;
168 }
155 } 169 }
156 } 170 }
157 } 171 }
158 172
159 static void ki_bfly4(
160 kiss_fft_cpx * Fout,
161 const size_t fstride,
162 const kiss_fft_state *st,
163 int m,
164 int N,
165 int mm
166 )
167 {
168 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
169 kiss_fft_cpx scratch[6];
170 const size_t m2=2*m;
171 const size_t m3=3*m;
172 int i, j;
173
174 kiss_fft_cpx * Fout_beg = Fout;
175 for (i=0;i<N;i++)
176 {
177 Fout = Fout_beg + i*mm;
178 tw3 = tw2 = tw1 = st->twiddles;
179 for (j=0;j<m;j++)
180 {
181 C_MULC(scratch[0],Fout[m] , *tw1 );
182 C_MULC(scratch[1],Fout[m2] , *tw2 );
183 C_MULC(scratch[2],Fout[m3] , *tw3 );
184
185 C_SUB( scratch[5] , *Fout, scratch[1] );
186 C_ADDTO(*Fout, scratch[1]);
187 C_ADD( scratch[3] , scratch[0] , scratch[2] );
188 C_SUB( scratch[4] , scratch[0] , scratch[2] );
189 C_SUB( Fout[m2], *Fout, scratch[3] );
190 tw1 += fstride;
191 tw2 += fstride*2;
192 tw3 += fstride*3;
193 C_ADDTO( *Fout , scratch[3] );
194
195 Fout[m].r = scratch[5].r - scratch[4].i;
196 Fout[m].i = scratch[5].i + scratch[4].r;
197 Fout[m3].r = scratch[5].r + scratch[4].i;
198 Fout[m3].i = scratch[5].i - scratch[4].r;
199 ++Fout;
200 }
201 }
202 }
203 173
204 #ifndef RADIX_TWO_ONLY 174 #ifndef RADIX_TWO_ONLY
205 175
206 static void kf_bfly3( 176 static void kf_bfly3(
207 kiss_fft_cpx * Fout, 177 kiss_fft_cpx * Fout,
208 const size_t fstride, 178 const size_t fstride,
209 const kiss_fft_state *st, 179 const kiss_fft_state *st,
210 int m, 180 int m,
211 int N, 181 int N,
212 int mm 182 int mm
213 ) 183 )
214 { 184 {
215 int i; 185 int i;
216 size_t k; 186 size_t k;
217 const size_t m2 = 2*m; 187 const size_t m2 = 2*m;
218 const kiss_twiddle_cpx *tw1,*tw2; 188 const kiss_twiddle_cpx *tw1,*tw2;
219 kiss_fft_cpx scratch[5]; 189 kiss_fft_cpx scratch[5];
220 kiss_twiddle_cpx epi3; 190 kiss_twiddle_cpx epi3;
221 191
222 kiss_fft_cpx * Fout_beg = Fout; 192 kiss_fft_cpx * Fout_beg = Fout;
193 #ifdef FIXED_POINT
194 epi3.r = -16384;
195 epi3.i = -28378;
196 #else
223 epi3 = st->twiddles[fstride*m]; 197 epi3 = st->twiddles[fstride*m];
198 #endif
224 for (i=0;i<N;i++) 199 for (i=0;i<N;i++)
225 { 200 {
226 Fout = Fout_beg + i*mm; 201 Fout = Fout_beg + i*mm;
227 tw1=tw2=st->twiddles; 202 tw1=tw2=st->twiddles;
203 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
228 k=m; 204 k=m;
229 do { 205 do {
230 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
231 206
232 C_MUL(scratch[1],Fout[m] , *tw1); 207 C_MUL(scratch[1],Fout[m] , *tw1);
233 C_MUL(scratch[2],Fout[m2] , *tw2); 208 C_MUL(scratch[2],Fout[m2] , *tw2);
234 209
235 C_ADD(scratch[3],scratch[1],scratch[2]); 210 C_ADD(scratch[3],scratch[1],scratch[2]);
236 C_SUB(scratch[0],scratch[1],scratch[2]); 211 C_SUB(scratch[0],scratch[1],scratch[2]);
237 tw1 += fstride; 212 tw1 += fstride;
238 tw2 += fstride*2; 213 tw2 += fstride*2;
239 214
240 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 215 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
241 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 216 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
242 217
243 C_MULBYSCALAR( scratch[0] , epi3.i ); 218 C_MULBYSCALAR( scratch[0] , epi3.i );
244 219
245 C_ADDTO(*Fout,scratch[3]); 220 C_ADDTO(*Fout,scratch[3]);
246 221
247 Fout[m2].r = Fout[m].r + scratch[0].i; 222 Fout[m2].r = Fout[m].r + scratch[0].i;
248 Fout[m2].i = Fout[m].i - scratch[0].r; 223 Fout[m2].i = Fout[m].i - scratch[0].r;
249 224
250 Fout[m].r -= scratch[0].i; 225 Fout[m].r -= scratch[0].i;
251 Fout[m].i += scratch[0].r; 226 Fout[m].i += scratch[0].r;
252 227
253 ++Fout; 228 ++Fout;
254 } while(--k); 229 } while(--k);
255 } 230 }
256 } 231 }
257 232
258 static void ki_bfly3(
259 kiss_fft_cpx * Fout,
260 const size_t fstride,
261 const kiss_fft_state *st,
262 int m,
263 int N,
264 int mm
265 )
266 {
267 int i, k;
268 const size_t m2 = 2*m;
269 const kiss_twiddle_cpx *tw1,*tw2;
270 kiss_fft_cpx scratch[5];
271 kiss_twiddle_cpx epi3;
272 233
273 kiss_fft_cpx * Fout_beg = Fout; 234 #ifndef OVERRIDE_kf_bfly5
274 epi3 = st->twiddles[fstride*m];
275 for (i=0;i<N;i++)
276 {
277 Fout = Fout_beg + i*mm;
278 tw1=tw2=st->twiddles;
279 k=m;
280 do{
281
282 C_MULC(scratch[1],Fout[m] , *tw1);
283 C_MULC(scratch[2],Fout[m2] , *tw2);
284
285 C_ADD(scratch[3],scratch[1],scratch[2]);
286 C_SUB(scratch[0],scratch[1],scratch[2]);
287 tw1 += fstride;
288 tw2 += fstride*2;
289
290 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
291 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
292
293 C_MULBYSCALAR( scratch[0] , -epi3.i );
294
295 C_ADDTO(*Fout,scratch[3]);
296
297 Fout[m2].r = Fout[m].r + scratch[0].i;
298 Fout[m2].i = Fout[m].i - scratch[0].r;
299
300 Fout[m].r -= scratch[0].i;
301 Fout[m].i += scratch[0].r;
302
303 ++Fout;
304 }while(--k);
305 }
306 }
307
308 static void kf_bfly5( 235 static void kf_bfly5(
309 kiss_fft_cpx * Fout, 236 kiss_fft_cpx * Fout,
310 const size_t fstride, 237 const size_t fstride,
311 const kiss_fft_state *st, 238 const kiss_fft_state *st,
312 int m, 239 int m,
313 int N, 240 int N,
314 int mm 241 int mm
315 ) 242 )
316 { 243 {
317 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 244 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
318 int i, u; 245 int i, u;
319 kiss_fft_cpx scratch[13]; 246 kiss_fft_cpx scratch[13];
320 const kiss_twiddle_cpx * twiddles = st->twiddles;
321 const kiss_twiddle_cpx *tw; 247 const kiss_twiddle_cpx *tw;
322 kiss_twiddle_cpx ya,yb; 248 kiss_twiddle_cpx ya,yb;
323 kiss_fft_cpx * Fout_beg = Fout; 249 kiss_fft_cpx * Fout_beg = Fout;
324 250
325 ya = twiddles[fstride*m]; 251 #ifdef FIXED_POINT
326 yb = twiddles[fstride*2*m]; 252 ya.r = 10126;
253 ya.i = -31164;
254 yb.r = -26510;
255 yb.i = -19261;
256 #else
257 ya = st->twiddles[fstride*m];
258 yb = st->twiddles[fstride*2*m];
259 #endif
327 tw=st->twiddles; 260 tw=st->twiddles;
328 261
329 for (i=0;i<N;i++) 262 for (i=0;i<N;i++)
330 { 263 {
331 Fout = Fout_beg + i*mm; 264 Fout = Fout_beg + i*mm;
332 Fout0=Fout; 265 Fout0=Fout;
333 Fout1=Fout0+m; 266 Fout1=Fout0+m;
334 Fout2=Fout0+2*m; 267 Fout2=Fout0+2*m;
335 Fout3=Fout0+3*m; 268 Fout3=Fout0+3*m;
336 Fout4=Fout0+4*m; 269 Fout4=Fout0+4*m;
337 270
271 /* For non-custom modes, m is guaranteed to be a multiple of 4. */
338 for ( u=0; u<m; ++u ) { 272 for ( u=0; u<m; ++u ) {
339 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV ( *Fout3,5); C_FIXDIV( *Fout4,5);
340 scratch[0] = *Fout0; 273 scratch[0] = *Fout0;
341 274
342 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
343 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); 276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
344 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); 277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
345 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); 278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
346 279
347 C_ADD( scratch[7],scratch[1],scratch[4]); 280 C_ADD( scratch[7],scratch[1],scratch[4]);
348 C_SUB( scratch[10],scratch[1],scratch[4]); 281 C_SUB( scratch[10],scratch[1],scratch[4]);
349 C_ADD( scratch[8],scratch[2],scratch[3]); 282 C_ADD( scratch[8],scratch[2],scratch[3]);
(...skipping 16 matching lines...) Expand all
366 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); 299 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
367 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); 300 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
368 301
369 C_ADD(*Fout2,scratch[11],scratch[12]); 302 C_ADD(*Fout2,scratch[11],scratch[12]);
370 C_SUB(*Fout3,scratch[11],scratch[12]); 303 C_SUB(*Fout3,scratch[11],scratch[12]);
371 304
372 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
373 } 306 }
374 } 307 }
375 } 308 }
309 #endif /* OVERRIDE_kf_bfly5 */
376 310
377 static void ki_bfly5(
378 kiss_fft_cpx * Fout,
379 const size_t fstride,
380 const kiss_fft_state *st,
381 int m,
382 int N,
383 int mm
384 )
385 {
386 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
387 int i, u;
388 kiss_fft_cpx scratch[13];
389 const kiss_twiddle_cpx * twiddles = st->twiddles;
390 const kiss_twiddle_cpx *tw;
391 kiss_twiddle_cpx ya,yb;
392 kiss_fft_cpx * Fout_beg = Fout;
393
394 ya = twiddles[fstride*m];
395 yb = twiddles[fstride*2*m];
396 tw=st->twiddles;
397
398 for (i=0;i<N;i++)
399 {
400 Fout = Fout_beg + i*mm;
401 Fout0=Fout;
402 Fout1=Fout0+m;
403 Fout2=Fout0+2*m;
404 Fout3=Fout0+3*m;
405 Fout4=Fout0+4*m;
406
407 for ( u=0; u<m; ++u ) {
408 scratch[0] = *Fout0;
409
410 C_MULC(scratch[1] ,*Fout1, tw[u*fstride]);
411 C_MULC(scratch[2] ,*Fout2, tw[2*u*fstride]);
412 C_MULC(scratch[3] ,*Fout3, tw[3*u*fstride]);
413 C_MULC(scratch[4] ,*Fout4, tw[4*u*fstride]);
414
415 C_ADD( scratch[7],scratch[1],scratch[4]);
416 C_SUB( scratch[10],scratch[1],scratch[4]);
417 C_ADD( scratch[8],scratch[2],scratch[3]);
418 C_SUB( scratch[9],scratch[2],scratch[3]);
419
420 Fout0->r += scratch[7].r + scratch[8].r;
421 Fout0->i += scratch[7].i + scratch[8].i;
422
423 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[ 8].r,yb.r);
424 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[ 8].i,yb.r);
425
426 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);
427 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);
428
429 C_SUB(*Fout1,scratch[5],scratch[6]);
430 C_ADD(*Fout4,scratch[5],scratch[6]);
431
432 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch [8].r,ya.r);
433 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch [8].i,ya.r);
434 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);
435 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);
436
437 C_ADD(*Fout2,scratch[11],scratch[12]);
438 C_SUB(*Fout3,scratch[11],scratch[12]);
439
440 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
441 }
442 }
443 }
444 311
445 #endif 312 #endif
446 313
447 314
448 #ifdef CUSTOM_MODES 315 #ifdef CUSTOM_MODES
449 316
450 static 317 static
451 void compute_bitrev_table( 318 void compute_bitrev_table(
452 int Fout, 319 int Fout,
453 opus_int16 *f, 320 opus_int16 *f,
(...skipping 27 matching lines...) Expand all
481 } 348 }
482 349
483 /* facbuf is populated by p1,m1,p2,m2, ... 350 /* facbuf is populated by p1,m1,p2,m2, ...
484 where 351 where
485 p[i] * m[i] = m[i-1] 352 p[i] * m[i] = m[i-1]
486 m0 = n */ 353 m0 = n */
487 static 354 static
488 int kf_factor(int n,opus_int16 * facbuf) 355 int kf_factor(int n,opus_int16 * facbuf)
489 { 356 {
490 int p=4; 357 int p=4;
358 int i;
359 int stages=0;
360 int nbak = n;
491 361
492 /*factor out powers of 4, powers of 2, then any remaining primes */ 362 /*factor out powers of 4, powers of 2, then any remaining primes */
493 do { 363 do {
494 while (n % p) { 364 while (n % p) {
495 switch (p) { 365 switch (p) {
496 case 4: p = 2; break; 366 case 4: p = 2; break;
497 case 2: p = 3; break; 367 case 2: p = 3; break;
498 default: p += 2; break; 368 default: p += 2; break;
499 } 369 }
500 if (p>32000 || (opus_int32)p*(opus_int32)p > n) 370 if (p>32000 || (opus_int32)p*(opus_int32)p > n)
501 p = n; /* no more factors, skip to end */ 371 p = n; /* no more factors, skip to end */
502 } 372 }
503 n /= p; 373 n /= p;
504 #ifdef RADIX_TWO_ONLY 374 #ifdef RADIX_TWO_ONLY
505 if (p!=2 && p != 4) 375 if (p!=2 && p != 4)
506 #else 376 #else
507 if (p>5) 377 if (p>5)
508 #endif 378 #endif
509 { 379 {
510 return 0; 380 return 0;
511 } 381 }
512 *facbuf++ = p; 382 facbuf[2*stages] = p;
513 *facbuf++ = n; 383 if (p==2 && stages > 1)
384 {
385 facbuf[2*stages] = 4;
386 facbuf[2] = 2;
387 }
388 stages++;
514 } while (n > 1); 389 } while (n > 1);
390 n = nbak;
391 /* Reverse the order to get the radix 4 at the end, so we can use the
392 fast degenerate case. It turns out that reversing the order also
393 improves the noise behaviour. */
394 for (i=0;i<stages/2;i++)
395 {
396 int tmp;
397 tmp = facbuf[2*i];
398 facbuf[2*i] = facbuf[2*(stages-i-1)];
399 facbuf[2*(stages-i-1)] = tmp;
400 }
401 for (i=0;i<stages;i++)
402 {
403 n /= facbuf[2*i];
404 facbuf[2*i+1] = n;
405 }
515 return 1; 406 return 1;
516 } 407 }
517 408
518 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft) 409 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
519 { 410 {
520 int i; 411 int i;
521 #ifdef FIXED_POINT 412 #ifdef FIXED_POINT
522 for (i=0;i<nfft;++i) { 413 for (i=0;i<nfft;++i) {
523 opus_val32 phase = -i; 414 opus_val32 phase = -i;
524 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft)); 415 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
(...skipping 23 matching lines...) Expand all
548 }else{ 439 }else{
549 if (mem != NULL && *lenmem >= memneeded) 440 if (mem != NULL && *lenmem >= memneeded)
550 st = (kiss_fft_state*)mem; 441 st = (kiss_fft_state*)mem;
551 *lenmem = memneeded; 442 *lenmem = memneeded;
552 } 443 }
553 if (st) { 444 if (st) {
554 opus_int16 *bitrev; 445 opus_int16 *bitrev;
555 kiss_twiddle_cpx *twiddles; 446 kiss_twiddle_cpx *twiddles;
556 447
557 st->nfft=nfft; 448 st->nfft=nfft;
558 #ifndef FIXED_POINT 449 #ifdef FIXED_POINT
450 st->scale_shift = celt_ilog2(st->nfft);
451 if (st->nfft == 1<<st->scale_shift)
452 st->scale = Q15ONE;
453 else
454 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
455 #else
559 st->scale = 1.f/nfft; 456 st->scale = 1.f/nfft;
560 #endif 457 #endif
561 if (base != NULL) 458 if (base != NULL)
562 { 459 {
563 st->twiddles = base->twiddles; 460 st->twiddles = base->twiddles;
564 st->shift = 0; 461 st->shift = 0;
565 while (nfft<<st->shift != base->nfft && st->shift < 32) 462 while (st->shift < 32 && nfft<<st->shift != base->nfft)
566 st->shift++; 463 st->shift++;
567 if (st->shift>=32) 464 if (st->shift>=32)
568 goto fail; 465 goto fail;
569 } else { 466 } else {
570 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(k iss_twiddle_cpx)*nfft); 467 st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(k iss_twiddle_cpx)*nfft);
571 compute_twiddles(twiddles, nfft); 468 compute_twiddles(twiddles, nfft);
572 st->shift = -1; 469 st->shift = -1;
573 } 470 }
574 if (!kf_factor(nfft,st->factors)) 471 if (!kf_factor(nfft,st->factors))
575 { 472 {
(...skipping 23 matching lines...) Expand all
599 { 496 {
600 opus_free((opus_int16*)cfg->bitrev); 497 opus_free((opus_int16*)cfg->bitrev);
601 if (cfg->shift < 0) 498 if (cfg->shift < 0)
602 opus_free((kiss_twiddle_cpx*)cfg->twiddles); 499 opus_free((kiss_twiddle_cpx*)cfg->twiddles);
603 opus_free((kiss_fft_state*)cfg); 500 opus_free((kiss_fft_state*)cfg);
604 } 501 }
605 } 502 }
606 503
607 #endif /* CUSTOM_MODES */ 504 #endif /* CUSTOM_MODES */
608 505
609 void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou t) 506 void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
610 { 507 {
611 int m2, m; 508 int m2, m;
612 int p; 509 int p;
613 int L; 510 int L;
614 int fstride[MAXFACTORS]; 511 int fstride[MAXFACTORS];
615 int i; 512 int i;
616 int shift; 513 int shift;
617 514
618 /* st->shift can be -1 */ 515 /* st->shift can be -1 */
619 shift = st->shift>0 ? st->shift : 0; 516 shift = st->shift>0 ? st->shift : 0;
620 517
621 celt_assert2 (fin != fout, "In-place FFT not supported");
622 /* Bit-reverse the input */
623 for (i=0;i<st->nfft;i++)
624 {
625 fout[st->bitrev[i]] = fin[i];
626 #ifndef FIXED_POINT
627 fout[st->bitrev[i]].r *= st->scale;
628 fout[st->bitrev[i]].i *= st->scale;
629 #endif
630 }
631
632 fstride[0] = 1; 518 fstride[0] = 1;
633 L=0; 519 L=0;
634 do { 520 do {
635 p = st->factors[2*L]; 521 p = st->factors[2*L];
636 m = st->factors[2*L+1]; 522 m = st->factors[2*L+1];
637 fstride[L+1] = fstride[L]*p; 523 fstride[L+1] = fstride[L]*p;
638 L++; 524 L++;
639 } while(m!=1); 525 } while(m!=1);
640 m = st->factors[2*L-1]; 526 m = st->factors[2*L-1];
641 for (i=L-1;i>=0;i--) 527 for (i=L-1;i>=0;i--)
642 { 528 {
643 if (i!=0) 529 if (i!=0)
644 m2 = st->factors[2*i-1]; 530 m2 = st->factors[2*i-1];
645 else 531 else
646 m2 = 1; 532 m2 = 1;
647 switch (st->factors[2*i]) 533 switch (st->factors[2*i])
648 { 534 {
649 case 2: 535 case 2:
650 kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2); 536 kf_bfly2(fout, m, fstride[i]);
651 break; 537 break;
652 case 4: 538 case 4:
653 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2); 539 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
654 break; 540 break;
655 #ifndef RADIX_TWO_ONLY 541 #ifndef RADIX_TWO_ONLY
656 case 3: 542 case 3:
657 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2); 543 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
658 break; 544 break;
659 case 5: 545 case 5:
660 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2); 546 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
661 break; 547 break;
662 #endif 548 #endif
663 } 549 }
664 m = m2; 550 m = m2;
665 } 551 }
666 } 552 }
667 553
554 void opus_fft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fou t)
555 {
556 int i;
557 opus_val16 scale;
558 #ifdef FIXED_POINT
559 /* Allows us to scale with MULT16_32_Q16(), which is faster than
560 MULT16_32_Q15() on ARM. */
561 int scale_shift = st->scale_shift-1;
562 #endif
563 scale = st->scale;
564
565 celt_assert2 (fin != fout, "In-place FFT not supported");
566 /* Bit-reverse the input */
567 for (i=0;i<st->nfft;i++)
568 {
569 kiss_fft_cpx x = fin[i];
570 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
571 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
572 }
573 opus_fft_impl(st, fout);
574 }
575
576
577 #ifdef TEST_UNIT_DFT_C
668 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fo ut) 578 void opus_ifft(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fo ut)
669 { 579 {
670 int m2, m;
671 int p;
672 int L;
673 int fstride[MAXFACTORS];
674 int i; 580 int i;
675 int shift;
676
677 /* st->shift can be -1 */
678 shift = st->shift>0 ? st->shift : 0;
679 celt_assert2 (fin != fout, "In-place FFT not supported"); 581 celt_assert2 (fin != fout, "In-place FFT not supported");
680 /* Bit-reverse the input */ 582 /* Bit-reverse the input */
681 for (i=0;i<st->nfft;i++) 583 for (i=0;i<st->nfft;i++)
682 fout[st->bitrev[i]] = fin[i]; 584 fout[st->bitrev[i]] = fin[i];
683 585 for (i=0;i<st->nfft;i++)
684 fstride[0] = 1; 586 fout[i].i = -fout[i].i;
685 L=0; 587 opus_fft_impl(st, fout);
686 do { 588 for (i=0;i<st->nfft;i++)
687 p = st->factors[2*L]; 589 fout[i].i = -fout[i].i;
688 m = st->factors[2*L+1]; 590 }
689 fstride[L+1] = fstride[L]*p;
690 L++;
691 } while(m!=1);
692 m = st->factors[2*L-1];
693 for (i=L-1;i>=0;i--)
694 {
695 if (i!=0)
696 m2 = st->factors[2*i-1];
697 else
698 m2 = 1;
699 switch (st->factors[2*i])
700 {
701 case 2:
702 ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);
703 break;
704 case 4:
705 ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
706 break;
707 #ifndef RADIX_TWO_ONLY
708 case 3:
709 ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
710 break;
711 case 5:
712 ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
713 break;
714 #endif 591 #endif
715 }
716 m = m2;
717 }
718 }
719
OLDNEW
« no previous file with comments | « celt/kiss_fft.h ('k') | celt/mdct.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698