OLD | NEW |
1 /* Copyright (c) 2007-2008 CSIRO | 1 /* Copyright (c) 2007-2008 CSIRO |
2 Copyright (c) 2007-2008 Xiph.Org Foundation | 2 Copyright (c) 2007-2008 Xiph.Org Foundation |
3 Written by Jean-Marc Valin */ | 3 Written by Jean-Marc Valin */ |
4 /* | 4 /* |
5 Redistribution and use in source and binary forms, with or without | 5 Redistribution and use in source and binary forms, with or without |
6 modification, are permitted provided that the following conditions | 6 modification, are permitted provided that the following conditions |
7 are met: | 7 are met: |
8 | 8 |
9 - Redistributions of source code must retain the above copyright | 9 - Redistributions of source code must retain the above copyright |
10 notice, this list of conditions and the following disclaimer. | 10 notice, this list of conditions and the following disclaimer. |
(...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
102 #endif /* CUSTOM_MODES */ | 102 #endif /* CUSTOM_MODES */ |
103 | 103 |
104 /* Forward MDCT trashes the input array */ | 104 /* Forward MDCT trashes the input array */ |
105 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
* OPUS_RESTRICT out, | 105 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
* OPUS_RESTRICT out, |
106 const opus_val16 *window, int overlap, int shift, int stride) | 106 const opus_val16 *window, int overlap, int shift, int stride) |
107 { | 107 { |
108 int i; | 108 int i; |
109 int N, N2, N4; | 109 int N, N2, N4; |
110 kiss_twiddle_scalar sine; | 110 kiss_twiddle_scalar sine; |
111 VARDECL(kiss_fft_scalar, f); | 111 VARDECL(kiss_fft_scalar, f); |
| 112 VARDECL(kiss_fft_scalar, f2); |
112 SAVE_STACK; | 113 SAVE_STACK; |
113 N = l->n; | 114 N = l->n; |
114 N >>= shift; | 115 N >>= shift; |
115 N2 = N>>1; | 116 N2 = N>>1; |
116 N4 = N>>2; | 117 N4 = N>>2; |
117 ALLOC(f, N2, kiss_fft_scalar); | 118 ALLOC(f, N2, kiss_fft_scalar); |
| 119 ALLOC(f2, N2, kiss_fft_scalar); |
118 /* sin(x) ~= x here */ | 120 /* sin(x) ~= x here */ |
119 #ifdef FIXED_POINT | 121 #ifdef FIXED_POINT |
120 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; | 122 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; |
121 #else | 123 #else |
122 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; | 124 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; |
123 #endif | 125 #endif |
124 | 126 |
125 /* Consider the input to be composed of four blocks: [a, b, c, d] */ | 127 /* Consider the input to be composed of four blocks: [a, b, c, d] */ |
126 /* Window, shuffle, fold */ | 128 /* Window, shuffle, fold */ |
127 { | 129 { |
128 /* Temp pointers to make it really clear to the compiler what we're doing
*/ | 130 /* Temp pointers to make it really clear to the compiler what we're doing
*/ |
129 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); | 131 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); |
130 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); | 132 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); |
131 kiss_fft_scalar * OPUS_RESTRICT yp = f; | 133 kiss_fft_scalar * OPUS_RESTRICT yp = f; |
132 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); | 134 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); |
133 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; | 135 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; |
134 for(i=0;i<(overlap>>2);i++) | 136 for(i=0;i<((overlap+3)>>2);i++) |
135 { | 137 { |
136 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ | 138 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ |
137 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); | 139 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); |
138 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); | 140 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); |
139 xp1+=2; | 141 xp1+=2; |
140 xp2-=2; | 142 xp2-=2; |
141 wp1+=2; | 143 wp1+=2; |
142 wp2-=2; | 144 wp2-=2; |
143 } | 145 } |
144 wp1 = window; | 146 wp1 = window; |
145 wp2 = window+overlap-1; | 147 wp2 = window+overlap-1; |
146 for(;i<N4-(overlap>>2);i++) | 148 for(;i<N4-((overlap+3)>>2);i++) |
147 { | 149 { |
148 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ | 150 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ |
149 *yp++ = *xp2; | 151 *yp++ = *xp2; |
150 *yp++ = *xp1; | 152 *yp++ = *xp1; |
151 xp1+=2; | 153 xp1+=2; |
152 xp2-=2; | 154 xp2-=2; |
153 } | 155 } |
154 for(;i<N4;i++) | 156 for(;i<N4;i++) |
155 { | 157 { |
156 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ | 158 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ |
(...skipping 16 matching lines...) Expand all Loading... |
173 im = yp[1]; | 175 im = yp[1]; |
174 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); | 176 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); |
175 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); | 177 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); |
176 /* works because the cos is nearly one */ | 178 /* works because the cos is nearly one */ |
177 *yp++ = yr + S_MUL(yi,sine); | 179 *yp++ = yr + S_MUL(yi,sine); |
178 *yp++ = yi - S_MUL(yr,sine); | 180 *yp++ = yi - S_MUL(yr,sine); |
179 } | 181 } |
180 } | 182 } |
181 | 183 |
182 /* N/4 complex FFT, down-scales by 4/N */ | 184 /* N/4 complex FFT, down-scales by 4/N */ |
183 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in); | 185 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); |
184 | 186 |
185 /* Post-rotate */ | 187 /* Post-rotate */ |
186 { | 188 { |
187 /* Temp pointers to make it really clear to the compiler what we're doing
*/ | 189 /* Temp pointers to make it really clear to the compiler what we're doing
*/ |
188 const kiss_fft_scalar * OPUS_RESTRICT fp = in; | 190 const kiss_fft_scalar * OPUS_RESTRICT fp = f2; |
189 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; | 191 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; |
190 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); | 192 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); |
191 const kiss_twiddle_scalar *t = &l->trig[0]; | 193 const kiss_twiddle_scalar *t = &l->trig[0]; |
192 /* Temp pointers to make it really clear to the compiler what we're doing
*/ | 194 /* Temp pointers to make it really clear to the compiler what we're doing
*/ |
193 for(i=0;i<N4;i++) | 195 for(i=0;i<N4;i++) |
194 { | 196 { |
195 kiss_fft_scalar yr, yi; | 197 kiss_fft_scalar yr, yi; |
196 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); | 198 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); |
197 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); | 199 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); |
198 /* works because the cos is nearly one */ | 200 /* works because the cos is nearly one */ |
199 *yp1 = yr - S_MUL(yi,sine); | 201 *yp1 = yr - S_MUL(yi,sine); |
200 *yp2 = yi + S_MUL(yr,sine);; | 202 *yp2 = yi + S_MUL(yr,sine);; |
201 fp += 2; | 203 fp += 2; |
202 yp1 += 2*stride; | 204 yp1 += 2*stride; |
203 yp2 -= 2*stride; | 205 yp2 -= 2*stride; |
204 } | 206 } |
205 } | 207 } |
206 RESTORE_STACK; | 208 RESTORE_STACK; |
207 } | 209 } |
208 | 210 |
209 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
r * OPUS_RESTRICT out, | 211 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
r * OPUS_RESTRICT out, |
210 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int strid
e) | 212 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int strid
e) |
211 { | 213 { |
212 int i; | 214 int i; |
213 int N, N2, N4; | 215 int N, N2, N4; |
214 kiss_twiddle_scalar sine; | 216 kiss_twiddle_scalar sine; |
215 VARDECL(kiss_fft_scalar, f); | |
216 VARDECL(kiss_fft_scalar, f2); | 217 VARDECL(kiss_fft_scalar, f2); |
217 SAVE_STACK; | 218 SAVE_STACK; |
218 N = l->n; | 219 N = l->n; |
219 N >>= shift; | 220 N >>= shift; |
220 N2 = N>>1; | 221 N2 = N>>1; |
221 N4 = N>>2; | 222 N4 = N>>2; |
222 ALLOC(f, N2, kiss_fft_scalar); | |
223 ALLOC(f2, N2, kiss_fft_scalar); | 223 ALLOC(f2, N2, kiss_fft_scalar); |
224 /* sin(x) ~= x here */ | 224 /* sin(x) ~= x here */ |
225 #ifdef FIXED_POINT | 225 #ifdef FIXED_POINT |
226 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; | 226 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; |
227 #else | 227 #else |
228 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; | 228 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; |
229 #endif | 229 #endif |
230 | 230 |
231 /* Pre-rotate */ | 231 /* Pre-rotate */ |
232 { | 232 { |
233 /* Temp pointers to make it really clear to the compiler what we're doing
*/ | 233 /* Temp pointers to make it really clear to the compiler what we're doing
*/ |
234 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; | 234 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; |
235 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); | 235 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); |
236 kiss_fft_scalar * OPUS_RESTRICT yp = f2; | 236 kiss_fft_scalar * OPUS_RESTRICT yp = f2; |
237 const kiss_twiddle_scalar *t = &l->trig[0]; | 237 const kiss_twiddle_scalar *t = &l->trig[0]; |
238 for(i=0;i<N4;i++) | 238 for(i=0;i<N4;i++) |
239 { | 239 { |
240 kiss_fft_scalar yr, yi; | 240 kiss_fft_scalar yr, yi; |
241 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); | 241 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); |
242 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); | 242 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); |
243 /* works because the cos is nearly one */ | 243 /* works because the cos is nearly one */ |
244 *yp++ = yr - S_MUL(yi,sine); | 244 *yp++ = yr - S_MUL(yi,sine); |
245 *yp++ = yi + S_MUL(yr,sine); | 245 *yp++ = yi + S_MUL(yr,sine); |
246 xp1+=2*stride; | 246 xp1+=2*stride; |
247 xp2-=2*stride; | 247 xp2-=2*stride; |
248 } | 248 } |
249 } | 249 } |
250 | 250 |
251 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-poi
nt */ | 251 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-poi
nt */ |
252 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f); | 252 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>
1))); |
253 | 253 |
254 /* Post-rotate */ | 254 /* Post-rotate and de-shuffle from both ends of the buffer at once to make |
| 255 it in-place. */ |
255 { | 256 { |
256 kiss_fft_scalar * OPUS_RESTRICT fp = f; | 257 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); |
| 258 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; |
257 const kiss_twiddle_scalar *t = &l->trig[0]; | 259 const kiss_twiddle_scalar *t = &l->trig[0]; |
258 | 260 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the |
259 for(i=0;i<N4;i++) | 261 middle pair will be computed twice. */ |
| 262 for(i=0;i<(N4+1)>>1;i++) |
260 { | 263 { |
261 kiss_fft_scalar re, im, yr, yi; | 264 kiss_fft_scalar re, im, yr, yi; |
262 re = fp[0]; | 265 kiss_twiddle_scalar t0, t1; |
263 im = fp[1]; | 266 re = yp0[0]; |
| 267 im = yp0[1]; |
| 268 t0 = t[i<<shift]; |
| 269 t1 = t[(N4-i)<<shift]; |
264 /* We'd scale up by 2 here, but instead it's done when mixing the windo
ws */ | 270 /* We'd scale up by 2 here, but instead it's done when mixing the windo
ws */ |
265 yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); | 271 yr = S_MUL(re,t0) - S_MUL(im,t1); |
266 yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); | 272 yi = S_MUL(im,t0) + S_MUL(re,t1); |
| 273 re = yp1[0]; |
| 274 im = yp1[1]; |
267 /* works because the cos is nearly one */ | 275 /* works because the cos is nearly one */ |
268 *fp++ = yr - S_MUL(yi,sine); | 276 yp0[0] = -(yr - S_MUL(yi,sine)); |
269 *fp++ = yi + S_MUL(yr,sine); | 277 yp1[1] = yi + S_MUL(yr,sine); |
| 278 |
| 279 t0 = t[(N4-i-1)<<shift]; |
| 280 t1 = t[(i+1)<<shift]; |
| 281 /* We'd scale up by 2 here, but instead it's done when mixing the windo
ws */ |
| 282 yr = S_MUL(re,t0) - S_MUL(im,t1); |
| 283 yi = S_MUL(im,t0) + S_MUL(re,t1); |
| 284 /* works because the cos is nearly one */ |
| 285 yp1[0] = -(yr - S_MUL(yi,sine)); |
| 286 yp0[1] = yi + S_MUL(yr,sine); |
| 287 yp0 += 2; |
| 288 yp1 -= 2; |
270 } | 289 } |
271 } | 290 } |
272 /* De-shuffle the components for the middle of the window only */ | 291 |
273 { | |
274 const kiss_fft_scalar * OPUS_RESTRICT fp1 = f; | |
275 const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1; | |
276 kiss_fft_scalar * OPUS_RESTRICT yp = f2; | |
277 for(i = 0; i < N4; i++) | |
278 { | |
279 *yp++ =-*fp1; | |
280 *yp++ = *fp2; | |
281 fp1 += 2; | |
282 fp2 -= 2; | |
283 } | |
284 } | |
285 out -= (N2-overlap)>>1; | |
286 /* Mirror on both sides for TDAC */ | 292 /* Mirror on both sides for TDAC */ |
287 { | 293 { |
288 kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1; | 294 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; |
289 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1; | 295 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; |
290 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2; | |
291 const opus_val16 * OPUS_RESTRICT wp1 = window; | 296 const opus_val16 * OPUS_RESTRICT wp1 = window; |
292 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; | 297 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; |
293 for(i = 0; i< N4-overlap/2; i++) | 298 |
| 299 for(i = 0; i < overlap/2; i++) |
294 { | 300 { |
295 *xp1 = *fp1; | 301 kiss_fft_scalar x1, x2; |
296 xp1--; | 302 x1 = *xp1; |
297 fp1--; | 303 x2 = *yp1; |
298 } | 304 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); |
299 for(; i < N4; i++) | 305 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); |
300 { | |
301 kiss_fft_scalar x1; | |
302 x1 = *fp1--; | |
303 *yp1++ +=-MULT16_32_Q15(*wp1, x1); | |
304 *xp1-- += MULT16_32_Q15(*wp2, x1); | |
305 wp1++; | 306 wp1++; |
306 wp2--; | 307 wp2--; |
307 } | 308 } |
308 } | |
309 { | |
310 kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4; | |
311 kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2; | |
312 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2); | |
313 const opus_val16 * OPUS_RESTRICT wp1 = window; | |
314 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; | |
315 for(i = 0; i< N4-overlap/2; i++) | |
316 { | |
317 *xp2 = *fp2; | |
318 xp2++; | |
319 fp2++; | |
320 } | |
321 for(; i < N4; i++) | |
322 { | |
323 kiss_fft_scalar x2; | |
324 x2 = *fp2++; | |
325 *yp2-- = MULT16_32_Q15(*wp1, x2); | |
326 *xp2++ = MULT16_32_Q15(*wp2, x2); | |
327 wp1++; | |
328 wp2--; | |
329 } | |
330 } | 309 } |
331 RESTORE_STACK; | 310 RESTORE_STACK; |
332 } | 311 } |
OLD | NEW |