| OLD | NEW |
| 1 /* Copyright (c) 2007-2008 CSIRO | 1 /* Copyright (c) 2007-2008 CSIRO |
| 2 Copyright (c) 2007-2008 Xiph.Org Foundation | 2 Copyright (c) 2007-2008 Xiph.Org Foundation |
| 3 Written by Jean-Marc Valin */ | 3 Written by Jean-Marc Valin */ |
| 4 /* | 4 /* |
| 5 Redistribution and use in source and binary forms, with or without | 5 Redistribution and use in source and binary forms, with or without |
| 6 modification, are permitted provided that the following conditions | 6 modification, are permitted provided that the following conditions |
| 7 are met: | 7 are met: |
| 8 | 8 |
| 9 - Redistributions of source code must retain the above copyright | 9 - Redistributions of source code must retain the above copyright |
| 10 notice, this list of conditions and the following disclaimer. | 10 notice, this list of conditions and the following disclaimer. |
| (...skipping 91 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 102 #endif /* CUSTOM_MODES */ | 102 #endif /* CUSTOM_MODES */ |
| 103 | 103 |
| 104 /* Forward MDCT trashes the input array */ | 104 /* Forward MDCT trashes the input array */ |
| 105 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
* OPUS_RESTRICT out, | 105 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
* OPUS_RESTRICT out, |
| 106 const opus_val16 *window, int overlap, int shift, int stride) | 106 const opus_val16 *window, int overlap, int shift, int stride) |
| 107 { | 107 { |
| 108 int i; | 108 int i; |
| 109 int N, N2, N4; | 109 int N, N2, N4; |
| 110 kiss_twiddle_scalar sine; | 110 kiss_twiddle_scalar sine; |
| 111 VARDECL(kiss_fft_scalar, f); | 111 VARDECL(kiss_fft_scalar, f); |
| 112 VARDECL(kiss_fft_scalar, f2); |
| 112 SAVE_STACK; | 113 SAVE_STACK; |
| 113 N = l->n; | 114 N = l->n; |
| 114 N >>= shift; | 115 N >>= shift; |
| 115 N2 = N>>1; | 116 N2 = N>>1; |
| 116 N4 = N>>2; | 117 N4 = N>>2; |
| 117 ALLOC(f, N2, kiss_fft_scalar); | 118 ALLOC(f, N2, kiss_fft_scalar); |
| 119 ALLOC(f2, N2, kiss_fft_scalar); |
| 118 /* sin(x) ~= x here */ | 120 /* sin(x) ~= x here */ |
| 119 #ifdef FIXED_POINT | 121 #ifdef FIXED_POINT |
| 120 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; | 122 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; |
| 121 #else | 123 #else |
| 122 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; | 124 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; |
| 123 #endif | 125 #endif |
| 124 | 126 |
| 125 /* Consider the input to be composed of four blocks: [a, b, c, d] */ | 127 /* Consider the input to be composed of four blocks: [a, b, c, d] */ |
| 126 /* Window, shuffle, fold */ | 128 /* Window, shuffle, fold */ |
| 127 { | 129 { |
| 128 /* Temp pointers to make it really clear to the compiler what we're doing
*/ | 130 /* Temp pointers to make it really clear to the compiler what we're doing
*/ |
| 129 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); | 131 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); |
| 130 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); | 132 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); |
| 131 kiss_fft_scalar * OPUS_RESTRICT yp = f; | 133 kiss_fft_scalar * OPUS_RESTRICT yp = f; |
| 132 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); | 134 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); |
| 133 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; | 135 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; |
| 134 for(i=0;i<(overlap>>2);i++) | 136 for(i=0;i<((overlap+3)>>2);i++) |
| 135 { | 137 { |
| 136 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ | 138 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/ |
| 137 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); | 139 *yp++ = MULT16_32_Q15(*wp2, xp1[N2]) + MULT16_32_Q15(*wp1,*xp2); |
| 138 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); | 140 *yp++ = MULT16_32_Q15(*wp1, *xp1) - MULT16_32_Q15(*wp2, xp2[-N2]); |
| 139 xp1+=2; | 141 xp1+=2; |
| 140 xp2-=2; | 142 xp2-=2; |
| 141 wp1+=2; | 143 wp1+=2; |
| 142 wp2-=2; | 144 wp2-=2; |
| 143 } | 145 } |
| 144 wp1 = window; | 146 wp1 = window; |
| 145 wp2 = window+overlap-1; | 147 wp2 = window+overlap-1; |
| 146 for(;i<N4-(overlap>>2);i++) | 148 for(;i<N4-((overlap+3)>>2);i++) |
| 147 { | 149 { |
| 148 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ | 150 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ |
| 149 *yp++ = *xp2; | 151 *yp++ = *xp2; |
| 150 *yp++ = *xp1; | 152 *yp++ = *xp1; |
| 151 xp1+=2; | 153 xp1+=2; |
| 152 xp2-=2; | 154 xp2-=2; |
| 153 } | 155 } |
| 154 for(;i<N4;i++) | 156 for(;i<N4;i++) |
| 155 { | 157 { |
| 156 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ | 158 /* Real part arranged as a-bR, Imag part arranged as -c-dR */ |
| (...skipping 16 matching lines...) Expand all Loading... |
| 173 im = yp[1]; | 175 im = yp[1]; |
| 174 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); | 176 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); |
| 175 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); | 177 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); |
| 176 /* works because the cos is nearly one */ | 178 /* works because the cos is nearly one */ |
| 177 *yp++ = yr + S_MUL(yi,sine); | 179 *yp++ = yr + S_MUL(yi,sine); |
| 178 *yp++ = yi - S_MUL(yr,sine); | 180 *yp++ = yi - S_MUL(yr,sine); |
| 179 } | 181 } |
| 180 } | 182 } |
| 181 | 183 |
| 182 /* N/4 complex FFT, down-scales by 4/N */ | 184 /* N/4 complex FFT, down-scales by 4/N */ |
| 183 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in); | 185 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); |
| 184 | 186 |
| 185 /* Post-rotate */ | 187 /* Post-rotate */ |
| 186 { | 188 { |
| 187 /* Temp pointers to make it really clear to the compiler what we're doing
*/ | 189 /* Temp pointers to make it really clear to the compiler what we're doing
*/ |
| 188 const kiss_fft_scalar * OPUS_RESTRICT fp = in; | 190 const kiss_fft_scalar * OPUS_RESTRICT fp = f2; |
| 189 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; | 191 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; |
| 190 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); | 192 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); |
| 191 const kiss_twiddle_scalar *t = &l->trig[0]; | 193 const kiss_twiddle_scalar *t = &l->trig[0]; |
| 192 /* Temp pointers to make it really clear to the compiler what we're doing
*/ | 194 /* Temp pointers to make it really clear to the compiler what we're doing
*/ |
| 193 for(i=0;i<N4;i++) | 195 for(i=0;i<N4;i++) |
| 194 { | 196 { |
| 195 kiss_fft_scalar yr, yi; | 197 kiss_fft_scalar yr, yi; |
| 196 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); | 198 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); |
| 197 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); | 199 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); |
| 198 /* works because the cos is nearly one */ | 200 /* works because the cos is nearly one */ |
| 199 *yp1 = yr - S_MUL(yi,sine); | 201 *yp1 = yr - S_MUL(yi,sine); |
| 200 *yp2 = yi + S_MUL(yr,sine);; | 202 *yp2 = yi + S_MUL(yr,sine);; |
| 201 fp += 2; | 203 fp += 2; |
| 202 yp1 += 2*stride; | 204 yp1 += 2*stride; |
| 203 yp2 -= 2*stride; | 205 yp2 -= 2*stride; |
| 204 } | 206 } |
| 205 } | 207 } |
| 206 RESTORE_STACK; | 208 RESTORE_STACK; |
| 207 } | 209 } |
| 208 | 210 |
| 209 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
r * OPUS_RESTRICT out, | 211 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
r * OPUS_RESTRICT out, |
| 210 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int strid
e) | 212 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int strid
e) |
| 211 { | 213 { |
| 212 int i; | 214 int i; |
| 213 int N, N2, N4; | 215 int N, N2, N4; |
| 214 kiss_twiddle_scalar sine; | 216 kiss_twiddle_scalar sine; |
| 215 VARDECL(kiss_fft_scalar, f); | |
| 216 VARDECL(kiss_fft_scalar, f2); | 217 VARDECL(kiss_fft_scalar, f2); |
| 217 SAVE_STACK; | 218 SAVE_STACK; |
| 218 N = l->n; | 219 N = l->n; |
| 219 N >>= shift; | 220 N >>= shift; |
| 220 N2 = N>>1; | 221 N2 = N>>1; |
| 221 N4 = N>>2; | 222 N4 = N>>2; |
| 222 ALLOC(f, N2, kiss_fft_scalar); | |
| 223 ALLOC(f2, N2, kiss_fft_scalar); | 223 ALLOC(f2, N2, kiss_fft_scalar); |
| 224 /* sin(x) ~= x here */ | 224 /* sin(x) ~= x here */ |
| 225 #ifdef FIXED_POINT | 225 #ifdef FIXED_POINT |
| 226 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; | 226 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; |
| 227 #else | 227 #else |
| 228 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; | 228 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N; |
| 229 #endif | 229 #endif |
| 230 | 230 |
| 231 /* Pre-rotate */ | 231 /* Pre-rotate */ |
| 232 { | 232 { |
| 233 /* Temp pointers to make it really clear to the compiler what we're doing
*/ | 233 /* Temp pointers to make it really clear to the compiler what we're doing
*/ |
| 234 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; | 234 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; |
| 235 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); | 235 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); |
| 236 kiss_fft_scalar * OPUS_RESTRICT yp = f2; | 236 kiss_fft_scalar * OPUS_RESTRICT yp = f2; |
| 237 const kiss_twiddle_scalar *t = &l->trig[0]; | 237 const kiss_twiddle_scalar *t = &l->trig[0]; |
| 238 for(i=0;i<N4;i++) | 238 for(i=0;i<N4;i++) |
| 239 { | 239 { |
| 240 kiss_fft_scalar yr, yi; | 240 kiss_fft_scalar yr, yi; |
| 241 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); | 241 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); |
| 242 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); | 242 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); |
| 243 /* works because the cos is nearly one */ | 243 /* works because the cos is nearly one */ |
| 244 *yp++ = yr - S_MUL(yi,sine); | 244 *yp++ = yr - S_MUL(yi,sine); |
| 245 *yp++ = yi + S_MUL(yr,sine); | 245 *yp++ = yi + S_MUL(yr,sine); |
| 246 xp1+=2*stride; | 246 xp1+=2*stride; |
| 247 xp2-=2*stride; | 247 xp2-=2*stride; |
| 248 } | 248 } |
| 249 } | 249 } |
| 250 | 250 |
| 251 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-poi
nt */ | 251 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-poi
nt */ |
| 252 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)f); | 252 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>>
1))); |
| 253 | 253 |
| 254 /* Post-rotate */ | 254 /* Post-rotate and de-shuffle from both ends of the buffer at once to make |
| 255 it in-place. */ |
| 255 { | 256 { |
| 256 kiss_fft_scalar * OPUS_RESTRICT fp = f; | 257 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); |
| 258 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; |
| 257 const kiss_twiddle_scalar *t = &l->trig[0]; | 259 const kiss_twiddle_scalar *t = &l->trig[0]; |
| 258 | 260 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the |
| 259 for(i=0;i<N4;i++) | 261 middle pair will be computed twice. */ |
| 262 for(i=0;i<(N4+1)>>1;i++) |
| 260 { | 263 { |
| 261 kiss_fft_scalar re, im, yr, yi; | 264 kiss_fft_scalar re, im, yr, yi; |
| 262 re = fp[0]; | 265 kiss_twiddle_scalar t0, t1; |
| 263 im = fp[1]; | 266 re = yp0[0]; |
| 267 im = yp0[1]; |
| 268 t0 = t[i<<shift]; |
| 269 t1 = t[(N4-i)<<shift]; |
| 264 /* We'd scale up by 2 here, but instead it's done when mixing the windo
ws */ | 270 /* We'd scale up by 2 here, but instead it's done when mixing the windo
ws */ |
| 265 yr = S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); | 271 yr = S_MUL(re,t0) - S_MUL(im,t1); |
| 266 yi = S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); | 272 yi = S_MUL(im,t0) + S_MUL(re,t1); |
| 273 re = yp1[0]; |
| 274 im = yp1[1]; |
| 267 /* works because the cos is nearly one */ | 275 /* works because the cos is nearly one */ |
| 268 *fp++ = yr - S_MUL(yi,sine); | 276 yp0[0] = -(yr - S_MUL(yi,sine)); |
| 269 *fp++ = yi + S_MUL(yr,sine); | 277 yp1[1] = yi + S_MUL(yr,sine); |
| 278 |
| 279 t0 = t[(N4-i-1)<<shift]; |
| 280 t1 = t[(i+1)<<shift]; |
| 281 /* We'd scale up by 2 here, but instead it's done when mixing the windo
ws */ |
| 282 yr = S_MUL(re,t0) - S_MUL(im,t1); |
| 283 yi = S_MUL(im,t0) + S_MUL(re,t1); |
| 284 /* works because the cos is nearly one */ |
| 285 yp1[0] = -(yr - S_MUL(yi,sine)); |
| 286 yp0[1] = yi + S_MUL(yr,sine); |
| 287 yp0 += 2; |
| 288 yp1 -= 2; |
| 270 } | 289 } |
| 271 } | 290 } |
| 272 /* De-shuffle the components for the middle of the window only */ | 291 |
| 273 { | |
| 274 const kiss_fft_scalar * OPUS_RESTRICT fp1 = f; | |
| 275 const kiss_fft_scalar * OPUS_RESTRICT fp2 = f+N2-1; | |
| 276 kiss_fft_scalar * OPUS_RESTRICT yp = f2; | |
| 277 for(i = 0; i < N4; i++) | |
| 278 { | |
| 279 *yp++ =-*fp1; | |
| 280 *yp++ = *fp2; | |
| 281 fp1 += 2; | |
| 282 fp2 -= 2; | |
| 283 } | |
| 284 } | |
| 285 out -= (N2-overlap)>>1; | |
| 286 /* Mirror on both sides for TDAC */ | 292 /* Mirror on both sides for TDAC */ |
| 287 { | 293 { |
| 288 kiss_fft_scalar * OPUS_RESTRICT fp1 = f2+N4-1; | 294 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; |
| 289 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+N2-1; | 295 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; |
| 290 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+N4-overlap/2; | |
| 291 const opus_val16 * OPUS_RESTRICT wp1 = window; | 296 const opus_val16 * OPUS_RESTRICT wp1 = window; |
| 292 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; | 297 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; |
| 293 for(i = 0; i< N4-overlap/2; i++) | 298 |
| 299 for(i = 0; i < overlap/2; i++) |
| 294 { | 300 { |
| 295 *xp1 = *fp1; | 301 kiss_fft_scalar x1, x2; |
| 296 xp1--; | 302 x1 = *xp1; |
| 297 fp1--; | 303 x2 = *yp1; |
| 298 } | 304 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); |
| 299 for(; i < N4; i++) | 305 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); |
| 300 { | |
| 301 kiss_fft_scalar x1; | |
| 302 x1 = *fp1--; | |
| 303 *yp1++ +=-MULT16_32_Q15(*wp1, x1); | |
| 304 *xp1-- += MULT16_32_Q15(*wp2, x1); | |
| 305 wp1++; | 306 wp1++; |
| 306 wp2--; | 307 wp2--; |
| 307 } | 308 } |
| 308 } | |
| 309 { | |
| 310 kiss_fft_scalar * OPUS_RESTRICT fp2 = f2+N4; | |
| 311 kiss_fft_scalar * OPUS_RESTRICT xp2 = out+N2; | |
| 312 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+N-1-(N4-overlap/2); | |
| 313 const opus_val16 * OPUS_RESTRICT wp1 = window; | |
| 314 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; | |
| 315 for(i = 0; i< N4-overlap/2; i++) | |
| 316 { | |
| 317 *xp2 = *fp2; | |
| 318 xp2++; | |
| 319 fp2++; | |
| 320 } | |
| 321 for(; i < N4; i++) | |
| 322 { | |
| 323 kiss_fft_scalar x2; | |
| 324 x2 = *fp2++; | |
| 325 *yp2-- = MULT16_32_Q15(*wp1, x2); | |
| 326 *xp2++ = MULT16_32_Q15(*wp2, x2); | |
| 327 wp1++; | |
| 328 wp2--; | |
| 329 } | |
| 330 } | 309 } |
| 331 RESTORE_STACK; | 310 RESTORE_STACK; |
| 332 } | 311 } |
| OLD | NEW |