Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(797)

Side by Side Diff: celt/mdct.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master
Patch Set: Add the contents of Makefile.mips back. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « celt/kiss_fft.c ('k') | celt/mips/celt_mipsr1.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 2007-2008 CSIRO 1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2008 Xiph.Org Foundation 2 Copyright (c) 2007-2008 Xiph.Org Foundation
3 Written by Jean-Marc Valin */ 3 Written by Jean-Marc Valin */
4 /* 4 /*
5 Redistribution and use in source and binary forms, with or without 5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions 6 modification, are permitted provided that the following conditions
7 are met: 7 are met:
8 8
9 - Redistributions of source code must retain the above copyright 9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer. 10 notice, this list of conditions and the following disclaimer.
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
46 #endif 46 #endif
47 47
48 #include "mdct.h" 48 #include "mdct.h"
49 #include "kiss_fft.h" 49 #include "kiss_fft.h"
50 #include "_kiss_fft_guts.h" 50 #include "_kiss_fft_guts.h"
51 #include <math.h> 51 #include <math.h>
52 #include "os_support.h" 52 #include "os_support.h"
53 #include "mathops.h" 53 #include "mathops.h"
54 #include "stack_alloc.h" 54 #include "stack_alloc.h"
55 55
56 #if defined(MIPSr1_ASM)
57 #include "mips/mdct_mipsr1.h"
58 #endif
59
60
56 #ifdef CUSTOM_MODES 61 #ifdef CUSTOM_MODES
57 62
58 int clt_mdct_init(mdct_lookup *l,int N, int maxshift) 63 int clt_mdct_init(mdct_lookup *l,int N, int maxshift)
59 { 64 {
60 int i; 65 int i;
61 int N4;
62 kiss_twiddle_scalar *trig; 66 kiss_twiddle_scalar *trig;
63 #if defined(FIXED_POINT) 67 int shift;
64 int N2=N>>1; 68 int N2=N>>1;
65 #endif
66 l->n = N; 69 l->n = N;
67 N4 = N>>2;
68 l->maxshift = maxshift; 70 l->maxshift = maxshift;
69 for (i=0;i<=maxshift;i++) 71 for (i=0;i<=maxshift;i++)
70 { 72 {
71 if (i==0) 73 if (i==0)
72 l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0); 74 l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);
73 else 75 else
74 l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]); 76 l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);
75 #ifndef ENABLE_TI_DSPLIB55 77 #ifndef ENABLE_TI_DSPLIB55
76 if (l->kfft[i]==NULL) 78 if (l->kfft[i]==NULL)
77 return 0; 79 return 0;
78 #endif 80 #endif
79 } 81 }
80 l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N4+1)*sizeof(kiss_twiddle_ scalar)); 82 l->trig = trig = (kiss_twiddle_scalar*)opus_alloc((N-(N2>>maxshift))*sizeof(k iss_twiddle_scalar));
81 if (l->trig==NULL) 83 if (l->trig==NULL)
82 return 0; 84 return 0;
83 /* We have enough points that sine isn't necessary */ 85 for (shift=0;shift<=maxshift;shift++)
86 {
87 /* We have enough points that sine isn't necessary */
84 #if defined(FIXED_POINT) 88 #if defined(FIXED_POINT)
85 for (i=0;i<=N4;i++) 89 #if 1
86 trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2) ,N)); 90 for (i=0;i<N2;i++)
91 trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17), N2+16384),N));
87 #else 92 #else
88 for (i=0;i<=N4;i++) 93 for (i=0;i<N2;i++)
89 trig[i] = (kiss_twiddle_scalar)cos(2*PI*i/N); 94 trig[i] = (kiss_twiddle_scalar)MAX32(-32767,MIN32(32767,floor(.5+32768* cos(2*M_PI*(i+.125)/N))));
90 #endif 95 #endif
96 #else
97 for (i=0;i<N2;i++)
98 trig[i] = (kiss_twiddle_scalar)cos(2*PI*(i+.125)/N);
99 #endif
100 trig += N2;
101 N2 >>= 1;
102 N >>= 1;
103 }
91 return 1; 104 return 1;
92 } 105 }
93 106
94 void clt_mdct_clear(mdct_lookup *l) 107 void clt_mdct_clear(mdct_lookup *l)
95 { 108 {
96 int i; 109 int i;
97 for (i=0;i<=l->maxshift;i++) 110 for (i=0;i<=l->maxshift;i++)
98 opus_fft_free(l->kfft[i]); 111 opus_fft_free(l->kfft[i]);
99 opus_free((kiss_twiddle_scalar*)l->trig); 112 opus_free((kiss_twiddle_scalar*)l->trig);
100 } 113 }
101 114
102 #endif /* CUSTOM_MODES */ 115 #endif /* CUSTOM_MODES */
103 116
104 /* Forward MDCT trashes the input array */ 117 /* Forward MDCT trashes the input array */
118 #ifndef OVERRIDE_clt_mdct_forward
105 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out, 119 void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar * OPUS_RESTRICT out,
106 const opus_val16 *window, int overlap, int shift, int stride) 120 const opus_val16 *window, int overlap, int shift, int stride)
107 { 121 {
108 int i; 122 int i;
109 int N, N2, N4; 123 int N, N2, N4;
110 kiss_twiddle_scalar sine;
111 VARDECL(kiss_fft_scalar, f); 124 VARDECL(kiss_fft_scalar, f);
112 VARDECL(kiss_fft_scalar, f2); 125 VARDECL(kiss_fft_cpx, f2);
126 const kiss_fft_state *st = l->kfft[shift];
127 const kiss_twiddle_scalar *trig;
128 opus_val16 scale;
129 #ifdef FIXED_POINT
130 /* Allows us to scale with MULT16_32_Q16(), which is faster than
131 MULT16_32_Q15() on ARM. */
132 int scale_shift = st->scale_shift-1;
133 #endif
113 SAVE_STACK; 134 SAVE_STACK;
135 scale = st->scale;
136
114 N = l->n; 137 N = l->n;
115 N >>= shift; 138 trig = l->trig;
139 for (i=0;i<shift;i++)
140 {
141 N >>= 1;
142 trig += N;
143 }
116 N2 = N>>1; 144 N2 = N>>1;
117 N4 = N>>2; 145 N4 = N>>2;
146
118 ALLOC(f, N2, kiss_fft_scalar); 147 ALLOC(f, N2, kiss_fft_scalar);
119 ALLOC(f2, N2, kiss_fft_scalar); 148 ALLOC(f2, N4, kiss_fft_cpx);
120 /* sin(x) ~= x here */
121 #ifdef FIXED_POINT
122 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
123 #else
124 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
125 #endif
126 149
127 /* Consider the input to be composed of four blocks: [a, b, c, d] */ 150 /* Consider the input to be composed of four blocks: [a, b, c, d] */
128 /* Window, shuffle, fold */ 151 /* Window, shuffle, fold */
129 { 152 {
130 /* Temp pointers to make it really clear to the compiler what we're doing */ 153 /* Temp pointers to make it really clear to the compiler what we're doing */
131 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1); 154 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);
132 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1); 155 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);
133 kiss_fft_scalar * OPUS_RESTRICT yp = f; 156 kiss_fft_scalar * OPUS_RESTRICT yp = f;
134 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1); 157 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);
135 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1; 158 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;
(...skipping 24 matching lines...) Expand all
160 *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]); 183 *yp++ = MULT16_32_Q15(*wp2, *xp1) + MULT16_32_Q15(*wp1, xp2[N2]);
161 xp1+=2; 184 xp1+=2;
162 xp2-=2; 185 xp2-=2;
163 wp1+=2; 186 wp1+=2;
164 wp2-=2; 187 wp2-=2;
165 } 188 }
166 } 189 }
167 /* Pre-rotation */ 190 /* Pre-rotation */
168 { 191 {
169 kiss_fft_scalar * OPUS_RESTRICT yp = f; 192 kiss_fft_scalar * OPUS_RESTRICT yp = f;
170 const kiss_twiddle_scalar *t = &l->trig[0]; 193 const kiss_twiddle_scalar *t = &trig[0];
171 for(i=0;i<N4;i++) 194 for(i=0;i<N4;i++)
172 { 195 {
196 kiss_fft_cpx yc;
197 kiss_twiddle_scalar t0, t1;
173 kiss_fft_scalar re, im, yr, yi; 198 kiss_fft_scalar re, im, yr, yi;
174 re = yp[0]; 199 t0 = t[i];
175 im = yp[1]; 200 t1 = t[N4+i];
176 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]); 201 re = *yp++;
177 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]); 202 im = *yp++;
178 /* works because the cos is nearly one */ 203 yr = S_MUL(re,t0) - S_MUL(im,t1);
179 *yp++ = yr + S_MUL(yi,sine); 204 yi = S_MUL(im,t0) + S_MUL(re,t1);
180 *yp++ = yi - S_MUL(yr,sine); 205 yc.r = yr;
206 yc.i = yi;
207 yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);
208 yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);
209 f2[st->bitrev[i]] = yc;
181 } 210 }
182 } 211 }
183 212
184 /* N/4 complex FFT, down-scales by 4/N */ 213 /* N/4 complex FFT, does not downscale anymore */
185 opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); 214 opus_fft_impl(st, f2);
186 215
187 /* Post-rotate */ 216 /* Post-rotate */
188 { 217 {
189 /* Temp pointers to make it really clear to the compiler what we're doing */ 218 /* Temp pointers to make it really clear to the compiler what we're doing */
190 const kiss_fft_scalar * OPUS_RESTRICT fp = f2; 219 const kiss_fft_cpx * OPUS_RESTRICT fp = f2;
191 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 220 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
192 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); 221 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
193 const kiss_twiddle_scalar *t = &l->trig[0]; 222 const kiss_twiddle_scalar *t = &trig[0];
194 /* Temp pointers to make it really clear to the compiler what we're doing */ 223 /* Temp pointers to make it really clear to the compiler what we're doing */
195 for(i=0;i<N4;i++) 224 for(i=0;i<N4;i++)
196 { 225 {
197 kiss_fft_scalar yr, yi; 226 kiss_fft_scalar yr, yi;
198 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]); 227 yr = S_MUL(fp->i,t[N4+i]) - S_MUL(fp->r,t[i]);
199 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]); 228 yi = S_MUL(fp->r,t[N4+i]) + S_MUL(fp->i,t[i]);
200 /* works because the cos is nearly one */ 229 *yp1 = yr;
201 *yp1 = yr - S_MUL(yi,sine); 230 *yp2 = yi;
202 *yp2 = yi + S_MUL(yr,sine);; 231 fp++;
203 fp += 2;
204 yp1 += 2*stride; 232 yp1 += 2*stride;
205 yp2 -= 2*stride; 233 yp2 -= 2*stride;
206 } 234 }
207 } 235 }
208 RESTORE_STACK; 236 RESTORE_STACK;
209 } 237 }
238 #endif /* OVERRIDE_clt_mdct_forward */
210 239
240 #ifndef OVERRIDE_clt_mdct_backward
211 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala r * OPUS_RESTRICT out, 241 void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala r * OPUS_RESTRICT out,
212 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int strid e) 242 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int strid e)
213 { 243 {
214 int i; 244 int i;
215 int N, N2, N4; 245 int N, N2, N4;
216 kiss_twiddle_scalar sine; 246 const kiss_twiddle_scalar *trig;
217 VARDECL(kiss_fft_scalar, f2); 247
218 SAVE_STACK;
219 N = l->n; 248 N = l->n;
220 N >>= shift; 249 trig = l->trig;
250 for (i=0;i<shift;i++)
251 {
252 N >>= 1;
253 trig += N;
254 }
221 N2 = N>>1; 255 N2 = N>>1;
222 N4 = N>>2; 256 N4 = N>>2;
223 ALLOC(f2, N2, kiss_fft_scalar);
224 /* sin(x) ~= x here */
225 #ifdef FIXED_POINT
226 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
227 #else
228 sine = (kiss_twiddle_scalar)2*PI*(.125f)/N;
229 #endif
230 257
231 /* Pre-rotate */ 258 /* Pre-rotate */
232 { 259 {
233 /* Temp pointers to make it really clear to the compiler what we're doing */ 260 /* Temp pointers to make it really clear to the compiler what we're doing */
234 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in; 261 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;
235 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1); 262 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);
236 kiss_fft_scalar * OPUS_RESTRICT yp = f2; 263 kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);
237 const kiss_twiddle_scalar *t = &l->trig[0]; 264 const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];
265 const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;
238 for(i=0;i<N4;i++) 266 for(i=0;i<N4;i++)
239 { 267 {
268 int rev;
240 kiss_fft_scalar yr, yi; 269 kiss_fft_scalar yr, yi;
241 yr = -S_MUL(*xp2, t[i<<shift]) + S_MUL(*xp1,t[(N4-i)<<shift]); 270 rev = *bitrev++;
242 yi = -S_MUL(*xp2, t[(N4-i)<<shift]) - S_MUL(*xp1,t[i<<shift]); 271 yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]);
243 /* works because the cos is nearly one */ 272 yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]);
244 *yp++ = yr - S_MUL(yi,sine); 273 /* We swap real and imag because we use an FFT instead of an IFFT. */
245 *yp++ = yi + S_MUL(yr,sine); 274 yp[2*rev+1] = yr;
275 yp[2*rev] = yi;
276 /* Storing the pre-rotation directly in the bitrev order. */
246 xp1+=2*stride; 277 xp1+=2*stride;
247 xp2-=2*stride; 278 xp2-=2*stride;
248 } 279 }
249 } 280 }
250 281
251 /* Inverse N/4 complex FFT. This one should *not* downscale even in fixed-poi nt */ 282 opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));
252 opus_ifft(l->kfft[shift], (kiss_fft_cpx *)f2, (kiss_fft_cpx *)(out+(overlap>> 1)));
253 283
254 /* Post-rotate and de-shuffle from both ends of the buffer at once to make 284 /* Post-rotate and de-shuffle from both ends of the buffer at once to make
255 it in-place. */ 285 it in-place. */
256 { 286 {
257 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1); 287 kiss_fft_scalar * yp0 = out+(overlap>>1);
258 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2; 288 kiss_fft_scalar * yp1 = out+(overlap>>1)+N2-2;
259 const kiss_twiddle_scalar *t = &l->trig[0]; 289 const kiss_twiddle_scalar *t = &trig[0];
260 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the 290 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the
261 middle pair will be computed twice. */ 291 middle pair will be computed twice. */
262 for(i=0;i<(N4+1)>>1;i++) 292 for(i=0;i<(N4+1)>>1;i++)
263 { 293 {
264 kiss_fft_scalar re, im, yr, yi; 294 kiss_fft_scalar re, im, yr, yi;
265 kiss_twiddle_scalar t0, t1; 295 kiss_twiddle_scalar t0, t1;
266 re = yp0[0]; 296 /* We swap real and imag because we're using an FFT instead of an IFFT. */
267 im = yp0[1]; 297 re = yp0[1];
268 t0 = t[i<<shift]; 298 im = yp0[0];
269 t1 = t[(N4-i)<<shift]; 299 t0 = t[i];
300 t1 = t[N4+i];
270 /* We'd scale up by 2 here, but instead it's done when mixing the windo ws */ 301 /* We'd scale up by 2 here, but instead it's done when mixing the windo ws */
271 yr = S_MUL(re,t0) - S_MUL(im,t1); 302 yr = S_MUL(re,t0) + S_MUL(im,t1);
272 yi = S_MUL(im,t0) + S_MUL(re,t1); 303 yi = S_MUL(re,t1) - S_MUL(im,t0);
273 re = yp1[0]; 304 /* We swap real and imag because we're using an FFT instead of an IFFT. */
274 im = yp1[1]; 305 re = yp1[1];
275 /* works because the cos is nearly one */ 306 im = yp1[0];
276 yp0[0] = -(yr - S_MUL(yi,sine)); 307 yp0[0] = yr;
277 yp1[1] = yi + S_MUL(yr,sine); 308 yp1[1] = yi;
278 309
279 t0 = t[(N4-i-1)<<shift]; 310 t0 = t[(N4-i-1)];
280 t1 = t[(i+1)<<shift]; 311 t1 = t[(N2-i-1)];
281 /* We'd scale up by 2 here, but instead it's done when mixing the windo ws */ 312 /* We'd scale up by 2 here, but instead it's done when mixing the windo ws */
282 yr = S_MUL(re,t0) - S_MUL(im,t1); 313 yr = S_MUL(re,t0) + S_MUL(im,t1);
283 yi = S_MUL(im,t0) + S_MUL(re,t1); 314 yi = S_MUL(re,t1) - S_MUL(im,t0);
284 /* works because the cos is nearly one */ 315 yp1[0] = yr;
285 yp1[0] = -(yr - S_MUL(yi,sine)); 316 yp0[1] = yi;
286 yp0[1] = yi + S_MUL(yr,sine);
287 yp0 += 2; 317 yp0 += 2;
288 yp1 -= 2; 318 yp1 -= 2;
289 } 319 }
290 } 320 }
291 321
292 /* Mirror on both sides for TDAC */ 322 /* Mirror on both sides for TDAC */
293 { 323 {
294 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1; 324 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;
295 kiss_fft_scalar * OPUS_RESTRICT yp1 = out; 325 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
296 const opus_val16 * OPUS_RESTRICT wp1 = window; 326 const opus_val16 * OPUS_RESTRICT wp1 = window;
297 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1; 327 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;
298 328
299 for(i = 0; i < overlap/2; i++) 329 for(i = 0; i < overlap/2; i++)
300 { 330 {
301 kiss_fft_scalar x1, x2; 331 kiss_fft_scalar x1, x2;
302 x1 = *xp1; 332 x1 = *xp1;
303 x2 = *yp1; 333 x2 = *yp1;
304 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); 334 *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1);
305 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); 335 *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1);
306 wp1++; 336 wp1++;
307 wp2--; 337 wp2--;
308 } 338 }
309 } 339 }
310 RESTORE_STACK;
311 } 340 }
341 #endif /* OVERRIDE_clt_mdct_backward */
OLDNEW
« no previous file with comments | « celt/kiss_fft.c ('k') | celt/mips/celt_mipsr1.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698