Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(79)

Side by Side Diff: third_party/opus/src/celt/kiss_fft.c

Issue 2962373002: [Opus] Update to v1.2.1 (Closed)
Patch Set: Pre-increment instead of post-increment Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/opus/src/celt/float_cast.h ('k') | third_party/opus/src/celt/mathops.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /*Copyright (c) 2003-2004, Mark Borgerding 1 /*Copyright (c) 2003-2004, Mark Borgerding
2 Lots of modifications by Jean-Marc Valin 2 Lots of modifications by Jean-Marc Valin
3 Copyright (c) 2005-2007, Xiph.Org Foundation 3 Copyright (c) 2005-2007, Xiph.Org Foundation
4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO 4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO
5 5
6 All rights reserved. 6 All rights reserved.
7 7
8 Redistribution and use in source and binary forms, with or without 8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are met: 9 modification, are permitted provided that the following conditions are met:
10 10
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
75 /* We know that m==4 here because the radix-2 is just after a radix-4 */ 75 /* We know that m==4 here because the radix-2 is just after a radix-4 */
76 celt_assert(m==4); 76 celt_assert(m==4);
77 for (i=0;i<N;i++) 77 for (i=0;i<N;i++)
78 { 78 {
79 kiss_fft_cpx t; 79 kiss_fft_cpx t;
80 Fout2 = Fout + 4; 80 Fout2 = Fout + 4;
81 t = Fout2[0]; 81 t = Fout2[0];
82 C_SUB( Fout2[0] , Fout[0] , t ); 82 C_SUB( Fout2[0] , Fout[0] , t );
83 C_ADDTO( Fout[0] , t ); 83 C_ADDTO( Fout[0] , t );
84 84
85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); 85 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);
86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); 86 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);
87 C_SUB( Fout2[1] , Fout[1] , t ); 87 C_SUB( Fout2[1] , Fout[1] , t );
88 C_ADDTO( Fout[1] , t ); 88 C_ADDTO( Fout[1] , t );
89 89
90 t.r = Fout2[2].i; 90 t.r = Fout2[2].i;
91 t.i = -Fout2[2].r; 91 t.i = -Fout2[2].r;
92 C_SUB( Fout2[2] , Fout[2] , t ); 92 C_SUB( Fout2[2] , Fout[2] , t );
93 C_ADDTO( Fout[2] , t ); 93 C_ADDTO( Fout[2] , t );
94 94
95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); 95 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);
96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); 96 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);
97 C_SUB( Fout2[3] , Fout[3] , t ); 97 C_SUB( Fout2[3] , Fout[3] , t );
98 C_ADDTO( Fout[3] , t ); 98 C_ADDTO( Fout[3] , t );
99 Fout += 8; 99 Fout += 8;
100 } 100 }
101 } 101 }
102 } 102 }
103 103
104 static void kf_bfly4( 104 static void kf_bfly4(
105 kiss_fft_cpx * Fout, 105 kiss_fft_cpx * Fout,
106 const size_t fstride, 106 const size_t fstride,
(...skipping 12 matching lines...) Expand all
119 { 119 {
120 kiss_fft_cpx scratch0, scratch1; 120 kiss_fft_cpx scratch0, scratch1;
121 121
122 C_SUB( scratch0 , *Fout, Fout[2] ); 122 C_SUB( scratch0 , *Fout, Fout[2] );
123 C_ADDTO(*Fout, Fout[2]); 123 C_ADDTO(*Fout, Fout[2]);
124 C_ADD( scratch1 , Fout[1] , Fout[3] ); 124 C_ADD( scratch1 , Fout[1] , Fout[3] );
125 C_SUB( Fout[2], *Fout, scratch1 ); 125 C_SUB( Fout[2], *Fout, scratch1 );
126 C_ADDTO( *Fout , scratch1 ); 126 C_ADDTO( *Fout , scratch1 );
127 C_SUB( scratch1 , Fout[1] , Fout[3] ); 127 C_SUB( scratch1 , Fout[1] , Fout[3] );
128 128
129 Fout[1].r = scratch0.r + scratch1.i; 129 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);
130 Fout[1].i = scratch0.i - scratch1.r; 130 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);
131 Fout[3].r = scratch0.r - scratch1.i; 131 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);
132 Fout[3].i = scratch0.i + scratch1.r; 132 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);
133 Fout+=4; 133 Fout+=4;
134 } 134 }
135 } else { 135 } else {
136 int j; 136 int j;
137 kiss_fft_cpx scratch[6]; 137 kiss_fft_cpx scratch[6];
138 const kiss_twiddle_cpx *tw1,*tw2,*tw3; 138 const kiss_twiddle_cpx *tw1,*tw2,*tw3;
139 const int m2=2*m; 139 const int m2=2*m;
140 const int m3=3*m; 140 const int m3=3*m;
141 kiss_fft_cpx * Fout_beg = Fout; 141 kiss_fft_cpx * Fout_beg = Fout;
142 for (i=0;i<N;i++) 142 for (i=0;i<N;i++)
(...skipping 10 matching lines...) Expand all
153 C_SUB( scratch[5] , *Fout, scratch[1] ); 153 C_SUB( scratch[5] , *Fout, scratch[1] );
154 C_ADDTO(*Fout, scratch[1]); 154 C_ADDTO(*Fout, scratch[1]);
155 C_ADD( scratch[3] , scratch[0] , scratch[2] ); 155 C_ADD( scratch[3] , scratch[0] , scratch[2] );
156 C_SUB( scratch[4] , scratch[0] , scratch[2] ); 156 C_SUB( scratch[4] , scratch[0] , scratch[2] );
157 C_SUB( Fout[m2], *Fout, scratch[3] ); 157 C_SUB( Fout[m2], *Fout, scratch[3] );
158 tw1 += fstride; 158 tw1 += fstride;
159 tw2 += fstride*2; 159 tw2 += fstride*2;
160 tw3 += fstride*3; 160 tw3 += fstride*3;
161 C_ADDTO( *Fout , scratch[3] ); 161 C_ADDTO( *Fout , scratch[3] );
162 162
163 Fout[m].r = scratch[5].r + scratch[4].i; 163 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);
164 Fout[m].i = scratch[5].i - scratch[4].r; 164 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);
165 Fout[m3].r = scratch[5].r - scratch[4].i; 165 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);
166 Fout[m3].i = scratch[5].i + scratch[4].r; 166 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);
167 ++Fout; 167 ++Fout;
168 } 168 }
169 } 169 }
170 } 170 }
171 } 171 }
172 172
173 173
174 #ifndef RADIX_TWO_ONLY 174 #ifndef RADIX_TWO_ONLY
175 175
176 static void kf_bfly3( 176 static void kf_bfly3(
(...skipping 28 matching lines...) Expand all
205 do { 205 do {
206 206
207 C_MUL(scratch[1],Fout[m] , *tw1); 207 C_MUL(scratch[1],Fout[m] , *tw1);
208 C_MUL(scratch[2],Fout[m2] , *tw2); 208 C_MUL(scratch[2],Fout[m2] , *tw2);
209 209
210 C_ADD(scratch[3],scratch[1],scratch[2]); 210 C_ADD(scratch[3],scratch[1],scratch[2]);
211 C_SUB(scratch[0],scratch[1],scratch[2]); 211 C_SUB(scratch[0],scratch[1],scratch[2]);
212 tw1 += fstride; 212 tw1 += fstride;
213 tw2 += fstride*2; 213 tw2 += fstride*2;
214 214
215 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 215 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));
216 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 216 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));
217 217
218 C_MULBYSCALAR( scratch[0] , epi3.i ); 218 C_MULBYSCALAR( scratch[0] , epi3.i );
219 219
220 C_ADDTO(*Fout,scratch[3]); 220 C_ADDTO(*Fout,scratch[3]);
221 221
222 Fout[m2].r = Fout[m].r + scratch[0].i; 222 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);
223 Fout[m2].i = Fout[m].i - scratch[0].r; 223 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);
224 224
225 Fout[m].r -= scratch[0].i; 225 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);
226 Fout[m].i += scratch[0].r; 226 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);
227 227
228 ++Fout; 228 ++Fout;
229 } while(--k); 229 } while(--k);
230 } 230 }
231 } 231 }
232 232
233 233
234 #ifndef OVERRIDE_kf_bfly5 234 #ifndef OVERRIDE_kf_bfly5
235 static void kf_bfly5( 235 static void kf_bfly5(
236 kiss_fft_cpx * Fout, 236 kiss_fft_cpx * Fout,
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 275 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); 276 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); 277 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); 278 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
279 279
280 C_ADD( scratch[7],scratch[1],scratch[4]); 280 C_ADD( scratch[7],scratch[1],scratch[4]);
281 C_SUB( scratch[10],scratch[1],scratch[4]); 281 C_SUB( scratch[10],scratch[1],scratch[4]);
282 C_ADD( scratch[8],scratch[2],scratch[3]); 282 C_ADD( scratch[8],scratch[2],scratch[3]);
283 C_SUB( scratch[9],scratch[2],scratch[3]); 283 C_SUB( scratch[9],scratch[2],scratch[3]);
284 284
285 Fout0->r += scratch[7].r + scratch[8].r; 285 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r ));
286 Fout0->i += scratch[7].i + scratch[8].i; 286 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i ));
287 287
288 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[ 8].r,yb.r); 288 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r ,ya.r), S_MUL(scratch[8].r,yb.r)));
289 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[ 8].i,yb.r); 289 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i ,ya.r), S_MUL(scratch[8].i,yb.r)));
290 290
291 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); 291 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9] .i,yb.i));
292 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); 292 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL (scratch[9].r,yb.i)));
293 293
294 C_SUB(*Fout1,scratch[5],scratch[6]); 294 C_SUB(*Fout1,scratch[5],scratch[6]);
295 C_ADD(*Fout4,scratch[5],scratch[6]); 295 C_ADD(*Fout4,scratch[5],scratch[6]);
296 296
297 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch [8].r,ya.r); 297 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7]. r,yb.r), S_MUL(scratch[8].r,ya.r)));
298 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch [8].i,ya.r); 298 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7]. i,yb.r), S_MUL(scratch[8].i,ya.r)));
299 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); 299 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10] .i,yb.i));
300 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); 300 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9] .r,ya.i));
301 301
302 C_ADD(*Fout2,scratch[11],scratch[12]); 302 C_ADD(*Fout2,scratch[11],scratch[12]);
303 C_SUB(*Fout3,scratch[11],scratch[12]); 303 C_SUB(*Fout3,scratch[11],scratch[12]);
304 304
305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
306 } 306 }
307 } 307 }
308 } 308 }
309 #endif /* OVERRIDE_kf_bfly5 */ 309 #endif /* OVERRIDE_kf_bfly5 */
310 310
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
595 celt_assert2 (fin != fout, "In-place FFT not supported"); 595 celt_assert2 (fin != fout, "In-place FFT not supported");
596 /* Bit-reverse the input */ 596 /* Bit-reverse the input */
597 for (i=0;i<st->nfft;i++) 597 for (i=0;i<st->nfft;i++)
598 fout[st->bitrev[i]] = fin[i]; 598 fout[st->bitrev[i]] = fin[i];
599 for (i=0;i<st->nfft;i++) 599 for (i=0;i<st->nfft;i++)
600 fout[i].i = -fout[i].i; 600 fout[i].i = -fout[i].i;
601 opus_fft_impl(st, fout); 601 opus_fft_impl(st, fout);
602 for (i=0;i<st->nfft;i++) 602 for (i=0;i<st->nfft;i++)
603 fout[i].i = -fout[i].i; 603 fout[i].i = -fout[i].i;
604 } 604 }
OLDNEW
« no previous file with comments | « third_party/opus/src/celt/float_cast.h ('k') | third_party/opus/src/celt/mathops.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698