third_party/opus/src/celt/kiss_fft.c - Issue 2962373002: [Opus] Update to v1.2.1

Side by Side Diff: third_party/opus/src/celt/kiss_fft.c

Issue 2962373002: [Opus] Update to v1.2.1 (Closed)

Patch Set: Pre-increment instead of post-increment Created 3 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*Copyright (c) 2003-2004, Mark Borgerding	1 /*Copyright (c) 2003-2004, Mark Borgerding

2 Lots of modifications by Jean-Marc Valin	2 Lots of modifications by Jean-Marc Valin

3 Copyright (c) 2005-2007, Xiph.Org Foundation	3 Copyright (c) 2005-2007, Xiph.Org Foundation

4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO	4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO

5	5

6 All rights reserved.	6 All rights reserved.

7	7

8 Redistribution and use in source and binary forms, with or without	8 Redistribution and use in source and binary forms, with or without

9 modification, are permitted provided that the following conditions are met:	9 modification, are permitted provided that the following conditions are met:

10	10

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
75 /* We know that m==4 here because the radix-2 is just after a radix-4 */	75 /* We know that m==4 here because the radix-2 is just after a radix-4 */

76 celt_assert(m==4);	76 celt_assert(m==4);

77 for (i=0;i<N;i++)	77 for (i=0;i<N;i++)

78 {	78 {

79 kiss_fft_cpx t;	79 kiss_fft_cpx t;

80 Fout2 = Fout + 4;	80 Fout2 = Fout + 4;

81 t = Fout2[0];	81 t = Fout2[0];

82 C_SUB( Fout2[0] , Fout[0] , t );	82 C_SUB( Fout2[0] , Fout[0] , t );

83 C_ADDTO( Fout[0] , t );	83 C_ADDTO( Fout[0] , t );

84	84

85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);	85 t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw);

86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);	86 t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw);

87 C_SUB( Fout2[1] , Fout[1] , t );	87 C_SUB( Fout2[1] , Fout[1] , t );

88 C_ADDTO( Fout[1] , t );	88 C_ADDTO( Fout[1] , t );

89	89

90 t.r = Fout2[2].i;	90 t.r = Fout2[2].i;

91 t.i = -Fout2[2].r;	91 t.i = -Fout2[2].r;

92 C_SUB( Fout2[2] , Fout[2] , t );	92 C_SUB( Fout2[2] , Fout[2] , t );

93 C_ADDTO( Fout[2] , t );	93 C_ADDTO( Fout[2] , t );

94	94

95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);	95 t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw);

96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);	96 t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw);

97 C_SUB( Fout2[3] , Fout[3] , t );	97 C_SUB( Fout2[3] , Fout[3] , t );

98 C_ADDTO( Fout[3] , t );	98 C_ADDTO( Fout[3] , t );

99 Fout += 8;	99 Fout += 8;

100 }	100 }

101 }	101 }

102 }	102 }

103	103

104 static void kf_bfly4(	104 static void kf_bfly4(

105 kiss_fft_cpx * Fout,	105 kiss_fft_cpx * Fout,

106 const size_t fstride,	106 const size_t fstride,

(...skipping 12 matching lines...) Expand all Loading...
119 {	119 {

120 kiss_fft_cpx scratch0, scratch1;	120 kiss_fft_cpx scratch0, scratch1;

121	121

122 C_SUB( scratch0 , *Fout, Fout[2] );	122 C_SUB( scratch0 , *Fout, Fout[2] );

123 C_ADDTO(*Fout, Fout[2]);	123 C_ADDTO(*Fout, Fout[2]);

124 C_ADD( scratch1 , Fout[1] , Fout[3] );	124 C_ADD( scratch1 , Fout[1] , Fout[3] );

125 C_SUB( Fout[2], *Fout, scratch1 );	125 C_SUB( Fout[2], *Fout, scratch1 );

126 C_ADDTO( *Fout , scratch1 );	126 C_ADDTO( *Fout , scratch1 );

127 C_SUB( scratch1 , Fout[1] , Fout[3] );	127 C_SUB( scratch1 , Fout[1] , Fout[3] );

128	128

129 Fout[1].r = scratch0.r + scratch1.i;	129 Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i);

130 Fout[1].i = scratch0.i - scratch1.r;	130 Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r);

131 Fout[3].r = scratch0.r - scratch1.i;	131 Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i);

132 Fout[3].i = scratch0.i + scratch1.r;	132 Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r);

133 Fout+=4;	133 Fout+=4;

134 }	134 }

135 } else {	135 } else {

136 int j;	136 int j;

137 kiss_fft_cpx scratch[6];	137 kiss_fft_cpx scratch[6];

138 const kiss_twiddle_cpx tw1,tw2,*tw3;	138 const kiss_twiddle_cpx tw1,tw2,*tw3;

139 const int m2=2*m;	139 const int m2=2*m;

140 const int m3=3*m;	140 const int m3=3*m;

141 kiss_fft_cpx * Fout_beg = Fout;	141 kiss_fft_cpx * Fout_beg = Fout;

142 for (i=0;i<N;i++)	142 for (i=0;i<N;i++)

(...skipping 10 matching lines...) Expand all Loading...
153 C_SUB( scratch[5] , *Fout, scratch[1] );	153 C_SUB( scratch[5] , *Fout, scratch[1] );

154 C_ADDTO(*Fout, scratch[1]);	154 C_ADDTO(*Fout, scratch[1]);

155 C_ADD( scratch[3] , scratch[0] , scratch[2] );	155 C_ADD( scratch[3] , scratch[0] , scratch[2] );

156 C_SUB( scratch[4] , scratch[0] , scratch[2] );	156 C_SUB( scratch[4] , scratch[0] , scratch[2] );

157 C_SUB( Fout[m2], *Fout, scratch[3] );	157 C_SUB( Fout[m2], *Fout, scratch[3] );

158 tw1 += fstride;	158 tw1 += fstride;

159 tw2 += fstride*2;	159 tw2 += fstride*2;

160 tw3 += fstride*3;	160 tw3 += fstride*3;

161 C_ADDTO( *Fout , scratch[3] );	161 C_ADDTO( *Fout , scratch[3] );

162	162

163 Fout[m].r = scratch[5].r + scratch[4].i;	163 Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i);

164 Fout[m].i = scratch[5].i - scratch[4].r;	164 Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r);

165 Fout[m3].r = scratch[5].r - scratch[4].i;	165 Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i);

166 Fout[m3].i = scratch[5].i + scratch[4].r;	166 Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r);

167 ++Fout;	167 ++Fout;

168 }	168 }

169 }	169 }

170 }	170 }

171 }	171 }

172	172

173	173

174 #ifndef RADIX_TWO_ONLY	174 #ifndef RADIX_TWO_ONLY

175	175

176 static void kf_bfly3(	176 static void kf_bfly3(

(...skipping 28 matching lines...) Expand all Loading...
205 do {	205 do {

206	206

207 C_MUL(scratch[1],Fout[m] , *tw1);	207 C_MUL(scratch[1],Fout[m] , *tw1);

208 C_MUL(scratch[2],Fout[m2] , *tw2);	208 C_MUL(scratch[2],Fout[m2] , *tw2);

209	209

210 C_ADD(scratch[3],scratch[1],scratch[2]);	210 C_ADD(scratch[3],scratch[1],scratch[2]);

211 C_SUB(scratch[0],scratch[1],scratch[2]);	211 C_SUB(scratch[0],scratch[1],scratch[2]);

212 tw1 += fstride;	212 tw1 += fstride;

213 tw2 += fstride*2;	213 tw2 += fstride*2;

214	214

215 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);	215 Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r));

216 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);	216 Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i));

217	217

218 C_MULBYSCALAR( scratch[0] , epi3.i );	218 C_MULBYSCALAR( scratch[0] , epi3.i );

219	219

220 C_ADDTO(*Fout,scratch[3]);	220 C_ADDTO(*Fout,scratch[3]);

221	221

222 Fout[m2].r = Fout[m].r + scratch[0].i;	222 Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i);

223 Fout[m2].i = Fout[m].i - scratch[0].r;	223 Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r);

224	224

225 Fout[m].r -= scratch[0].i;	225 Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i);

226 Fout[m].i += scratch[0].r;	226 Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r);

227	227

228 ++Fout;	228 ++Fout;

229 } while(--k);	229 } while(--k);

230 }	230 }

231 }	231 }

232	232

233	233

234 #ifndef OVERRIDE_kf_bfly5	234 #ifndef OVERRIDE_kf_bfly5

235 static void kf_bfly5(	235 static void kf_bfly5(

236 kiss_fft_cpx * Fout,	236 kiss_fft_cpx * Fout,

(...skipping 38 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
275 C_MUL(scratch[1] ,Fout1, tw[ufstride]);	275 C_MUL(scratch[1] ,Fout1, tw[ufstride]);

276 C_MUL(scratch[2] ,Fout2, tw[2u*fstride]);	276 C_MUL(scratch[2] ,Fout2, tw[2u*fstride]);

277 C_MUL(scratch[3] ,Fout3, tw[3u*fstride]);	277 C_MUL(scratch[3] ,Fout3, tw[3u*fstride]);

278 C_MUL(scratch[4] ,Fout4, tw[4u*fstride]);	278 C_MUL(scratch[4] ,Fout4, tw[4u*fstride]);

279	279

280 C_ADD( scratch[7],scratch[1],scratch[4]);	280 C_ADD( scratch[7],scratch[1],scratch[4]);

281 C_SUB( scratch[10],scratch[1],scratch[4]);	281 C_SUB( scratch[10],scratch[1],scratch[4]);

282 C_ADD( scratch[8],scratch[2],scratch[3]);	282 C_ADD( scratch[8],scratch[2],scratch[3]);

283 C_SUB( scratch[9],scratch[2],scratch[3]);	283 C_SUB( scratch[9],scratch[2],scratch[3]);

284	284

285 Fout0->r += scratch[7].r + scratch[8].r;	285 Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r ));

286 Fout0->i += scratch[7].i + scratch[8].i;	286 Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i ));

287	287

288 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[ 8].r,yb.r);	288 scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r ,ya.r), S_MUL(scratch[8].r,yb.r)));

289 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[ 8].i,yb.r);	289 scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i ,ya.r), S_MUL(scratch[8].i,yb.r)));

290	290

291 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);	291 scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9] .i,yb.i));

292 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);	292 scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL (scratch[9].r,yb.i)));

293	293

294 C_SUB(*Fout1,scratch[5],scratch[6]);	294 C_SUB(*Fout1,scratch[5],scratch[6]);

295 C_ADD(*Fout4,scratch[5],scratch[6]);	295 C_ADD(*Fout4,scratch[5],scratch[6]);

296	296

297 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch [8].r,ya.r);	297 scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7]. r,yb.r), S_MUL(scratch[8].r,ya.r)));

298 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch [8].i,ya.r);	298 scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7]. i,yb.r), S_MUL(scratch[8].i,ya.r)));

299 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);	299 scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10] .i,yb.i));

300 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);	300 scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9] .r,ya.i));

301	301

302 C_ADD(*Fout2,scratch[11],scratch[12]);	302 C_ADD(*Fout2,scratch[11],scratch[12]);

303 C_SUB(*Fout3,scratch[11],scratch[12]);	303 C_SUB(*Fout3,scratch[11],scratch[12]);

304	304

305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;	305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;

306 }	306 }

307 }	307 }

308 }	308 }

309 #endif /* OVERRIDE_kf_bfly5 */	309 #endif /* OVERRIDE_kf_bfly5 */

310	310

(...skipping 284 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
595 celt_assert2 (fin != fout, "In-place FFT not supported");	595 celt_assert2 (fin != fout, "In-place FFT not supported");

596 /* Bit-reverse the input */	596 /* Bit-reverse the input */

597 for (i=0;i<st->nfft;i++)	597 for (i=0;i<st->nfft;i++)

598 fout[st->bitrev[i]] = fin[i];	598 fout[st->bitrev[i]] = fin[i];

599 for (i=0;i<st->nfft;i++)	599 for (i=0;i<st->nfft;i++)

600 fout[i].i = -fout[i].i;	600 fout[i].i = -fout[i].i;

601 opus_fft_impl(st, fout);	601 opus_fft_impl(st, fout);

602 for (i=0;i<st->nfft;i++)	602 for (i=0;i<st->nfft;i++)

603 fout[i].i = -fout[i].i;	603 fout[i].i = -fout[i].i;

604 }	604 }

OLD	NEW

« no previous file with comments | « third_party/opus/src/celt/float_cast.h ('k') | third_party/opus/src/celt/mathops.h » ('j') | no next file with comments »