celt/kiss_fft.c - Issue 882843002: Update to opus-HEAD-66611f1.

Side by Side Diff: celt/kiss_fft.c

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*Copyright (c) 2003-2004, Mark Borgerding	1 /*Copyright (c) 2003-2004, Mark Borgerding

2 Lots of modifications by Jean-Marc Valin	2 Lots of modifications by Jean-Marc Valin

3 Copyright (c) 2005-2007, Xiph.Org Foundation	3 Copyright (c) 2005-2007, Xiph.Org Foundation

4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO	4 Copyright (c) 2008, Xiph.Org Foundation, CSIRO

5	5

6 All rights reserved.	6 All rights reserved.

7	7

8 Redistribution and use in source and binary forms, with or without	8 Redistribution and use in source and binary forms, with or without

9 modification, are permitted provided that the following conditions are met:	9 modification, are permitted provided that the following conditions are met:

10	10

(...skipping 29 matching lines...) Expand all Loading...
40 #include "os_support.h"	40 #include "os_support.h"

41 #include "mathops.h"	41 #include "mathops.h"

42 #include "stack_alloc.h"	42 #include "stack_alloc.h"

43	43

44 /* The guts header contains all the multiplication and addition macros that are defined for	44 /* The guts header contains all the multiplication and addition macros that are defined for

45 complex numbers. It also delares the kf_ internal functions.	45 complex numbers. It also delares the kf_ internal functions.

46 */	46 */

47	47

48 static void kf_bfly2(	48 static void kf_bfly2(

49 kiss_fft_cpx * Fout,	49 kiss_fft_cpx * Fout,

50 const size_t fstride,

51 const kiss_fft_state *st,

52 int m,	50 int m,

53 int N,	51 int N

54 int mm

55 )	52 )

56 {	53 {

57 kiss_fft_cpx * Fout2;	54 kiss_fft_cpx * Fout2;

58 const kiss_twiddle_cpx * tw1;	55 int i;

59 int i,j;	56 (void)m;

60 kiss_fft_cpx * Fout_beg = Fout;	57 #ifdef CUSTOM_MODES

61 for (i=0;i<N;i++)	58 if (m==1)

62 {	59 {

63 Fout = Fout_beg + i*mm;	60 celt_assert(m==1);

64 Fout2 = Fout + m;	61 for (i=0;i<N;i++)

65 tw1 = st->twiddles;

66 for(j=0;j<m;j++)

67 {	62 {

68 kiss_fft_cpx t;	63 kiss_fft_cpx t;

69 Fout->r = SHR32(Fout->r, 1);Fout->i = SHR32(Fout->i, 1);	64 Fout2 = Fout + 1;

70 Fout2->r = SHR32(Fout2->r, 1);Fout2->i = SHR32(Fout2->i, 1);	65 t = *Fout2;

71 C_MUL (t, Fout2 , tw1);

72 tw1 += fstride;

73 C_SUB( Fout2 , Fout , t );	66 C_SUB( Fout2 , Fout , t );

74 C_ADDTO( *Fout , t );	67 C_ADDTO( *Fout , t );

75 ++Fout2;	68 Fout += 2;

76 ++Fout;	69 }

	70 } else

	71 #endif

	72 {

	73 opus_val16 tw;

	74 tw = QCONST16(0.7071067812f, 15);

	75 /* We know that m==4 here because the radix-2 is just after a radix-4 */

	76 celt_assert(m==4);

	77 for (i=0;i<N;i++)

	78 {

	79 kiss_fft_cpx t;

	80 Fout2 = Fout + 4;

	81 t = Fout2[0];

	82 C_SUB( Fout2[0] , Fout[0] , t );

	83 C_ADDTO( Fout[0] , t );

	84

	85 t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);

	86 t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);

	87 C_SUB( Fout2[1] , Fout[1] , t );

	88 C_ADDTO( Fout[1] , t );

	89

	90 t.r = Fout2[2].i;

	91 t.i = -Fout2[2].r;

	92 C_SUB( Fout2[2] , Fout[2] , t );

	93 C_ADDTO( Fout[2] , t );

	94

	95 t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);

	96 t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);

	97 C_SUB( Fout2[3] , Fout[3] , t );

	98 C_ADDTO( Fout[3] , t );

	99 Fout += 8;

77 }	100 }

78 }	101 }

79 }	102 }

80

81 static void ki_bfly2(

82 kiss_fft_cpx * Fout,

83 const size_t fstride,

84 const kiss_fft_state *st,

85 int m,

86 int N,

87 int mm

88 )

89 {

90 kiss_fft_cpx * Fout2;

91 const kiss_twiddle_cpx * tw1;

92 kiss_fft_cpx t;

93 int i,j;

94 kiss_fft_cpx * Fout_beg = Fout;

95 for (i=0;i<N;i++)

96 {

97 Fout = Fout_beg + i*mm;

98 Fout2 = Fout + m;

99 tw1 = st->twiddles;

100 for(j=0;j<m;j++)

101 {

102 C_MULC (t, Fout2 , tw1);

103 tw1 += fstride;

104 C_SUB( Fout2 , Fout , t );

105 C_ADDTO( *Fout , t );

106 ++Fout2;

107 ++Fout;

108 }

109 }

110 }

111	103

112 static void kf_bfly4(	104 static void kf_bfly4(

113 kiss_fft_cpx * Fout,	105 kiss_fft_cpx * Fout,

114 const size_t fstride,	106 const size_t fstride,

115 const kiss_fft_state *st,	107 const kiss_fft_state *st,

116 int m,	108 int m,

117 int N,	109 int N,

118 int mm	110 int mm

119 )	111 )

120 {	112 {

121 const kiss_twiddle_cpx tw1,tw2,*tw3;	113 int i;

122 kiss_fft_cpx scratch[6];

123 const size_t m2=2*m;

124 const size_t m3=3*m;

125 int i, j;

126	114

127 kiss_fft_cpx * Fout_beg = Fout;	115 if (m==1)

128 for (i=0;i<N;i++)

129 {	116 {

130 Fout = Fout_beg + i*mm;	117 /* Degenerate case where all the twiddles are 1. */

131 tw3 = tw2 = tw1 = st->twiddles;	118 for (i=0;i<N;i++)

132 for (j=0;j<m;j++)

133 {	119 {

134 C_MUL4(scratch[0],Fout[m] , *tw1 );	120 kiss_fft_cpx scratch0, scratch1;

135 C_MUL4(scratch[1],Fout[m2] , *tw2 );

136 C_MUL4(scratch[2],Fout[m3] , *tw3 );

137	121

138 Fout->r = PSHR32(Fout->r, 2);	122 C_SUB( scratch0 , *Fout, Fout[2] );

139 Fout->i = PSHR32(Fout->i, 2);	123 C_ADDTO(*Fout, Fout[2]);

140 C_SUB( scratch[5] , *Fout, scratch[1] );	124 C_ADD( scratch1 , Fout[1] , Fout[3] );

141 C_ADDTO(*Fout, scratch[1]);	125 C_SUB( Fout[2], *Fout, scratch1 );

142 C_ADD( scratch[3] , scratch[0] , scratch[2] );	126 C_ADDTO( *Fout , scratch1 );

143 C_SUB( scratch[4] , scratch[0] , scratch[2] );	127 C_SUB( scratch1 , Fout[1] , Fout[3] );

144 C_SUB( Fout[m2], *Fout, scratch[3] );

145 tw1 += fstride;

146 tw2 += fstride*2;

147 tw3 += fstride*3;

148 C_ADDTO( *Fout , scratch[3] );

149	128

150 Fout[m].r = scratch[5].r + scratch[4].i;	129 Fout[1].r = scratch0.r + scratch1.i;

151 Fout[m].i = scratch[5].i - scratch[4].r;	130 Fout[1].i = scratch0.i - scratch1.r;

152 Fout[m3].r = scratch[5].r - scratch[4].i;	131 Fout[3].r = scratch0.r - scratch1.i;

153 Fout[m3].i = scratch[5].i + scratch[4].r;	132 Fout[3].i = scratch0.i + scratch1.r;

154 ++Fout;	133 Fout+=4;

	134 }

	135 } else {

	136 int j;

	137 kiss_fft_cpx scratch[6];

	138 const kiss_twiddle_cpx tw1,tw2,*tw3;

	139 const int m2=2*m;

	140 const int m3=3*m;

	141 kiss_fft_cpx * Fout_beg = Fout;

	142 for (i=0;i<N;i++)

	143 {

	144 Fout = Fout_beg + i*mm;

	145 tw3 = tw2 = tw1 = st->twiddles;

	146 /* m is guaranteed to be a multiple of 4. */

	147 for (j=0;j<m;j++)

	148 {

	149 C_MUL(scratch[0],Fout[m] , *tw1 );

	150 C_MUL(scratch[1],Fout[m2] , *tw2 );

	151 C_MUL(scratch[2],Fout[m3] , *tw3 );

	152

	153 C_SUB( scratch[5] , *Fout, scratch[1] );

	154 C_ADDTO(*Fout, scratch[1]);

	155 C_ADD( scratch[3] , scratch[0] , scratch[2] );

	156 C_SUB( scratch[4] , scratch[0] , scratch[2] );

	157 C_SUB( Fout[m2], *Fout, scratch[3] );

	158 tw1 += fstride;

	159 tw2 += fstride*2;

	160 tw3 += fstride*3;

	161 C_ADDTO( *Fout , scratch[3] );

	162

	163 Fout[m].r = scratch[5].r + scratch[4].i;

	164 Fout[m].i = scratch[5].i - scratch[4].r;

	165 Fout[m3].r = scratch[5].r - scratch[4].i;

	166 Fout[m3].i = scratch[5].i + scratch[4].r;

	167 ++Fout;

	168 }

155 }	169 }

156 }	170 }

157 }	171 }

158	172

159 static void ki_bfly4(

160 kiss_fft_cpx * Fout,

161 const size_t fstride,

162 const kiss_fft_state *st,

163 int m,

164 int N,

165 int mm

166 )

167 {

168 const kiss_twiddle_cpx tw1,tw2,*tw3;

169 kiss_fft_cpx scratch[6];

170 const size_t m2=2*m;

171 const size_t m3=3*m;

172 int i, j;

173

174 kiss_fft_cpx * Fout_beg = Fout;

175 for (i=0;i<N;i++)

176 {

177 Fout = Fout_beg + i*mm;

178 tw3 = tw2 = tw1 = st->twiddles;

179 for (j=0;j<m;j++)

180 {

181 C_MULC(scratch[0],Fout[m] , *tw1 );

182 C_MULC(scratch[1],Fout[m2] , *tw2 );

183 C_MULC(scratch[2],Fout[m3] , *tw3 );

184

185 C_SUB( scratch[5] , *Fout, scratch[1] );

186 C_ADDTO(*Fout, scratch[1]);

187 C_ADD( scratch[3] , scratch[0] , scratch[2] );

188 C_SUB( scratch[4] , scratch[0] , scratch[2] );

189 C_SUB( Fout[m2], *Fout, scratch[3] );

190 tw1 += fstride;

191 tw2 += fstride*2;

192 tw3 += fstride*3;

193 C_ADDTO( *Fout , scratch[3] );

194

195 Fout[m].r = scratch[5].r - scratch[4].i;

196 Fout[m].i = scratch[5].i + scratch[4].r;

197 Fout[m3].r = scratch[5].r + scratch[4].i;

198 Fout[m3].i = scratch[5].i - scratch[4].r;

199 ++Fout;

200 }

201 }

202 }

203	173

204 #ifndef RADIX_TWO_ONLY	174 #ifndef RADIX_TWO_ONLY

205	175

206 static void kf_bfly3(	176 static void kf_bfly3(

207 kiss_fft_cpx * Fout,	177 kiss_fft_cpx * Fout,

208 const size_t fstride,	178 const size_t fstride,

209 const kiss_fft_state *st,	179 const kiss_fft_state *st,

210 int m,	180 int m,

211 int N,	181 int N,

212 int mm	182 int mm

213 )	183 )

214 {	184 {

215 int i;	185 int i;

216 size_t k;	186 size_t k;

217 const size_t m2 = 2*m;	187 const size_t m2 = 2*m;

218 const kiss_twiddle_cpx tw1,tw2;	188 const kiss_twiddle_cpx tw1,tw2;

219 kiss_fft_cpx scratch[5];	189 kiss_fft_cpx scratch[5];

220 kiss_twiddle_cpx epi3;	190 kiss_twiddle_cpx epi3;

221	191

222 kiss_fft_cpx * Fout_beg = Fout;	192 kiss_fft_cpx * Fout_beg = Fout;

	193 #ifdef FIXED_POINT

	194 epi3.r = -16384;

	195 epi3.i = -28378;

	196 #else

223 epi3 = st->twiddles[fstride*m];	197 epi3 = st->twiddles[fstride*m];

	198 #endif

224 for (i=0;i<N;i++)	199 for (i=0;i<N;i++)

225 {	200 {

226 Fout = Fout_beg + i*mm;	201 Fout = Fout_beg + i*mm;

227 tw1=tw2=st->twiddles;	202 tw1=tw2=st->twiddles;

	203 /* For non-custom modes, m is guaranteed to be a multiple of 4. */

228 k=m;	204 k=m;

229 do {	205 do {

230 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);

231	206

232 C_MUL(scratch[1],Fout[m] , *tw1);	207 C_MUL(scratch[1],Fout[m] , *tw1);

233 C_MUL(scratch[2],Fout[m2] , *tw2);	208 C_MUL(scratch[2],Fout[m2] , *tw2);

234	209

235 C_ADD(scratch[3],scratch[1],scratch[2]);	210 C_ADD(scratch[3],scratch[1],scratch[2]);

236 C_SUB(scratch[0],scratch[1],scratch[2]);	211 C_SUB(scratch[0],scratch[1],scratch[2]);

237 tw1 += fstride;	212 tw1 += fstride;

238 tw2 += fstride*2;	213 tw2 += fstride*2;

239	214

240 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);	215 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);

241 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);	216 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);

242	217

243 C_MULBYSCALAR( scratch[0] , epi3.i );	218 C_MULBYSCALAR( scratch[0] , epi3.i );

244	219

245 C_ADDTO(*Fout,scratch[3]);	220 C_ADDTO(*Fout,scratch[3]);

246	221

247 Fout[m2].r = Fout[m].r + scratch[0].i;	222 Fout[m2].r = Fout[m].r + scratch[0].i;

248 Fout[m2].i = Fout[m].i - scratch[0].r;	223 Fout[m2].i = Fout[m].i - scratch[0].r;

249	224

250 Fout[m].r -= scratch[0].i;	225 Fout[m].r -= scratch[0].i;

251 Fout[m].i += scratch[0].r;	226 Fout[m].i += scratch[0].r;

252	227

253 ++Fout;	228 ++Fout;

254 } while(--k);	229 } while(--k);

255 }	230 }

256 }	231 }

257	232

258 static void ki_bfly3(

259 kiss_fft_cpx * Fout,

260 const size_t fstride,

261 const kiss_fft_state *st,

262 int m,

263 int N,

264 int mm

265 )

266 {

267 int i, k;

268 const size_t m2 = 2*m;

269 const kiss_twiddle_cpx tw1,tw2;

270 kiss_fft_cpx scratch[5];

271 kiss_twiddle_cpx epi3;

272	233

273 kiss_fft_cpx * Fout_beg = Fout;	234 #ifndef OVERRIDE_kf_bfly5

274 epi3 = st->twiddles[fstride*m];

275 for (i=0;i<N;i++)

276 {

277 Fout = Fout_beg + i*mm;

278 tw1=tw2=st->twiddles;

279 k=m;

280 do{

281

282 C_MULC(scratch[1],Fout[m] , *tw1);

283 C_MULC(scratch[2],Fout[m2] , *tw2);

284

285 C_ADD(scratch[3],scratch[1],scratch[2]);

286 C_SUB(scratch[0],scratch[1],scratch[2]);

287 tw1 += fstride;

288 tw2 += fstride*2;

289

290 Fout[m].r = Fout->r - HALF_OF(scratch[3].r);

291 Fout[m].i = Fout->i - HALF_OF(scratch[3].i);

292

293 C_MULBYSCALAR( scratch[0] , -epi3.i );

294

295 C_ADDTO(*Fout,scratch[3]);

296

297 Fout[m2].r = Fout[m].r + scratch[0].i;

298 Fout[m2].i = Fout[m].i - scratch[0].r;

299

300 Fout[m].r -= scratch[0].i;

301 Fout[m].i += scratch[0].r;

302

303 ++Fout;

304 }while(--k);

305 }

306 }

307

308 static void kf_bfly5(	235 static void kf_bfly5(

309 kiss_fft_cpx * Fout,	236 kiss_fft_cpx * Fout,

310 const size_t fstride,	237 const size_t fstride,

311 const kiss_fft_state *st,	238 const kiss_fft_state *st,

312 int m,	239 int m,

313 int N,	240 int N,

314 int mm	241 int mm

315 )	242 )

316 {	243 {

317 kiss_fft_cpx Fout0,Fout1,Fout2,Fout3,*Fout4;	244 kiss_fft_cpx Fout0,Fout1,Fout2,Fout3,*Fout4;

318 int i, u;	245 int i, u;

319 kiss_fft_cpx scratch[13];	246 kiss_fft_cpx scratch[13];

320 const kiss_twiddle_cpx * twiddles = st->twiddles;

321 const kiss_twiddle_cpx *tw;	247 const kiss_twiddle_cpx *tw;

322 kiss_twiddle_cpx ya,yb;	248 kiss_twiddle_cpx ya,yb;

323 kiss_fft_cpx * Fout_beg = Fout;	249 kiss_fft_cpx * Fout_beg = Fout;

324	250

325 ya = twiddles[fstride*m];	251 #ifdef FIXED_POINT

326 yb = twiddles[fstride2m];	252 ya.r = 10126;

	253 ya.i = -31164;

	254 yb.r = -26510;

	255 yb.i = -19261;

	256 #else

	257 ya = st->twiddles[fstride*m];

	258 yb = st->twiddles[fstride2m];

	259 #endif

327 tw=st->twiddles;	260 tw=st->twiddles;

328	261

329 for (i=0;i<N;i++)	262 for (i=0;i<N;i++)

330 {	263 {

331 Fout = Fout_beg + i*mm;	264 Fout = Fout_beg + i*mm;

332 Fout0=Fout;	265 Fout0=Fout;

333 Fout1=Fout0+m;	266 Fout1=Fout0+m;

334 Fout2=Fout0+2*m;	267 Fout2=Fout0+2*m;

335 Fout3=Fout0+3*m;	268 Fout3=Fout0+3*m;

336 Fout4=Fout0+4*m;	269 Fout4=Fout0+4*m;

337	270

	271 /* For non-custom modes, m is guaranteed to be a multiple of 4. */

338 for ( u=0; u<m; ++u ) {	272 for ( u=0; u<m; ++u ) {

339 C_FIXDIV( Fout0,5); C_FIXDIV( Fout1,5); C_FIXDIV( Fout2,5); C_FIXDIV ( Fout3,5); C_FIXDIV( *Fout4,5);

340 scratch[0] = *Fout0;	273 scratch[0] = *Fout0;

341	274

342 C_MUL(scratch[1] ,Fout1, tw[ufstride]);	275 C_MUL(scratch[1] ,Fout1, tw[ufstride]);

343 C_MUL(scratch[2] ,Fout2, tw[2u*fstride]);	276 C_MUL(scratch[2] ,Fout2, tw[2u*fstride]);

344 C_MUL(scratch[3] ,Fout3, tw[3u*fstride]);	277 C_MUL(scratch[3] ,Fout3, tw[3u*fstride]);

345 C_MUL(scratch[4] ,Fout4, tw[4u*fstride]);	278 C_MUL(scratch[4] ,Fout4, tw[4u*fstride]);

346	279

347 C_ADD( scratch[7],scratch[1],scratch[4]);	280 C_ADD( scratch[7],scratch[1],scratch[4]);

348 C_SUB( scratch[10],scratch[1],scratch[4]);	281 C_SUB( scratch[10],scratch[1],scratch[4]);

349 C_ADD( scratch[8],scratch[2],scratch[3]);	282 C_ADD( scratch[8],scratch[2],scratch[3]);

(...skipping 16 matching lines...) Expand all Loading...
366 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);	299 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);

367 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);	300 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);

368	301

369 C_ADD(*Fout2,scratch[11],scratch[12]);	302 C_ADD(*Fout2,scratch[11],scratch[12]);

370 C_SUB(*Fout3,scratch[11],scratch[12]);	303 C_SUB(*Fout3,scratch[11],scratch[12]);

371	304

372 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;	305 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;

373 }	306 }

374 }	307 }

375 }	308 }

	309 #endif /* OVERRIDE_kf_bfly5 */

376	310

377 static void ki_bfly5(

378 kiss_fft_cpx * Fout,

379 const size_t fstride,

380 const kiss_fft_state *st,

381 int m,

382 int N,

383 int mm

384 )

385 {

386 kiss_fft_cpx Fout0,Fout1,Fout2,Fout3,*Fout4;

387 int i, u;

388 kiss_fft_cpx scratch[13];

389 const kiss_twiddle_cpx * twiddles = st->twiddles;

390 const kiss_twiddle_cpx *tw;

391 kiss_twiddle_cpx ya,yb;

392 kiss_fft_cpx * Fout_beg = Fout;

393

394 ya = twiddles[fstride*m];

395 yb = twiddles[fstride2m];

396 tw=st->twiddles;

397

398 for (i=0;i<N;i++)

399 {

400 Fout = Fout_beg + i*mm;

401 Fout0=Fout;

402 Fout1=Fout0+m;

403 Fout2=Fout0+2*m;

404 Fout3=Fout0+3*m;

405 Fout4=Fout0+4*m;

406

407 for ( u=0; u<m; ++u ) {

408 scratch[0] = *Fout0;

409

410 C_MULC(scratch[1] ,Fout1, tw[ufstride]);

411 C_MULC(scratch[2] ,Fout2, tw[2u*fstride]);

412 C_MULC(scratch[3] ,Fout3, tw[3u*fstride]);

413 C_MULC(scratch[4] ,Fout4, tw[4u*fstride]);

414

415 C_ADD( scratch[7],scratch[1],scratch[4]);

416 C_SUB( scratch[10],scratch[1],scratch[4]);

417 C_ADD( scratch[8],scratch[2],scratch[3]);

418 C_SUB( scratch[9],scratch[2],scratch[3]);

419

420 Fout0->r += scratch[7].r + scratch[8].r;

421 Fout0->i += scratch[7].i + scratch[8].i;

422

423 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[ 8].r,yb.r);

424 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[ 8].i,yb.r);

425

426 scratch[6].r = -S_MUL(scratch[10].i,ya.i) - S_MUL(scratch[9].i,yb.i);

427 scratch[6].i = S_MUL(scratch[10].r,ya.i) + S_MUL(scratch[9].r,yb.i);

428

429 C_SUB(*Fout1,scratch[5],scratch[6]);

430 C_ADD(*Fout4,scratch[5],scratch[6]);

431

432 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch [8].r,ya.r);

433 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch [8].i,ya.r);

434 scratch[12].r = S_MUL(scratch[10].i,yb.i) - S_MUL(scratch[9].i,ya.i);

435 scratch[12].i = -S_MUL(scratch[10].r,yb.i) + S_MUL(scratch[9].r,ya.i);

436

437 C_ADD(*Fout2,scratch[11],scratch[12]);

438 C_SUB(*Fout3,scratch[11],scratch[12]);

439

440 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;

441 }

442 }

443 }

444	311

445 #endif	312 #endif

446	313

447	314

448 #ifdef CUSTOM_MODES	315 #ifdef CUSTOM_MODES

449	316

450 static	317 static

451 void compute_bitrev_table(	318 void compute_bitrev_table(

452 int Fout,	319 int Fout,

453 opus_int16 *f,	320 opus_int16 *f,

(...skipping 27 matching lines...) Expand all Loading...
481 }	348 }

482	349

483 /* facbuf is populated by p1,m1,p2,m2, ...	350 /* facbuf is populated by p1,m1,p2,m2, ...

484 where	351 where

485 p[i] * m[i] = m[i-1]	352 p[i] * m[i] = m[i-1]

486 m0 = n */	353 m0 = n */

487 static	354 static

488 int kf_factor(int n,opus_int16 * facbuf)	355 int kf_factor(int n,opus_int16 * facbuf)

489 {	356 {

490 int p=4;	357 int p=4;

	358 int i;

	359 int stages=0;

	360 int nbak = n;

491	361

492 /factor out powers of 4, powers of 2, then any remaining primes /	362 /factor out powers of 4, powers of 2, then any remaining primes /

493 do {	363 do {

494 while (n % p) {	364 while (n % p) {

495 switch (p) {	365 switch (p) {

496 case 4: p = 2; break;	366 case 4: p = 2; break;

497 case 2: p = 3; break;	367 case 2: p = 3; break;

498 default: p += 2; break;	368 default: p += 2; break;

499 }	369 }

500 if (p>32000 \|\| (opus_int32)p*(opus_int32)p > n)	370 if (p>32000 \|\| (opus_int32)p*(opus_int32)p > n)

501 p = n; /* no more factors, skip to end */	371 p = n; /* no more factors, skip to end */

502 }	372 }

503 n /= p;	373 n /= p;

504 #ifdef RADIX_TWO_ONLY	374 #ifdef RADIX_TWO_ONLY

505 if (p!=2 && p != 4)	375 if (p!=2 && p != 4)

506 #else	376 #else

507 if (p>5)	377 if (p>5)

508 #endif	378 #endif

509 {	379 {

510 return 0;	380 return 0;

511 }	381 }

512 *facbuf++ = p;	382 facbuf[2*stages] = p;

513 *facbuf++ = n;	383 if (p==2 && stages > 1)

	384 {

	385 facbuf[2*stages] = 4;

	386 facbuf[2] = 2;

	387 }

	388 stages++;

514 } while (n > 1);	389 } while (n > 1);

	390 n = nbak;

	391 /* Reverse the order to get the radix 4 at the end, so we can use the

	392 fast degenerate case. It turns out that reversing the order also

	393 improves the noise behaviour. */

	394 for (i=0;i<stages/2;i++)

	395 {

	396 int tmp;

	397 tmp = facbuf[2*i];

	398 facbuf[2i] = facbuf[2(stages-i-1)];

	399 facbuf[2*(stages-i-1)] = tmp;

	400 }

	401 for (i=0;i<stages;i++)

	402 {

	403 n /= facbuf[2*i];

	404 facbuf[2*i+1] = n;

	405 }

515 return 1;	406 return 1;

516 }	407 }

517	408

518 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)	409 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)

519 {	410 {

520 int i;	411 int i;

521 #ifdef FIXED_POINT	412 #ifdef FIXED_POINT

522 for (i=0;i<nfft;++i) {	413 for (i=0;i<nfft;++i) {

523 opus_val32 phase = -i;	414 opus_val32 phase = -i;

524 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));	415 kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));

(...skipping 23 matching lines...) Expand all Loading...
548 }else{	439 }else{

549 if (mem != NULL && *lenmem >= memneeded)	440 if (mem != NULL && *lenmem >= memneeded)

550 st = (kiss_fft_state*)mem;	441 st = (kiss_fft_state*)mem;

551 *lenmem = memneeded;	442 *lenmem = memneeded;

552 }	443 }

553 if (st) {	444 if (st) {

554 opus_int16 *bitrev;	445 opus_int16 *bitrev;

555 kiss_twiddle_cpx *twiddles;	446 kiss_twiddle_cpx *twiddles;

556	447

557 st->nfft=nfft;	448 st->nfft=nfft;

558 #ifndef FIXED_POINT	449 #ifdef FIXED_POINT

	450 st->scale_shift = celt_ilog2(st->nfft);

	451 if (st->nfft == 1<<st->scale_shift)

	452 st->scale = Q15ONE;

	453 else

	454 st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);

	455 #else

559 st->scale = 1.f/nfft;	456 st->scale = 1.f/nfft;

560 #endif	457 #endif

561 if (base != NULL)	458 if (base != NULL)

562 {	459 {

563 st->twiddles = base->twiddles;	460 st->twiddles = base->twiddles;

564 st->shift = 0;	461 st->shift = 0;

565 while (nfft<<st->shift != base->nfft && st->shift < 32)	462 while (st->shift < 32 && nfft<<st->shift != base->nfft)

566 st->shift++;	463 st->shift++;

567 if (st->shift>=32)	464 if (st->shift>=32)

568 goto fail;	465 goto fail;

569 } else {	466 } else {

570 st->twiddles = twiddles = (kiss_twiddle_cpx)KISS_FFT_MALLOC(sizeof(k iss_twiddle_cpx)nfft);	467 st->twiddles = twiddles = (kiss_twiddle_cpx)KISS_FFT_MALLOC(sizeof(k iss_twiddle_cpx)nfft);

571 compute_twiddles(twiddles, nfft);	468 compute_twiddles(twiddles, nfft);

572 st->shift = -1;	469 st->shift = -1;

573 }	470 }

574 if (!kf_factor(nfft,st->factors))	471 if (!kf_factor(nfft,st->factors))

575 {	472 {

(...skipping 23 matching lines...) Expand all Loading...
599 {	496 {

600 opus_free((opus_int16*)cfg->bitrev);	497 opus_free((opus_int16*)cfg->bitrev);

601 if (cfg->shift < 0)	498 if (cfg->shift < 0)

602 opus_free((kiss_twiddle_cpx*)cfg->twiddles);	499 opus_free((kiss_twiddle_cpx*)cfg->twiddles);

603 opus_free((kiss_fft_state*)cfg);	500 opus_free((kiss_fft_state*)cfg);

604 }	501 }

605 }	502 }

606	503

607 #endif /* CUSTOM_MODES */	504 #endif /* CUSTOM_MODES */

608	505

609 void opus_fft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fou t)	506 void opus_fft_impl(const kiss_fft_state st,kiss_fft_cpx fout)

610 {	507 {

611 int m2, m;	508 int m2, m;

612 int p;	509 int p;

613 int L;	510 int L;

614 int fstride[MAXFACTORS];	511 int fstride[MAXFACTORS];

615 int i;	512 int i;

616 int shift;	513 int shift;

617	514

618 /* st->shift can be -1 */	515 /* st->shift can be -1 */

619 shift = st->shift>0 ? st->shift : 0;	516 shift = st->shift>0 ? st->shift : 0;

620	517

621 celt_assert2 (fin != fout, "In-place FFT not supported");

622 /* Bit-reverse the input */

623 for (i=0;i<st->nfft;i++)

624 {

625 fout[st->bitrev[i]] = fin[i];

626 #ifndef FIXED_POINT

627 fout[st->bitrev[i]].r *= st->scale;

628 fout[st->bitrev[i]].i *= st->scale;

629 #endif

630 }

631

632 fstride[0] = 1;	518 fstride[0] = 1;

633 L=0;	519 L=0;

634 do {	520 do {

635 p = st->factors[2*L];	521 p = st->factors[2*L];

636 m = st->factors[2*L+1];	522 m = st->factors[2*L+1];

637 fstride[L+1] = fstride[L]*p;	523 fstride[L+1] = fstride[L]*p;

638 L++;	524 L++;

639 } while(m!=1);	525 } while(m!=1);

640 m = st->factors[2*L-1];	526 m = st->factors[2*L-1];

641 for (i=L-1;i>=0;i--)	527 for (i=L-1;i>=0;i--)

642 {	528 {

643 if (i!=0)	529 if (i!=0)

644 m2 = st->factors[2*i-1];	530 m2 = st->factors[2*i-1];

645 else	531 else

646 m2 = 1;	532 m2 = 1;

647 switch (st->factors[2*i])	533 switch (st->factors[2*i])

648 {	534 {

649 case 2:	535 case 2:

650 kf_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);	536 kf_bfly2(fout, m, fstride[i]);

651 break;	537 break;

652 case 4:	538 case 4:

653 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);	539 kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);

654 break;	540 break;

655 #ifndef RADIX_TWO_ONLY	541 #ifndef RADIX_TWO_ONLY

656 case 3:	542 case 3:

657 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);	543 kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);

658 break;	544 break;

659 case 5:	545 case 5:

660 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);	546 kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);

661 break;	547 break;

662 #endif	548 #endif

663 }	549 }

664 m = m2;	550 m = m2;

665 }	551 }

666 }	552 }

667	553

	554 void opus_fft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fou t)

	555 {

	556 int i;

	557 opus_val16 scale;

	558 #ifdef FIXED_POINT

	559 /* Allows us to scale with MULT16_32_Q16(), which is faster than

	560 MULT16_32_Q15() on ARM. */

	561 int scale_shift = st->scale_shift-1;

	562 #endif

	563 scale = st->scale;

	564

	565 celt_assert2 (fin != fout, "In-place FFT not supported");

	566 /* Bit-reverse the input */

	567 for (i=0;i<st->nfft;i++)

	568 {

	569 kiss_fft_cpx x = fin[i];

	570 fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);

	571 fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);

	572 }

	573 opus_fft_impl(st, fout);

	574 }

	575

	576

	577 #ifdef TEST_UNIT_DFT_C

668 void opus_ifft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fo ut)	578 void opus_ifft(const kiss_fft_state st,const kiss_fft_cpx fin,kiss_fft_cpx *fo ut)

669 {	579 {

670 int m2, m;

671 int p;

672 int L;

673 int fstride[MAXFACTORS];

674 int i;	580 int i;

675 int shift;

676

677 /* st->shift can be -1 */

678 shift = st->shift>0 ? st->shift : 0;

679 celt_assert2 (fin != fout, "In-place FFT not supported");	581 celt_assert2 (fin != fout, "In-place FFT not supported");

680 /* Bit-reverse the input */	582 /* Bit-reverse the input */

681 for (i=0;i<st->nfft;i++)	583 for (i=0;i<st->nfft;i++)

682 fout[st->bitrev[i]] = fin[i];	584 fout[st->bitrev[i]] = fin[i];

683	585 for (i=0;i<st->nfft;i++)

684 fstride[0] = 1;	586 fout[i].i = -fout[i].i;

685 L=0;	587 opus_fft_impl(st, fout);

686 do {	588 for (i=0;i<st->nfft;i++)

687 p = st->factors[2*L];	589 fout[i].i = -fout[i].i;

688 m = st->factors[2*L+1];	590 }

689 fstride[L+1] = fstride[L]*p;

690 L++;

691 } while(m!=1);

692 m = st->factors[2*L-1];

693 for (i=L-1;i>=0;i--)

694 {

695 if (i!=0)

696 m2 = st->factors[2*i-1];

697 else

698 m2 = 1;

699 switch (st->factors[2*i])

700 {

701 case 2:

702 ki_bfly2(fout,fstride[i]<<shift,st,m, fstride[i], m2);

703 break;

704 case 4:

705 ki_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);

706 break;

707 #ifndef RADIX_TWO_ONLY

708 case 3:

709 ki_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);

710 break;

711 case 5:

712 ki_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);

713 break;

714 #endif	591 #endif

715 }

716 m = m2;

717 }

718 }

719

OLD	NEW

« autogen.sh ('K') | « celt/kiss_fft.h ('k') | celt/mdct.c » ('j') | doc/release.txt » ('J')