celt/mips/mdct_mipsr1.h - Issue 882843002: Update to opus-HEAD-66611f1.

Side by Side Diff: celt/mips/mdct_mipsr1.h

Issue 882843002: Update to opus-HEAD-66611f1. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/opus.git@master

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /* Copyright (c) 2007-2008 CSIRO	1 /* Copyright (c) 2007-2008 CSIRO

2 Copyright (c) 2007-2008 Xiph.Org Foundation	2 Copyright (c) 2007-2008 Xiph.Org Foundation

3 Written by Jean-Marc Valin */	3 Written by Jean-Marc Valin */

4 /*	4 /*

5 Redistribution and use in source and binary forms, with or without	5 Redistribution and use in source and binary forms, with or without

6 modification, are permitted provided that the following conditions	6 modification, are permitted provided that the following conditions

7 are met:	7 are met:

8	8

9 - Redistributions of source code must retain the above copyright	9 - Redistributions of source code must retain the above copyright

10 notice, this list of conditions and the following disclaimer.	10 notice, this list of conditions and the following disclaimer.

(...skipping 20 matching lines...) Expand all Loading...
31 plug in pretty much and FFT here.	31 plug in pretty much and FFT here.

32	32

33 This replaces the Vorbis FFT (and uses the exact same API), which	33 This replaces the Vorbis FFT (and uses the exact same API), which

34 was a bit too messy and that was ending up duplicating code	34 was a bit too messy and that was ending up duplicating code

35 (might as well use the same FFT everywhere).	35 (might as well use the same FFT everywhere).

36	36

37 The algorithm is similar to (and inspired from) Fabrice Bellard's	37 The algorithm is similar to (and inspired from) Fabrice Bellard's

38 MDCT implementation in FFMPEG, but has differences in signs, ordering	38 MDCT implementation in FFMPEG, but has differences in signs, ordering

39 and scaling in many places.	39 and scaling in many places.

40 */	40 */

	41 #ifndef __MDCT_MIPSR1_H__

	42 #define __MDCT_MIPSR1_H__

41	43

42 #ifndef SKIP_CONFIG_H	44 #ifndef SKIP_CONFIG_H

43 #ifdef HAVE_CONFIG_H	45 #ifdef HAVE_CONFIG_H

44 #include "config.h"	46 #include "config.h"

45 #endif	47 #endif

46 #endif	48 #endif

47	49

48 #include "mdct.h"	50 #include "mdct.h"

49 #include "kiss_fft.h"	51 #include "kiss_fft.h"

50 #include "_kiss_fft_guts.h"	52 #include "_kiss_fft_guts.h"

51 #include <math.h>	53 #include <math.h>

52 #include "os_support.h"	54 #include "os_support.h"

53 #include "mathops.h"	55 #include "mathops.h"

54 #include "stack_alloc.h"	56 #include "stack_alloc.h"

55	57

56 #ifdef CUSTOM_MODES

57

58 int clt_mdct_init(mdct_lookup *l,int N, int maxshift)

59 {

60 int i;

61 int N4;

62 kiss_twiddle_scalar *trig;

63 #if defined(FIXED_POINT)

64 int N2=N>>1;

65 #endif

66 l->n = N;

67 N4 = N>>2;

68 l->maxshift = maxshift;

69 for (i=0;i<=maxshift;i++)

70 {

71 if (i==0)

72 l->kfft[i] = opus_fft_alloc(N>>2>>i, 0, 0);

73 else

74 l->kfft[i] = opus_fft_alloc_twiddles(N>>2>>i, 0, 0, l->kfft[0]);

75 #ifndef ENABLE_TI_DSPLIB55

76 if (l->kfft[i]==NULL)

77 return 0;

78 #endif

79 }

80 l->trig = trig = (kiss_twiddle_scalar)opus_alloc((N4+1)sizeof(kiss_twiddle_ scalar));

81 if (l->trig==NULL)

82 return 0;

83 /* We have enough points that sine isn't necessary */

84 #if defined(FIXED_POINT)

85 for (i=0;i<=N4;i++)

86 trig[i] = TRIG_UPSCALE*celt_cos_norm(DIV32(ADD32(SHL32(EXTEND32(i),17),N2) ,N));

87 #else

88 for (i=0;i<=N4;i++)

89 trig[i] = (kiss_twiddle_scalar)cos(2PIi/N);

90 #endif

91 return 1;

92 }

93

94 void clt_mdct_clear(mdct_lookup *l)

95 {

96 int i;

97 for (i=0;i<=l->maxshift;i++)

98 opus_fft_free(l->kfft[i]);

99 opus_free((kiss_twiddle_scalar*)l->trig);

100 }

101

102 #endif /* CUSTOM_MODES */

103

104 /* Forward MDCT trashes the input array */	58 /* Forward MDCT trashes the input array */

	59 #define OVERRIDE_clt_mdct_forward

105 void clt_mdct_forward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scalar * OPUS_RESTRICT out,	60 void clt_mdct_forward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scalar * OPUS_RESTRICT out,

106 const opus_val16 *window, int overlap, int shift, int stride)	61 const opus_val16 *window, int overlap, int shift, int stride)

107 {	62 {

108 int i;	63 int i;

109 int N, N2, N4;	64 int N, N2, N4;

110 kiss_twiddle_scalar sine;

111 VARDECL(kiss_fft_scalar, f);	65 VARDECL(kiss_fft_scalar, f);

112 VARDECL(kiss_fft_scalar, f2);	66 VARDECL(kiss_fft_cpx, f2);

	67 const kiss_fft_state *st = l->kfft[shift];

	68 const kiss_twiddle_scalar *trig;

	69 opus_val16 scale;

	70 #ifdef FIXED_POINT

	71 /* Allows us to scale with MULT16_32_Q16(), which is faster than

	72 MULT16_32_Q15() on ARM. */

	73 int scale_shift = st->scale_shift-1;

	74 #endif

113 SAVE_STACK;	75 SAVE_STACK;

	76 scale = st->scale;

	77

114 N = l->n;	78 N = l->n;

115 N >>= shift;	79 trig = l->trig;

	80 for (i=0;i<shift;i++)

	81 {

	82 N >>= 1;

	83 trig += N;

	84 }

116 N2 = N>>1;	85 N2 = N>>1;

117 N4 = N>>2;	86 N4 = N>>2;

	87

118 ALLOC(f, N2, kiss_fft_scalar);	88 ALLOC(f, N2, kiss_fft_scalar);

119 ALLOC(f2, N2, kiss_fft_scalar);	89 ALLOC(f2, N4, kiss_fft_cpx);

120 /* sin(x) ~= x here */

121 #ifdef FIXED_POINT

122 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;

123 #else

124 sine = (kiss_twiddle_scalar)2PI(.125f)/N;

125 #endif

126	90

127 /* Consider the input to be composed of four blocks: [a, b, c, d] */	91 /* Consider the input to be composed of four blocks: [a, b, c, d] */

128 /* Window, shuffle, fold */	92 /* Window, shuffle, fold */

129 {	93 {

130 /* Temp pointers to make it really clear to the compiler what we're doing */	94 /* Temp pointers to make it really clear to the compiler what we're doing */

131 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);	95 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in+(overlap>>1);

132 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);	96 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+N2-1+(overlap>>1);

133 kiss_fft_scalar * OPUS_RESTRICT yp = f;	97 kiss_fft_scalar * OPUS_RESTRICT yp = f;

134 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);	98 const opus_val16 * OPUS_RESTRICT wp1 = window+(overlap>>1);

135 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;	99 const opus_val16 * OPUS_RESTRICT wp2 = window+(overlap>>1)-1;

136 for(i=0;i<((overlap+3)>>2);i++)	100 for(i=0;i<((overlap+3)>>2);i++)

137 {	101 {

138 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/	102 /* Real part arranged as -d-cR, Imag part arranged as -b+aR*/

139 yp++ = MULT16_32_Q15(wp2, xp1[N2]) + MULT16_32_Q15(wp1,xp2);	103 yp++ = S_MUL_ADD(wp2, xp1[N2],wp1,xp2);

140 yp++ = MULT16_32_Q15(wp1, xp1) - MULT16_32_Q15(wp2, xp2[-N2]);	104 yp++ = S_MUL_SUB(wp1, xp1,wp2, xp2[-N2]);

141 xp1+=2;	105 xp1+=2;

142 xp2-=2;	106 xp2-=2;

143 wp1+=2;	107 wp1+=2;

144 wp2-=2;	108 wp2-=2;

145 }	109 }

146 wp1 = window;	110 wp1 = window;

147 wp2 = window+overlap-1;	111 wp2 = window+overlap-1;

148 for(;i<N4-((overlap+3)>>2);i++)	112 for(;i<N4-((overlap+3)>>2);i++)

149 {	113 {

150 /* Real part arranged as a-bR, Imag part arranged as -c-dR */	114 /* Real part arranged as a-bR, Imag part arranged as -c-dR */

151 yp++ = xp2;	115 yp++ = xp2;

152 yp++ = xp1;	116 yp++ = xp1;

153 xp1+=2;	117 xp1+=2;

154 xp2-=2;	118 xp2-=2;

155 }	119 }

156 for(;i<N4;i++)	120 for(;i<N4;i++)

157 {	121 {

158 /* Real part arranged as a-bR, Imag part arranged as -c-dR */	122 /* Real part arranged as a-bR, Imag part arranged as -c-dR */

159 yp++ = -MULT16_32_Q15(wp1, xp1[-N2]) + MULT16_32_Q15(wp2, xp2);	123 yp++ = S_MUL_SUB(wp2, xp2, wp1, xp1[-N2]);

160 yp++ = MULT16_32_Q15(wp2, xp1) + MULT16_32_Q15(wp1, xp2[N2]);	124 yp++ = S_MUL_ADD(wp2, xp1, wp1, xp2[N2]);

161 xp1+=2;	125 xp1+=2;

162 xp2-=2;	126 xp2-=2;

163 wp1+=2;	127 wp1+=2;

164 wp2-=2;	128 wp2-=2;

165 }	129 }

166 }	130 }

167 /* Pre-rotation */	131 /* Pre-rotation */

168 {	132 {

169 kiss_fft_scalar * OPUS_RESTRICT yp = f;	133 kiss_fft_scalar * OPUS_RESTRICT yp = f;

170 const kiss_twiddle_scalar *t = &l->trig[0];	134 const kiss_twiddle_scalar *t = &trig[0];

171 for(i=0;i<N4;i++)	135 for(i=0;i<N4;i++)

172 {	136 {

	137 kiss_fft_cpx yc;

	138 kiss_twiddle_scalar t0, t1;

173 kiss_fft_scalar re, im, yr, yi;	139 kiss_fft_scalar re, im, yr, yi;

174 re = yp[0];	140 t0 = t[i];

175 im = yp[1];	141 t1 = t[N4+i];

176 yr = -S_MUL(re,t[i<<shift]) - S_MUL(im,t[(N4-i)<<shift]);	142 re = *yp++;

177 yi = -S_MUL(im,t[i<<shift]) + S_MUL(re,t[(N4-i)<<shift]);	143 im = *yp++;

178 /* works because the cos is nearly one */	144

179 *yp++ = yr + S_MUL(yi,sine);	145 yr = S_MUL_SUB(re,t0,im,t1);

180 *yp++ = yi - S_MUL(yr,sine);	146 yi = S_MUL_ADD(im,t0,re,t1);

	147

	148 yc.r = yr;

	149 yc.i = yi;

	150 yc.r = PSHR32(MULT16_32_Q16(scale, yc.r), scale_shift);

	151 yc.i = PSHR32(MULT16_32_Q16(scale, yc.i), scale_shift);

	152 f2[st->bitrev[i]] = yc;

181 }	153 }

182 }	154 }

183	155

184 /* N/4 complex FFT, down-scales by 4/N */	156 /* N/4 complex FFT, does not downscale anymore */

185 opus_fft(l->kfft[shift], (kiss_fft_cpx )f, (kiss_fft_cpx )f2);	157 opus_fft_impl(st, f2);

186	158

187 /* Post-rotate */	159 /* Post-rotate */

188 {	160 {

189 /* Temp pointers to make it really clear to the compiler what we're doing */	161 /* Temp pointers to make it really clear to the compiler what we're doing */

190 const kiss_fft_scalar * OPUS_RESTRICT fp = f2;	162 const kiss_fft_cpx * OPUS_RESTRICT fp = f2;

191 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;	163 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;

192 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);	164 kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);

193 const kiss_twiddle_scalar *t = &l->trig[0];	165 const kiss_twiddle_scalar *t = &trig[0];

194 /* Temp pointers to make it really clear to the compiler what we're doing */	166 /* Temp pointers to make it really clear to the compiler what we're doing */

195 for(i=0;i<N4;i++)	167 for(i=0;i<N4;i++)

196 {	168 {

197 kiss_fft_scalar yr, yi;	169 kiss_fft_scalar yr, yi;

198 yr = S_MUL(fp[1],t[(N4-i)<<shift]) + S_MUL(fp[0],t[i<<shift]);	170 yr = S_MUL_SUB(fp->i,t[N4+i] , fp->r,t[i]);

199 yi = S_MUL(fp[0],t[(N4-i)<<shift]) - S_MUL(fp[1],t[i<<shift]);	171 yi = S_MUL_ADD(fp->r,t[N4+i] ,fp->i,t[i]);

200 /* works because the cos is nearly one */	172 *yp1 = yr;

201 *yp1 = yr - S_MUL(yi,sine);	173 *yp2 = yi;

202 *yp2 = yi + S_MUL(yr,sine);;	174 fp++;

203 fp += 2;

204 yp1 += 2*stride;	175 yp1 += 2*stride;

205 yp2 -= 2*stride;	176 yp2 -= 2*stride;

206 }	177 }

207 }	178 }

208 RESTORE_STACK;	179 RESTORE_STACK;

209 }	180 }

210	181

	182 #define OVERRIDE_clt_mdct_backward

211 void clt_mdct_backward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scala r * OPUS_RESTRICT out,	183 void clt_mdct_backward(const mdct_lookup l, kiss_fft_scalar in, kiss_fft_scala r * OPUS_RESTRICT out,

212 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int strid e)	184 const opus_val16 * OPUS_RESTRICT window, int overlap, int shift, int strid e)

213 {	185 {

214 int i;	186 int i;

215 int N, N2, N4;	187 int N, N2, N4;

216 kiss_twiddle_scalar sine;	188 const kiss_twiddle_scalar *trig;

217 VARDECL(kiss_fft_scalar, f2);	189

218 SAVE_STACK;

219 N = l->n;	190 N = l->n;

220 N >>= shift;	191 trig = l->trig;

	192 for (i=0;i<shift;i++)

	193 {

	194 N >>= 1;

	195 trig += N;

	196 }

221 N2 = N>>1;	197 N2 = N>>1;

222 N4 = N>>2;	198 N4 = N>>2;

223 ALLOC(f2, N2, kiss_fft_scalar);

224 /* sin(x) ~= x here */

225 #ifdef FIXED_POINT

226 sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;

227 #else

228 sine = (kiss_twiddle_scalar)2PI(.125f)/N;

229 #endif

230	199

231 /* Pre-rotate */	200 /* Pre-rotate */

232 {	201 {

233 /* Temp pointers to make it really clear to the compiler what we're doing */	202 /* Temp pointers to make it really clear to the compiler what we're doing */

234 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;	203 const kiss_fft_scalar * OPUS_RESTRICT xp1 = in;

235 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);	204 const kiss_fft_scalar * OPUS_RESTRICT xp2 = in+stride*(N2-1);

236 kiss_fft_scalar * OPUS_RESTRICT yp = f2;	205 kiss_fft_scalar * OPUS_RESTRICT yp = out+(overlap>>1);

237 const kiss_twiddle_scalar *t = &l->trig[0];	206 const kiss_twiddle_scalar * OPUS_RESTRICT t = &trig[0];

	207 const opus_int16 * OPUS_RESTRICT bitrev = l->kfft[shift]->bitrev;

238 for(i=0;i<N4;i++)	208 for(i=0;i<N4;i++)

239 {	209 {

	210 int rev;

240 kiss_fft_scalar yr, yi;	211 kiss_fft_scalar yr, yi;

241 yr = -S_MUL(xp2, t[i<<shift]) + S_MUL(xp1,t[(N4-i)<<shift]);	212 rev = *bitrev++;

242 yi = -S_MUL(xp2, t[(N4-i)<<shift]) - S_MUL(xp1,t[i<<shift]);	213 yr = S_MUL_ADD(xp2, t[i] , xp1, t[N4+i]);

243 /* works because the cos is nearly one */	214 yi = S_MUL_SUB(xp1, t[i] , xp2, t[N4+i]);

244 *yp++ = yr - S_MUL(yi,sine);	215 /* We swap real and imag because we use an FFT instead of an IFFT. */

245 *yp++ = yi + S_MUL(yr,sine);	216 yp[2*rev+1] = yr;

	217 yp[2*rev] = yi;

	218 /* Storing the pre-rotation directly in the bitrev order. */

246 xp1+=2*stride;	219 xp1+=2*stride;

247 xp2-=2*stride;	220 xp2-=2*stride;

248 }	221 }

249 }	222 }

250	223

251 /* Inverse N/4 complex FFT. This one should not downscale even in fixed-poi nt */	224 opus_fft_impl(l->kfft[shift], (kiss_fft_cpx*)(out+(overlap>>1)));

252 opus_ifft(l->kfft[shift], (kiss_fft_cpx )f2, (kiss_fft_cpx )(out+(overlap>> 1)));

253	225

254 /* Post-rotate and de-shuffle from both ends of the buffer at once to make	226 /* Post-rotate and de-shuffle from both ends of the buffer at once to make

255 it in-place. */	227 it in-place. */

256 {	228 {

257 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);	229 kiss_fft_scalar * OPUS_RESTRICT yp0 = out+(overlap>>1);

258 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;	230 kiss_fft_scalar * OPUS_RESTRICT yp1 = out+(overlap>>1)+N2-2;

259 const kiss_twiddle_scalar *t = &l->trig[0];	231 const kiss_twiddle_scalar *t = &trig[0];

260 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the	232 /* Loop to (N4+1)>>1 to handle odd N4. When N4 is odd, the

261 middle pair will be computed twice. */	233 middle pair will be computed twice. */

262 for(i=0;i<(N4+1)>>1;i++)	234 for(i=0;i<(N4+1)>>1;i++)

263 {	235 {

264 kiss_fft_scalar re, im, yr, yi;	236 kiss_fft_scalar re, im, yr, yi;

265 kiss_twiddle_scalar t0, t1;	237 kiss_twiddle_scalar t0, t1;

266 re = yp0[0];	238 /* We swap real and imag because we're using an FFT instead of an IFFT. */

267 im = yp0[1];	239 re = yp0[1];

268 t0 = t[i<<shift];	240 im = yp0[0];

269 t1 = t[(N4-i)<<shift];	241 t0 = t[i];

	242 t1 = t[N4+i];

270 /* We'd scale up by 2 here, but instead it's done when mixing the windo ws */	243 /* We'd scale up by 2 here, but instead it's done when mixing the windo ws */

271 yr = S_MUL(re,t0) - S_MUL(im,t1);	244 yr = S_MUL_ADD(re,t0 , im,t1);

272 yi = S_MUL(im,t0) + S_MUL(re,t1);	245 yi = S_MUL_SUB(re,t1 , im,t0);

273 re = yp1[0];	246 /* We swap real and imag because we're using an FFT instead of an IFFT. */

274 im = yp1[1];	247 re = yp1[1];

275 /* works because the cos is nearly one */	248 im = yp1[0];

276 yp0[0] = -(yr - S_MUL(yi,sine));	249 yp0[0] = yr;

277 yp1[1] = yi + S_MUL(yr,sine);	250 yp1[1] = yi;

278	251

279 t0 = t[(N4-i-1)<<shift];	252 t0 = t[(N4-i-1)];

280 t1 = t[(i+1)<<shift];	253 t1 = t[(N2-i-1)];

281 /* We'd scale up by 2 here, but instead it's done when mixing the windo ws */	254 /* We'd scale up by 2 here, but instead it's done when mixing the windo ws */

282 yr = S_MUL(re,t0) - S_MUL(im,t1);	255 yr = S_MUL_ADD(re,t0,im,t1);

283 yi = S_MUL(im,t0) + S_MUL(re,t1);	256 yi = S_MUL_SUB(re,t1,im,t0);

284 /* works because the cos is nearly one */	257 yp1[0] = yr;

285 yp1[0] = -(yr - S_MUL(yi,sine));	258 yp0[1] = yi;

286 yp0[1] = yi + S_MUL(yr,sine);

287 yp0 += 2;	259 yp0 += 2;

288 yp1 -= 2;	260 yp1 -= 2;

289 }	261 }

290 }	262 }

291	263

292 /* Mirror on both sides for TDAC */	264 /* Mirror on both sides for TDAC */

293 {	265 {

294 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;	266 kiss_fft_scalar * OPUS_RESTRICT xp1 = out+overlap-1;

295 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;	267 kiss_fft_scalar * OPUS_RESTRICT yp1 = out;

296 const opus_val16 * OPUS_RESTRICT wp1 = window;	268 const opus_val16 * OPUS_RESTRICT wp1 = window;

297 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;	269 const opus_val16 * OPUS_RESTRICT wp2 = window+overlap-1;

298	270

299 for(i = 0; i < overlap/2; i++)	271 for(i = 0; i < overlap/2; i++)

300 {	272 {

301 kiss_fft_scalar x1, x2;	273 kiss_fft_scalar x1, x2;

302 x1 = *xp1;	274 x1 = *xp1;

303 x2 = *yp1;	275 x2 = *yp1;

304 yp1++ = MULT16_32_Q15(wp2, x2) - MULT16_32_Q15(*wp1, x1);	276 yp1++ = MULT16_32_Q15(wp2, x2) - MULT16_32_Q15(*wp1, x1);

305 xp1-- = MULT16_32_Q15(wp1, x2) + MULT16_32_Q15(*wp2, x1);	277 xp1-- = MULT16_32_Q15(wp1, x2) + MULT16_32_Q15(*wp2, x1);

306 wp1++;	278 wp1++;

307 wp2--;	279 wp2--;

308 }	280 }

309 }	281 }

310 RESTORE_STACK;

311 }	282 }

	283 #endif /* __MDCT_MIPSR1_H__ */

OLD	NEW

« autogen.sh ('K') | « celt/mips/kiss_fft_mipsr1.h ('k') | celt/mips/pitch_mipsr1.h » ('j') | doc/release.txt » ('J')