third_party/opus/src/celt/vq.c - Issue 2962373002: [Opus] Update to v1.2.1

Side by Side Diff: third_party/opus/src/celt/vq.c

Issue 2962373002: [Opus] Update to v1.2.1 (Closed)

Patch Set: Pre-increment instead of post-increment Created 3 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /* Copyright (c) 2007-2008 CSIRO	1 /* Copyright (c) 2007-2008 CSIRO

2 Copyright (c) 2007-2009 Xiph.Org Foundation	2 Copyright (c) 2007-2009 Xiph.Org Foundation

3 Written by Jean-Marc Valin */	3 Written by Jean-Marc Valin */

4 /*	4 /*

5 Redistribution and use in source and binary forms, with or without	5 Redistribution and use in source and binary forms, with or without

6 modification, are permitted provided that the following conditions	6 modification, are permitted provided that the following conditions

7 are met:	7 are met:

8	8

9 - Redistributions of source code must retain the above copyright	9 - Redistributions of source code must retain the above copyright

10 notice, this list of conditions and the following disclaimer.	10 notice, this list of conditions and the following disclaimer.

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
60 {	60 {

61 celt_norm x1, x2;	61 celt_norm x1, x2;

62 x1 = Xptr[0];	62 x1 = Xptr[0];

63 x2 = Xptr[stride];	63 x2 = Xptr[stride];

64 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));	64 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));

65 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));	65 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));

66 }	66 }

67 }	67 }

68 #endif /* OVERRIDE_vq_exp_rotation1 */	68 #endif /* OVERRIDE_vq_exp_rotation1 */

69	69

70 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)	70 void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)

71 {	71 {

72 static const int SPREAD_FACTOR[3]={15,10,5};	72 static const int SPREAD_FACTOR[3]={15,10,5};

73 int i;	73 int i;

74 opus_val16 c, s;	74 opus_val16 c, s;

75 opus_val16 gain, theta;	75 opus_val16 gain, theta;

76 int stride2=0;	76 int stride2=0;

77 int factor;	77 int factor;

78	78

79 if (2*K>=len \|\| spread==SPREAD_NONE)	79 if (2*K>=len \|\| spread==SPREAD_NONE)

80 return;	80 return;

(...skipping 70 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
151 int j;	151 int j;

152 unsigned tmp=0;	152 unsigned tmp=0;

153 j=0; do {	153 j=0; do {

154 tmp \|= iy[i*N0+j];	154 tmp \|= iy[i*N0+j];

155 } while (++j<N0);	155 } while (++j<N0);

156 collapse_mask \|= (tmp!=0)<<i;	156 collapse_mask \|= (tmp!=0)<<i;

157 } while (++i<B);	157 } while (++i<B);

158 return collapse_mask;	158 return collapse_mask;

159 }	159 }

160	160

161 unsigned alg_quant(celt_norm X, int N, int K, int spread, int B, ec_enc enc	161 opus_val16 op_pvq_search_c(celt_norm X, int iy, int K, int N, int arch)

162 #ifdef RESYNTH

163 , opus_val16 gain

164 #endif

165 )

166 {	162 {

167 VARDECL(celt_norm, y);	163 VARDECL(celt_norm, y);

168 VARDECL(int, iy);	164 VARDECL(int, signx);

169 VARDECL(opus_val16, signx);

170 int i, j;	165 int i, j;

171 opus_val16 s;

172 int pulsesLeft;	166 int pulsesLeft;

173 opus_val32 sum;	167 opus_val32 sum;

174 opus_val32 xy;	168 opus_val32 xy;

175 opus_val16 yy;	169 opus_val16 yy;

176 unsigned collapse_mask;

177 SAVE_STACK;	170 SAVE_STACK;

178	171

179 celt_assert2(K>0, "alg_quant() needs at least one pulse");	172 (void)arch;

180 celt_assert2(N>1, "alg_quant() needs at least two dimensions");

181

182 ALLOC(y, N, celt_norm);	173 ALLOC(y, N, celt_norm);

183 ALLOC(iy, N, int);	174 ALLOC(signx, N, int);

184 ALLOC(signx, N, opus_val16);

185

186 exp_rotation(X, N, 1, B, K, spread);

187	175

188 /* Get rid of the sign */	176 /* Get rid of the sign */

189 sum = 0;	177 sum = 0;

190 j=0; do {	178 j=0; do {

191 if (X[j]>0)	179 signx[j] = X[j]<0;

192 signx[j]=1;	180 /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */

193 else {	181 X[j] = ABS16(X[j]);

194 signx[j]=-1;

195 X[j]=-X[j];

196 }

197 iy[j] = 0;	182 iy[j] = 0;

198 y[j] = 0;	183 y[j] = 0;

199 } while (++j<N);	184 } while (++j<N);

200	185

201 xy = yy = 0;	186 xy = yy = 0;

202	187

203 pulsesLeft = K;	188 pulsesLeft = K;

204	189

205 /* Do a pre-search by projecting on the pyramid */	190 /* Do a pre-search by projecting on the pyramid */

206 if (K > (N>>1))	191 if (K > (N>>1))

(...skipping 11 matching lines...) Expand all Loading...
218 to be allocated. 64 is an approximation of infinity here. */	203 to be allocated. 64 is an approximation of infinity here. */

219 if (!(sum > EPSILON && sum < 64))	204 if (!(sum > EPSILON && sum < 64))

220 #endif	205 #endif

221 {	206 {

222 X[0] = QCONST16(1.f,14);	207 X[0] = QCONST16(1.f,14);

223 j=1; do	208 j=1; do

224 X[j]=0;	209 X[j]=0;

225 while (++j<N);	210 while (++j<N);

226 sum = QCONST16(1.f,14);	211 sum = QCONST16(1.f,14);

227 }	212 }

228 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum)));	213 #ifdef FIXED_POINT

	214 rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));

	215 #else

	216 /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */

	217 rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));

	218 #endif

229 j=0; do {	219 j=0; do {

230 #ifdef FIXED_POINT	220 #ifdef FIXED_POINT

231 /* It's really important to round towards zero here */	221 /* It's really important to round towards zero here */

232 iy[j] = MULT16_16_Q15(X[j],rcp);	222 iy[j] = MULT16_16_Q15(X[j],rcp);

233 #else	223 #else

234 iy[j] = (int)floor(rcp*X[j]);	224 iy[j] = (int)floor(rcp*X[j]);

235 #endif	225 #endif

236 y[j] = (celt_norm)iy[j];	226 y[j] = (celt_norm)iy[j];

237 yy = MAC16_16(yy, y[j],y[j]);	227 yy = MAC16_16(yy, y[j],y[j]);

238 xy = MAC16_16(xy, X[j],y[j]);	228 xy = MAC16_16(xy, X[j],y[j]);

239 y[j] *= 2;	229 y[j] *= 2;

240 pulsesLeft -= iy[j];	230 pulsesLeft -= iy[j];

241 } while (++j<N);	231 } while (++j<N);

242 }	232 }

243 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass");	233 celt_assert2(pulsesLeft>=0, "Allocated too many pulses in the quick pass");

244	234

245 /* This should never happen, but just in case it does (e.g. on silence)	235 /* This should never happen, but just in case it does (e.g. on silence)

246 we fill the first bin with pulses. */	236 we fill the first bin with pulses. */

247 #ifdef FIXED_POINT_DEBUG	237 #ifdef FIXED_POINT_DEBUG

248 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");	238 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");

249 #endif	239 #endif

250 if (pulsesLeft > N+3)	240 if (pulsesLeft > N+3)

251 {	241 {

252 opus_val16 tmp = (opus_val16)pulsesLeft;	242 opus_val16 tmp = (opus_val16)pulsesLeft;

253 yy = MAC16_16(yy, tmp, tmp);	243 yy = MAC16_16(yy, tmp, tmp);

254 yy = MAC16_16(yy, tmp, y[0]);	244 yy = MAC16_16(yy, tmp, y[0]);

255 iy[0] += pulsesLeft;	245 iy[0] += pulsesLeft;

256 pulsesLeft=0;	246 pulsesLeft=0;

257 }	247 }

258	248

259 s = 1;

260 for (i=0;i<pulsesLeft;i++)	249 for (i=0;i<pulsesLeft;i++)

261 {	250 {

	251 opus_val16 Rxy, Ryy;

262 int best_id;	252 int best_id;

263 opus_val32 best_num = -VERY_LARGE16;	253 opus_val32 best_num;

264 opus_val16 best_den = 0;	254 opus_val16 best_den;

265 #ifdef FIXED_POINT	255 #ifdef FIXED_POINT

266 int rshift;	256 int rshift;

267 #endif	257 #endif

268 #ifdef FIXED_POINT	258 #ifdef FIXED_POINT

269 rshift = 1+celt_ilog2(K-pulsesLeft+i+1);	259 rshift = 1+celt_ilog2(K-pulsesLeft+i+1);

270 #endif	260 #endif

271 best_id = 0;	261 best_id = 0;

272 /* The squared magnitude term gets added anyway, so we might as well	262 /* The squared magnitude term gets added anyway, so we might as well

273 add it outside the loop */	263 add it outside the loop */

274 yy = ADD16(yy, 1);	264 yy = ADD16(yy, 1);

275 j=0;	265

	266 /* Calculations for position 0 are out of the loop, in part to reduce

	267 mispredicted branches (since the if condition is usually false)

	268 in the loop. */

	269 /* Temporary sums of the new pulse(s) */

	270 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));

	271 /* We're multiplying y[j] by two so we don't have to do it here */

	272 Ryy = ADD16(yy, y[0]);

	273

	274 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that

	275 Rxy is positive because the sign is pre-computed) */

	276 Rxy = MULT16_16_Q15(Rxy,Rxy);

	277 best_den = Ryy;

	278 best_num = Rxy;

	279 j=1;

276 do {	280 do {

277 opus_val16 Rxy, Ryy;

278 /* Temporary sums of the new pulse(s) */	281 /* Temporary sums of the new pulse(s) */

279 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));	282 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));

280 /* We're multiplying y[j] by two so we don't have to do it here */	283 /* We're multiplying y[j] by two so we don't have to do it here */

281 Ryy = ADD16(yy, y[j]);	284 Ryy = ADD16(yy, y[j]);

282	285

283 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that	286 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that

284 Rxy is positive because the sign is pre-computed) */	287 Rxy is positive because the sign is pre-computed) */

285 Rxy = MULT16_16_Q15(Rxy,Rxy);	288 Rxy = MULT16_16_Q15(Rxy,Rxy);

286 /* The idea is to check for num/den >= best_num/best_den, but that way	289 /* The idea is to check for num/den >= best_num/best_den, but that way

287 we can do it without any division */	290 we can do it without any division */

288 /* OPT: Make sure to use conditional moves here */	291 /* OPT: It's not clear whether a cmov is faster than a branch here

289 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num))	292 since the condition is more often false than true and using

	293 a cmov introduces data dependencies across iterations. The optimal

	294 choice may be architecture-dependent. */

	295 if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))

290 {	296 {

291 best_den = Ryy;	297 best_den = Ryy;

292 best_num = Rxy;	298 best_num = Rxy;

293 best_id = j;	299 best_id = j;

294 }	300 }

295 } while (++j<N);	301 } while (++j<N);

296	302

297 /* Updating the sums of the new pulse(s) */	303 /* Updating the sums of the new pulse(s) */

298 xy = ADD32(xy, EXTEND32(X[best_id]));	304 xy = ADD32(xy, EXTEND32(X[best_id]));

299 /* We're multiplying y[j] by two so we don't have to do it here */	305 /* We're multiplying y[j] by two so we don't have to do it here */

300 yy = ADD16(yy, y[best_id]);	306 yy = ADD16(yy, y[best_id]);

301	307

302 /* Only now that we've made the final choice, update y/iy */	308 /* Only now that we've made the final choice, update y/iy */

303 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */	309 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */

304 y[best_id] += 2*s;	310 y[best_id] += 2;

305 iy[best_id]++;	311 iy[best_id]++;

306 }	312 }

307	313

308 /* Put the original sign back */	314 /* Put the original sign back */

309 j=0;	315 j=0;

310 do {	316 do {

311 X[j] = MULT16_16(signx[j],X[j]);	317 /iy[j] = signx[j] ? -iy[j] : iy[j];/

312 if (signx[j] < 0)	318 /* OPT: The is more likely to be compiled without a branch than the code a bove

313 iy[j] = -iy[j];	319 but has the same performance otherwise. */

	320 iy[j] = (iy[j]^-signx[j]) + signx[j];

314 } while (++j<N);	321 } while (++j<N);

	322 RESTORE_STACK;

	323 return yy;

	324 }

	325

	326 unsigned alg_quant(celt_norm X, int N, int K, int spread, int B, ec_enc enc,

	327 opus_val16 gain, int resynth, int arch)

	328 {

	329 VARDECL(int, iy);

	330 opus_val16 yy;

	331 unsigned collapse_mask;

	332 SAVE_STACK;

	333

	334 celt_assert2(K>0, "alg_quant() needs at least one pulse");

	335 celt_assert2(N>1, "alg_quant() needs at least two dimensions");

	336

	337 /* Covers vectorization by up to 4. */

	338 ALLOC(iy, N+3, int);

	339

	340 exp_rotation(X, N, 1, B, K, spread);

	341

	342 yy = op_pvq_search(X, iy, K, N, arch);

	343

315 encode_pulses(iy, N, K, enc);	344 encode_pulses(iy, N, K, enc);

316	345

317 #ifdef RESYNTH	346 if (resynth)

318 normalise_residual(iy, X, N, yy, gain);	347 {

319 exp_rotation(X, N, -1, B, K, spread);	348 normalise_residual(iy, X, N, yy, gain);

320 #endif	349 exp_rotation(X, N, -1, B, K, spread);

	350 }

321	351

322 collapse_mask = extract_collapse_mask(iy, N, B);	352 collapse_mask = extract_collapse_mask(iy, N, B);

323 RESTORE_STACK;	353 RESTORE_STACK;

324 return collapse_mask;	354 return collapse_mask;

325 }	355 }

326	356

327 /** Decode pulse vector and combine the result with the pitch vector to produce	357 /** Decode pulse vector and combine the result with the pitch vector to produce

328 the final normalised signal in the current band. */	358 the final normalised signal in the current band. */

329 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,	359 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,

330 ec_dec *dec, opus_val16 gain)	360 ec_dec *dec, opus_val16 gain)

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
394 } else {	424 } else {

395 Emid += celt_inner_prod(X, X, N, arch);	425 Emid += celt_inner_prod(X, X, N, arch);

396 Eside += celt_inner_prod(Y, Y, N, arch);	426 Eside += celt_inner_prod(Y, Y, N, arch);

397 }	427 }

398 mid = celt_sqrt(Emid);	428 mid = celt_sqrt(Emid);

399 side = celt_sqrt(Eside);	429 side = celt_sqrt(Eside);

400 #ifdef FIXED_POINT	430 #ifdef FIXED_POINT

401 /* 0.63662 = 2/pi */	431 /* 0.63662 = 2/pi */

402 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));	432 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));

403 #else	433 #else

404 itheta = (int)floor(.5f+163840.63662fatan2(side,mid));	434 itheta = (int)floor(.5f+163840.63662ffast_atan2f(side,mid));

405 #endif	435 #endif

406	436

407 return itheta;	437 return itheta;

408 }	438 }

OLD	NEW

« no previous file with comments | « third_party/opus/src/celt/vq.h ('k') | third_party/opus/src/celt/x86/celt_lpc_sse.h » ('j') | no next file with comments »