Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(125)

Side by Side Diff: third_party/opus/src/celt/vq.c

Issue 2962373002: [Opus] Update to v1.2.1 (Closed)
Patch Set: Pre-increment instead of post-increment Created 3 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/opus/src/celt/vq.h ('k') | third_party/opus/src/celt/x86/celt_lpc_sse.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* Copyright (c) 2007-2008 CSIRO 1 /* Copyright (c) 2007-2008 CSIRO
2 Copyright (c) 2007-2009 Xiph.Org Foundation 2 Copyright (c) 2007-2009 Xiph.Org Foundation
3 Written by Jean-Marc Valin */ 3 Written by Jean-Marc Valin */
4 /* 4 /*
5 Redistribution and use in source and binary forms, with or without 5 Redistribution and use in source and binary forms, with or without
6 modification, are permitted provided that the following conditions 6 modification, are permitted provided that the following conditions
7 are met: 7 are met:
8 8
9 - Redistributions of source code must retain the above copyright 9 - Redistributions of source code must retain the above copyright
10 notice, this list of conditions and the following disclaimer. 10 notice, this list of conditions and the following disclaimer.
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
60 { 60 {
61 celt_norm x1, x2; 61 celt_norm x1, x2;
62 x1 = Xptr[0]; 62 x1 = Xptr[0];
63 x2 = Xptr[stride]; 63 x2 = Xptr[stride];
64 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); 64 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
65 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); 65 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
66 } 66 }
67 } 67 }
68 #endif /* OVERRIDE_vq_exp_rotation1 */ 68 #endif /* OVERRIDE_vq_exp_rotation1 */
69 69
70 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) 70 void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
71 { 71 {
72 static const int SPREAD_FACTOR[3]={15,10,5}; 72 static const int SPREAD_FACTOR[3]={15,10,5};
73 int i; 73 int i;
74 opus_val16 c, s; 74 opus_val16 c, s;
75 opus_val16 gain, theta; 75 opus_val16 gain, theta;
76 int stride2=0; 76 int stride2=0;
77 int factor; 77 int factor;
78 78
79 if (2*K>=len || spread==SPREAD_NONE) 79 if (2*K>=len || spread==SPREAD_NONE)
80 return; 80 return;
(...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after
151 int j; 151 int j;
152 unsigned tmp=0; 152 unsigned tmp=0;
153 j=0; do { 153 j=0; do {
154 tmp |= iy[i*N0+j]; 154 tmp |= iy[i*N0+j];
155 } while (++j<N0); 155 } while (++j<N0);
156 collapse_mask |= (tmp!=0)<<i; 156 collapse_mask |= (tmp!=0)<<i;
157 } while (++i<B); 157 } while (++i<B);
158 return collapse_mask; 158 return collapse_mask;
159 } 159 }
160 160
161 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc 161 opus_val16 op_pvq_search_c(celt_norm *X, int *iy, int K, int N, int arch)
162 #ifdef RESYNTH
163 , opus_val16 gain
164 #endif
165 )
166 { 162 {
167 VARDECL(celt_norm, y); 163 VARDECL(celt_norm, y);
168 VARDECL(int, iy); 164 VARDECL(int, signx);
169 VARDECL(opus_val16, signx);
170 int i, j; 165 int i, j;
171 opus_val16 s;
172 int pulsesLeft; 166 int pulsesLeft;
173 opus_val32 sum; 167 opus_val32 sum;
174 opus_val32 xy; 168 opus_val32 xy;
175 opus_val16 yy; 169 opus_val16 yy;
176 unsigned collapse_mask;
177 SAVE_STACK; 170 SAVE_STACK;
178 171
179 celt_assert2(K>0, "alg_quant() needs at least one pulse"); 172 (void)arch;
180 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
181
182 ALLOC(y, N, celt_norm); 173 ALLOC(y, N, celt_norm);
183 ALLOC(iy, N, int); 174 ALLOC(signx, N, int);
184 ALLOC(signx, N, opus_val16);
185
186 exp_rotation(X, N, 1, B, K, spread);
187 175
188 /* Get rid of the sign */ 176 /* Get rid of the sign */
189 sum = 0; 177 sum = 0;
190 j=0; do { 178 j=0; do {
191 if (X[j]>0) 179 signx[j] = X[j]<0;
192 signx[j]=1; 180 /* OPT: Make sure the compiler doesn't use a branch on ABS16(). */
193 else { 181 X[j] = ABS16(X[j]);
194 signx[j]=-1;
195 X[j]=-X[j];
196 }
197 iy[j] = 0; 182 iy[j] = 0;
198 y[j] = 0; 183 y[j] = 0;
199 } while (++j<N); 184 } while (++j<N);
200 185
201 xy = yy = 0; 186 xy = yy = 0;
202 187
203 pulsesLeft = K; 188 pulsesLeft = K;
204 189
205 /* Do a pre-search by projecting on the pyramid */ 190 /* Do a pre-search by projecting on the pyramid */
206 if (K > (N>>1)) 191 if (K > (N>>1))
(...skipping 11 matching lines...) Expand all
218 to be allocated. 64 is an approximation of infinity here. */ 203 to be allocated. 64 is an approximation of infinity here. */
219 if (!(sum > EPSILON && sum < 64)) 204 if (!(sum > EPSILON && sum < 64))
220 #endif 205 #endif
221 { 206 {
222 X[0] = QCONST16(1.f,14); 207 X[0] = QCONST16(1.f,14);
223 j=1; do 208 j=1; do
224 X[j]=0; 209 X[j]=0;
225 while (++j<N); 210 while (++j<N);
226 sum = QCONST16(1.f,14); 211 sum = QCONST16(1.f,14);
227 } 212 }
228 rcp = EXTRACT16(MULT16_32_Q16(K-1, celt_rcp(sum))); 213 #ifdef FIXED_POINT
214 rcp = EXTRACT16(MULT16_32_Q16(K, celt_rcp(sum)));
215 #else
216 /* Using K+e with e < 1 guarantees we cannot get more than K pulses. */
217 rcp = EXTRACT16(MULT16_32_Q16(K+0.8f, celt_rcp(sum)));
218 #endif
229 j=0; do { 219 j=0; do {
230 #ifdef FIXED_POINT 220 #ifdef FIXED_POINT
231 /* It's really important to round *towards zero* here */ 221 /* It's really important to round *towards zero* here */
232 iy[j] = MULT16_16_Q15(X[j],rcp); 222 iy[j] = MULT16_16_Q15(X[j],rcp);
233 #else 223 #else
234 iy[j] = (int)floor(rcp*X[j]); 224 iy[j] = (int)floor(rcp*X[j]);
235 #endif 225 #endif
236 y[j] = (celt_norm)iy[j]; 226 y[j] = (celt_norm)iy[j];
237 yy = MAC16_16(yy, y[j],y[j]); 227 yy = MAC16_16(yy, y[j],y[j]);
238 xy = MAC16_16(xy, X[j],y[j]); 228 xy = MAC16_16(xy, X[j],y[j]);
239 y[j] *= 2; 229 y[j] *= 2;
240 pulsesLeft -= iy[j]; 230 pulsesLeft -= iy[j];
241 } while (++j<N); 231 } while (++j<N);
242 } 232 }
243 celt_assert2(pulsesLeft>=1, "Allocated too many pulses in the quick pass"); 233 celt_assert2(pulsesLeft>=0, "Allocated too many pulses in the quick pass");
244 234
245 /* This should never happen, but just in case it does (e.g. on silence) 235 /* This should never happen, but just in case it does (e.g. on silence)
246 we fill the first bin with pulses. */ 236 we fill the first bin with pulses. */
247 #ifdef FIXED_POINT_DEBUG 237 #ifdef FIXED_POINT_DEBUG
248 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass"); 238 celt_assert2(pulsesLeft<=N+3, "Not enough pulses in the quick pass");
249 #endif 239 #endif
250 if (pulsesLeft > N+3) 240 if (pulsesLeft > N+3)
251 { 241 {
252 opus_val16 tmp = (opus_val16)pulsesLeft; 242 opus_val16 tmp = (opus_val16)pulsesLeft;
253 yy = MAC16_16(yy, tmp, tmp); 243 yy = MAC16_16(yy, tmp, tmp);
254 yy = MAC16_16(yy, tmp, y[0]); 244 yy = MAC16_16(yy, tmp, y[0]);
255 iy[0] += pulsesLeft; 245 iy[0] += pulsesLeft;
256 pulsesLeft=0; 246 pulsesLeft=0;
257 } 247 }
258 248
259 s = 1;
260 for (i=0;i<pulsesLeft;i++) 249 for (i=0;i<pulsesLeft;i++)
261 { 250 {
251 opus_val16 Rxy, Ryy;
262 int best_id; 252 int best_id;
263 opus_val32 best_num = -VERY_LARGE16; 253 opus_val32 best_num;
264 opus_val16 best_den = 0; 254 opus_val16 best_den;
265 #ifdef FIXED_POINT 255 #ifdef FIXED_POINT
266 int rshift; 256 int rshift;
267 #endif 257 #endif
268 #ifdef FIXED_POINT 258 #ifdef FIXED_POINT
269 rshift = 1+celt_ilog2(K-pulsesLeft+i+1); 259 rshift = 1+celt_ilog2(K-pulsesLeft+i+1);
270 #endif 260 #endif
271 best_id = 0; 261 best_id = 0;
272 /* The squared magnitude term gets added anyway, so we might as well 262 /* The squared magnitude term gets added anyway, so we might as well
273 add it outside the loop */ 263 add it outside the loop */
274 yy = ADD16(yy, 1); 264 yy = ADD16(yy, 1);
275 j=0; 265
266 /* Calculations for position 0 are out of the loop, in part to reduce
267 mispredicted branches (since the if condition is usually false)
268 in the loop. */
269 /* Temporary sums of the new pulse(s) */
270 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[0])),rshift));
271 /* We're multiplying y[j] by two so we don't have to do it here */
272 Ryy = ADD16(yy, y[0]);
273
274 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
275 Rxy is positive because the sign is pre-computed) */
276 Rxy = MULT16_16_Q15(Rxy,Rxy);
277 best_den = Ryy;
278 best_num = Rxy;
279 j=1;
276 do { 280 do {
277 opus_val16 Rxy, Ryy;
278 /* Temporary sums of the new pulse(s) */ 281 /* Temporary sums of the new pulse(s) */
279 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift)); 282 Rxy = EXTRACT16(SHR32(ADD32(xy, EXTEND32(X[j])),rshift));
280 /* We're multiplying y[j] by two so we don't have to do it here */ 283 /* We're multiplying y[j] by two so we don't have to do it here */
281 Ryy = ADD16(yy, y[j]); 284 Ryy = ADD16(yy, y[j]);
282 285
283 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that 286 /* Approximate score: we maximise Rxy/sqrt(Ryy) (we're guaranteed that
284 Rxy is positive because the sign is pre-computed) */ 287 Rxy is positive because the sign is pre-computed) */
285 Rxy = MULT16_16_Q15(Rxy,Rxy); 288 Rxy = MULT16_16_Q15(Rxy,Rxy);
286 /* The idea is to check for num/den >= best_num/best_den, but that way 289 /* The idea is to check for num/den >= best_num/best_den, but that way
287 we can do it without any division */ 290 we can do it without any division */
288 /* OPT: Make sure to use conditional moves here */ 291 /* OPT: It's not clear whether a cmov is faster than a branch here
289 if (MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)) 292 since the condition is more often false than true and using
293 a cmov introduces data dependencies across iterations. The optimal
294 choice may be architecture-dependent. */
295 if (opus_unlikely(MULT16_16(best_den, Rxy) > MULT16_16(Ryy, best_num)))
290 { 296 {
291 best_den = Ryy; 297 best_den = Ryy;
292 best_num = Rxy; 298 best_num = Rxy;
293 best_id = j; 299 best_id = j;
294 } 300 }
295 } while (++j<N); 301 } while (++j<N);
296 302
297 /* Updating the sums of the new pulse(s) */ 303 /* Updating the sums of the new pulse(s) */
298 xy = ADD32(xy, EXTEND32(X[best_id])); 304 xy = ADD32(xy, EXTEND32(X[best_id]));
299 /* We're multiplying y[j] by two so we don't have to do it here */ 305 /* We're multiplying y[j] by two so we don't have to do it here */
300 yy = ADD16(yy, y[best_id]); 306 yy = ADD16(yy, y[best_id]);
301 307
302 /* Only now that we've made the final choice, update y/iy */ 308 /* Only now that we've made the final choice, update y/iy */
303 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */ 309 /* Multiplying y[j] by 2 so we don't have to do it everywhere else */
304 y[best_id] += 2*s; 310 y[best_id] += 2;
305 iy[best_id]++; 311 iy[best_id]++;
306 } 312 }
307 313
308 /* Put the original sign back */ 314 /* Put the original sign back */
309 j=0; 315 j=0;
310 do { 316 do {
311 X[j] = MULT16_16(signx[j],X[j]); 317 /*iy[j] = signx[j] ? -iy[j] : iy[j];*/
312 if (signx[j] < 0) 318 /* OPT: The is more likely to be compiled without a branch than the code a bove
313 iy[j] = -iy[j]; 319 but has the same performance otherwise. */
320 iy[j] = (iy[j]^-signx[j]) + signx[j];
314 } while (++j<N); 321 } while (++j<N);
322 RESTORE_STACK;
323 return yy;
324 }
325
326 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc,
327 opus_val16 gain, int resynth, int arch)
328 {
329 VARDECL(int, iy);
330 opus_val16 yy;
331 unsigned collapse_mask;
332 SAVE_STACK;
333
334 celt_assert2(K>0, "alg_quant() needs at least one pulse");
335 celt_assert2(N>1, "alg_quant() needs at least two dimensions");
336
337 /* Covers vectorization by up to 4. */
338 ALLOC(iy, N+3, int);
339
340 exp_rotation(X, N, 1, B, K, spread);
341
342 yy = op_pvq_search(X, iy, K, N, arch);
343
315 encode_pulses(iy, N, K, enc); 344 encode_pulses(iy, N, K, enc);
316 345
317 #ifdef RESYNTH 346 if (resynth)
318 normalise_residual(iy, X, N, yy, gain); 347 {
319 exp_rotation(X, N, -1, B, K, spread); 348 normalise_residual(iy, X, N, yy, gain);
320 #endif 349 exp_rotation(X, N, -1, B, K, spread);
350 }
321 351
322 collapse_mask = extract_collapse_mask(iy, N, B); 352 collapse_mask = extract_collapse_mask(iy, N, B);
323 RESTORE_STACK; 353 RESTORE_STACK;
324 return collapse_mask; 354 return collapse_mask;
325 } 355 }
326 356
327 /** Decode pulse vector and combine the result with the pitch vector to produce 357 /** Decode pulse vector and combine the result with the pitch vector to produce
328 the final normalised signal in the current band. */ 358 the final normalised signal in the current band. */
329 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, 359 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
330 ec_dec *dec, opus_val16 gain) 360 ec_dec *dec, opus_val16 gain)
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
394 } else { 424 } else {
395 Emid += celt_inner_prod(X, X, N, arch); 425 Emid += celt_inner_prod(X, X, N, arch);
396 Eside += celt_inner_prod(Y, Y, N, arch); 426 Eside += celt_inner_prod(Y, Y, N, arch);
397 } 427 }
398 mid = celt_sqrt(Emid); 428 mid = celt_sqrt(Emid);
399 side = celt_sqrt(Eside); 429 side = celt_sqrt(Eside);
400 #ifdef FIXED_POINT 430 #ifdef FIXED_POINT
401 /* 0.63662 = 2/pi */ 431 /* 0.63662 = 2/pi */
402 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); 432 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid));
403 #else 433 #else
404 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); 434 itheta = (int)floor(.5f+16384*0.63662f*fast_atan2f(side,mid));
405 #endif 435 #endif
406 436
407 return itheta; 437 return itheta;
408 } 438 }
OLDNEW
« no previous file with comments | « third_party/opus/src/celt/vq.h ('k') | third_party/opus/src/celt/x86/celt_lpc_sse.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698