| OLD | NEW |
| 1 /* Copyright (c) 2007-2008 CSIRO | 1 /* Copyright (c) 2007-2008 CSIRO |
| 2 Copyright (c) 2007-2009 Xiph.Org Foundation | 2 Copyright (c) 2007-2009 Xiph.Org Foundation |
| 3 Written by Jean-Marc Valin */ | 3 Written by Jean-Marc Valin */ |
| 4 /* | 4 /* |
| 5 Redistribution and use in source and binary forms, with or without | 5 Redistribution and use in source and binary forms, with or without |
| 6 modification, are permitted provided that the following conditions | 6 modification, are permitted provided that the following conditions |
| 7 are met: | 7 are met: |
| 8 | 8 |
| 9 - Redistributions of source code must retain the above copyright | 9 - Redistributions of source code must retain the above copyright |
| 10 notice, this list of conditions and the following disclaimer. | 10 notice, this list of conditions and the following disclaimer. |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 #include "config.h" | 30 #include "config.h" |
| 31 #endif | 31 #endif |
| 32 | 32 |
| 33 #include "mathops.h" | 33 #include "mathops.h" |
| 34 #include "cwrs.h" | 34 #include "cwrs.h" |
| 35 #include "vq.h" | 35 #include "vq.h" |
| 36 #include "arch.h" | 36 #include "arch.h" |
| 37 #include "os_support.h" | 37 #include "os_support.h" |
| 38 #include "bands.h" | 38 #include "bands.h" |
| 39 #include "rate.h" | 39 #include "rate.h" |
| 40 #include "pitch.h" |
| 40 | 41 |
| 42 #if defined(MIPSr1_ASM) |
| 43 #include "mips/vq_mipsr1.h" |
| 44 #endif |
| 45 |
| 46 #ifndef OVERRIDE_vq_exp_rotation1 |
| 41 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
val16 s) | 47 static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
val16 s) |
| 42 { | 48 { |
| 43 int i; | 49 int i; |
| 50 opus_val16 ms; |
| 44 celt_norm *Xptr; | 51 celt_norm *Xptr; |
| 45 Xptr = X; | 52 Xptr = X; |
| 53 ms = NEG16(s); |
| 46 for (i=0;i<len-stride;i++) | 54 for (i=0;i<len-stride;i++) |
| 47 { | 55 { |
| 48 celt_norm x1, x2; | 56 celt_norm x1, x2; |
| 49 x1 = Xptr[0]; | 57 x1 = Xptr[0]; |
| 50 x2 = Xptr[stride]; | 58 x2 = Xptr[stride]; |
| 51 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); | 59 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
| 52 *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); | 60 *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
| 53 } | 61 } |
| 54 Xptr = &X[len-2*stride-1]; | 62 Xptr = &X[len-2*stride-1]; |
| 55 for (i=len-2*stride-1;i>=0;i--) | 63 for (i=len-2*stride-1;i>=0;i--) |
| 56 { | 64 { |
| 57 celt_norm x1, x2; | 65 celt_norm x1, x2; |
| 58 x1 = Xptr[0]; | 66 x1 = Xptr[0]; |
| 59 x2 = Xptr[stride]; | 67 x2 = Xptr[stride]; |
| 60 Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); | 68 Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
| 61 *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); | 69 *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
| 62 } | 70 } |
| 63 } | 71 } |
| 72 #endif /* OVERRIDE_vq_exp_rotation1 */ |
| 64 | 73 |
| 65 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
spread) | 74 static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
spread) |
| 66 { | 75 { |
| 67 static const int SPREAD_FACTOR[3]={15,10,5}; | 76 static const int SPREAD_FACTOR[3]={15,10,5}; |
| 68 int i; | 77 int i; |
| 69 opus_val16 c, s; | 78 opus_val16 c, s; |
| 70 opus_val16 gain, theta; | 79 opus_val16 gain, theta; |
| 71 int stride2=0; | 80 int stride2=0; |
| 72 int factor; | 81 int factor; |
| 73 | 82 |
| (...skipping 10 matching lines...) Expand all Loading... |
| 84 if (len>=8*stride) | 93 if (len>=8*stride) |
| 85 { | 94 { |
| 86 stride2 = 1; | 95 stride2 = 1; |
| 87 /* This is just a simple (equivalent) way of computing sqrt(len/stride) wi
th rounding. | 96 /* This is just a simple (equivalent) way of computing sqrt(len/stride) wi
th rounding. |
| 88 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ | 97 It's basically incrementing long as (stride2+0.5)^2 < len/stride. */ |
| 89 while ((stride2*stride2+stride2)*stride + (stride>>2) < len) | 98 while ((stride2*stride2+stride2)*stride + (stride>>2) < len) |
| 90 stride2++; | 99 stride2++; |
| 91 } | 100 } |
| 92 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, f
or both this and for | 101 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, f
or both this and for |
| 93 extract_collapse_mask().*/ | 102 extract_collapse_mask().*/ |
| 94 len /= stride; | 103 len = celt_udiv(len, stride); |
| 95 for (i=0;i<stride;i++) | 104 for (i=0;i<stride;i++) |
| 96 { | 105 { |
| 97 if (dir < 0) | 106 if (dir < 0) |
| 98 { | 107 { |
| 99 if (stride2) | 108 if (stride2) |
| 100 exp_rotation1(X+i*len, len, stride2, s, c); | 109 exp_rotation1(X+i*len, len, stride2, s, c); |
| 101 exp_rotation1(X+i*len, len, 1, c, s); | 110 exp_rotation1(X+i*len, len, 1, c, s); |
| 102 } else { | 111 } else { |
| 103 exp_rotation1(X+i*len, len, 1, c, -s); | 112 exp_rotation1(X+i*len, len, 1, c, -s); |
| 104 if (stride2) | 113 if (stride2) |
| (...skipping 28 matching lines...) Expand all Loading... |
| 133 | 142 |
| 134 static unsigned extract_collapse_mask(int *iy, int N, int B) | 143 static unsigned extract_collapse_mask(int *iy, int N, int B) |
| 135 { | 144 { |
| 136 unsigned collapse_mask; | 145 unsigned collapse_mask; |
| 137 int N0; | 146 int N0; |
| 138 int i; | 147 int i; |
| 139 if (B<=1) | 148 if (B<=1) |
| 140 return 1; | 149 return 1; |
| 141 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, f
or both this and for | 150 /*NOTE: As a minor optimization, we could be passing around log2(B), not B, f
or both this and for |
| 142 exp_rotation().*/ | 151 exp_rotation().*/ |
| 143 N0 = N/B; | 152 N0 = celt_udiv(N, B); |
| 144 collapse_mask = 0; | 153 collapse_mask = 0; |
| 145 i=0; do { | 154 i=0; do { |
| 146 int j; | 155 int j; |
| 156 unsigned tmp=0; |
| 147 j=0; do { | 157 j=0; do { |
| 148 collapse_mask |= (iy[i*N0+j]!=0)<<i; | 158 tmp |= iy[i*N0+j]; |
| 149 } while (++j<N0); | 159 } while (++j<N0); |
| 160 collapse_mask |= (tmp!=0)<<i; |
| 150 } while (++i<B); | 161 } while (++i<B); |
| 151 return collapse_mask; | 162 return collapse_mask; |
| 152 } | 163 } |
| 153 | 164 |
| 154 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc | 165 unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc |
| 155 #ifdef RESYNTH | 166 #ifdef RESYNTH |
| 156 , opus_val16 gain | 167 , opus_val16 gain |
| 157 #endif | 168 #endif |
| 158 ) | 169 ) |
| 159 { | 170 { |
| (...skipping 155 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 315 collapse_mask = extract_collapse_mask(iy, N, B); | 326 collapse_mask = extract_collapse_mask(iy, N, B); |
| 316 RESTORE_STACK; | 327 RESTORE_STACK; |
| 317 return collapse_mask; | 328 return collapse_mask; |
| 318 } | 329 } |
| 319 | 330 |
| 320 /** Decode pulse vector and combine the result with the pitch vector to produce | 331 /** Decode pulse vector and combine the result with the pitch vector to produce |
| 321 the final normalised signal in the current band. */ | 332 the final normalised signal in the current band. */ |
| 322 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, | 333 unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, |
| 323 ec_dec *dec, opus_val16 gain) | 334 ec_dec *dec, opus_val16 gain) |
| 324 { | 335 { |
| 325 int i; | |
| 326 opus_val32 Ryy; | 336 opus_val32 Ryy; |
| 327 unsigned collapse_mask; | 337 unsigned collapse_mask; |
| 328 VARDECL(int, iy); | 338 VARDECL(int, iy); |
| 329 SAVE_STACK; | 339 SAVE_STACK; |
| 330 | 340 |
| 331 celt_assert2(K>0, "alg_unquant() needs at least one pulse"); | 341 celt_assert2(K>0, "alg_unquant() needs at least one pulse"); |
| 332 celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); | 342 celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); |
| 333 ALLOC(iy, N, int); | 343 ALLOC(iy, N, int); |
| 334 decode_pulses(iy, N, K, dec); | 344 Ryy = decode_pulses(iy, N, K, dec); |
| 335 Ryy = 0; | |
| 336 i=0; | |
| 337 do { | |
| 338 Ryy = MAC16_16(Ryy, iy[i], iy[i]); | |
| 339 } while (++i < N); | |
| 340 normalise_residual(iy, X, N, Ryy, gain); | 345 normalise_residual(iy, X, N, Ryy, gain); |
| 341 exp_rotation(X, N, -1, B, K, spread); | 346 exp_rotation(X, N, -1, B, K, spread); |
| 342 collapse_mask = extract_collapse_mask(iy, N, B); | 347 collapse_mask = extract_collapse_mask(iy, N, B); |
| 343 RESTORE_STACK; | 348 RESTORE_STACK; |
| 344 return collapse_mask; | 349 return collapse_mask; |
| 345 } | 350 } |
| 346 | 351 |
| 347 void renormalise_vector(celt_norm *X, int N, opus_val16 gain) | 352 #ifndef OVERRIDE_renormalise_vector |
| 353 void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) |
| 348 { | 354 { |
| 349 int i; | 355 int i; |
| 350 #ifdef FIXED_POINT | 356 #ifdef FIXED_POINT |
| 351 int k; | 357 int k; |
| 352 #endif | 358 #endif |
| 353 opus_val32 E = EPSILON; | 359 opus_val32 E; |
| 354 opus_val16 g; | 360 opus_val16 g; |
| 355 opus_val32 t; | 361 opus_val32 t; |
| 356 celt_norm *xptr = X; | 362 celt_norm *xptr; |
| 357 for (i=0;i<N;i++) | 363 E = EPSILON + celt_inner_prod(X, X, N, arch); |
| 358 { | |
| 359 E = MAC16_16(E, *xptr, *xptr); | |
| 360 xptr++; | |
| 361 } | |
| 362 #ifdef FIXED_POINT | 364 #ifdef FIXED_POINT |
| 363 k = celt_ilog2(E)>>1; | 365 k = celt_ilog2(E)>>1; |
| 364 #endif | 366 #endif |
| 365 t = VSHR32(E, 2*(k-7)); | 367 t = VSHR32(E, 2*(k-7)); |
| 366 g = MULT16_16_P15(celt_rsqrt_norm(t),gain); | 368 g = MULT16_16_P15(celt_rsqrt_norm(t),gain); |
| 367 | 369 |
| 368 xptr = X; | 370 xptr = X; |
| 369 for (i=0;i<N;i++) | 371 for (i=0;i<N;i++) |
| 370 { | 372 { |
| 371 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); | 373 *xptr = EXTRACT16(PSHR32(MULT16_16(g, *xptr), k+1)); |
| 372 xptr++; | 374 xptr++; |
| 373 } | 375 } |
| 374 /*return celt_sqrt(E);*/ | 376 /*return celt_sqrt(E);*/ |
| 375 } | 377 } |
| 378 #endif /* OVERRIDE_renormalise_vector */ |
| 376 | 379 |
| 377 int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) | 380 int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int
arch) |
| 378 { | 381 { |
| 379 int i; | 382 int i; |
| 380 int itheta; | 383 int itheta; |
| 381 opus_val16 mid, side; | 384 opus_val16 mid, side; |
| 382 opus_val32 Emid, Eside; | 385 opus_val32 Emid, Eside; |
| 383 | 386 |
| 384 Emid = Eside = EPSILON; | 387 Emid = Eside = EPSILON; |
| 385 if (stereo) | 388 if (stereo) |
| 386 { | 389 { |
| 387 for (i=0;i<N;i++) | 390 for (i=0;i<N;i++) |
| 388 { | 391 { |
| 389 celt_norm m, s; | 392 celt_norm m, s; |
| 390 m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); | 393 m = ADD16(SHR16(X[i],1),SHR16(Y[i],1)); |
| 391 s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); | 394 s = SUB16(SHR16(X[i],1),SHR16(Y[i],1)); |
| 392 Emid = MAC16_16(Emid, m, m); | 395 Emid = MAC16_16(Emid, m, m); |
| 393 Eside = MAC16_16(Eside, s, s); | 396 Eside = MAC16_16(Eside, s, s); |
| 394 } | 397 } |
| 395 } else { | 398 } else { |
| 396 for (i=0;i<N;i++) | 399 Emid += celt_inner_prod(X, X, N, arch); |
| 397 { | 400 Eside += celt_inner_prod(Y, Y, N, arch); |
| 398 celt_norm m, s; | |
| 399 m = X[i]; | |
| 400 s = Y[i]; | |
| 401 Emid = MAC16_16(Emid, m, m); | |
| 402 Eside = MAC16_16(Eside, s, s); | |
| 403 } | |
| 404 } | 401 } |
| 405 mid = celt_sqrt(Emid); | 402 mid = celt_sqrt(Emid); |
| 406 side = celt_sqrt(Eside); | 403 side = celt_sqrt(Eside); |
| 407 #ifdef FIXED_POINT | 404 #ifdef FIXED_POINT |
| 408 /* 0.63662 = 2/pi */ | 405 /* 0.63662 = 2/pi */ |
| 409 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); | 406 itheta = MULT16_16_Q15(QCONST16(0.63662f,15),celt_atan2p(side, mid)); |
| 410 #else | 407 #else |
| 411 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); | 408 itheta = (int)floor(.5f+16384*0.63662f*atan2(side,mid)); |
| 412 #endif | 409 #endif |
| 413 | 410 |
| 414 return itheta; | 411 return itheta; |
| 415 } | 412 } |
| OLD | NEW |