| Index: celt/vq.c
|
| diff --git a/celt/vq.c b/celt/vq.c
|
| index 98a0f36c9f0f2d3003c9eda5568a3871bb4918fa..0c58cdd4e6b877b7685acc018d88b54d339d6b36 100644
|
| --- a/celt/vq.c
|
| +++ b/celt/vq.c
|
| @@ -37,19 +37,27 @@
|
| #include "os_support.h"
|
| #include "bands.h"
|
| #include "rate.h"
|
| +#include "pitch.h"
|
|
|
| +#if defined(MIPSr1_ASM)
|
| +#include "mips/vq_mipsr1.h"
|
| +#endif
|
| +
|
| +#ifndef OVERRIDE_vq_exp_rotation1
|
| static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s)
|
| {
|
| int i;
|
| + opus_val16 ms;
|
| celt_norm *Xptr;
|
| Xptr = X;
|
| + ms = NEG16(s);
|
| for (i=0;i<len-stride;i++)
|
| {
|
| celt_norm x1, x2;
|
| x1 = Xptr[0];
|
| x2 = Xptr[stride];
|
| - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
|
| - *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
|
| + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
|
| + *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
|
| }
|
| Xptr = &X[len-2*stride-1];
|
| for (i=len-2*stride-1;i>=0;i--)
|
| @@ -57,10 +65,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_
|
| celt_norm x1, x2;
|
| x1 = Xptr[0];
|
| x2 = Xptr[stride];
|
| - Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15));
|
| - *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15));
|
| + Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15));
|
| + *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15));
|
| }
|
| }
|
| +#endif /* OVERRIDE_vq_exp_rotation1 */
|
|
|
| static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread)
|
| {
|
| @@ -91,7 +100,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int
|
| }
|
| /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
|
| extract_collapse_mask().*/
|
| - len /= stride;
|
| + len = celt_udiv(len, stride);
|
| for (i=0;i<stride;i++)
|
| {
|
| if (dir < 0)
|
| @@ -140,13 +149,15 @@ static unsigned extract_collapse_mask(int *iy, int N, int B)
|
| return 1;
|
| /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
|
| exp_rotation().*/
|
| - N0 = N/B;
|
| + N0 = celt_udiv(N, B);
|
| collapse_mask = 0;
|
| i=0; do {
|
| int j;
|
| + unsigned tmp=0;
|
| j=0; do {
|
| - collapse_mask |= (iy[i*N0+j]!=0)<<i;
|
| + tmp |= iy[i*N0+j];
|
| } while (++j<N0);
|
| + collapse_mask |= (tmp!=0)<<i;
|
| } while (++i<B);
|
| return collapse_mask;
|
| }
|
| @@ -322,7 +333,6 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
|
| unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
|
| ec_dec *dec, opus_val16 gain)
|
| {
|
| - int i;
|
| opus_val32 Ryy;
|
| unsigned collapse_mask;
|
| VARDECL(int, iy);
|
| @@ -331,12 +341,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
|
| celt_assert2(K>0, "alg_unquant() needs at least one pulse");
|
| celt_assert2(N>1, "alg_unquant() needs at least two dimensions");
|
| ALLOC(iy, N, int);
|
| - decode_pulses(iy, N, K, dec);
|
| - Ryy = 0;
|
| - i=0;
|
| - do {
|
| - Ryy = MAC16_16(Ryy, iy[i], iy[i]);
|
| - } while (++i < N);
|
| + Ryy = decode_pulses(iy, N, K, dec);
|
| normalise_residual(iy, X, N, Ryy, gain);
|
| exp_rotation(X, N, -1, B, K, spread);
|
| collapse_mask = extract_collapse_mask(iy, N, B);
|
| @@ -344,21 +349,18 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B,
|
| return collapse_mask;
|
| }
|
|
|
| -void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
|
| +#ifndef OVERRIDE_renormalise_vector
|
| +void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
|
| {
|
| int i;
|
| #ifdef FIXED_POINT
|
| int k;
|
| #endif
|
| - opus_val32 E = EPSILON;
|
| + opus_val32 E;
|
| opus_val16 g;
|
| opus_val32 t;
|
| - celt_norm *xptr = X;
|
| - for (i=0;i<N;i++)
|
| - {
|
| - E = MAC16_16(E, *xptr, *xptr);
|
| - xptr++;
|
| - }
|
| + celt_norm *xptr;
|
| + E = EPSILON + celt_inner_prod(X, X, N, arch);
|
| #ifdef FIXED_POINT
|
| k = celt_ilog2(E)>>1;
|
| #endif
|
| @@ -373,8 +375,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain)
|
| }
|
| /*return celt_sqrt(E);*/
|
| }
|
| +#endif /* OVERRIDE_renormalise_vector */
|
|
|
| -int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
|
| +int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch)
|
| {
|
| int i;
|
| int itheta;
|
| @@ -393,14 +396,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N)
|
| Eside = MAC16_16(Eside, s, s);
|
| }
|
| } else {
|
| - for (i=0;i<N;i++)
|
| - {
|
| - celt_norm m, s;
|
| - m = X[i];
|
| - s = Y[i];
|
| - Emid = MAC16_16(Emid, m, m);
|
| - Eside = MAC16_16(Eside, s, s);
|
| - }
|
| + Emid += celt_inner_prod(X, X, N, arch);
|
| + Eside += celt_inner_prod(Y, Y, N, arch);
|
| }
|
| mid = celt_sqrt(Emid);
|
| side = celt_sqrt(Eside);
|
|
|