Index: celt/vq.c |
diff --git a/celt/vq.c b/celt/vq.c |
index 98a0f36c9f0f2d3003c9eda5568a3871bb4918fa..0c58cdd4e6b877b7685acc018d88b54d339d6b36 100644 |
--- a/celt/vq.c |
+++ b/celt/vq.c |
@@ -37,19 +37,27 @@ |
#include "os_support.h" |
#include "bands.h" |
#include "rate.h" |
+#include "pitch.h" |
+#if defined(MIPSr1_ASM) |
+#include "mips/vq_mipsr1.h" |
+#endif |
+ |
+#ifndef OVERRIDE_vq_exp_rotation1 |
static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_val16 s) |
{ |
int i; |
+ opus_val16 ms; |
celt_norm *Xptr; |
Xptr = X; |
+ ms = NEG16(s); |
for (i=0;i<len-stride;i++) |
{ |
celt_norm x1, x2; |
x1 = Xptr[0]; |
x2 = Xptr[stride]; |
- Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); |
- *Xptr++ = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); |
+ Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
+ *Xptr++ = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
} |
Xptr = &X[len-2*stride-1]; |
for (i=len-2*stride-1;i>=0;i--) |
@@ -57,10 +65,11 @@ static void exp_rotation1(celt_norm *X, int len, int stride, opus_val16 c, opus_ |
celt_norm x1, x2; |
x1 = Xptr[0]; |
x2 = Xptr[stride]; |
- Xptr[stride] = EXTRACT16(SHR32(MULT16_16(c,x2) + MULT16_16(s,x1), 15)); |
- *Xptr-- = EXTRACT16(SHR32(MULT16_16(c,x1) - MULT16_16(s,x2), 15)); |
+ Xptr[stride] = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x2), s, x1), 15)); |
+ *Xptr-- = EXTRACT16(PSHR32(MAC16_16(MULT16_16(c, x1), ms, x2), 15)); |
} |
} |
+#endif /* OVERRIDE_vq_exp_rotation1 */ |
static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int spread) |
{ |
@@ -91,7 +100,7 @@ static void exp_rotation(celt_norm *X, int len, int dir, int stride, int K, int |
} |
/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
extract_collapse_mask().*/ |
- len /= stride; |
+ len = celt_udiv(len, stride); |
for (i=0;i<stride;i++) |
{ |
if (dir < 0) |
@@ -140,13 +149,15 @@ static unsigned extract_collapse_mask(int *iy, int N, int B) |
return 1; |
/*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for |
exp_rotation().*/ |
- N0 = N/B; |
+ N0 = celt_udiv(N, B); |
collapse_mask = 0; |
i=0; do { |
int j; |
+ unsigned tmp=0; |
j=0; do { |
- collapse_mask |= (iy[i*N0+j]!=0)<<i; |
+ tmp |= iy[i*N0+j]; |
} while (++j<N0); |
+ collapse_mask |= (tmp!=0)<<i; |
} while (++i<B); |
return collapse_mask; |
} |
@@ -322,7 +333,6 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc |
unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, |
ec_dec *dec, opus_val16 gain) |
{ |
- int i; |
opus_val32 Ryy; |
unsigned collapse_mask; |
VARDECL(int, iy); |
@@ -331,12 +341,7 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, |
celt_assert2(K>0, "alg_unquant() needs at least one pulse"); |
celt_assert2(N>1, "alg_unquant() needs at least two dimensions"); |
ALLOC(iy, N, int); |
- decode_pulses(iy, N, K, dec); |
- Ryy = 0; |
- i=0; |
- do { |
- Ryy = MAC16_16(Ryy, iy[i], iy[i]); |
- } while (++i < N); |
+ Ryy = decode_pulses(iy, N, K, dec); |
normalise_residual(iy, X, N, Ryy, gain); |
exp_rotation(X, N, -1, B, K, spread); |
collapse_mask = extract_collapse_mask(iy, N, B); |
@@ -344,21 +349,18 @@ unsigned alg_unquant(celt_norm *X, int N, int K, int spread, int B, |
return collapse_mask; |
} |
-void renormalise_vector(celt_norm *X, int N, opus_val16 gain) |
+#ifndef OVERRIDE_renormalise_vector |
+void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch) |
{ |
int i; |
#ifdef FIXED_POINT |
int k; |
#endif |
- opus_val32 E = EPSILON; |
+ opus_val32 E; |
opus_val16 g; |
opus_val32 t; |
- celt_norm *xptr = X; |
- for (i=0;i<N;i++) |
- { |
- E = MAC16_16(E, *xptr, *xptr); |
- xptr++; |
- } |
+ celt_norm *xptr; |
+ E = EPSILON + celt_inner_prod(X, X, N, arch); |
#ifdef FIXED_POINT |
k = celt_ilog2(E)>>1; |
#endif |
@@ -373,8 +375,9 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain) |
} |
/*return celt_sqrt(E);*/ |
} |
+#endif /* OVERRIDE_renormalise_vector */ |
-int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) |
+int stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int N, int arch) |
{ |
int i; |
int itheta; |
@@ -393,14 +396,8 @@ int stereo_itheta(celt_norm *X, celt_norm *Y, int stereo, int N) |
Eside = MAC16_16(Eside, s, s); |
} |
} else { |
- for (i=0;i<N;i++) |
- { |
- celt_norm m, s; |
- m = X[i]; |
- s = Y[i]; |
- Emid = MAC16_16(Emid, m, m); |
- Eside = MAC16_16(Eside, s, s); |
- } |
+ Emid += celt_inner_prod(X, X, N, arch); |
+ Eside += celt_inner_prod(Y, Y, N, arch); |
} |
mid = celt_sqrt(Emid); |
side = celt_sqrt(Eside); |