Index: celt/bands.c |
diff --git a/celt/bands.c b/celt/bands.c |
index cce56e2f6ebe5978fbdae8bff75a74da3f1c7ba8..c643b0937302d7fc3ad9cfe113706e76138ece32 100644 |
--- a/celt/bands.c |
+++ b/celt/bands.c |
@@ -92,11 +92,11 @@ static int bitexact_log2tan(int isin,int icos) |
#ifdef FIXED_POINT |
/* Compute the amplitude (sqrt energy) in each of the bands */ |
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) |
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) |
{ |
int i, c, N; |
const opus_int16 *eBands = m->eBands; |
- N = M*m->shortMdctSize; |
+ N = m->shortMdctSize<<LM; |
c=0; do { |
for (i=0;i<end;i++) |
{ |
@@ -104,18 +104,23 @@ void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *band |
opus_val32 maxval=0; |
opus_val32 sum = 0; |
- j=M*eBands[i]; do { |
- maxval = MAX32(maxval, X[j+c*N]); |
- maxval = MAX32(maxval, -X[j+c*N]); |
- } while (++j<M*eBands[i+1]); |
- |
+ maxval = celt_maxabs32(&X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); |
if (maxval > 0) |
{ |
- int shift = celt_ilog2(maxval)-10; |
- j=M*eBands[i]; do { |
- sum = MAC16_16(sum, EXTRACT16(VSHR32(X[j+c*N],shift)), |
- EXTRACT16(VSHR32(X[j+c*N],shift))); |
- } while (++j<M*eBands[i+1]); |
+ int shift = celt_ilog2(maxval) - 14 + (((m->logN[i]>>BITRES)+LM+1)>>1); |
+ j=eBands[i]<<LM; |
+ if (shift>0) |
+ { |
+ do { |
+ sum = MAC16_16(sum, EXTRACT16(SHR32(X[j+c*N],shift)), |
+ EXTRACT16(SHR32(X[j+c*N],shift))); |
+ } while (++j<eBands[i+1]<<LM); |
+ } else { |
+ do { |
+ sum = MAC16_16(sum, EXTRACT16(SHL32(X[j+c*N],-shift)), |
+ EXTRACT16(SHL32(X[j+c*N],-shift))); |
+ } while (++j<eBands[i+1]<<LM); |
+ } |
/* We're adding one here to ensure the normalized band isn't larger than unity norm */ |
bandE[i+c*m->nbEBands] = EPSILON+VSHR32(EXTEND32(celt_sqrt(sum)),-shift); |
} else { |
@@ -150,18 +155,16 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel |
#else /* FIXED_POINT */ |
/* Compute the amplitude (sqrt energy) in each of the bands */ |
-void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int M) |
+void compute_band_energies(const CELTMode *m, const celt_sig *X, celt_ener *bandE, int end, int C, int LM) |
{ |
int i, c, N; |
const opus_int16 *eBands = m->eBands; |
- N = M*m->shortMdctSize; |
+ N = m->shortMdctSize<<LM; |
c=0; do { |
for (i=0;i<end;i++) |
{ |
- int j; |
- opus_val32 sum = 1e-27f; |
- for (j=M*eBands[i];j<M*eBands[i+1];j++) |
- sum += X[j+c*N]*X[j+c*N]; |
+ opus_val32 sum; |
+ sum = 1e-27f + celt_inner_prod_c(&X[c*N+(eBands[i]<<LM)], &X[c*N+(eBands[i]<<LM)], (eBands[i+1]-eBands[i])<<LM); |
bandE[i+c*m->nbEBands] = celt_sqrt(sum); |
/*printf ("%f ", bandE[i+c*m->nbEBands]);*/ |
} |
@@ -190,74 +193,80 @@ void normalise_bands(const CELTMode *m, const celt_sig * OPUS_RESTRICT freq, cel |
/* De-normalise the energy to produce the synthesis from the unit-energy bands */ |
void denormalise_bands(const CELTMode *m, const celt_norm * OPUS_RESTRICT X, |
- celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, int end, int C, int M) |
+ celt_sig * OPUS_RESTRICT freq, const opus_val16 *bandLogE, int start, |
+ int end, int M, int downsample, int silence) |
{ |
- int i, c, N; |
+ int i, N; |
+ int bound; |
+ celt_sig * OPUS_RESTRICT f; |
+ const celt_norm * OPUS_RESTRICT x; |
const opus_int16 *eBands = m->eBands; |
N = M*m->shortMdctSize; |
- celt_assert2(C<=2, "denormalise_bands() not implemented for >2 channels"); |
- c=0; do { |
- celt_sig * OPUS_RESTRICT f; |
- const celt_norm * OPUS_RESTRICT x; |
- f = freq+c*N; |
- x = X+c*N+M*eBands[start]; |
- for (i=0;i<M*eBands[start];i++) |
- *f++ = 0; |
- for (i=start;i<end;i++) |
- { |
- int j, band_end; |
- opus_val16 g; |
- opus_val16 lg; |
+ bound = M*eBands[end]; |
+ if (downsample!=1) |
+ bound = IMIN(bound, N/downsample); |
+ if (silence) |
+ { |
+ bound = 0; |
+ start = end = 0; |
+ } |
+ f = freq; |
+ x = X+M*eBands[start]; |
+ for (i=0;i<M*eBands[start];i++) |
+ *f++ = 0; |
+ for (i=start;i<end;i++) |
+ { |
+ int j, band_end; |
+ opus_val16 g; |
+ opus_val16 lg; |
#ifdef FIXED_POINT |
- int shift; |
+ int shift; |
#endif |
- j=M*eBands[i]; |
- band_end = M*eBands[i+1]; |
- lg = ADD16(bandLogE[i+c*m->nbEBands], SHL16((opus_val16)eMeans[i],6)); |
+ j=M*eBands[i]; |
+ band_end = M*eBands[i+1]; |
+ lg = ADD16(bandLogE[i], SHL16((opus_val16)eMeans[i],6)); |
#ifndef FIXED_POINT |
- g = celt_exp2(lg); |
+ g = celt_exp2(lg); |
#else |
- /* Handle the integer part of the log energy */ |
- shift = 16-(lg>>DB_SHIFT); |
- if (shift>31) |
- { |
- shift=0; |
- g=0; |
- } else { |
- /* Handle the fractional part. */ |
- g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); |
- } |
- /* Handle extreme gains with negative shift. */ |
- if (shift<0) |
- { |
- /* For shift < -2 we'd be likely to overflow, so we're capping |
+ /* Handle the integer part of the log energy */ |
+ shift = 16-(lg>>DB_SHIFT); |
+ if (shift>31) |
+ { |
+ shift=0; |
+ g=0; |
+ } else { |
+ /* Handle the fractional part. */ |
+ g = celt_exp2_frac(lg&((1<<DB_SHIFT)-1)); |
+ } |
+ /* Handle extreme gains with negative shift. */ |
+ if (shift<0) |
+ { |
+ /* For shift < -2 we'd be likely to overflow, so we're capping |
the gain here. This shouldn't happen unless the bitstream is |
already corrupted. */ |
- if (shift < -2) |
- { |
- g = 32767; |
- shift = -2; |
- } |
- do { |
- *f++ = SHL32(MULT16_16(*x++, g), -shift); |
- } while (++j<band_end); |
- } else |
+ if (shift < -2) |
+ { |
+ g = 32767; |
+ shift = -2; |
+ } |
+ do { |
+ *f++ = SHL32(MULT16_16(*x++, g), -shift); |
+ } while (++j<band_end); |
+ } else |
#endif |
/* Be careful of the fixed-point "else" just above when changing this code */ |
do { |
*f++ = SHR32(MULT16_16(*x++, g), shift); |
} while (++j<band_end); |
- } |
- celt_assert(start <= end); |
- for (i=M*eBands[end];i<N;i++) |
- *f++ = 0; |
- } while (++c<C); |
+ } |
+ celt_assert(start <= end); |
+ OPUS_CLEAR(&freq[bound], N-bound); |
} |
/* This prevents energy collapse for transients with multiple short MDCTs */ |
void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_masks, int LM, int C, int size, |
- int start, int end, opus_val16 *logE, opus_val16 *prev1logE, |
- opus_val16 *prev2logE, int *pulses, opus_uint32 seed) |
+ int start, int end, const opus_val16 *logE, const opus_val16 *prev1logE, |
+ const opus_val16 *prev2logE, const int *pulses, opus_uint32 seed, int arch) |
{ |
int c, i, j, k; |
for (i=start;i<end;i++) |
@@ -272,7 +281,8 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas |
N0 = m->eBands[i+1]-m->eBands[i]; |
/* depth in 1/8 bits */ |
- depth = (1+pulses[i])/((m->eBands[i+1]-m->eBands[i])<<LM); |
+ celt_assert(pulses[i]>=0); |
+ depth = celt_udiv(1+pulses[i], (m->eBands[i+1]-m->eBands[i]))>>LM; |
#ifdef FIXED_POINT |
thresh32 = SHR32(celt_exp2(-SHL16(depth, 10-BITRES)),1); |
@@ -345,12 +355,12 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas |
} |
/* We just added some energy, so we need to renormalise */ |
if (renormalize) |
- renormalise_vector(X, N0<<LM, Q15ONE); |
+ renormalise_vector(X, N0<<LM, Q15ONE, arch); |
} while (++c<C); |
} |
} |
-static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, const celt_ener *bandE, int bandID, int N) |
+static void intensity_stereo(const CELTMode *m, celt_norm * OPUS_RESTRICT X, const celt_norm * OPUS_RESTRICT Y, const celt_ener *bandE, int bandID, int N) |
{ |
int i = bandID; |
int j; |
@@ -370,25 +380,25 @@ static void intensity_stereo(const CELTMode *m, celt_norm *X, celt_norm *Y, cons |
celt_norm r, l; |
l = X[j]; |
r = Y[j]; |
- X[j] = MULT16_16_Q14(a1,l) + MULT16_16_Q14(a2,r); |
+ X[j] = EXTRACT16(SHR32(MAC16_16(MULT16_16(a1, l), a2, r), 14)); |
/* Side is not encoded, no need to calculate */ |
} |
} |
-static void stereo_split(celt_norm *X, celt_norm *Y, int N) |
+static void stereo_split(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, int N) |
{ |
int j; |
for (j=0;j<N;j++) |
{ |
- celt_norm r, l; |
- l = MULT16_16_Q15(QCONST16(.70710678f,15), X[j]); |
- r = MULT16_16_Q15(QCONST16(.70710678f,15), Y[j]); |
- X[j] = l+r; |
- Y[j] = r-l; |
+ opus_val32 r, l; |
+ l = MULT16_16(QCONST16(.70710678f, 15), X[j]); |
+ r = MULT16_16(QCONST16(.70710678f, 15), Y[j]); |
+ X[j] = EXTRACT16(SHR32(ADD32(l, r), 15)); |
+ Y[j] = EXTRACT16(SHR32(SUB32(r, l), 15)); |
} |
} |
-static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) |
+static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT Y, opus_val16 mid, int N) |
{ |
int j; |
opus_val32 xp=0, side=0; |
@@ -409,8 +419,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) |
Er = MULT16_16(mid2, mid2) + side + 2*xp; |
if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) |
{ |
- for (j=0;j<N;j++) |
- Y[j] = X[j]; |
+ OPUS_COPY(Y, X, N); |
return; |
} |
@@ -434,7 +443,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) |
{ |
celt_norm r, l; |
/* Apply mid scaling (side is already scaled) */ |
- l = MULT16_16_Q15(mid, X[j]); |
+ l = MULT16_16_P15(mid, X[j]); |
r = Y[j]; |
X[j] = EXTRACT16(PSHR32(MULT16_16(lgain, SUB16(l,r)), kl+1)); |
Y[j] = EXTRACT16(PSHR32(MULT16_16(rgain, ADD16(l,r)), kr+1)); |
@@ -442,7 +451,7 @@ static void stereo_merge(celt_norm *X, celt_norm *Y, opus_val16 mid, int N) |
} |
/* Decide whether we should spread the pulses in the current frame */ |
-int spreading_decision(const CELTMode *m, celt_norm *X, int *average, |
+int spreading_decision(const CELTMode *m, const celt_norm *X, int *average, |
int last_decision, int *hf_average, int *tapset_decision, int update_hf, |
int end, int C, int M) |
{ |
@@ -463,7 +472,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, |
{ |
int j, N, tmp=0; |
int tcount[3] = {0,0,0}; |
- celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; |
+ const celt_norm * OPUS_RESTRICT x = X+M*eBands[i]+c*N0; |
N = M*(eBands[i+1]-eBands[i]); |
if (N<=8) |
continue; |
@@ -483,7 +492,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, |
/* Only include four last bands (8 kHz and up) */ |
if (i>m->nbEBands-4) |
- hf_sum += 32*(tcount[1]+tcount[0])/N; |
+ hf_sum += celt_udiv(32*(tcount[1]+tcount[0]), N); |
tmp = (2*tcount[2] >= N) + (2*tcount[1] >= N) + (2*tcount[0] >= N); |
sum += tmp*256; |
nbBands++; |
@@ -493,7 +502,7 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, |
if (update_hf) |
{ |
if (hf_sum) |
- hf_sum /= C*(4-m->nbEBands+end); |
+ hf_sum = celt_udiv(hf_sum, C*(4-m->nbEBands+end)); |
*hf_average = (*hf_average+hf_sum)>>1; |
hf_sum = *hf_average; |
if (*tapset_decision==2) |
@@ -509,7 +518,8 @@ int spreading_decision(const CELTMode *m, celt_norm *X, int *average, |
} |
/*printf("%d %d %d\n", hf_sum, *hf_average, *tapset_decision);*/ |
celt_assert(nbBands>0); /* end has to be non-zero */ |
- sum /= nbBands; |
+ celt_assert(sum>=0); |
+ sum = celt_udiv(sum, nbBands); |
/* Recursive averaging */ |
sum = (sum+*average)>>1; |
*average = sum; |
@@ -567,8 +577,7 @@ static void deinterleave_hadamard(celt_norm *X, int N0, int stride, int hadamard |
for (j=0;j<N0;j++) |
tmp[i*N0+j] = X[j*stride+i]; |
} |
- for (j=0;j<N;j++) |
- X[j] = tmp[j]; |
+ OPUS_COPY(X, tmp, N); |
RESTORE_STACK; |
} |
@@ -591,8 +600,7 @@ static void interleave_hadamard(celt_norm *X, int N0, int stride, int hadamard) |
for (j=0;j<N0;j++) |
tmp[j*stride+i] = X[i*N0+j]; |
} |
- for (j=0;j<N;j++) |
- X[j] = tmp[j]; |
+ OPUS_COPY(X, tmp, N); |
RESTORE_STACK; |
} |
@@ -603,11 +611,11 @@ void haar1(celt_norm *X, int N0, int stride) |
for (i=0;i<stride;i++) |
for (j=0;j<N0;j++) |
{ |
- celt_norm tmp1, tmp2; |
- tmp1 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*2*j+i]); |
- tmp2 = MULT16_16_Q15(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); |
- X[stride*2*j+i] = tmp1 + tmp2; |
- X[stride*(2*j+1)+i] = tmp1 - tmp2; |
+ opus_val32 tmp1, tmp2; |
+ tmp1 = MULT16_16(QCONST16(.70710678f,15), X[stride*2*j+i]); |
+ tmp2 = MULT16_16(QCONST16(.70710678f,15), X[stride*(2*j+1)+i]); |
+ X[stride*2*j+i] = EXTRACT16(PSHR32(ADD32(tmp1, tmp2), 15)); |
+ X[stride*(2*j+1)+i] = EXTRACT16(PSHR32(SUB32(tmp1, tmp2), 15)); |
} |
} |
@@ -622,7 +630,8 @@ static int compute_qn(int N, int b, int offset, int pulse_cap, int stereo) |
/* The upper limit ensures that in a stereo split with itheta==16384, we'll |
always have enough bits left over to code at least one pulse in the |
side; otherwise it would collapse, since it doesn't get folded. */ |
- qb = IMIN(b-pulse_cap-(4<<BITRES), (b+N2*offset)/N2); |
+ qb = celt_sudiv(b+N2*offset, N2); |
+ qb = IMIN(b-pulse_cap-(4<<BITRES), qb); |
qb = IMIN(8<<BITRES, qb); |
@@ -647,6 +656,7 @@ struct band_ctx { |
opus_int32 remaining_bits; |
const celt_ener *bandE; |
opus_uint32 seed; |
+ int arch; |
}; |
struct split_ctx { |
@@ -698,7 +708,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, |
side and mid. With just that parameter, we can re-scale both |
mid and side because we know that 1) they have unit norm and |
2) they are orthogonal. */ |
- itheta = stereo_itheta(X, Y, stereo, N); |
+ itheta = stereo_itheta(X, Y, stereo, N, ctx->arch); |
} |
tell = ec_tell_frac(ec); |
if (qn!=1) |
@@ -769,7 +779,8 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx, |
ec_dec_update(ec, fl, fl+fs, ft); |
} |
} |
- itheta = (opus_int32)itheta*16384/qn; |
+ celt_assert(itheta>=0); |
+ itheta = celt_udiv((opus_int32)itheta*16384, qn); |
if (encode && stereo) |
{ |
if (itheta==0) |
@@ -1021,8 +1032,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, |
fill &= cm_mask; |
if (!fill) |
{ |
- for (j=0;j<N;j++) |
- X[j] = 0; |
+ OPUS_CLEAR(X, N); |
} else { |
if (lowband == NULL) |
{ |
@@ -1046,7 +1056,7 @@ static unsigned quant_partition(struct band_ctx *ctx, celt_norm *X, |
} |
cm = fill; |
} |
- renormalise_vector(X, N, gain); |
+ renormalise_vector(X, N, gain, ctx->arch); |
} |
} |
} |
@@ -1084,7 +1094,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, |
longBlocks = B0==1; |
- N_B /= B; |
+ N_B = celt_udiv(N_B, B); |
/* Special case for one sample */ |
if (N==1) |
@@ -1098,9 +1108,7 @@ static unsigned quant_band(struct band_ctx *ctx, celt_norm *X, |
if (lowband_scratch && lowband && (recombine || ((N_B&1) == 0 && tf_change<0) || B0>1)) |
{ |
- int j; |
- for (j=0;j<N;j++) |
- lowband_scratch[j] = lowband[j]; |
+ OPUS_COPY(lowband_scratch, lowband, N); |
lowband = lowband_scratch; |
} |
@@ -1353,9 +1361,11 @@ static unsigned quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_norm |
void quant_all_bands(int encode, const CELTMode *m, int start, int end, |
- celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, const celt_ener *bandE, int *pulses, |
- int shortBlocks, int spread, int dual_stereo, int intensity, int *tf_res, |
- opus_int32 total_bits, opus_int32 balance, ec_ctx *ec, int LM, int codedBands, opus_uint32 *seed) |
+ celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks, |
+ const celt_ener *bandE, int *pulses, int shortBlocks, int spread, |
+ int dual_stereo, int intensity, int *tf_res, opus_int32 total_bits, |
+ opus_int32 balance, ec_ctx *ec, int LM, int codedBands, |
+ opus_uint32 *seed, int arch) |
{ |
int i; |
opus_int32 remaining_bits; |
@@ -1397,6 +1407,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, |
ctx.m = m; |
ctx.seed = *seed; |
ctx.spread = spread; |
+ ctx.arch = arch; |
for (i=start;i<end;i++) |
{ |
opus_int32 tell; |
@@ -1428,7 +1439,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end, |
ctx.remaining_bits = remaining_bits; |
if (i <= codedBands-1) |
{ |
- curr_balance = balance / IMIN(3, codedBands-i); |
+ curr_balance = celt_sudiv(balance, IMIN(3, codedBands-i)); |
b = IMAX(0, IMIN(16383, IMIN(remaining_bits+1,pulses[i]+curr_balance))); |
} else { |
b = 0; |