Index: third_party/opus/src/celt/celt_encoder.c |
diff --git a/third_party/opus/src/celt/celt_encoder.c b/third_party/opus/src/celt/celt_encoder.c |
index 3ee7a4d3f7a1dceb71e6a1bef14306ac9d217a32..053e5a3b2900c16f2907fb03be537ce3b5b1092b 100644 |
--- a/third_party/opus/src/celt/celt_encoder.c |
+++ b/third_party/opus/src/celt/celt_encoder.c |
@@ -73,8 +73,8 @@ struct OpusCustomEncoder { |
int constrained_vbr; /* If zero, VBR can do whatever it likes with the rate */ |
int loss_rate; |
int lsb_depth; |
- int variable_duration; |
int lfe; |
+ int disable_inv; |
int arch; |
/* Everything beyond this point gets cleared on a reset */ |
@@ -98,6 +98,7 @@ struct OpusCustomEncoder { |
#endif |
int consec_transient; |
AnalysisInfo analysis; |
+ SILKInfo silk_info; |
opus_val32 preemph_memE[2]; |
opus_val32 preemph_memD[2]; |
@@ -123,6 +124,7 @@ struct OpusCustomEncoder { |
/* opus_val16 oldBandE[], Size = channels*mode->nbEBands */ |
/* opus_val16 oldLogE[], Size = channels*mode->nbEBands */ |
/* opus_val16 oldLogE2[], Size = channels*mode->nbEBands */ |
+ /* opus_val16 energyError[], Size = channels*mode->nbEBands */ |
}; |
int celt_encoder_get_size(int channels) |
@@ -136,9 +138,10 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_encoder_get_size(const CELTMode *mode, int |
int size = sizeof(struct CELTEncoder) |
+ (channels*mode->overlap-1)*sizeof(celt_sig) /* celt_sig in_mem[channels*mode->overlap]; */ |
+ channels*COMBFILTER_MAXPERIOD*sizeof(celt_sig) /* celt_sig prefilter_mem[channels*COMBFILTER_MAXPERIOD]; */ |
- + 3*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */ |
+ + 4*channels*mode->nbEBands*sizeof(opus_val16); /* opus_val16 oldBandE[channels*mode->nbEBands]; */ |
/* opus_val16 oldLogE[channels*mode->nbEBands]; */ |
/* opus_val16 oldLogE2[channels*mode->nbEBands]; */ |
+ /* opus_val16 energyError[channels*mode->nbEBands]; */ |
return size; |
} |
@@ -178,7 +181,6 @@ static int opus_custom_encoder_init_arch(CELTEncoder *st, const CELTMode *mode, |
st->start = 0; |
st->end = st->mode->effEBands; |
st->signalling = 1; |
- |
st->arch = arch; |
st->constrained_vbr = 1; |
@@ -223,7 +225,8 @@ void opus_custom_encoder_destroy(CELTEncoder *st) |
static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C, |
- opus_val16 *tf_estimate, int *tf_chan) |
+ opus_val16 *tf_estimate, int *tf_chan, int allow_weak_transients, |
+ int *weak_transient) |
{ |
int i; |
VARDECL(opus_val16, tmp); |
@@ -233,6 +236,12 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int |
int c; |
opus_val16 tf_max; |
int len2; |
+ /* Forward masking: 6.7 dB/ms. */ |
+#ifdef FIXED_POINT |
+ int forward_shift = 4; |
+#else |
+ opus_val16 forward_decay = QCONST16(.0625f,15); |
+#endif |
/* Table of 6*64/x, trained on real data to minimize the average error */ |
static const unsigned char inv_table[128] = { |
255,255,156,110, 86, 70, 59, 51, 45, 40, 37, 33, 31, 28, 26, 25, |
@@ -247,6 +256,19 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int |
SAVE_STACK; |
ALLOC(tmp, len, opus_val16); |
+ *weak_transient = 0; |
+ /* For lower bitrates, let's be more conservative and have a forward masking |
+ decay of 3.3 dB/ms. This avoids having to code transients at very low |
+ bitrate (mostly for hybrid), which can result in unstable energy and/or |
+ partial collapse. */ |
+ if (allow_weak_transients) |
+ { |
+#ifdef FIXED_POINT |
+ forward_shift = 5; |
+#else |
+ forward_decay = QCONST16(.03125f,15); |
+#endif |
+ } |
len2=len/2; |
for (c=0;c<C;c++) |
{ |
@@ -269,7 +291,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int |
mem0 = mem1 + y - 2*x; |
mem1 = x - .5f*y; |
#endif |
- tmp[i] = EXTRACT16(SHR32(y,2)); |
+ tmp[i] = SROUND16(y, 2); |
/*printf("%f ", tmp[i]);*/ |
} |
/*printf("\n");*/ |
@@ -280,7 +302,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int |
/* Normalize tmp to max range */ |
{ |
int shift=0; |
- shift = 14-celt_ilog2(1+celt_maxabs16(tmp, len)); |
+ shift = 14-celt_ilog2(MAX16(1, celt_maxabs16(tmp, len))); |
if (shift!=0) |
{ |
for (i=0;i<len;i++) |
@@ -299,9 +321,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int |
mean += x2; |
#ifdef FIXED_POINT |
/* FIXME: Use PSHR16() instead */ |
- tmp[i] = mem0 + PSHR32(x2-mem0,4); |
+ tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift); |
#else |
- tmp[i] = mem0 + MULT16_16_P15(QCONST16(.0625f,15),x2-mem0); |
+ tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0); |
#endif |
mem0 = tmp[i]; |
} |
@@ -311,6 +333,7 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int |
/* Backward pass to compute the pre-echo threshold */ |
for (i=len2-1;i>=0;i--) |
{ |
+ /* Backward masking: 13.9 dB/ms. */ |
#ifdef FIXED_POINT |
/* FIXME: Use PSHR16() instead */ |
tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3); |
@@ -359,7 +382,12 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int |
} |
} |
is_transient = mask_metric>200; |
- |
+ /* For low bitrates, define "weak transients" that need to be |
+ handled differently to avoid partial collapse. */ |
+ if (allow_weak_transients && is_transient && mask_metric<600) { |
+ is_transient = 0; |
+ *weak_transient = 1; |
+ } |
/* Arbitrary metric for VBR boost */ |
tf_max = MAX16(0,celt_sqrt(27*mask_metric)-42); |
/* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ |
@@ -549,7 +577,7 @@ static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias |
static int tf_analysis(const CELTMode *m, int len, int isTransient, |
int *tf_res, int lambda, celt_norm *X, int N0, int LM, |
- int *tf_sum, opus_val16 tf_estimate, int tf_chan) |
+ opus_val16 tf_estimate, int tf_chan) |
{ |
int i; |
VARDECL(int, metric); |
@@ -574,7 +602,6 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, |
ALLOC(path0, len, int); |
ALLOC(path1, len, int); |
- *tf_sum = 0; |
for (i=0;i<len;i++) |
{ |
int k, N; |
@@ -629,7 +656,6 @@ static int tf_analysis(const CELTMode *m, int len, int isTransient, |
metric[i] = 2*best_level; |
else |
metric[i] = -2*best_level; |
- *tf_sum += (isTransient ? LM : 0) - metric[i]/2; |
/* For bands that can't be split to -1, set the metric to the half-way point to avoid |
biasing the decision */ |
if (narrow && (metric[i]==0 || metric[i]==-2*LM)) |
@@ -754,7 +780,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, |
static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, |
const opus_val16 *bandLogE, int end, int LM, int C, int N0, |
AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, |
- int intensity, opus_val16 surround_trim, int arch) |
+ int intensity, opus_val16 surround_trim, opus_int32 equiv_rate, int arch) |
{ |
int i; |
opus_val32 diff=0; |
@@ -762,6 +788,14 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, |
int trim_index; |
opus_val16 trim = QCONST16(5.f, 8); |
opus_val16 logXC, logXC2; |
+ /* At low bitrate, reducing the trim seems to help. At higher bitrates, it's less |
+ clear what's best, so we're keeping it as it was before, at least for now. */ |
+ if (equiv_rate < 64000) { |
+ trim = QCONST16(4.f, 8); |
+ } else if (equiv_rate < 80000) { |
+ opus_int32 frac = (equiv_rate-64000) >> 10; |
+ trim = QCONST16(4.f, 8) + QCONST16(1.f/16.f, 8)*frac; |
+ } |
if (C==2) |
{ |
opus_val16 sum = 0; /* Q10 */ |
@@ -809,7 +843,7 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, |
} while (++c<C); |
diff /= C*(end-1); |
/*printf("%f\n", diff);*/ |
- trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); |
+ trim -= MAX32(-QCONST16(2.f, 8), MIN32(QCONST16(2.f, 8), SHR32(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); |
trim -= SHR16(surround_trim, DB_SHIFT-8); |
trim -= 2*SHR16(tf_estimate, 14-8); |
#ifndef DISABLE_FLOAT_API |
@@ -930,7 +964,7 @@ static opus_val16 median_of_3(const opus_val16 *x) |
static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 *bandLogE2, |
int nbEBands, int start, int end, int C, int *offsets, int lsb_depth, const opus_int16 *logN, |
int isTransient, int vbr, int constrained_vbr, const opus_int16 *eBands, int LM, |
- int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc) |
+ int effectiveBytes, opus_int32 *tot_boost_, int lfe, opus_val16 *surround_dynalloc, AnalysisInfo *analysis) |
{ |
int i, c; |
opus_int32 tot_boost=0; |
@@ -1020,14 +1054,26 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 |
} |
for (i=start;i<end;i++) |
{ |
- int width; |
- int boost; |
- int boost_bits; |
- |
if (i<8) |
follower[i] *= 2; |
if (i>=12) |
follower[i] = HALF16(follower[i]); |
+ } |
+#ifdef DISABLE_FLOAT_API |
+ (void)analysis; |
+#else |
+ if (analysis->valid) |
+ { |
+ for (i=start;i<IMIN(LEAK_BANDS, end);i++) |
+ follower[i] = follower[i] + QCONST16(1.f/64.f, DB_SHIFT)*analysis->leak_boost[i]; |
+ } |
+#endif |
+ for (i=start;i<end;i++) |
+ { |
+ int width; |
+ int boost; |
+ int boost_bits; |
+ |
follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT)); |
width = C*(eBands[i+1]-eBands[i])<<LM; |
@@ -1042,11 +1088,11 @@ static opus_val16 dynalloc_analysis(const opus_val16 *bandLogE, const opus_val16 |
boost = (int)SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT); |
boost_bits = boost*6<<BITRES; |
} |
- /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */ |
+ /* For CBR and non-transient CVBR frames, limit dynalloc to 2/3 of the bits */ |
if ((!vbr || (constrained_vbr&&!isTransient)) |
- && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4) |
+ && (tot_boost+boost_bits)>>BITRES>>3 > 2*effectiveBytes/3) |
{ |
- opus_int32 cap = ((effectiveBytes/4)<<BITRES<<3); |
+ opus_int32 cap = ((2*effectiveBytes/3)<<BITRES<<3); |
offsets[i] = cap-tot_boost; |
tot_boost = cap; |
break; |
@@ -1193,7 +1239,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 |
int LM, opus_int32 bitrate, int lastCodedBands, int C, int intensity, |
int constrained_vbr, opus_val16 stereo_saving, int tot_boost, |
opus_val16 tf_estimate, int pitch_change, opus_val16 maxDepth, |
- int variable_duration, int lfe, int has_surround_mask, opus_val16 surround_masking, |
+ int lfe, int has_surround_mask, opus_val16 surround_masking, |
opus_val16 temporal_vbr) |
{ |
/* The target rate in 8th bits per frame */ |
@@ -1235,10 +1281,9 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 |
SHR32(MULT16_16(stereo_saving-QCONST16(0.1f,8),(coded_stereo_dof<<BITRES)),8)); |
} |
/* Boost the rate according to dynalloc (minus the dynalloc average for calibration). */ |
- target += tot_boost-(16<<LM); |
+ target += tot_boost-(19<<LM); |
/* Apply transient boost, compensating for average boost. */ |
- tf_calibration = variable_duration==OPUS_FRAMESIZE_VARIABLE ? |
- QCONST16(0.02f,14) : QCONST16(0.04f,14); |
+ tf_calibration = QCONST16(0.044f,14); |
target += (opus_int32)SHL32(MULT16_32_Q15(tf_estimate-tf_calibration, target),1); |
#ifndef DISABLE_FLOAT_API |
@@ -1249,7 +1294,7 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 |
float tonal; |
/* Tonality boost (compensating for the average). */ |
- tonal = MAX16(0.f,analysis->tonality-.15f)-0.09f; |
+ tonal = MAX16(0.f,analysis->tonality-.15f)-0.12f; |
tonal_target = target + (opus_int32)((coded_bins<<BITRES)*1.2f*tonal); |
if (pitch_change) |
tonal_target += (opus_int32)((coded_bins<<BITRES)*.8f); |
@@ -1279,21 +1324,11 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32 |
/*printf("%f %d\n", maxDepth, floor_depth);*/ |
} |
- if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000)) |
+ /* Make VBR less aggressive for constrained VBR because we can't keep a higher bitrate |
+ for long. Needs tuning. */ |
+ if ((!has_surround_mask||lfe) && constrained_vbr) |
{ |
- opus_val16 rate_factor = Q15ONE; |
- if (bitrate < 64000) |
- { |
-#ifdef FIXED_POINT |
- rate_factor = MAX16(0,(bitrate-32000)); |
-#else |
- rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); |
-#endif |
- } |
- if (constrained_vbr) |
- rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15)); |
- target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target); |
- |
+ target = base_target + (opus_int32)MULT16_32_Q15(QCONST16(0.67f, 15), target-base_target); |
} |
if (!has_surround_mask && tf_estimate < QCONST16(.2f, 14)) |
@@ -1331,7 +1366,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
VARDECL(int, tf_res); |
VARDECL(unsigned char, collapse_masks); |
celt_sig *prefilter_mem; |
- opus_val16 *oldBandE, *oldLogE, *oldLogE2; |
+ opus_val16 *oldBandE, *oldLogE, *oldLogE2, *energyError; |
int shortBlocks=0; |
int isTransient=0; |
const int CC = st->channels; |
@@ -1343,7 +1378,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
int end; |
int effEnd; |
int codedBands; |
- int tf_sum; |
int alloc_trim; |
int pitch_index=COMBFILTER_MINPERIOD; |
opus_val16 gain1 = 0; |
@@ -1355,6 +1389,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
opus_int32 total_boost; |
opus_int32 balance; |
opus_int32 tell; |
+ opus_int32 tell0_frac; |
int prefilter_tapset=0; |
int pf_on; |
int anti_collapse_rsv; |
@@ -1376,7 +1411,9 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
opus_val16 surround_masking=0; |
opus_val16 temporal_vbr=0; |
opus_val16 surround_trim = 0; |
- opus_int32 equiv_rate = 510000; |
+ opus_int32 equiv_rate; |
+ int hybrid; |
+ int weak_transient = 0; |
VARDECL(opus_val16, surround_dynalloc); |
ALLOC_STACK; |
@@ -1386,6 +1423,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
eBands = mode->eBands; |
start = st->start; |
end = st->end; |
+ hybrid = start != 0; |
tf_estimate = 0; |
if (nbCompressedBytes<2 || pcm==NULL) |
{ |
@@ -1409,12 +1447,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
oldBandE = (opus_val16*)(st->in_mem+CC*(overlap+COMBFILTER_MAXPERIOD)); |
oldLogE = oldBandE + CC*nbEBands; |
oldLogE2 = oldLogE + CC*nbEBands; |
+ energyError = oldLogE2 + CC*nbEBands; |
if (enc==NULL) |
{ |
- tell=1; |
+ tell0_frac=tell=1; |
nbFilledBytes=0; |
} else { |
+ tell0_frac=tell=ec_tell_frac(enc); |
tell=ec_tell(enc); |
nbFilledBytes=(tell+4)>>3; |
} |
@@ -1467,10 +1507,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
if (st->bitrate!=OPUS_BITRATE_MAX) |
nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes, |
(tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling)); |
- effectiveBytes = nbCompressedBytes; |
+ effectiveBytes = nbCompressedBytes - nbFilledBytes; |
} |
+ equiv_rate = ((opus_int32)nbCompressedBytes*8*50 >> (3-LM)) - (40*C+20)*((400>>LM) - 50); |
if (st->bitrate != OPUS_BITRATE_MAX) |
- equiv_rate = st->bitrate - (40*C+20)*((400>>LM) - 50); |
+ equiv_rate = IMIN(equiv_rate, st->bitrate - (40*C+20)*((400>>LM) - 50)); |
if (enc==NULL) |
{ |
@@ -1558,8 +1599,8 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
{ |
int enabled; |
int qg; |
- enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && start==0 && !silence && !st->disable_pf |
- && st->complexity >= 5 && !(st->consec_transient && LM!=3 && st->variable_duration==OPUS_FRAMESIZE_VARIABLE); |
+ enabled = ((st->lfe&&nbAvailableBytes>3) || nbAvailableBytes>12*C) && !hybrid && !silence && !st->disable_pf |
+ && st->complexity >= 5; |
prefilter_tapset = st->tapset_decision; |
pf_on = run_prefilter(st, in, prefilter_mem, CC, N, prefilter_tapset, &pitch_index, &gain1, &qg, enabled, nbAvailableBytes); |
@@ -1568,7 +1609,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
pitch_change = 1; |
if (pf_on==0) |
{ |
- if(start==0 && tell+16<=total_bits) |
+ if(!hybrid && tell+16<=total_bits) |
ec_enc_bit_logp(enc, 0, 1); |
} else { |
/*This block is not gated by a total bits check only because |
@@ -1589,8 +1630,12 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
shortBlocks = 0; |
if (st->complexity >= 1 && !st->lfe) |
{ |
+ /* Reduces the likelihood of energy instability on fricatives at low bitrate |
+ in hybrid mode. It seems like we still want to have real transients on vowels |
+ though (small SILK quantization offset value). */ |
+ int allow_weak_transients = hybrid && effectiveBytes<15 && st->silk_info.offset >= 100; |
isTransient = transient_analysis(in, N+overlap, CC, |
- &tf_estimate, &tf_chan); |
+ &tf_estimate, &tf_chan, allow_weak_transients, &weak_transient); |
} |
if (LM>0 && ec_tell(enc)+3<=total_bits) |
{ |
@@ -1610,7 +1655,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
if (secondMdct) |
{ |
compute_mdcts(mode, 0, in, freq, C, CC, LM, st->upsample, st->arch); |
- compute_band_energies(mode, freq, bandE, effEnd, C, LM); |
+ compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); |
amp2Log2(mode, effEnd, end, bandE, bandLogE2, C); |
for (i=0;i<C*nbEBands;i++) |
bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); |
@@ -1619,7 +1664,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); |
if (CC==2&&C==1) |
tf_chan = 0; |
- compute_band_energies(mode, freq, bandE, effEnd, C, LM); |
+ compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); |
if (st->lfe) |
{ |
@@ -1634,7 +1679,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
ALLOC(surround_dynalloc, C*nbEBands, opus_val16); |
OPUS_CLEAR(surround_dynalloc, end); |
/* This computes how much masking takes place between surround channels */ |
- if (start==0&&st->energy_mask&&!st->lfe) |
+ if (!hybrid&&st->energy_mask&&!st->lfe) |
{ |
int mask_end; |
int midband; |
@@ -1736,14 +1781,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
/* Last chance to catch any transient we might have missed in the |
time-domain analysis */ |
- if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe) |
+ if (LM>0 && ec_tell(enc)+3<=total_bits && !isTransient && st->complexity>=5 && !st->lfe && !hybrid) |
{ |
if (patch_transient_decision(bandLogE, oldBandE, nbEBands, start, end, C)) |
{ |
isTransient = 1; |
shortBlocks = M; |
compute_mdcts(mode, shortBlocks, in, freq, C, CC, LM, st->upsample, st->arch); |
- compute_band_energies(mode, freq, bandE, effEnd, C, LM); |
+ compute_band_energies(mode, freq, bandE, effEnd, C, LM, st->arch); |
amp2Log2(mode, effEnd, end, bandE, bandLogE, C); |
/* Compensate for the scaling of short vs long mdcts */ |
for (i=0;i<C*nbEBands;i++) |
@@ -1762,29 +1807,47 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
ALLOC(tf_res, nbEBands, int); |
/* Disable variable tf resolution for hybrid and at very low bitrate */ |
- if (effectiveBytes>=15*C && start==0 && st->complexity>=2 && !st->lfe) |
+ if (effectiveBytes>=15*C && !hybrid && st->complexity>=2 && !st->lfe) |
{ |
int lambda; |
- if (effectiveBytes<40) |
- lambda = 12; |
- else if (effectiveBytes<60) |
- lambda = 6; |
- else if (effectiveBytes<100) |
- lambda = 4; |
- else |
- lambda = 3; |
- lambda*=2; |
- tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, &tf_sum, tf_estimate, tf_chan); |
+ lambda = IMAX(5, 1280/effectiveBytes + 2); |
+ tf_select = tf_analysis(mode, effEnd, isTransient, tf_res, lambda, X, N, LM, tf_estimate, tf_chan); |
for (i=effEnd;i<end;i++) |
tf_res[i] = tf_res[effEnd-1]; |
+ } else if (hybrid && weak_transient) |
+ { |
+ /* For weak transients, we rely on the fact that improving time resolution using |
+ TF on a long window is imperfect and will not result in an energy collapse at |
+ low bitrate. */ |
+ for (i=0;i<end;i++) |
+ tf_res[i] = 1; |
+ tf_select=0; |
+ } else if (hybrid && effectiveBytes<15) |
+ { |
+ /* For low bitrate hybrid, we force temporal resolution to 5 ms rather than 2.5 ms. */ |
+ for (i=0;i<end;i++) |
+ tf_res[i] = 0; |
+ tf_select=isTransient; |
} else { |
- tf_sum = 0; |
for (i=0;i<end;i++) |
tf_res[i] = isTransient; |
tf_select=0; |
} |
ALLOC(error, C*nbEBands, opus_val16); |
+ c=0; |
+ do { |
+ for (i=start;i<end;i++) |
+ { |
+ /* When the energy is stable, slightly bias energy quantization towards |
+ the previous error to make the gain more stable (a constant offset is |
+ better than fluctuations). */ |
+ if (ABS32(SUB32(bandLogE[i+c*nbEBands], oldBandE[i+c*nbEBands])) < QCONST16(2.f, DB_SHIFT)) |
+ { |
+ bandLogE[i+c*nbEBands] -= MULT16_16_Q15(energyError[i+c*nbEBands], QCONST16(0.25f, 15)); |
+ } |
+ } |
+ } while (++c < C); |
quant_coarse_energy(mode, start, end, effEnd, bandLogE, |
oldBandE, total_bits, error, enc, |
C, LM, nbAvailableBytes, st->force_intra, |
@@ -1798,7 +1861,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
{ |
st->tapset_decision = 0; |
st->spread_decision = SPREAD_NORMAL; |
- } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C || start != 0) |
+ } else if (hybrid) |
+ { |
+ if (st->complexity == 0) |
+ st->spread_decision = SPREAD_NONE; |
+ else if (isTransient) |
+ st->spread_decision = SPREAD_NORMAL; |
+ else |
+ st->spread_decision = SPREAD_AGGRESSIVE; |
+ } else if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C) |
{ |
if (st->complexity == 0) |
st->spread_decision = SPREAD_NONE; |
@@ -1834,7 +1905,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
maxDepth = dynalloc_analysis(bandLogE, bandLogE2, nbEBands, start, end, C, offsets, |
st->lsb_depth, mode->logN, isTransient, st->vbr, st->constrained_vbr, |
- eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc); |
+ eBands, LM, effectiveBytes, &tot_boost, st->lfe, surround_dynalloc, &st->analysis); |
/* For LFE, everything interesting is in the first band */ |
if (st->lfe) |
offsets[0] = IMIN(8, effectiveBytes/3); |
@@ -1896,12 +1967,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
alloc_trim = 5; |
if (tell+(6<<BITRES) <= total_bits - total_boost) |
{ |
- if (st->lfe) |
+ if (start > 0 || st->lfe) |
+ { |
+ st->stereo_saving = 0; |
alloc_trim = 5; |
- else |
+ } else { |
alloc_trim = alloc_trim_analysis(mode, X, bandLogE, |
end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, |
- st->intensity, surround_trim, st->arch); |
+ st->intensity, surround_trim, equiv_rate, st->arch); |
+ } |
ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); |
tell = ec_tell_frac(enc); |
} |
@@ -1919,17 +1993,36 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
/* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms. |
The CELT allocator will just not be able to use more than that anyway. */ |
nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM)); |
- base_target = vbr_rate - ((40*C+20)<<BITRES); |
+ if (!hybrid) |
+ { |
+ base_target = vbr_rate - ((40*C+20)<<BITRES); |
+ } else { |
+ base_target = IMAX(0, vbr_rate - ((9*C+4)<<BITRES)); |
+ } |
if (st->constrained_vbr) |
base_target += (st->vbr_offset>>lm_diff); |
- target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, |
+ if (!hybrid) |
+ { |
+ target = compute_vbr(mode, &st->analysis, base_target, LM, equiv_rate, |
st->lastCodedBands, C, st->intensity, st->constrained_vbr, |
st->stereo_saving, tot_boost, tf_estimate, pitch_change, maxDepth, |
- st->variable_duration, st->lfe, st->energy_mask!=NULL, surround_masking, |
+ st->lfe, st->energy_mask!=NULL, surround_masking, |
temporal_vbr); |
- |
+ } else { |
+ target = base_target; |
+ /* Tonal frames (offset<100) need more bits than noisy (offset>100) ones. */ |
+ if (st->silk_info.offset < 100) target += 12 << BITRES >> (3-LM); |
+ if (st->silk_info.offset > 100) target -= 18 << BITRES >> (3-LM); |
+ /* Boosting bitrate on transients and vowels with significant temporal |
+ spikes. */ |
+ target += (opus_int32)MULT16_16_Q14(tf_estimate-QCONST16(.25f,14), (50<<BITRES)); |
+ /* If we have a strong transient, let's make sure it has enough bits to code |
+ the first two bands, so that it can use folding rather than noise. */ |
+ if (tf_estimate > QCONST16(.7f,14)) |
+ target = IMAX(target, 50<<BITRES); |
+ } |
/* The current offset is removed from the target and the space used |
so far is added*/ |
target=target+tell; |
@@ -1937,11 +2030,16 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
result in the encoder running out of bits. |
The margin of 2 bytes ensures that none of the bust-prevention logic |
in the decoder will have triggered so far. */ |
- min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2 - nbFilledBytes; |
+ min_allowed = ((tell+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)) + 2; |
+ /* Take into account the 37 bits we need to have left in the packet to |
+ signal a redundant frame in hybrid mode. Creating a shorter packet would |
+ create an entropy coder desync. */ |
+ if (hybrid) |
+ min_allowed = IMAX(min_allowed, (tell0_frac+(37<<BITRES)+total_boost+(1<<(BITRES+3))-1)>>(BITRES+3)); |
nbAvailableBytes = (target+(1<<(BITRES+2)))>>(BITRES+3); |
nbAvailableBytes = IMAX(min_allowed,nbAvailableBytes); |
- nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes) - nbFilledBytes; |
+ nbAvailableBytes = IMIN(nbCompressedBytes,nbAvailableBytes); |
/* By how much did we "miss" the target on that frame */ |
delta = target - vbr_rate; |
@@ -1988,7 +2086,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
st->vbr_reservoir = 0; |
/*printf ("+%d\n", adjust);*/ |
} |
- nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes); |
+ nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes); |
/*printf("%d\n", nbCompressedBytes*50*8);*/ |
/* This moves the raw bits to take into account the new compressed size */ |
ec_enc_shrink(enc, nbCompressedBytes); |
@@ -2038,7 +2136,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
quant_all_bands(1, mode, start, end, X, C==2 ? X+N : NULL, collapse_masks, |
bandE, pulses, shortBlocks, st->spread_decision, |
dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, |
- balance, enc, LM, codedBands, &st->rng, st->arch); |
+ balance, enc, LM, codedBands, &st->rng, st->complexity, st->arch, st->disable_inv); |
if (anti_collapse_rsv > 0) |
{ |
@@ -2049,6 +2147,14 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, |
ec_enc_bits(enc, anti_collapse_on, 1); |
} |
quant_energy_finalise(mode, start, end, oldBandE, error, fine_quant, fine_priority, nbCompressedBytes*8-ec_tell(enc), enc, C); |
+ OPUS_CLEAR(energyError, nbEBands*CC); |
+ c=0; |
+ do { |
+ for (i=start;i<end;i++) |
+ { |
+ energyError[i+c*nbEBands] = MAX16(-QCONST16(0.5f, 15), MIN16(QCONST16(0.5f, 15), error[i+c*nbEBands])); |
+ } |
+ } while (++c < C); |
if (silence) |
{ |
@@ -2321,10 +2427,24 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) |
*value=st->lsb_depth; |
} |
break; |
- case OPUS_SET_EXPERT_FRAME_DURATION_REQUEST: |
+ case OPUS_SET_PHASE_INVERSION_DISABLED_REQUEST: |
{ |
opus_int32 value = va_arg(ap, opus_int32); |
- st->variable_duration = value; |
+ if(value<0 || value>1) |
+ { |
+ goto bad_arg; |
+ } |
+ st->disable_inv = value; |
+ } |
+ break; |
+ case OPUS_GET_PHASE_INVERSION_DISABLED_REQUEST: |
+ { |
+ opus_int32 *value = va_arg(ap, opus_int32*); |
+ if (!value) |
+ { |
+ goto bad_arg; |
+ } |
+ *value = st->disable_inv; |
} |
break; |
case OPUS_RESET_STATE: |
@@ -2368,6 +2488,13 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) |
OPUS_COPY(&st->analysis, info, 1); |
} |
break; |
+ case CELT_SET_SILK_INFO_REQUEST: |
+ { |
+ SILKInfo *info = va_arg(ap, SILKInfo *); |
+ if (info) |
+ OPUS_COPY(&st->silk_info, info, 1); |
+ } |
+ break; |
case CELT_GET_MODE_REQUEST: |
{ |
const CELTMode ** value = va_arg(ap, const CELTMode**); |