| Index: source/patched-ffmpeg-mt/libavcodec/aacpsy.c
|
| ===================================================================
|
| --- source/patched-ffmpeg-mt/libavcodec/aacpsy.c (revision 59334)
|
| +++ source/patched-ffmpeg-mt/libavcodec/aacpsy.c (working copy)
|
| @@ -44,6 +44,14 @@
|
|
|
| #define PSY_3GPP_RPEMIN 0.01f
|
| #define PSY_3GPP_RPELEV 2.0f
|
| +
|
| +/* LAME psy model constants */
|
| +#define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order
|
| +#define AAC_BLOCK_SIZE_LONG 1024 ///< long block size
|
| +#define AAC_BLOCK_SIZE_SHORT 128 ///< short block size
|
| +#define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence
|
| +#define PSY_LAME_NUM_SUBBLOCKS 3 ///< Number of sub-blocks in each short block
|
| +
|
| /**
|
| * @}
|
| */
|
| @@ -51,46 +59,158 @@
|
| /**
|
| * information for single band used by 3GPP TS26.403-inspired psychoacoustic model
|
| */
|
| -typedef struct Psy3gppBand{
|
| +typedef struct AacPsyBand{
|
| float energy; ///< band energy
|
| float ffac; ///< form factor
|
| float thr; ///< energy threshold
|
| float min_snr; ///< minimal SNR
|
| float thr_quiet; ///< threshold in quiet
|
| -}Psy3gppBand;
|
| +}AacPsyBand;
|
|
|
| /**
|
| * single/pair channel context for psychoacoustic model
|
| */
|
| -typedef struct Psy3gppChannel{
|
| - Psy3gppBand band[128]; ///< bands information
|
| - Psy3gppBand prev_band[128]; ///< bands information from the previous frame
|
| +typedef struct AacPsyChannel{
|
| + AacPsyBand band[128]; ///< bands information
|
| + AacPsyBand prev_band[128]; ///< bands information from the previous frame
|
|
|
| float win_energy; ///< sliding average of channel energy
|
| float iir_state[2]; ///< hi-pass IIR filter state
|
| uint8_t next_grouping; ///< stored grouping scheme for the next frame (in case of 8 short window sequence)
|
| enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame
|
| -}Psy3gppChannel;
|
| + /* LAME psy model specific members */
|
| + float attack_threshold; ///< attack threshold for this channel
|
| + float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS];
|
| + int prev_attack; ///< attack value for the last short block in the previous sequence
|
| +}AacPsyChannel;
|
|
|
| /**
|
| * psychoacoustic model frame type-dependent coefficients
|
| */
|
| -typedef struct Psy3gppCoeffs{
|
| +typedef struct AacPsyCoeffs{
|
| float ath [64]; ///< absolute threshold of hearing per bands
|
| float barks [64]; ///< Bark value for each spectral band in long frame
|
| float spread_low[64]; ///< spreading factor for low-to-high threshold spreading in long frame
|
| float spread_hi [64]; ///< spreading factor for high-to-low threshold spreading in long frame
|
| -}Psy3gppCoeffs;
|
| +}AacPsyCoeffs;
|
|
|
| /**
|
| * 3GPP TS26.403-inspired psychoacoustic model specific data
|
| */
|
| -typedef struct Psy3gppContext{
|
| - Psy3gppCoeffs psy_coef[2];
|
| - Psy3gppChannel *ch;
|
| -}Psy3gppContext;
|
| +typedef struct AacPsyContext{
|
| + AacPsyCoeffs psy_coef[2];
|
| + AacPsyChannel *ch;
|
| +}AacPsyContext;
|
|
|
| /**
|
| + * LAME psy model preset struct
|
| + */
|
| +typedef struct {
|
| + int quality; ///< Quality to map the rest of the vaules to.
|
| + /* This is overloaded to be both kbps per channel in ABR mode, and
|
| + * requested quality in constant quality mode.
|
| + */
|
| + float st_lrm; ///< short threshold for L, R, and M channels
|
| +} PsyLamePreset;
|
| +
|
| +/**
|
| + * LAME psy model preset table for ABR
|
| + */
|
| +static const PsyLamePreset psy_abr_map[] = {
|
| +/* TODO: Tuning. These were taken from LAME. */
|
| +/* kbps/ch st_lrm */
|
| + { 8, 6.60},
|
| + { 16, 6.60},
|
| + { 24, 6.60},
|
| + { 32, 6.60},
|
| + { 40, 6.60},
|
| + { 48, 6.60},
|
| + { 56, 6.60},
|
| + { 64, 6.40},
|
| + { 80, 6.00},
|
| + { 96, 5.60},
|
| + {112, 5.20},
|
| + {128, 5.20},
|
| + {160, 5.20}
|
| +};
|
| +
|
| +/**
|
| +* LAME psy model preset table for constant quality
|
| +*/
|
| +static const PsyLamePreset psy_vbr_map[] = {
|
| +/* vbr_q st_lrm */
|
| + { 0, 4.20},
|
| + { 1, 4.20},
|
| + { 2, 4.20},
|
| + { 3, 4.20},
|
| + { 4, 4.20},
|
| + { 5, 4.20},
|
| + { 6, 4.20},
|
| + { 7, 4.20},
|
| + { 8, 4.20},
|
| + { 9, 4.20},
|
| + {10, 4.20}
|
| +};
|
| +
|
| +/**
|
| + * LAME psy model FIR coefficient table
|
| + */
|
| +static const float psy_fir_coeffs[] = {
|
| + -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
|
| + -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
|
| + -5.52212e-17 * 2, -0.313819 * 2
|
| +};
|
| +
|
| +/**
|
| + * calculates the attack threshold for ABR from the above table for the LAME psy model
|
| + */
|
| +static float lame_calc_attack_threshold(int bitrate)
|
| +{
|
| + /* Assume max bitrate to start with */
|
| + int lower_range = 12, upper_range = 12;
|
| + int lower_range_kbps = psy_abr_map[12].quality;
|
| + int upper_range_kbps = psy_abr_map[12].quality;
|
| + int i;
|
| +
|
| + /* Determine which bitrates the value specified falls between.
|
| + * If the loop ends without breaking our above assumption of 320kbps was correct.
|
| + */
|
| + for (i = 1; i < 13; i++) {
|
| + if (FFMAX(bitrate, psy_abr_map[i].quality) != bitrate) {
|
| + upper_range = i;
|
| + upper_range_kbps = psy_abr_map[i ].quality;
|
| + lower_range = i - 1;
|
| + lower_range_kbps = psy_abr_map[i - 1].quality;
|
| + break; /* Upper range found */
|
| + }
|
| + }
|
| +
|
| + /* Determine which range the value specified is closer to */
|
| + if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
|
| + return psy_abr_map[lower_range].st_lrm;
|
| + return psy_abr_map[upper_range].st_lrm;
|
| +}
|
| +
|
| +/**
|
| + * LAME psy model specific initialization
|
| + */
|
| +static void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx) {
|
| + int i;
|
| +
|
| + for (i = 0; i < avctx->channels; i++) {
|
| + AacPsyChannel *pch = &ctx->ch[i];
|
| +
|
| + if (avctx->flags & CODEC_FLAG_QSCALE)
|
| + pch->attack_threshold = psy_vbr_map[avctx->global_quality / FF_QP2LAMBDA].st_lrm;
|
| + else
|
| + pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate / avctx->channels / 1000);
|
| +
|
| + for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++)
|
| + pch->prev_energy_subshort[i] = 10.0f;
|
| + }
|
| +}
|
| +
|
| +/**
|
| * Calculate Bark value for given line.
|
| */
|
| static av_cold float calc_bark(float f)
|
| @@ -113,17 +233,17 @@
|
| }
|
|
|
| static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
|
| - Psy3gppContext *pctx;
|
| + AacPsyContext *pctx;
|
| float bark;
|
| int i, j, g, start;
|
| float prev, minscale, minath;
|
|
|
| - ctx->model_priv_data = av_mallocz(sizeof(Psy3gppContext));
|
| - pctx = (Psy3gppContext*) ctx->model_priv_data;
|
| + ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
|
| + pctx = (AacPsyContext*) ctx->model_priv_data;
|
|
|
| minath = ath(3410, ATH_ADD);
|
| for (j = 0; j < 2; j++) {
|
| - Psy3gppCoeffs *coeffs = &pctx->psy_coef[j];
|
| + AacPsyCoeffs *coeffs = &pctx->psy_coef[j];
|
| float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f);
|
| i = 0;
|
| prev = 0.0;
|
| @@ -147,7 +267,10 @@
|
| }
|
| }
|
|
|
| - pctx->ch = av_mallocz(sizeof(Psy3gppChannel) * ctx->avctx->channels);
|
| + pctx->ch = av_mallocz(sizeof(AacPsyChannel) * ctx->avctx->channels);
|
| +
|
| + lame_window_init(pctx, ctx->avctx);
|
| +
|
| return 0;
|
| }
|
|
|
| @@ -182,8 +305,8 @@
|
| int i, j;
|
| int br = ctx->avctx->bit_rate / ctx->avctx->channels;
|
| int attack_ratio = br <= 16000 ? 18 : 10;
|
| - Psy3gppContext *pctx = (Psy3gppContext*) ctx->model_priv_data;
|
| - Psy3gppChannel *pch = &pctx->ch[channel];
|
| + AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
|
| + AacPsyChannel *pch = &pctx->ch[channel];
|
| uint8_t grouping = 0;
|
| int next_type = pch->next_window_seq;
|
| FFPsyWindowInfo wi;
|
| @@ -264,20 +387,20 @@
|
| * Calculate band thresholds as suggested in 3GPP TS26.403
|
| */
|
| static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
|
| - const float *coefs, FFPsyWindowInfo *wi)
|
| + const float *coefs, const FFPsyWindowInfo *wi)
|
| {
|
| - Psy3gppContext *pctx = (Psy3gppContext*) ctx->model_priv_data;
|
| - Psy3gppChannel *pch = &pctx->ch[channel];
|
| + AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
|
| + AacPsyChannel *pch = &pctx->ch[channel];
|
| int start = 0;
|
| int i, w, g;
|
| const int num_bands = ctx->num_bands[wi->num_windows == 8];
|
| const uint8_t* band_sizes = ctx->bands[wi->num_windows == 8];
|
| - Psy3gppCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8];
|
| + AacPsyCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8];
|
|
|
| //calculate energies, initial thresholds and related values - 5.4.2 "Threshold Calculation"
|
| for (w = 0; w < wi->num_windows*16; w += 16) {
|
| for (g = 0; g < num_bands; g++) {
|
| - Psy3gppBand *band = &pch->band[w+g];
|
| + AacPsyBand *band = &pch->band[w+g];
|
| band->energy = 0.0f;
|
| for (i = 0; i < band_sizes[g]; i++)
|
| band->energy += coefs[start+i] * coefs[start+i];
|
| @@ -290,7 +413,7 @@
|
| }
|
| //modify thresholds - spread, threshold in quiet - 5.4.3 "Spreaded Energy Calculation"
|
| for (w = 0; w < wi->num_windows*16; w += 16) {
|
| - Psy3gppBand *band = &pch->band[w];
|
| + AacPsyBand *band = &pch->band[w];
|
| for (g = 1; g < num_bands; g++)
|
| band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_low[g-1]);
|
| for (g = num_bands - 2; g >= 0; g--)
|
| @@ -311,17 +434,196 @@
|
|
|
| static av_cold void psy_3gpp_end(FFPsyContext *apc)
|
| {
|
| - Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data;
|
| + AacPsyContext *pctx = (AacPsyContext*) apc->model_priv_data;
|
| av_freep(&pctx->ch);
|
| av_freep(&apc->model_priv_data);
|
| }
|
|
|
| +static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int uselongblock)
|
| +{
|
| + int blocktype = ONLY_LONG_SEQUENCE;
|
| + if (uselongblock) {
|
| + if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE)
|
| + blocktype = LONG_STOP_SEQUENCE;
|
| + } else {
|
| + blocktype = EIGHT_SHORT_SEQUENCE;
|
| + if (ctx->next_window_seq == ONLY_LONG_SEQUENCE)
|
| + ctx->next_window_seq = LONG_START_SEQUENCE;
|
| + if (ctx->next_window_seq == LONG_STOP_SEQUENCE)
|
| + ctx->next_window_seq = EIGHT_SHORT_SEQUENCE;
|
| + }
|
|
|
| + wi->window_type[0] = ctx->next_window_seq;
|
| + ctx->next_window_seq = blocktype;
|
| +}
|
| +
|
| +static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx,
|
| + const int16_t *audio, const int16_t *la,
|
| + int channel, int prev_type)
|
| +{
|
| + AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
|
| + AacPsyChannel *pch = &pctx->ch[channel];
|
| + int grouping = 0;
|
| + int uselongblock = 1;
|
| + int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
|
| + int i;
|
| + FFPsyWindowInfo wi;
|
| +
|
| + memset(&wi, 0, sizeof(wi));
|
| + if (la) {
|
| + float hpfsmpl[AAC_BLOCK_SIZE_LONG];
|
| + float const *pf = hpfsmpl;
|
| + float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
|
| + float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS];
|
| + float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
|
| + int chans = ctx->avctx->channels;
|
| + const int16_t *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN) * chans;
|
| + int j, att_sum = 0;
|
| +
|
| + /* LAME comment: apply high pass filter of fs/4 */
|
| + for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
|
| + float sum1, sum2;
|
| + sum1 = firbuf[(i + ((PSY_LAME_FIR_LEN - 1) / 2)) * chans];
|
| + sum2 = 0.0;
|
| + for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
|
| + sum1 += psy_fir_coeffs[j] * (firbuf[(i + j) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j) * chans]);
|
| + sum2 += psy_fir_coeffs[j + 1] * (firbuf[(i + j + 1) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j - 1) * chans]);
|
| + }
|
| + hpfsmpl[i] = sum1 + sum2;
|
| + }
|
| +
|
| + /* Calculate the energies of each sub-shortblock */
|
| + for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
|
| + energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
|
| + assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)] > 0);
|
| + attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
|
| + energy_short[0] += energy_subshort[i];
|
| + }
|
| +
|
| + for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
|
| + float const *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS);
|
| + float p = 1.0f;
|
| + for (; pf < pfe; pf++)
|
| + if (p < fabsf(*pf))
|
| + p = fabsf(*pf);
|
| + pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBBLOCKS] = p;
|
| + energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
|
| + /* FIXME: The indexes below are [i + 3 - 2] in the LAME source.
|
| + * Obviously the 3 and 2 have some significance, or this would be just [i + 1]
|
| + * (which is what we use here). What the 3 stands for is ambigious, as it is both
|
| + * number of short blocks, and the number of sub-short blocks.
|
| + * It seems that LAME is comparing each sub-block to sub-block + 1 in the
|
| + * previous block.
|
| + */
|
| + if (p > energy_subshort[i + 1])
|
| + p = p / energy_subshort[i + 1];
|
| + else if (energy_subshort[i + 1] > p * 10.0f)
|
| + p = energy_subshort[i + 1] / (p * 10.0f);
|
| + else
|
| + p = 0.0;
|
| + attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
|
| + }
|
| +
|
| + /* compare energy between sub-short blocks */
|
| + for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++)
|
| + if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
|
| + if (attack_intensity[i] > pch->attack_threshold)
|
| + attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBBLOCKS) + 1;
|
| +
|
| + /* should have energy change between short blocks, in order to avoid periodic signals */
|
| + /* Good samples to show the effect are Trumpet test songs */
|
| + /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
|
| + /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
|
| + for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
|
| + float const u = energy_short[i - 1];
|
| + float const v = energy_short[i];
|
| + float const m = FFMAX(u, v);
|
| + if (m < 40000) { /* (2) */
|
| + if (u < 1.7f * v && v < 1.7f * u) { /* (1) */
|
| + if (i == 1 && attacks[0] < attacks[i])
|
| + attacks[0] = 0;
|
| + attacks[i] = 0;
|
| + }
|
| + }
|
| + att_sum += attacks[i];
|
| + }
|
| +
|
| + if (attacks[0] <= pch->prev_attack)
|
| + attacks[0] = 0;
|
| +
|
| + att_sum += attacks[0];
|
| + /* 3 below indicates the previous attack happened in the last sub-block of the previous sequence */
|
| + if (pch->prev_attack == 3 || att_sum) {
|
| + uselongblock = 0;
|
| +
|
| + if (attacks[1] && attacks[0])
|
| + attacks[1] = 0;
|
| + if (attacks[2] && attacks[1])
|
| + attacks[2] = 0;
|
| + if (attacks[3] && attacks[2])
|
| + attacks[3] = 0;
|
| + if (attacks[4] && attacks[3])
|
| + attacks[4] = 0;
|
| + if (attacks[5] && attacks[4])
|
| + attacks[5] = 0;
|
| + if (attacks[6] && attacks[5])
|
| + attacks[6] = 0;
|
| + if (attacks[7] && attacks[6])
|
| + attacks[7] = 0;
|
| + if (attacks[8] && attacks[7])
|
| + attacks[8] = 0;
|
| + }
|
| + } else {
|
| + /* We have no lookahead info, so just use same type as the previous sequence. */
|
| + uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
|
| + }
|
| +
|
| + lame_apply_block_type(pch, &wi, uselongblock);
|
| +
|
| + wi.window_type[1] = prev_type;
|
| + if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
|
| + wi.num_windows = 1;
|
| + wi.grouping[0] = 1;
|
| + if (wi.window_type[0] == LONG_START_SEQUENCE)
|
| + wi.window_shape = 0;
|
| + else
|
| + wi.window_shape = 1;
|
| + } else {
|
| + int lastgrp = 0;
|
| +
|
| + wi.num_windows = 8;
|
| + wi.window_shape = 0;
|
| + for (i = 0; i < 8; i++) {
|
| + if (!((pch->next_grouping >> i) & 1))
|
| + lastgrp = i;
|
| + wi.grouping[lastgrp]++;
|
| + }
|
| + }
|
| +
|
| + /* Determine grouping, based on the location of the first attack, and save for
|
| + * the next frame.
|
| + * FIXME: Move this to analysis.
|
| + * TODO: Tune groupings depending on attack location
|
| + * TODO: Handle more than one attack in a group
|
| + */
|
| + for (i = 0; i < 9; i++) {
|
| + if (attacks[i]) {
|
| + grouping = i;
|
| + break;
|
| + }
|
| + }
|
| + pch->next_grouping = window_grouping[grouping];
|
| +
|
| + pch->prev_attack = attacks[8];
|
| +
|
| + return wi;
|
| +}
|
| +
|
| const FFPsyModel ff_aac_psy_model =
|
| {
|
| .name = "3GPP TS 26.403-inspired model",
|
| .init = psy_3gpp_init,
|
| - .window = psy_3gpp_window,
|
| + .window = psy_lame_window,
|
| .analyze = psy_3gpp_analyze,
|
| .end = psy_3gpp_end,
|
| };
|
|
|