| OLD | NEW |
| 1 /* | 1 /* |
| 2 * AAC encoder psychoacoustic model | 2 * AAC encoder psychoacoustic model |
| 3 * Copyright (C) 2008 Konstantin Shishkov | 3 * Copyright (C) 2008 Konstantin Shishkov |
| 4 * | 4 * |
| 5 * This file is part of FFmpeg. | 5 * This file is part of FFmpeg. |
| 6 * | 6 * |
| 7 * FFmpeg is free software; you can redistribute it and/or | 7 * FFmpeg is free software; you can redistribute it and/or |
| 8 * modify it under the terms of the GNU Lesser General Public | 8 * modify it under the terms of the GNU Lesser General Public |
| 9 * License as published by the Free Software Foundation; either | 9 * License as published by the Free Software Foundation; either |
| 10 * version 2.1 of the License, or (at your option) any later version. | 10 * version 2.1 of the License, or (at your option) any later version. |
| (...skipping 26 matching lines...) Expand all Loading... |
| 37 | 37 |
| 38 /** | 38 /** |
| 39 * constants for 3GPP AAC psychoacoustic model | 39 * constants for 3GPP AAC psychoacoustic model |
| 40 * @{ | 40 * @{ |
| 41 */ | 41 */ |
| 42 #define PSY_3GPP_SPREAD_LOW 1.5f // spreading factor for ascending threshold sp
reading (15 dB/Bark) | 42 #define PSY_3GPP_SPREAD_LOW 1.5f // spreading factor for ascending threshold sp
reading (15 dB/Bark) |
| 43 #define PSY_3GPP_SPREAD_HI 3.0f // spreading factor for descending threshold s
preading (30 dB/Bark) | 43 #define PSY_3GPP_SPREAD_HI 3.0f // spreading factor for descending threshold s
preading (30 dB/Bark) |
| 44 | 44 |
| 45 #define PSY_3GPP_RPEMIN 0.01f | 45 #define PSY_3GPP_RPEMIN 0.01f |
| 46 #define PSY_3GPP_RPELEV 2.0f | 46 #define PSY_3GPP_RPELEV 2.0f |
| 47 |
| 48 /* LAME psy model constants */ |
| 49 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order |
| 50 #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size |
| 51 #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size |
| 52 #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence |
| 53 #define PSY_LAME_NUM_SUBBLOCKS 3 ///< Number of sub-blocks in each short bloc
k |
| 54 |
| 47 /** | 55 /** |
| 48 * @} | 56 * @} |
| 49 */ | 57 */ |
| 50 | 58 |
| 51 /** | 59 /** |
| 52 * information for single band used by 3GPP TS26.403-inspired psychoacoustic mod
el | 60 * information for single band used by 3GPP TS26.403-inspired psychoacoustic mod
el |
| 53 */ | 61 */ |
| 54 typedef struct Psy3gppBand{ | 62 typedef struct AacPsyBand{ |
| 55 float energy; ///< band energy | 63 float energy; ///< band energy |
| 56 float ffac; ///< form factor | 64 float ffac; ///< form factor |
| 57 float thr; ///< energy threshold | 65 float thr; ///< energy threshold |
| 58 float min_snr; ///< minimal SNR | 66 float min_snr; ///< minimal SNR |
| 59 float thr_quiet; ///< threshold in quiet | 67 float thr_quiet; ///< threshold in quiet |
| 60 }Psy3gppBand; | 68 }AacPsyBand; |
| 61 | 69 |
| 62 /** | 70 /** |
| 63 * single/pair channel context for psychoacoustic model | 71 * single/pair channel context for psychoacoustic model |
| 64 */ | 72 */ |
| 65 typedef struct Psy3gppChannel{ | 73 typedef struct AacPsyChannel{ |
| 66 Psy3gppBand band[128]; ///< bands information | 74 AacPsyBand band[128]; ///< bands information |
| 67 Psy3gppBand prev_band[128]; ///< bands information from the previou
s frame | 75 AacPsyBand prev_band[128]; ///< bands information from the previous
frame |
| 68 | 76 |
| 69 float win_energy; ///< sliding average of channel energy | 77 float win_energy; ///< sliding average of channel energy |
| 70 float iir_state[2]; ///< hi-pass IIR filter state | 78 float iir_state[2]; ///< hi-pass IIR filter state |
| 71 uint8_t next_grouping; ///< stored grouping scheme for the nex
t frame (in case of 8 short window sequence) | 79 uint8_t next_grouping; ///< stored grouping scheme for the nex
t frame (in case of 8 short window sequence) |
| 72 enum WindowSequence next_window_seq; ///< window sequence to be used in the
next frame | 80 enum WindowSequence next_window_seq; ///< window sequence to be used in the
next frame |
| 73 }Psy3gppChannel; | 81 /* LAME psy model specific members */ |
| 82 float attack_threshold; ///< attack threshold for this channel |
| 83 float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS]; |
| 84 int prev_attack; ///< attack value for the last short bl
ock in the previous sequence |
| 85 }AacPsyChannel; |
| 74 | 86 |
| 75 /** | 87 /** |
| 76 * psychoacoustic model frame type-dependent coefficients | 88 * psychoacoustic model frame type-dependent coefficients |
| 77 */ | 89 */ |
| 78 typedef struct Psy3gppCoeffs{ | 90 typedef struct AacPsyCoeffs{ |
| 79 float ath [64]; ///< absolute threshold of hearing per bands | 91 float ath [64]; ///< absolute threshold of hearing per bands |
| 80 float barks [64]; ///< Bark value for each spectral band in long frame | 92 float barks [64]; ///< Bark value for each spectral band in long frame |
| 81 float spread_low[64]; ///< spreading factor for low-to-high threshold spread
ing in long frame | 93 float spread_low[64]; ///< spreading factor for low-to-high threshold spread
ing in long frame |
| 82 float spread_hi [64]; ///< spreading factor for high-to-low threshold spread
ing in long frame | 94 float spread_hi [64]; ///< spreading factor for high-to-low threshold spread
ing in long frame |
| 83 }Psy3gppCoeffs; | 95 }AacPsyCoeffs; |
| 84 | 96 |
| 85 /** | 97 /** |
| 86 * 3GPP TS26.403-inspired psychoacoustic model specific data | 98 * 3GPP TS26.403-inspired psychoacoustic model specific data |
| 87 */ | 99 */ |
| 88 typedef struct Psy3gppContext{ | 100 typedef struct AacPsyContext{ |
| 89 Psy3gppCoeffs psy_coef[2]; | 101 AacPsyCoeffs psy_coef[2]; |
| 90 Psy3gppChannel *ch; | 102 AacPsyChannel *ch; |
| 91 }Psy3gppContext; | 103 }AacPsyContext; |
| 104 |
| 105 /** |
| 106 * LAME psy model preset struct |
| 107 */ |
| 108 typedef struct { |
| 109 int quality; ///< Quality to map the rest of the vaules to. |
| 110 /* This is overloaded to be both kbps per channel in ABR mode, and |
| 111 * requested quality in constant quality mode. |
| 112 */ |
| 113 float st_lrm; ///< short threshold for L, R, and M channels |
| 114 } PsyLamePreset; |
| 115 |
| 116 /** |
| 117 * LAME psy model preset table for ABR |
| 118 */ |
| 119 static const PsyLamePreset psy_abr_map[] = { |
| 120 /* TODO: Tuning. These were taken from LAME. */ |
| 121 /* kbps/ch st_lrm */ |
| 122 { 8, 6.60}, |
| 123 { 16, 6.60}, |
| 124 { 24, 6.60}, |
| 125 { 32, 6.60}, |
| 126 { 40, 6.60}, |
| 127 { 48, 6.60}, |
| 128 { 56, 6.60}, |
| 129 { 64, 6.40}, |
| 130 { 80, 6.00}, |
| 131 { 96, 5.60}, |
| 132 {112, 5.20}, |
| 133 {128, 5.20}, |
| 134 {160, 5.20} |
| 135 }; |
| 136 |
| 137 /** |
| 138 * LAME psy model preset table for constant quality |
| 139 */ |
| 140 static const PsyLamePreset psy_vbr_map[] = { |
| 141 /* vbr_q st_lrm */ |
| 142 { 0, 4.20}, |
| 143 { 1, 4.20}, |
| 144 { 2, 4.20}, |
| 145 { 3, 4.20}, |
| 146 { 4, 4.20}, |
| 147 { 5, 4.20}, |
| 148 { 6, 4.20}, |
| 149 { 7, 4.20}, |
| 150 { 8, 4.20}, |
| 151 { 9, 4.20}, |
| 152 {10, 4.20} |
| 153 }; |
| 154 |
| 155 /** |
| 156 * LAME psy model FIR coefficient table |
| 157 */ |
| 158 static const float psy_fir_coeffs[] = { |
| 159 -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2, |
| 160 -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2, |
| 161 -5.52212e-17 * 2, -0.313819 * 2 |
| 162 }; |
| 163 |
| 164 /** |
| 165 * calculates the attack threshold for ABR from the above table for the LAME psy
model |
| 166 */ |
| 167 static float lame_calc_attack_threshold(int bitrate) |
| 168 { |
| 169 /* Assume max bitrate to start with */ |
| 170 int lower_range = 12, upper_range = 12; |
| 171 int lower_range_kbps = psy_abr_map[12].quality; |
| 172 int upper_range_kbps = psy_abr_map[12].quality; |
| 173 int i; |
| 174 |
| 175 /* Determine which bitrates the value specified falls between. |
| 176 * If the loop ends without breaking our above assumption of 320kbps was cor
rect. |
| 177 */ |
| 178 for (i = 1; i < 13; i++) { |
| 179 if (FFMAX(bitrate, psy_abr_map[i].quality) != bitrate) { |
| 180 upper_range = i; |
| 181 upper_range_kbps = psy_abr_map[i ].quality; |
| 182 lower_range = i - 1; |
| 183 lower_range_kbps = psy_abr_map[i - 1].quality; |
| 184 break; /* Upper range found */ |
| 185 } |
| 186 } |
| 187 |
| 188 /* Determine which range the value specified is closer to */ |
| 189 if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps)) |
| 190 return psy_abr_map[lower_range].st_lrm; |
| 191 return psy_abr_map[upper_range].st_lrm; |
| 192 } |
| 193 |
| 194 /** |
| 195 * LAME psy model specific initialization |
| 196 */ |
| 197 static void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx) { |
| 198 int i; |
| 199 |
| 200 for (i = 0; i < avctx->channels; i++) { |
| 201 AacPsyChannel *pch = &ctx->ch[i]; |
| 202 |
| 203 if (avctx->flags & CODEC_FLAG_QSCALE) |
| 204 pch->attack_threshold = psy_vbr_map[avctx->global_quality / FF_QP2LA
MBDA].st_lrm; |
| 205 else |
| 206 pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate /
avctx->channels / 1000); |
| 207 |
| 208 for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) |
| 209 pch->prev_energy_subshort[i] = 10.0f; |
| 210 } |
| 211 } |
| 92 | 212 |
| 93 /** | 213 /** |
| 94 * Calculate Bark value for given line. | 214 * Calculate Bark value for given line. |
| 95 */ | 215 */ |
| 96 static av_cold float calc_bark(float f) | 216 static av_cold float calc_bark(float f) |
| 97 { | 217 { |
| 98 return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.
0f)); | 218 return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500.
0f)); |
| 99 } | 219 } |
| 100 | 220 |
| 101 #define ATH_ADD 4 | 221 #define ATH_ADD 4 |
| 102 /** | 222 /** |
| 103 * Calculate ATH value for given frequency. | 223 * Calculate ATH value for given frequency. |
| 104 * Borrowed from Lame. | 224 * Borrowed from Lame. |
| 105 */ | 225 */ |
| 106 static av_cold float ath(float f, float add) | 226 static av_cold float ath(float f, float add) |
| 107 { | 227 { |
| 108 f /= 1000.0f; | 228 f /= 1000.0f; |
| 109 return 3.64 * pow(f, -0.8) | 229 return 3.64 * pow(f, -0.8) |
| 110 - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4)) | 230 - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4)) |
| 111 + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7)) | 231 + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7)) |
| 112 + (0.6 + 0.04 * add) * 0.001 * f * f * f * f; | 232 + (0.6 + 0.04 * add) * 0.001 * f * f * f * f; |
| 113 } | 233 } |
| 114 | 234 |
| 115 static av_cold int psy_3gpp_init(FFPsyContext *ctx) { | 235 static av_cold int psy_3gpp_init(FFPsyContext *ctx) { |
| 116 Psy3gppContext *pctx; | 236 AacPsyContext *pctx; |
| 117 float bark; | 237 float bark; |
| 118 int i, j, g, start; | 238 int i, j, g, start; |
| 119 float prev, minscale, minath; | 239 float prev, minscale, minath; |
| 120 | 240 |
| 121 ctx->model_priv_data = av_mallocz(sizeof(Psy3gppContext)); | 241 ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext)); |
| 122 pctx = (Psy3gppContext*) ctx->model_priv_data; | 242 pctx = (AacPsyContext*) ctx->model_priv_data; |
| 123 | 243 |
| 124 minath = ath(3410, ATH_ADD); | 244 minath = ath(3410, ATH_ADD); |
| 125 for (j = 0; j < 2; j++) { | 245 for (j = 0; j < 2; j++) { |
| 126 Psy3gppCoeffs *coeffs = &pctx->psy_coef[j]; | 246 AacPsyCoeffs *coeffs = &pctx->psy_coef[j]; |
| 127 float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f
); | 247 float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f
); |
| 128 i = 0; | 248 i = 0; |
| 129 prev = 0.0; | 249 prev = 0.0; |
| 130 for (g = 0; g < ctx->num_bands[j]; g++) { | 250 for (g = 0; g < ctx->num_bands[j]; g++) { |
| 131 i += ctx->bands[j][g]; | 251 i += ctx->bands[j][g]; |
| 132 bark = calc_bark((i-1) * line_to_frequency); | 252 bark = calc_bark((i-1) * line_to_frequency); |
| 133 coeffs->barks[g] = (bark + prev) / 2.0; | 253 coeffs->barks[g] = (bark + prev) / 2.0; |
| 134 prev = bark; | 254 prev = bark; |
| 135 } | 255 } |
| 136 for (g = 0; g < ctx->num_bands[j] - 1; g++) { | 256 for (g = 0; g < ctx->num_bands[j] - 1; g++) { |
| 137 coeffs->spread_low[g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar
ks[g]) * PSY_3GPP_SPREAD_LOW); | 257 coeffs->spread_low[g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar
ks[g]) * PSY_3GPP_SPREAD_LOW); |
| 138 coeffs->spread_hi [g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar
ks[g]) * PSY_3GPP_SPREAD_HI); | 258 coeffs->spread_hi [g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar
ks[g]) * PSY_3GPP_SPREAD_HI); |
| 139 } | 259 } |
| 140 start = 0; | 260 start = 0; |
| 141 for (g = 0; g < ctx->num_bands[j]; g++) { | 261 for (g = 0; g < ctx->num_bands[j]; g++) { |
| 142 minscale = ath(start * line_to_frequency, ATH_ADD); | 262 minscale = ath(start * line_to_frequency, ATH_ADD); |
| 143 for (i = 1; i < ctx->bands[j][g]; i++) | 263 for (i = 1; i < ctx->bands[j][g]; i++) |
| 144 minscale = FFMIN(minscale, ath((start + i) * line_to_frequency,
ATH_ADD)); | 264 minscale = FFMIN(minscale, ath((start + i) * line_to_frequency,
ATH_ADD)); |
| 145 coeffs->ath[g] = minscale - minath; | 265 coeffs->ath[g] = minscale - minath; |
| 146 start += ctx->bands[j][g]; | 266 start += ctx->bands[j][g]; |
| 147 } | 267 } |
| 148 } | 268 } |
| 149 | 269 |
| 150 pctx->ch = av_mallocz(sizeof(Psy3gppChannel) * ctx->avctx->channels); | 270 pctx->ch = av_mallocz(sizeof(AacPsyChannel) * ctx->avctx->channels); |
| 271 |
| 272 lame_window_init(pctx, ctx->avctx); |
| 273 |
| 151 return 0; | 274 return 0; |
| 152 } | 275 } |
| 153 | 276 |
| 154 /** | 277 /** |
| 155 * IIR filter used in block switching decision | 278 * IIR filter used in block switching decision |
| 156 */ | 279 */ |
| 157 static float iir_filter(int in, float state[2]) | 280 static float iir_filter(int in, float state[2]) |
| 158 { | 281 { |
| 159 float ret; | 282 float ret; |
| 160 | 283 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 175 * Tell encoder which window types to use. | 298 * Tell encoder which window types to use. |
| 176 * @see 3GPP TS26.403 5.4.1 "Blockswitching" | 299 * @see 3GPP TS26.403 5.4.1 "Blockswitching" |
| 177 */ | 300 */ |
| 178 static FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, | 301 static FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, |
| 179 const int16_t *audio, const int16_t *la, | 302 const int16_t *audio, const int16_t *la, |
| 180 int channel, int prev_type) | 303 int channel, int prev_type) |
| 181 { | 304 { |
| 182 int i, j; | 305 int i, j; |
| 183 int br = ctx->avctx->bit_rate / ctx->avctx->channels; | 306 int br = ctx->avctx->bit_rate / ctx->avctx->channels; |
| 184 int attack_ratio = br <= 16000 ? 18 : 10; | 307 int attack_ratio = br <= 16000 ? 18 : 10; |
| 185 Psy3gppContext *pctx = (Psy3gppContext*) ctx->model_priv_data; | 308 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data; |
| 186 Psy3gppChannel *pch = &pctx->ch[channel]; | 309 AacPsyChannel *pch = &pctx->ch[channel]; |
| 187 uint8_t grouping = 0; | 310 uint8_t grouping = 0; |
| 188 int next_type = pch->next_window_seq; | 311 int next_type = pch->next_window_seq; |
| 189 FFPsyWindowInfo wi; | 312 FFPsyWindowInfo wi; |
| 190 | 313 |
| 191 memset(&wi, 0, sizeof(wi)); | 314 memset(&wi, 0, sizeof(wi)); |
| 192 if (la) { | 315 if (la) { |
| 193 float s[8], v; | 316 float s[8], v; |
| 194 int switch_to_eight = 0; | 317 int switch_to_eight = 0; |
| 195 float sum = 0.0, sum2 = 0.0; | 318 float sum = 0.0, sum2 = 0.0; |
| 196 int attack_n = 0; | 319 int attack_n = 0; |
| (...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 257 } | 380 } |
| 258 } | 381 } |
| 259 | 382 |
| 260 return wi; | 383 return wi; |
| 261 } | 384 } |
| 262 | 385 |
| 263 /** | 386 /** |
| 264 * Calculate band thresholds as suggested in 3GPP TS26.403 | 387 * Calculate band thresholds as suggested in 3GPP TS26.403 |
| 265 */ | 388 */ |
| 266 static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, | 389 static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, |
| 267 const float *coefs, FFPsyWindowInfo *wi) | 390 const float *coefs, const FFPsyWindowInfo *wi) |
| 268 { | 391 { |
| 269 Psy3gppContext *pctx = (Psy3gppContext*) ctx->model_priv_data; | 392 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data; |
| 270 Psy3gppChannel *pch = &pctx->ch[channel]; | 393 AacPsyChannel *pch = &pctx->ch[channel]; |
| 271 int start = 0; | 394 int start = 0; |
| 272 int i, w, g; | 395 int i, w, g; |
| 273 const int num_bands = ctx->num_bands[wi->num_windows == 8]; | 396 const int num_bands = ctx->num_bands[wi->num_windows == 8]; |
| 274 const uint8_t* band_sizes = ctx->bands[wi->num_windows == 8]; | 397 const uint8_t* band_sizes = ctx->bands[wi->num_windows == 8]; |
| 275 Psy3gppCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8]; | 398 AacPsyCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8]; |
| 276 | 399 |
| 277 //calculate energies, initial thresholds and related values - 5.4.2 "Thresho
ld Calculation" | 400 //calculate energies, initial thresholds and related values - 5.4.2 "Thresho
ld Calculation" |
| 278 for (w = 0; w < wi->num_windows*16; w += 16) { | 401 for (w = 0; w < wi->num_windows*16; w += 16) { |
| 279 for (g = 0; g < num_bands; g++) { | 402 for (g = 0; g < num_bands; g++) { |
| 280 Psy3gppBand *band = &pch->band[w+g]; | 403 AacPsyBand *band = &pch->band[w+g]; |
| 281 band->energy = 0.0f; | 404 band->energy = 0.0f; |
| 282 for (i = 0; i < band_sizes[g]; i++) | 405 for (i = 0; i < band_sizes[g]; i++) |
| 283 band->energy += coefs[start+i] * coefs[start+i]; | 406 band->energy += coefs[start+i] * coefs[start+i]; |
| 284 band->energy *= 1.0f / (512*512); | 407 band->energy *= 1.0f / (512*512); |
| 285 band->thr = band->energy * 0.001258925f; | 408 band->thr = band->energy * 0.001258925f; |
| 286 start += band_sizes[g]; | 409 start += band_sizes[g]; |
| 287 | 410 |
| 288 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].energy = band->energy; | 411 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].energy = band->energy; |
| 289 } | 412 } |
| 290 } | 413 } |
| 291 //modify thresholds - spread, threshold in quiet - 5.4.3 "Spreaded Energy Ca
lculation" | 414 //modify thresholds - spread, threshold in quiet - 5.4.3 "Spreaded Energy Ca
lculation" |
| 292 for (w = 0; w < wi->num_windows*16; w += 16) { | 415 for (w = 0; w < wi->num_windows*16; w += 16) { |
| 293 Psy3gppBand *band = &pch->band[w]; | 416 AacPsyBand *band = &pch->band[w]; |
| 294 for (g = 1; g < num_bands; g++) | 417 for (g = 1; g < num_bands; g++) |
| 295 band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_low[
g-1]); | 418 band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_low[
g-1]); |
| 296 for (g = num_bands - 2; g >= 0; g--) | 419 for (g = num_bands - 2; g >= 0; g--) |
| 297 band[g].thr = FFMAX(band[g].thr, band[g+1].thr * coeffs->spread_hi [
g]); | 420 band[g].thr = FFMAX(band[g].thr, band[g+1].thr * coeffs->spread_hi [
g]); |
| 298 for (g = 0; g < num_bands; g++) { | 421 for (g = 0; g < num_bands; g++) { |
| 299 band[g].thr_quiet = FFMAX(band[g].thr, coeffs->ath[g]); | 422 band[g].thr_quiet = FFMAX(band[g].thr, coeffs->ath[g]); |
| 300 if (wi->num_windows != 8 && wi->window_type[1] != EIGHT_SHORT_SEQUEN
CE) | 423 if (wi->num_windows != 8 && wi->window_type[1] != EIGHT_SHORT_SEQUEN
CE) |
| 301 band[g].thr_quiet = FFMAX(PSY_3GPP_RPEMIN*band[g].thr_quiet, | 424 band[g].thr_quiet = FFMAX(PSY_3GPP_RPEMIN*band[g].thr_quiet, |
| 302 FFMIN(band[g].thr_quiet, | 425 FFMIN(band[g].thr_quiet, |
| 303 PSY_3GPP_RPELEV*pch->prev_band[w+g].th
r_quiet)); | 426 PSY_3GPP_RPELEV*pch->prev_band[w+g].th
r_quiet)); |
| 304 band[g].thr = FFMAX(band[g].thr, band[g].thr_quiet * 0.25); | 427 band[g].thr = FFMAX(band[g].thr, band[g].thr_quiet * 0.25); |
| 305 | 428 |
| 306 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].threshold = band[g].thr; | 429 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].threshold = band[g].thr; |
| 307 } | 430 } |
| 308 } | 431 } |
| 309 memcpy(pch->prev_band, pch->band, sizeof(pch->band)); | 432 memcpy(pch->prev_band, pch->band, sizeof(pch->band)); |
| 310 } | 433 } |
| 311 | 434 |
| 312 static av_cold void psy_3gpp_end(FFPsyContext *apc) | 435 static av_cold void psy_3gpp_end(FFPsyContext *apc) |
| 313 { | 436 { |
| 314 Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data; | 437 AacPsyContext *pctx = (AacPsyContext*) apc->model_priv_data; |
| 315 av_freep(&pctx->ch); | 438 av_freep(&pctx->ch); |
| 316 av_freep(&apc->model_priv_data); | 439 av_freep(&apc->model_priv_data); |
| 317 } | 440 } |
| 318 | 441 |
| 442 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int u
selongblock) |
| 443 { |
| 444 int blocktype = ONLY_LONG_SEQUENCE; |
| 445 if (uselongblock) { |
| 446 if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE) |
| 447 blocktype = LONG_STOP_SEQUENCE; |
| 448 } else { |
| 449 blocktype = EIGHT_SHORT_SEQUENCE; |
| 450 if (ctx->next_window_seq == ONLY_LONG_SEQUENCE) |
| 451 ctx->next_window_seq = LONG_START_SEQUENCE; |
| 452 if (ctx->next_window_seq == LONG_STOP_SEQUENCE) |
| 453 ctx->next_window_seq = EIGHT_SHORT_SEQUENCE; |
| 454 } |
| 455 |
| 456 wi->window_type[0] = ctx->next_window_seq; |
| 457 ctx->next_window_seq = blocktype; |
| 458 } |
| 459 |
| 460 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx, |
| 461 const int16_t *audio, const int16_t *la, |
| 462 int channel, int prev_type) |
| 463 { |
| 464 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data; |
| 465 AacPsyChannel *pch = &pctx->ch[channel]; |
| 466 int grouping = 0; |
| 467 int uselongblock = 1; |
| 468 int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 }; |
| 469 int i; |
| 470 FFPsyWindowInfo wi; |
| 471 |
| 472 memset(&wi, 0, sizeof(wi)); |
| 473 if (la) { |
| 474 float hpfsmpl[AAC_BLOCK_SIZE_LONG]; |
| 475 float const *pf = hpfsmpl; |
| 476 float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOC
KS]; |
| 477 float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCK
S]; |
| 478 float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 }; |
| 479 int chans = ctx->avctx->channels; |
| 480 const int16_t *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN)
* chans; |
| 481 int j, att_sum = 0; |
| 482 |
| 483 /* LAME comment: apply high pass filter of fs/4 */ |
| 484 for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) { |
| 485 float sum1, sum2; |
| 486 sum1 = firbuf[(i + ((PSY_LAME_FIR_LEN - 1) / 2)) * chans]; |
| 487 sum2 = 0.0; |
| 488 for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) { |
| 489 sum1 += psy_fir_coeffs[j] * (firbuf[(i + j) * chans] + firbuf[(i
+ PSY_LAME_FIR_LEN - j) * chans]); |
| 490 sum2 += psy_fir_coeffs[j + 1] * (firbuf[(i + j + 1) * chans] + f
irbuf[(i + PSY_LAME_FIR_LEN - j - 1) * chans]); |
| 491 } |
| 492 hpfsmpl[i] = sum1 + sum2; |
| 493 } |
| 494 |
| 495 /* Calculate the energies of each sub-shortblock */ |
| 496 for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) { |
| 497 energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_
SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)]; |
| 498 assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * P
SY_LAME_NUM_SUBBLOCKS + 1)] > 0); |
| 499 attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort
[i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)]; |
| 500 energy_short[0] += energy_subshort[i]; |
| 501 } |
| 502 |
| 503 for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) { |
| 504 float const *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_
SHORT * PSY_LAME_NUM_SUBBLOCKS); |
| 505 float p = 1.0f; |
| 506 for (; pf < pfe; pf++) |
| 507 if (p < fabsf(*pf)) |
| 508 p = fabsf(*pf); |
| 509 pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBB
LOCKS] = p; |
| 510 energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p; |
| 511 /* FIXME: The indexes below are [i + 3 - 2] in the LAME source. |
| 512 * Obviously the 3 and 2 have some significance, or this wo
uld be just [i + 1] |
| 513 * (which is what we use here). What the 3 stands for is am
bigious, as it is both |
| 514 * number of short blocks, and the number of sub-short bloc
ks. |
| 515 * It seems that LAME is comparing each sub-block to sub-bl
ock + 1 in the |
| 516 * previous block. |
| 517 */ |
| 518 if (p > energy_subshort[i + 1]) |
| 519 p = p / energy_subshort[i + 1]; |
| 520 else if (energy_subshort[i + 1] > p * 10.0f) |
| 521 p = energy_subshort[i + 1] / (p * 10.0f); |
| 522 else |
| 523 p = 0.0; |
| 524 attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p; |
| 525 } |
| 526 |
| 527 /* compare energy between sub-short blocks */ |
| 528 for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++
) |
| 529 if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS]) |
| 530 if (attack_intensity[i] > pch->attack_threshold) |
| 531 attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBB
LOCKS) + 1; |
| 532 |
| 533 /* should have energy change between short blocks, in order to avoid per
iodic signals */ |
| 534 /* Good samples to show the effect are Trumpet test songs */ |
| 535 /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET
*/ |
| 536 /* RH: tuned (2) to let enough short blocks through for test sample FSOL
and SNAPS */ |
| 537 for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) { |
| 538 float const u = energy_short[i - 1]; |
| 539 float const v = energy_short[i]; |
| 540 float const m = FFMAX(u, v); |
| 541 if (m < 40000) { /* (2) */ |
| 542 if (u < 1.7f * v && v < 1.7f * u) { /* (1) */ |
| 543 if (i == 1 && attacks[0] < attacks[i]) |
| 544 attacks[0] = 0; |
| 545 attacks[i] = 0; |
| 546 } |
| 547 } |
| 548 att_sum += attacks[i]; |
| 549 } |
| 550 |
| 551 if (attacks[0] <= pch->prev_attack) |
| 552 attacks[0] = 0; |
| 553 |
| 554 att_sum += attacks[0]; |
| 555 /* 3 below indicates the previous attack happened in the last sub-block
of the previous sequence */ |
| 556 if (pch->prev_attack == 3 || att_sum) { |
| 557 uselongblock = 0; |
| 558 |
| 559 if (attacks[1] && attacks[0]) |
| 560 attacks[1] = 0; |
| 561 if (attacks[2] && attacks[1]) |
| 562 attacks[2] = 0; |
| 563 if (attacks[3] && attacks[2]) |
| 564 attacks[3] = 0; |
| 565 if (attacks[4] && attacks[3]) |
| 566 attacks[4] = 0; |
| 567 if (attacks[5] && attacks[4]) |
| 568 attacks[5] = 0; |
| 569 if (attacks[6] && attacks[5]) |
| 570 attacks[6] = 0; |
| 571 if (attacks[7] && attacks[6]) |
| 572 attacks[7] = 0; |
| 573 if (attacks[8] && attacks[7]) |
| 574 attacks[8] = 0; |
| 575 } |
| 576 } else { |
| 577 /* We have no lookahead info, so just use same type as the previous sequ
ence. */ |
| 578 uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE); |
| 579 } |
| 580 |
| 581 lame_apply_block_type(pch, &wi, uselongblock); |
| 582 |
| 583 wi.window_type[1] = prev_type; |
| 584 if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) { |
| 585 wi.num_windows = 1; |
| 586 wi.grouping[0] = 1; |
| 587 if (wi.window_type[0] == LONG_START_SEQUENCE) |
| 588 wi.window_shape = 0; |
| 589 else |
| 590 wi.window_shape = 1; |
| 591 } else { |
| 592 int lastgrp = 0; |
| 593 |
| 594 wi.num_windows = 8; |
| 595 wi.window_shape = 0; |
| 596 for (i = 0; i < 8; i++) { |
| 597 if (!((pch->next_grouping >> i) & 1)) |
| 598 lastgrp = i; |
| 599 wi.grouping[lastgrp]++; |
| 600 } |
| 601 } |
| 602 |
| 603 /* Determine grouping, based on the location of the first attack, and save f
or |
| 604 * the next frame. |
| 605 * FIXME: Move this to analysis. |
| 606 * TODO: Tune groupings depending on attack location |
| 607 * TODO: Handle more than one attack in a group |
| 608 */ |
| 609 for (i = 0; i < 9; i++) { |
| 610 if (attacks[i]) { |
| 611 grouping = i; |
| 612 break; |
| 613 } |
| 614 } |
| 615 pch->next_grouping = window_grouping[grouping]; |
| 616 |
| 617 pch->prev_attack = attacks[8]; |
| 618 |
| 619 return wi; |
| 620 } |
| 319 | 621 |
| 320 const FFPsyModel ff_aac_psy_model = | 622 const FFPsyModel ff_aac_psy_model = |
| 321 { | 623 { |
| 322 .name = "3GPP TS 26.403-inspired model", | 624 .name = "3GPP TS 26.403-inspired model", |
| 323 .init = psy_3gpp_init, | 625 .init = psy_3gpp_init, |
| 324 .window = psy_3gpp_window, | 626 .window = psy_lame_window, |
| 325 .analyze = psy_3gpp_analyze, | 627 .analyze = psy_3gpp_analyze, |
| 326 .end = psy_3gpp_end, | 628 .end = psy_3gpp_end, |
| 327 }; | 629 }; |
| OLD | NEW |