Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(274)

Side by Side Diff: source/patched-ffmpeg-mt/libavcodec/aacpsy.c

Issue 3384002: ffmpeg source update for sep 09 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/
Patch Set: Created 10 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 /* 1 /*
2 * AAC encoder psychoacoustic model 2 * AAC encoder psychoacoustic model
3 * Copyright (C) 2008 Konstantin Shishkov 3 * Copyright (C) 2008 Konstantin Shishkov
4 * 4 *
5 * This file is part of FFmpeg. 5 * This file is part of FFmpeg.
6 * 6 *
7 * FFmpeg is free software; you can redistribute it and/or 7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public 8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either 9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version. 10 * version 2.1 of the License, or (at your option) any later version.
(...skipping 26 matching lines...) Expand all
37 37
38 /** 38 /**
39 * constants for 3GPP AAC psychoacoustic model 39 * constants for 3GPP AAC psychoacoustic model
40 * @{ 40 * @{
41 */ 41 */
42 #define PSY_3GPP_SPREAD_LOW 1.5f // spreading factor for ascending threshold sp reading (15 dB/Bark) 42 #define PSY_3GPP_SPREAD_LOW 1.5f // spreading factor for ascending threshold sp reading (15 dB/Bark)
43 #define PSY_3GPP_SPREAD_HI 3.0f // spreading factor for descending threshold s preading (30 dB/Bark) 43 #define PSY_3GPP_SPREAD_HI 3.0f // spreading factor for descending threshold s preading (30 dB/Bark)
44 44
45 #define PSY_3GPP_RPEMIN 0.01f 45 #define PSY_3GPP_RPEMIN 0.01f
46 #define PSY_3GPP_RPELEV 2.0f 46 #define PSY_3GPP_RPELEV 2.0f
47
48 /* LAME psy model constants */
49 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order
50 #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size
51 #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size
52 #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence
53 #define PSY_LAME_NUM_SUBBLOCKS 3 ///< Number of sub-blocks in each short bloc k
54
47 /** 55 /**
48 * @} 56 * @}
49 */ 57 */
50 58
51 /** 59 /**
52 * information for single band used by 3GPP TS26.403-inspired psychoacoustic mod el 60 * information for single band used by 3GPP TS26.403-inspired psychoacoustic mod el
53 */ 61 */
54 typedef struct Psy3gppBand{ 62 typedef struct AacPsyBand{
55 float energy; ///< band energy 63 float energy; ///< band energy
56 float ffac; ///< form factor 64 float ffac; ///< form factor
57 float thr; ///< energy threshold 65 float thr; ///< energy threshold
58 float min_snr; ///< minimal SNR 66 float min_snr; ///< minimal SNR
59 float thr_quiet; ///< threshold in quiet 67 float thr_quiet; ///< threshold in quiet
60 }Psy3gppBand; 68 }AacPsyBand;
61 69
62 /** 70 /**
63 * single/pair channel context for psychoacoustic model 71 * single/pair channel context for psychoacoustic model
64 */ 72 */
65 typedef struct Psy3gppChannel{ 73 typedef struct AacPsyChannel{
66 Psy3gppBand band[128]; ///< bands information 74 AacPsyBand band[128]; ///< bands information
67 Psy3gppBand prev_band[128]; ///< bands information from the previou s frame 75 AacPsyBand prev_band[128]; ///< bands information from the previous frame
68 76
69 float win_energy; ///< sliding average of channel energy 77 float win_energy; ///< sliding average of channel energy
70 float iir_state[2]; ///< hi-pass IIR filter state 78 float iir_state[2]; ///< hi-pass IIR filter state
71 uint8_t next_grouping; ///< stored grouping scheme for the nex t frame (in case of 8 short window sequence) 79 uint8_t next_grouping; ///< stored grouping scheme for the nex t frame (in case of 8 short window sequence)
72 enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame 80 enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame
73 }Psy3gppChannel; 81 /* LAME psy model specific members */
82 float attack_threshold; ///< attack threshold for this channel
83 float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS];
84 int prev_attack; ///< attack value for the last short bl ock in the previous sequence
85 }AacPsyChannel;
74 86
75 /** 87 /**
76 * psychoacoustic model frame type-dependent coefficients 88 * psychoacoustic model frame type-dependent coefficients
77 */ 89 */
78 typedef struct Psy3gppCoeffs{ 90 typedef struct AacPsyCoeffs{
79 float ath [64]; ///< absolute threshold of hearing per bands 91 float ath [64]; ///< absolute threshold of hearing per bands
80 float barks [64]; ///< Bark value for each spectral band in long frame 92 float barks [64]; ///< Bark value for each spectral band in long frame
81 float spread_low[64]; ///< spreading factor for low-to-high threshold spread ing in long frame 93 float spread_low[64]; ///< spreading factor for low-to-high threshold spread ing in long frame
82 float spread_hi [64]; ///< spreading factor for high-to-low threshold spread ing in long frame 94 float spread_hi [64]; ///< spreading factor for high-to-low threshold spread ing in long frame
83 }Psy3gppCoeffs; 95 }AacPsyCoeffs;
84 96
85 /** 97 /**
86 * 3GPP TS26.403-inspired psychoacoustic model specific data 98 * 3GPP TS26.403-inspired psychoacoustic model specific data
87 */ 99 */
88 typedef struct Psy3gppContext{ 100 typedef struct AacPsyContext{
89 Psy3gppCoeffs psy_coef[2]; 101 AacPsyCoeffs psy_coef[2];
90 Psy3gppChannel *ch; 102 AacPsyChannel *ch;
91 }Psy3gppContext; 103 }AacPsyContext;
104
105 /**
106 * LAME psy model preset struct
107 */
108 typedef struct {
109 int quality; ///< Quality to map the rest of the vaules to.
110 /* This is overloaded to be both kbps per channel in ABR mode, and
111 * requested quality in constant quality mode.
112 */
113 float st_lrm; ///< short threshold for L, R, and M channels
114 } PsyLamePreset;
115
116 /**
117 * LAME psy model preset table for ABR
118 */
119 static const PsyLamePreset psy_abr_map[] = {
120 /* TODO: Tuning. These were taken from LAME. */
121 /* kbps/ch st_lrm */
122 { 8, 6.60},
123 { 16, 6.60},
124 { 24, 6.60},
125 { 32, 6.60},
126 { 40, 6.60},
127 { 48, 6.60},
128 { 56, 6.60},
129 { 64, 6.40},
130 { 80, 6.00},
131 { 96, 5.60},
132 {112, 5.20},
133 {128, 5.20},
134 {160, 5.20}
135 };
136
137 /**
138 * LAME psy model preset table for constant quality
139 */
140 static const PsyLamePreset psy_vbr_map[] = {
141 /* vbr_q st_lrm */
142 { 0, 4.20},
143 { 1, 4.20},
144 { 2, 4.20},
145 { 3, 4.20},
146 { 4, 4.20},
147 { 5, 4.20},
148 { 6, 4.20},
149 { 7, 4.20},
150 { 8, 4.20},
151 { 9, 4.20},
152 {10, 4.20}
153 };
154
155 /**
156 * LAME psy model FIR coefficient table
157 */
158 static const float psy_fir_coeffs[] = {
159 -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,
160 -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,
161 -5.52212e-17 * 2, -0.313819 * 2
162 };
163
164 /**
165 * calculates the attack threshold for ABR from the above table for the LAME psy model
166 */
167 static float lame_calc_attack_threshold(int bitrate)
168 {
169 /* Assume max bitrate to start with */
170 int lower_range = 12, upper_range = 12;
171 int lower_range_kbps = psy_abr_map[12].quality;
172 int upper_range_kbps = psy_abr_map[12].quality;
173 int i;
174
175 /* Determine which bitrates the value specified falls between.
176 * If the loop ends without breaking our above assumption of 320kbps was cor rect.
177 */
178 for (i = 1; i < 13; i++) {
179 if (FFMAX(bitrate, psy_abr_map[i].quality) != bitrate) {
180 upper_range = i;
181 upper_range_kbps = psy_abr_map[i ].quality;
182 lower_range = i - 1;
183 lower_range_kbps = psy_abr_map[i - 1].quality;
184 break; /* Upper range found */
185 }
186 }
187
188 /* Determine which range the value specified is closer to */
189 if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))
190 return psy_abr_map[lower_range].st_lrm;
191 return psy_abr_map[upper_range].st_lrm;
192 }
193
194 /**
195 * LAME psy model specific initialization
196 */
197 static void lame_window_init(AacPsyContext *ctx, AVCodecContext *avctx) {
198 int i;
199
200 for (i = 0; i < avctx->channels; i++) {
201 AacPsyChannel *pch = &ctx->ch[i];
202
203 if (avctx->flags & CODEC_FLAG_QSCALE)
204 pch->attack_threshold = psy_vbr_map[avctx->global_quality / FF_QP2LA MBDA].st_lrm;
205 else
206 pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate / avctx->channels / 1000);
207
208 for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++)
209 pch->prev_energy_subshort[i] = 10.0f;
210 }
211 }
92 212
93 /** 213 /**
94 * Calculate Bark value for given line. 214 * Calculate Bark value for given line.
95 */ 215 */
96 static av_cold float calc_bark(float f) 216 static av_cold float calc_bark(float f)
97 { 217 {
98 return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500. 0f)); 218 return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500. 0f));
99 } 219 }
100 220
101 #define ATH_ADD 4 221 #define ATH_ADD 4
102 /** 222 /**
103 * Calculate ATH value for given frequency. 223 * Calculate ATH value for given frequency.
104 * Borrowed from Lame. 224 * Borrowed from Lame.
105 */ 225 */
106 static av_cold float ath(float f, float add) 226 static av_cold float ath(float f, float add)
107 { 227 {
108 f /= 1000.0f; 228 f /= 1000.0f;
109 return 3.64 * pow(f, -0.8) 229 return 3.64 * pow(f, -0.8)
110 - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4)) 230 - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))
111 + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7)) 231 + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))
112 + (0.6 + 0.04 * add) * 0.001 * f * f * f * f; 232 + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;
113 } 233 }
114 234
115 static av_cold int psy_3gpp_init(FFPsyContext *ctx) { 235 static av_cold int psy_3gpp_init(FFPsyContext *ctx) {
116 Psy3gppContext *pctx; 236 AacPsyContext *pctx;
117 float bark; 237 float bark;
118 int i, j, g, start; 238 int i, j, g, start;
119 float prev, minscale, minath; 239 float prev, minscale, minath;
120 240
121 ctx->model_priv_data = av_mallocz(sizeof(Psy3gppContext)); 241 ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));
122 pctx = (Psy3gppContext*) ctx->model_priv_data; 242 pctx = (AacPsyContext*) ctx->model_priv_data;
123 243
124 minath = ath(3410, ATH_ADD); 244 minath = ath(3410, ATH_ADD);
125 for (j = 0; j < 2; j++) { 245 for (j = 0; j < 2; j++) {
126 Psy3gppCoeffs *coeffs = &pctx->psy_coef[j]; 246 AacPsyCoeffs *coeffs = &pctx->psy_coef[j];
127 float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f ); 247 float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f );
128 i = 0; 248 i = 0;
129 prev = 0.0; 249 prev = 0.0;
130 for (g = 0; g < ctx->num_bands[j]; g++) { 250 for (g = 0; g < ctx->num_bands[j]; g++) {
131 i += ctx->bands[j][g]; 251 i += ctx->bands[j][g];
132 bark = calc_bark((i-1) * line_to_frequency); 252 bark = calc_bark((i-1) * line_to_frequency);
133 coeffs->barks[g] = (bark + prev) / 2.0; 253 coeffs->barks[g] = (bark + prev) / 2.0;
134 prev = bark; 254 prev = bark;
135 } 255 }
136 for (g = 0; g < ctx->num_bands[j] - 1; g++) { 256 for (g = 0; g < ctx->num_bands[j] - 1; g++) {
137 coeffs->spread_low[g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar ks[g]) * PSY_3GPP_SPREAD_LOW); 257 coeffs->spread_low[g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar ks[g]) * PSY_3GPP_SPREAD_LOW);
138 coeffs->spread_hi [g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar ks[g]) * PSY_3GPP_SPREAD_HI); 258 coeffs->spread_hi [g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar ks[g]) * PSY_3GPP_SPREAD_HI);
139 } 259 }
140 start = 0; 260 start = 0;
141 for (g = 0; g < ctx->num_bands[j]; g++) { 261 for (g = 0; g < ctx->num_bands[j]; g++) {
142 minscale = ath(start * line_to_frequency, ATH_ADD); 262 minscale = ath(start * line_to_frequency, ATH_ADD);
143 for (i = 1; i < ctx->bands[j][g]; i++) 263 for (i = 1; i < ctx->bands[j][g]; i++)
144 minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD)); 264 minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));
145 coeffs->ath[g] = minscale - minath; 265 coeffs->ath[g] = minscale - minath;
146 start += ctx->bands[j][g]; 266 start += ctx->bands[j][g];
147 } 267 }
148 } 268 }
149 269
150 pctx->ch = av_mallocz(sizeof(Psy3gppChannel) * ctx->avctx->channels); 270 pctx->ch = av_mallocz(sizeof(AacPsyChannel) * ctx->avctx->channels);
271
272 lame_window_init(pctx, ctx->avctx);
273
151 return 0; 274 return 0;
152 } 275 }
153 276
154 /** 277 /**
155 * IIR filter used in block switching decision 278 * IIR filter used in block switching decision
156 */ 279 */
157 static float iir_filter(int in, float state[2]) 280 static float iir_filter(int in, float state[2])
158 { 281 {
159 float ret; 282 float ret;
160 283
(...skipping 14 matching lines...) Expand all
175 * Tell encoder which window types to use. 298 * Tell encoder which window types to use.
176 * @see 3GPP TS26.403 5.4.1 "Blockswitching" 299 * @see 3GPP TS26.403 5.4.1 "Blockswitching"
177 */ 300 */
178 static FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, 301 static FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx,
179 const int16_t *audio, const int16_t *la, 302 const int16_t *audio, const int16_t *la,
180 int channel, int prev_type) 303 int channel, int prev_type)
181 { 304 {
182 int i, j; 305 int i, j;
183 int br = ctx->avctx->bit_rate / ctx->avctx->channels; 306 int br = ctx->avctx->bit_rate / ctx->avctx->channels;
184 int attack_ratio = br <= 16000 ? 18 : 10; 307 int attack_ratio = br <= 16000 ? 18 : 10;
185 Psy3gppContext *pctx = (Psy3gppContext*) ctx->model_priv_data; 308 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
186 Psy3gppChannel *pch = &pctx->ch[channel]; 309 AacPsyChannel *pch = &pctx->ch[channel];
187 uint8_t grouping = 0; 310 uint8_t grouping = 0;
188 int next_type = pch->next_window_seq; 311 int next_type = pch->next_window_seq;
189 FFPsyWindowInfo wi; 312 FFPsyWindowInfo wi;
190 313
191 memset(&wi, 0, sizeof(wi)); 314 memset(&wi, 0, sizeof(wi));
192 if (la) { 315 if (la) {
193 float s[8], v; 316 float s[8], v;
194 int switch_to_eight = 0; 317 int switch_to_eight = 0;
195 float sum = 0.0, sum2 = 0.0; 318 float sum = 0.0, sum2 = 0.0;
196 int attack_n = 0; 319 int attack_n = 0;
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
257 } 380 }
258 } 381 }
259 382
260 return wi; 383 return wi;
261 } 384 }
262 385
263 /** 386 /**
264 * Calculate band thresholds as suggested in 3GPP TS26.403 387 * Calculate band thresholds as suggested in 3GPP TS26.403
265 */ 388 */
266 static void psy_3gpp_analyze(FFPsyContext *ctx, int channel, 389 static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,
267 const float *coefs, FFPsyWindowInfo *wi) 390 const float *coefs, const FFPsyWindowInfo *wi)
268 { 391 {
269 Psy3gppContext *pctx = (Psy3gppContext*) ctx->model_priv_data; 392 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
270 Psy3gppChannel *pch = &pctx->ch[channel]; 393 AacPsyChannel *pch = &pctx->ch[channel];
271 int start = 0; 394 int start = 0;
272 int i, w, g; 395 int i, w, g;
273 const int num_bands = ctx->num_bands[wi->num_windows == 8]; 396 const int num_bands = ctx->num_bands[wi->num_windows == 8];
274 const uint8_t* band_sizes = ctx->bands[wi->num_windows == 8]; 397 const uint8_t* band_sizes = ctx->bands[wi->num_windows == 8];
275 Psy3gppCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8]; 398 AacPsyCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8];
276 399
277 //calculate energies, initial thresholds and related values - 5.4.2 "Thresho ld Calculation" 400 //calculate energies, initial thresholds and related values - 5.4.2 "Thresho ld Calculation"
278 for (w = 0; w < wi->num_windows*16; w += 16) { 401 for (w = 0; w < wi->num_windows*16; w += 16) {
279 for (g = 0; g < num_bands; g++) { 402 for (g = 0; g < num_bands; g++) {
280 Psy3gppBand *band = &pch->band[w+g]; 403 AacPsyBand *band = &pch->band[w+g];
281 band->energy = 0.0f; 404 band->energy = 0.0f;
282 for (i = 0; i < band_sizes[g]; i++) 405 for (i = 0; i < band_sizes[g]; i++)
283 band->energy += coefs[start+i] * coefs[start+i]; 406 band->energy += coefs[start+i] * coefs[start+i];
284 band->energy *= 1.0f / (512*512); 407 band->energy *= 1.0f / (512*512);
285 band->thr = band->energy * 0.001258925f; 408 band->thr = band->energy * 0.001258925f;
286 start += band_sizes[g]; 409 start += band_sizes[g];
287 410
288 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].energy = band->energy; 411 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].energy = band->energy;
289 } 412 }
290 } 413 }
291 //modify thresholds - spread, threshold in quiet - 5.4.3 "Spreaded Energy Ca lculation" 414 //modify thresholds - spread, threshold in quiet - 5.4.3 "Spreaded Energy Ca lculation"
292 for (w = 0; w < wi->num_windows*16; w += 16) { 415 for (w = 0; w < wi->num_windows*16; w += 16) {
293 Psy3gppBand *band = &pch->band[w]; 416 AacPsyBand *band = &pch->band[w];
294 for (g = 1; g < num_bands; g++) 417 for (g = 1; g < num_bands; g++)
295 band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_low[ g-1]); 418 band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_low[ g-1]);
296 for (g = num_bands - 2; g >= 0; g--) 419 for (g = num_bands - 2; g >= 0; g--)
297 band[g].thr = FFMAX(band[g].thr, band[g+1].thr * coeffs->spread_hi [ g]); 420 band[g].thr = FFMAX(band[g].thr, band[g+1].thr * coeffs->spread_hi [ g]);
298 for (g = 0; g < num_bands; g++) { 421 for (g = 0; g < num_bands; g++) {
299 band[g].thr_quiet = FFMAX(band[g].thr, coeffs->ath[g]); 422 band[g].thr_quiet = FFMAX(band[g].thr, coeffs->ath[g]);
300 if (wi->num_windows != 8 && wi->window_type[1] != EIGHT_SHORT_SEQUEN CE) 423 if (wi->num_windows != 8 && wi->window_type[1] != EIGHT_SHORT_SEQUEN CE)
301 band[g].thr_quiet = FFMAX(PSY_3GPP_RPEMIN*band[g].thr_quiet, 424 band[g].thr_quiet = FFMAX(PSY_3GPP_RPEMIN*band[g].thr_quiet,
302 FFMIN(band[g].thr_quiet, 425 FFMIN(band[g].thr_quiet,
303 PSY_3GPP_RPELEV*pch->prev_band[w+g].th r_quiet)); 426 PSY_3GPP_RPELEV*pch->prev_band[w+g].th r_quiet));
304 band[g].thr = FFMAX(band[g].thr, band[g].thr_quiet * 0.25); 427 band[g].thr = FFMAX(band[g].thr, band[g].thr_quiet * 0.25);
305 428
306 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].threshold = band[g].thr; 429 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].threshold = band[g].thr;
307 } 430 }
308 } 431 }
309 memcpy(pch->prev_band, pch->band, sizeof(pch->band)); 432 memcpy(pch->prev_band, pch->band, sizeof(pch->band));
310 } 433 }
311 434
312 static av_cold void psy_3gpp_end(FFPsyContext *apc) 435 static av_cold void psy_3gpp_end(FFPsyContext *apc)
313 { 436 {
314 Psy3gppContext *pctx = (Psy3gppContext*) apc->model_priv_data; 437 AacPsyContext *pctx = (AacPsyContext*) apc->model_priv_data;
315 av_freep(&pctx->ch); 438 av_freep(&pctx->ch);
316 av_freep(&apc->model_priv_data); 439 av_freep(&apc->model_priv_data);
317 } 440 }
318 441
442 static void lame_apply_block_type(AacPsyChannel *ctx, FFPsyWindowInfo *wi, int u selongblock)
443 {
444 int blocktype = ONLY_LONG_SEQUENCE;
445 if (uselongblock) {
446 if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE)
447 blocktype = LONG_STOP_SEQUENCE;
448 } else {
449 blocktype = EIGHT_SHORT_SEQUENCE;
450 if (ctx->next_window_seq == ONLY_LONG_SEQUENCE)
451 ctx->next_window_seq = LONG_START_SEQUENCE;
452 if (ctx->next_window_seq == LONG_STOP_SEQUENCE)
453 ctx->next_window_seq = EIGHT_SHORT_SEQUENCE;
454 }
455
456 wi->window_type[0] = ctx->next_window_seq;
457 ctx->next_window_seq = blocktype;
458 }
459
460 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx,
461 const int16_t *audio, const int16_t *la,
462 int channel, int prev_type)
463 {
464 AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data;
465 AacPsyChannel *pch = &pctx->ch[channel];
466 int grouping = 0;
467 int uselongblock = 1;
468 int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
469 int i;
470 FFPsyWindowInfo wi;
471
472 memset(&wi, 0, sizeof(wi));
473 if (la) {
474 float hpfsmpl[AAC_BLOCK_SIZE_LONG];
475 float const *pf = hpfsmpl;
476 float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOC KS];
477 float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCK S];
478 float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };
479 int chans = ctx->avctx->channels;
480 const int16_t *firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN) * chans;
481 int j, att_sum = 0;
482
483 /* LAME comment: apply high pass filter of fs/4 */
484 for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {
485 float sum1, sum2;
486 sum1 = firbuf[(i + ((PSY_LAME_FIR_LEN - 1) / 2)) * chans];
487 sum2 = 0.0;
488 for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {
489 sum1 += psy_fir_coeffs[j] * (firbuf[(i + j) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j) * chans]);
490 sum2 += psy_fir_coeffs[j + 1] * (firbuf[(i + j + 1) * chans] + f irbuf[(i + PSY_LAME_FIR_LEN - j - 1) * chans]);
491 }
492 hpfsmpl[i] = sum1 + sum2;
493 }
494
495 /* Calculate the energies of each sub-shortblock */
496 for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {
497 energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_ SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];
498 assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * P SY_LAME_NUM_SUBBLOCKS + 1)] > 0);
499 attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort [i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];
500 energy_short[0] += energy_subshort[i];
501 }
502
503 for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {
504 float const *const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_ SHORT * PSY_LAME_NUM_SUBBLOCKS);
505 float p = 1.0f;
506 for (; pf < pfe; pf++)
507 if (p < fabsf(*pf))
508 p = fabsf(*pf);
509 pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBB LOCKS] = p;
510 energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;
511 /* FIXME: The indexes below are [i + 3 - 2] in the LAME source.
512 * Obviously the 3 and 2 have some significance, or this wo uld be just [i + 1]
513 * (which is what we use here). What the 3 stands for is am bigious, as it is both
514 * number of short blocks, and the number of sub-short bloc ks.
515 * It seems that LAME is comparing each sub-block to sub-bl ock + 1 in the
516 * previous block.
517 */
518 if (p > energy_subshort[i + 1])
519 p = p / energy_subshort[i + 1];
520 else if (energy_subshort[i + 1] > p * 10.0f)
521 p = energy_subshort[i + 1] / (p * 10.0f);
522 else
523 p = 0.0;
524 attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;
525 }
526
527 /* compare energy between sub-short blocks */
528 for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++ )
529 if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])
530 if (attack_intensity[i] > pch->attack_threshold)
531 attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBB LOCKS) + 1;
532
533 /* should have energy change between short blocks, in order to avoid per iodic signals */
534 /* Good samples to show the effect are Trumpet test songs */
535 /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */
536 /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */
537 for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {
538 float const u = energy_short[i - 1];
539 float const v = energy_short[i];
540 float const m = FFMAX(u, v);
541 if (m < 40000) { /* (2) */
542 if (u < 1.7f * v && v < 1.7f * u) { /* (1) */
543 if (i == 1 && attacks[0] < attacks[i])
544 attacks[0] = 0;
545 attacks[i] = 0;
546 }
547 }
548 att_sum += attacks[i];
549 }
550
551 if (attacks[0] <= pch->prev_attack)
552 attacks[0] = 0;
553
554 att_sum += attacks[0];
555 /* 3 below indicates the previous attack happened in the last sub-block of the previous sequence */
556 if (pch->prev_attack == 3 || att_sum) {
557 uselongblock = 0;
558
559 if (attacks[1] && attacks[0])
560 attacks[1] = 0;
561 if (attacks[2] && attacks[1])
562 attacks[2] = 0;
563 if (attacks[3] && attacks[2])
564 attacks[3] = 0;
565 if (attacks[4] && attacks[3])
566 attacks[4] = 0;
567 if (attacks[5] && attacks[4])
568 attacks[5] = 0;
569 if (attacks[6] && attacks[5])
570 attacks[6] = 0;
571 if (attacks[7] && attacks[6])
572 attacks[7] = 0;
573 if (attacks[8] && attacks[7])
574 attacks[8] = 0;
575 }
576 } else {
577 /* We have no lookahead info, so just use same type as the previous sequ ence. */
578 uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);
579 }
580
581 lame_apply_block_type(pch, &wi, uselongblock);
582
583 wi.window_type[1] = prev_type;
584 if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {
585 wi.num_windows = 1;
586 wi.grouping[0] = 1;
587 if (wi.window_type[0] == LONG_START_SEQUENCE)
588 wi.window_shape = 0;
589 else
590 wi.window_shape = 1;
591 } else {
592 int lastgrp = 0;
593
594 wi.num_windows = 8;
595 wi.window_shape = 0;
596 for (i = 0; i < 8; i++) {
597 if (!((pch->next_grouping >> i) & 1))
598 lastgrp = i;
599 wi.grouping[lastgrp]++;
600 }
601 }
602
603 /* Determine grouping, based on the location of the first attack, and save f or
604 * the next frame.
605 * FIXME: Move this to analysis.
606 * TODO: Tune groupings depending on attack location
607 * TODO: Handle more than one attack in a group
608 */
609 for (i = 0; i < 9; i++) {
610 if (attacks[i]) {
611 grouping = i;
612 break;
613 }
614 }
615 pch->next_grouping = window_grouping[grouping];
616
617 pch->prev_attack = attacks[8];
618
619 return wi;
620 }
319 621
320 const FFPsyModel ff_aac_psy_model = 622 const FFPsyModel ff_aac_psy_model =
321 { 623 {
322 .name = "3GPP TS 26.403-inspired model", 624 .name = "3GPP TS 26.403-inspired model",
323 .init = psy_3gpp_init, 625 .init = psy_3gpp_init,
324 .window = psy_3gpp_window, 626 .window = psy_lame_window,
325 .analyze = psy_3gpp_analyze, 627 .analyze = psy_3gpp_analyze,
326 .end = psy_3gpp_end, 628 .end = psy_3gpp_end,
327 }; 629 };
OLDNEW
« no previous file with comments | « source/patched-ffmpeg-mt/libavcodec/aacenc.c ('k') | source/patched-ffmpeg-mt/libavcodec/adpcm.c » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698