source/patched-ffmpeg-mt/libavcodec/aacpsy.c - Issue 3384002: ffmpeg source update for sep 09

Side by Side Diff: source/patched-ffmpeg-mt/libavcodec/aacpsy.c

Issue 3384002: ffmpeg source update for sep 09 (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: Created 10 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 /*	1 /*

2 * AAC encoder psychoacoustic model	2 * AAC encoder psychoacoustic model

3 * Copyright (C) 2008 Konstantin Shishkov	3 * Copyright (C) 2008 Konstantin Shishkov

4 *	4 *

5 * This file is part of FFmpeg.	5 * This file is part of FFmpeg.

6 *	6 *

7 * FFmpeg is free software; you can redistribute it and/or	7 * FFmpeg is free software; you can redistribute it and/or

8 * modify it under the terms of the GNU Lesser General Public	8 * modify it under the terms of the GNU Lesser General Public

9 * License as published by the Free Software Foundation; either	9 * License as published by the Free Software Foundation; either

10 * version 2.1 of the License, or (at your option) any later version.	10 * version 2.1 of the License, or (at your option) any later version.

(...skipping 26 matching lines...) Expand all Loading...
37	37

38 /**	38 /**

39 * constants for 3GPP AAC psychoacoustic model	39 * constants for 3GPP AAC psychoacoustic model

40 * @{	40 * @{

41 */	41 */

42 #define PSY_3GPP_SPREAD_LOW 1.5f // spreading factor for ascending threshold sp reading (15 dB/Bark)	42 #define PSY_3GPP_SPREAD_LOW 1.5f // spreading factor for ascending threshold sp reading (15 dB/Bark)

43 #define PSY_3GPP_SPREAD_HI 3.0f // spreading factor for descending threshold s preading (30 dB/Bark)	43 #define PSY_3GPP_SPREAD_HI 3.0f // spreading factor for descending threshold s preading (30 dB/Bark)

44	44

45 #define PSY_3GPP_RPEMIN 0.01f	45 #define PSY_3GPP_RPEMIN 0.01f

46 #define PSY_3GPP_RPELEV 2.0f	46 #define PSY_3GPP_RPELEV 2.0f

	47

	48 /* LAME psy model constants */

	49 #define PSY_LAME_FIR_LEN 21 ///< LAME psy model FIR order

	50 #define AAC_BLOCK_SIZE_LONG 1024 ///< long block size

	51 #define AAC_BLOCK_SIZE_SHORT 128 ///< short block size

	52 #define AAC_NUM_BLOCKS_SHORT 8 ///< number of blocks in a short sequence

	53 #define PSY_LAME_NUM_SUBBLOCKS 3 ///< Number of sub-blocks in each short bloc k

	54

47 /**	55 /**

48 * @}	56 * @}

49 */	57 */

50	58

51 /**	59 /**

52 * information for single band used by 3GPP TS26.403-inspired psychoacoustic mod el	60 * information for single band used by 3GPP TS26.403-inspired psychoacoustic mod el

53 */	61 */

54 typedef struct Psy3gppBand{	62 typedef struct AacPsyBand{

55 float energy; ///< band energy	63 float energy; ///< band energy

56 float ffac; ///< form factor	64 float ffac; ///< form factor

57 float thr; ///< energy threshold	65 float thr; ///< energy threshold

58 float min_snr; ///< minimal SNR	66 float min_snr; ///< minimal SNR

59 float thr_quiet; ///< threshold in quiet	67 float thr_quiet; ///< threshold in quiet

60 }Psy3gppBand;	68 }AacPsyBand;

61	69

62 /**	70 /**

63 * single/pair channel context for psychoacoustic model	71 * single/pair channel context for psychoacoustic model

64 */	72 */

65 typedef struct Psy3gppChannel{	73 typedef struct AacPsyChannel{

66 Psy3gppBand band[128]; ///< bands information	74 AacPsyBand band[128]; ///< bands information

67 Psy3gppBand prev_band[128]; ///< bands information from the previou s frame	75 AacPsyBand prev_band[128]; ///< bands information from the previous frame

68	76

69 float win_energy; ///< sliding average of channel energy	77 float win_energy; ///< sliding average of channel energy

70 float iir_state[2]; ///< hi-pass IIR filter state	78 float iir_state[2]; ///< hi-pass IIR filter state

71 uint8_t next_grouping; ///< stored grouping scheme for the nex t frame (in case of 8 short window sequence)	79 uint8_t next_grouping; ///< stored grouping scheme for the nex t frame (in case of 8 short window sequence)

72 enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame	80 enum WindowSequence next_window_seq; ///< window sequence to be used in the next frame

73 }Psy3gppChannel;	81 /* LAME psy model specific members */

	82 float attack_threshold; ///< attack threshold for this channel

	83 float prev_energy_subshort[AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS];

	84 int prev_attack; ///< attack value for the last short bl ock in the previous sequence

	85 }AacPsyChannel;

74	86

75 /**	87 /**

76 * psychoacoustic model frame type-dependent coefficients	88 * psychoacoustic model frame type-dependent coefficients

77 */	89 */

78 typedef struct Psy3gppCoeffs{	90 typedef struct AacPsyCoeffs{

79 float ath [64]; ///< absolute threshold of hearing per bands	91 float ath [64]; ///< absolute threshold of hearing per bands

80 float barks [64]; ///< Bark value for each spectral band in long frame	92 float barks [64]; ///< Bark value for each spectral band in long frame

81 float spread_low[64]; ///< spreading factor for low-to-high threshold spread ing in long frame	93 float spread_low[64]; ///< spreading factor for low-to-high threshold spread ing in long frame

82 float spread_hi [64]; ///< spreading factor for high-to-low threshold spread ing in long frame	94 float spread_hi [64]; ///< spreading factor for high-to-low threshold spread ing in long frame

83 }Psy3gppCoeffs;	95 }AacPsyCoeffs;

84	96

85 /**	97 /**

86 * 3GPP TS26.403-inspired psychoacoustic model specific data	98 * 3GPP TS26.403-inspired psychoacoustic model specific data

87 */	99 */

88 typedef struct Psy3gppContext{	100 typedef struct AacPsyContext{

89 Psy3gppCoeffs psy_coef[2];	101 AacPsyCoeffs psy_coef[2];

90 Psy3gppChannel *ch;	102 AacPsyChannel *ch;

91 }Psy3gppContext;	103 }AacPsyContext;

	104

	105 /**

	106 * LAME psy model preset struct

	107 */

	108 typedef struct {

	109 int quality; ///< Quality to map the rest of the vaules to.

	110 /* This is overloaded to be both kbps per channel in ABR mode, and

	111 * requested quality in constant quality mode.

	112 */

	113 float st_lrm; ///< short threshold for L, R, and M channels

	114 } PsyLamePreset;

	115

	116 /**

	117 * LAME psy model preset table for ABR

	118 */

	119 static const PsyLamePreset psy_abr_map[] = {

	120 /* TODO: Tuning. These were taken from LAME. */

	121 /* kbps/ch st_lrm */

	122 { 8, 6.60},

	123 { 16, 6.60},

	124 { 24, 6.60},

	125 { 32, 6.60},

	126 { 40, 6.60},

	127 { 48, 6.60},

	128 { 56, 6.60},

	129 { 64, 6.40},

	130 { 80, 6.00},

	131 { 96, 5.60},

	132 {112, 5.20},

	133 {128, 5.20},

	134 {160, 5.20}

	135 };

	136

	137 /**

	138 * LAME psy model preset table for constant quality

	139 */

	140 static const PsyLamePreset psy_vbr_map[] = {

	141 /* vbr_q st_lrm */

	142 { 0, 4.20},

	143 { 1, 4.20},

	144 { 2, 4.20},

	145 { 3, 4.20},

	146 { 4, 4.20},

	147 { 5, 4.20},

	148 { 6, 4.20},

	149 { 7, 4.20},

	150 { 8, 4.20},

	151 { 9, 4.20},

	152 {10, 4.20}

	153 };

	154

	155 /**

	156 * LAME psy model FIR coefficient table

	157 */

	158 static const float psy_fir_coeffs[] = {

	159 -8.65163e-18 * 2, -0.00851586 * 2, -6.74764e-18 * 2, 0.0209036 * 2,

	160 -3.36639e-17 * 2, -0.0438162 * 2, -1.54175e-17 * 2, 0.0931738 * 2,

	161 -5.52212e-17 * 2, -0.313819 * 2

	162 };

	163

	164 /**

	165 * calculates the attack threshold for ABR from the above table for the LAME psy model

	166 */

	167 static float lame_calc_attack_threshold(int bitrate)

	168 {

	169 /* Assume max bitrate to start with */

	170 int lower_range = 12, upper_range = 12;

	171 int lower_range_kbps = psy_abr_map[12].quality;

	172 int upper_range_kbps = psy_abr_map[12].quality;

	173 int i;

	174

	175 /* Determine which bitrates the value specified falls between.

	176 * If the loop ends without breaking our above assumption of 320kbps was cor rect.

	177 */

	178 for (i = 1; i < 13; i++) {

	179 if (FFMAX(bitrate, psy_abr_map[i].quality) != bitrate) {

	180 upper_range = i;

	181 upper_range_kbps = psy_abr_map[i ].quality;

	182 lower_range = i - 1;

	183 lower_range_kbps = psy_abr_map[i - 1].quality;

	184 break; /* Upper range found */

	185 }

	186 }

	187

	188 /* Determine which range the value specified is closer to */

	189 if ((upper_range_kbps - bitrate) > (bitrate - lower_range_kbps))

	190 return psy_abr_map[lower_range].st_lrm;

	191 return psy_abr_map[upper_range].st_lrm;

	192 }

	193

	194 /**

	195 * LAME psy model specific initialization

	196 */

	197 static void lame_window_init(AacPsyContext ctx, AVCodecContext avctx) {

	198 int i;

	199

	200 for (i = 0; i < avctx->channels; i++) {

	201 AacPsyChannel *pch = &ctx->ch[i];

	202

	203 if (avctx->flags & CODEC_FLAG_QSCALE)

	204 pch->attack_threshold = psy_vbr_map[avctx->global_quality / FF_QP2LA MBDA].st_lrm;

	205 else

	206 pch->attack_threshold = lame_calc_attack_threshold(avctx->bit_rate / avctx->channels / 1000);

	207

	208 for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++)

	209 pch->prev_energy_subshort[i] = 10.0f;

	210 }

	211 }

92	212

93 /**	213 /**

94 * Calculate Bark value for given line.	214 * Calculate Bark value for given line.

95 */	215 */

96 static av_cold float calc_bark(float f)	216 static av_cold float calc_bark(float f)

97 {	217 {

98 return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500. 0f));	218 return 13.3f * atanf(0.00076f * f) + 3.5f * atanf((f / 7500.0f) * (f / 7500. 0f));

99 }	219 }

100	220

101 #define ATH_ADD 4	221 #define ATH_ADD 4

102 /**	222 /**

103 * Calculate ATH value for given frequency.	223 * Calculate ATH value for given frequency.

104 * Borrowed from Lame.	224 * Borrowed from Lame.

105 */	225 */

106 static av_cold float ath(float f, float add)	226 static av_cold float ath(float f, float add)

107 {	227 {

108 f /= 1000.0f;	228 f /= 1000.0f;

109 return 3.64 * pow(f, -0.8)	229 return 3.64 * pow(f, -0.8)

110 - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))	230 - 6.8 * exp(-0.6 * (f - 3.4) * (f - 3.4))

111 + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))	231 + 6.0 * exp(-0.15 * (f - 8.7) * (f - 8.7))

112 + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;	232 + (0.6 + 0.04 * add) * 0.001 * f * f * f * f;

113 }	233 }

114	234

115 static av_cold int psy_3gpp_init(FFPsyContext *ctx) {	235 static av_cold int psy_3gpp_init(FFPsyContext *ctx) {

116 Psy3gppContext *pctx;	236 AacPsyContext *pctx;

117 float bark;	237 float bark;

118 int i, j, g, start;	238 int i, j, g, start;

119 float prev, minscale, minath;	239 float prev, minscale, minath;

120	240

121 ctx->model_priv_data = av_mallocz(sizeof(Psy3gppContext));	241 ctx->model_priv_data = av_mallocz(sizeof(AacPsyContext));

122 pctx = (Psy3gppContext*) ctx->model_priv_data;	242 pctx = (AacPsyContext*) ctx->model_priv_data;

123	243

124 minath = ath(3410, ATH_ADD);	244 minath = ath(3410, ATH_ADD);

125 for (j = 0; j < 2; j++) {	245 for (j = 0; j < 2; j++) {

126 Psy3gppCoeffs *coeffs = &pctx->psy_coef[j];	246 AacPsyCoeffs *coeffs = &pctx->psy_coef[j];

127 float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f );	247 float line_to_frequency = ctx->avctx->sample_rate / (j ? 256.f : 2048.0f );

128 i = 0;	248 i = 0;

129 prev = 0.0;	249 prev = 0.0;

130 for (g = 0; g < ctx->num_bands[j]; g++) {	250 for (g = 0; g < ctx->num_bands[j]; g++) {

131 i += ctx->bands[j][g];	251 i += ctx->bands[j][g];

132 bark = calc_bark((i-1) * line_to_frequency);	252 bark = calc_bark((i-1) * line_to_frequency);

133 coeffs->barks[g] = (bark + prev) / 2.0;	253 coeffs->barks[g] = (bark + prev) / 2.0;

134 prev = bark;	254 prev = bark;

135 }	255 }

136 for (g = 0; g < ctx->num_bands[j] - 1; g++) {	256 for (g = 0; g < ctx->num_bands[j] - 1; g++) {

137 coeffs->spread_low[g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar ks[g]) * PSY_3GPP_SPREAD_LOW);	257 coeffs->spread_low[g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar ks[g]) * PSY_3GPP_SPREAD_LOW);

138 coeffs->spread_hi [g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar ks[g]) * PSY_3GPP_SPREAD_HI);	258 coeffs->spread_hi [g] = pow(10.0, -(coeffs->barks[g+1] - coeffs->bar ks[g]) * PSY_3GPP_SPREAD_HI);

139 }	259 }

140 start = 0;	260 start = 0;

141 for (g = 0; g < ctx->num_bands[j]; g++) {	261 for (g = 0; g < ctx->num_bands[j]; g++) {

142 minscale = ath(start * line_to_frequency, ATH_ADD);	262 minscale = ath(start * line_to_frequency, ATH_ADD);

143 for (i = 1; i < ctx->bands[j][g]; i++)	263 for (i = 1; i < ctx->bands[j][g]; i++)

144 minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));	264 minscale = FFMIN(minscale, ath((start + i) * line_to_frequency, ATH_ADD));

145 coeffs->ath[g] = minscale - minath;	265 coeffs->ath[g] = minscale - minath;

146 start += ctx->bands[j][g];	266 start += ctx->bands[j][g];

147 }	267 }

148 }	268 }

149	269

150 pctx->ch = av_mallocz(sizeof(Psy3gppChannel) * ctx->avctx->channels);	270 pctx->ch = av_mallocz(sizeof(AacPsyChannel) * ctx->avctx->channels);

	271

	272 lame_window_init(pctx, ctx->avctx);

	273

151 return 0;	274 return 0;

152 }	275 }

153	276

154 /**	277 /**

155 * IIR filter used in block switching decision	278 * IIR filter used in block switching decision

156 */	279 */

157 static float iir_filter(int in, float state[2])	280 static float iir_filter(int in, float state[2])

158 {	281 {

159 float ret;	282 float ret;

160	283

(...skipping 14 matching lines...) Expand all Loading...
175 * Tell encoder which window types to use.	298 * Tell encoder which window types to use.

176 * @see 3GPP TS26.403 5.4.1 "Blockswitching"	299 * @see 3GPP TS26.403 5.4.1 "Blockswitching"

177 */	300 */

178 static FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx,	301 static FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx,

179 const int16_t audio, const int16_t la,	302 const int16_t audio, const int16_t la,

180 int channel, int prev_type)	303 int channel, int prev_type)

181 {	304 {

182 int i, j;	305 int i, j;

183 int br = ctx->avctx->bit_rate / ctx->avctx->channels;	306 int br = ctx->avctx->bit_rate / ctx->avctx->channels;

184 int attack_ratio = br <= 16000 ? 18 : 10;	307 int attack_ratio = br <= 16000 ? 18 : 10;

185 Psy3gppContext pctx = (Psy3gppContext) ctx->model_priv_data;	308 AacPsyContext pctx = (AacPsyContext) ctx->model_priv_data;

186 Psy3gppChannel *pch = &pctx->ch[channel];	309 AacPsyChannel *pch = &pctx->ch[channel];

187 uint8_t grouping = 0;	310 uint8_t grouping = 0;

188 int next_type = pch->next_window_seq;	311 int next_type = pch->next_window_seq;

189 FFPsyWindowInfo wi;	312 FFPsyWindowInfo wi;

190	313

191 memset(&wi, 0, sizeof(wi));	314 memset(&wi, 0, sizeof(wi));

192 if (la) {	315 if (la) {

193 float s[8], v;	316 float s[8], v;

194 int switch_to_eight = 0;	317 int switch_to_eight = 0;

195 float sum = 0.0, sum2 = 0.0;	318 float sum = 0.0, sum2 = 0.0;

196 int attack_n = 0;	319 int attack_n = 0;

(...skipping 60 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
257 }	380 }

258 }	381 }

259	382

260 return wi;	383 return wi;

261 }	384 }

262	385

263 /**	386 /**

264 * Calculate band thresholds as suggested in 3GPP TS26.403	387 * Calculate band thresholds as suggested in 3GPP TS26.403

265 */	388 */

266 static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,	389 static void psy_3gpp_analyze(FFPsyContext *ctx, int channel,

267 const float coefs, FFPsyWindowInfo wi)	390 const float coefs, const FFPsyWindowInfo wi)

268 {	391 {

269 Psy3gppContext pctx = (Psy3gppContext) ctx->model_priv_data;	392 AacPsyContext pctx = (AacPsyContext) ctx->model_priv_data;

270 Psy3gppChannel *pch = &pctx->ch[channel];	393 AacPsyChannel *pch = &pctx->ch[channel];

271 int start = 0;	394 int start = 0;

272 int i, w, g;	395 int i, w, g;

273 const int num_bands = ctx->num_bands[wi->num_windows == 8];	396 const int num_bands = ctx->num_bands[wi->num_windows == 8];

274 const uint8_t* band_sizes = ctx->bands[wi->num_windows == 8];	397 const uint8_t* band_sizes = ctx->bands[wi->num_windows == 8];

275 Psy3gppCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8];	398 AacPsyCoeffs *coeffs = &pctx->psy_coef[wi->num_windows == 8];

276	399

277 //calculate energies, initial thresholds and related values - 5.4.2 "Thresho ld Calculation"	400 //calculate energies, initial thresholds and related values - 5.4.2 "Thresho ld Calculation"

278 for (w = 0; w < wi->num_windows*16; w += 16) {	401 for (w = 0; w < wi->num_windows*16; w += 16) {

279 for (g = 0; g < num_bands; g++) {	402 for (g = 0; g < num_bands; g++) {

280 Psy3gppBand *band = &pch->band[w+g];	403 AacPsyBand *band = &pch->band[w+g];

281 band->energy = 0.0f;	404 band->energy = 0.0f;

282 for (i = 0; i < band_sizes[g]; i++)	405 for (i = 0; i < band_sizes[g]; i++)

283 band->energy += coefs[start+i] * coefs[start+i];	406 band->energy += coefs[start+i] * coefs[start+i];

284 band->energy = 1.0f / (512512);	407 band->energy = 1.0f / (512512);

285 band->thr = band->energy * 0.001258925f;	408 band->thr = band->energy * 0.001258925f;

286 start += band_sizes[g];	409 start += band_sizes[g];

287	410

288 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].energy = band->energy;	411 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].energy = band->energy;

289 }	412 }

290 }	413 }

291 //modify thresholds - spread, threshold in quiet - 5.4.3 "Spreaded Energy Ca lculation"	414 //modify thresholds - spread, threshold in quiet - 5.4.3 "Spreaded Energy Ca lculation"

292 for (w = 0; w < wi->num_windows*16; w += 16) {	415 for (w = 0; w < wi->num_windows*16; w += 16) {

293 Psy3gppBand *band = &pch->band[w];	416 AacPsyBand *band = &pch->band[w];

294 for (g = 1; g < num_bands; g++)	417 for (g = 1; g < num_bands; g++)

295 band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_low[ g-1]);	418 band[g].thr = FFMAX(band[g].thr, band[g-1].thr * coeffs->spread_low[ g-1]);

296 for (g = num_bands - 2; g >= 0; g--)	419 for (g = num_bands - 2; g >= 0; g--)

297 band[g].thr = FFMAX(band[g].thr, band[g+1].thr * coeffs->spread_hi [ g]);	420 band[g].thr = FFMAX(band[g].thr, band[g+1].thr * coeffs->spread_hi [ g]);

298 for (g = 0; g < num_bands; g++) {	421 for (g = 0; g < num_bands; g++) {

299 band[g].thr_quiet = FFMAX(band[g].thr, coeffs->ath[g]);	422 band[g].thr_quiet = FFMAX(band[g].thr, coeffs->ath[g]);

300 if (wi->num_windows != 8 && wi->window_type[1] != EIGHT_SHORT_SEQUEN CE)	423 if (wi->num_windows != 8 && wi->window_type[1] != EIGHT_SHORT_SEQUEN CE)

301 band[g].thr_quiet = FFMAX(PSY_3GPP_RPEMIN*band[g].thr_quiet,	424 band[g].thr_quiet = FFMAX(PSY_3GPP_RPEMIN*band[g].thr_quiet,

302 FFMIN(band[g].thr_quiet,	425 FFMIN(band[g].thr_quiet,

303 PSY_3GPP_RPELEV*pch->prev_band[w+g].th r_quiet));	426 PSY_3GPP_RPELEV*pch->prev_band[w+g].th r_quiet));

304 band[g].thr = FFMAX(band[g].thr, band[g].thr_quiet * 0.25);	427 band[g].thr = FFMAX(band[g].thr, band[g].thr_quiet * 0.25);

305	428

306 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].threshold = band[g].thr;	429 ctx->psy_bands[channel*PSY_MAX_BANDS+w+g].threshold = band[g].thr;

307 }	430 }

308 }	431 }

309 memcpy(pch->prev_band, pch->band, sizeof(pch->band));	432 memcpy(pch->prev_band, pch->band, sizeof(pch->band));

310 }	433 }

311	434

312 static av_cold void psy_3gpp_end(FFPsyContext *apc)	435 static av_cold void psy_3gpp_end(FFPsyContext *apc)

313 {	436 {

314 Psy3gppContext pctx = (Psy3gppContext) apc->model_priv_data;	437 AacPsyContext pctx = (AacPsyContext) apc->model_priv_data;

315 av_freep(&pctx->ch);	438 av_freep(&pctx->ch);

316 av_freep(&apc->model_priv_data);	439 av_freep(&apc->model_priv_data);

317 }	440 }

318	441

	442 static void lame_apply_block_type(AacPsyChannel ctx, FFPsyWindowInfo wi, int u selongblock)

	443 {

	444 int blocktype = ONLY_LONG_SEQUENCE;

	445 if (uselongblock) {

	446 if (ctx->next_window_seq == EIGHT_SHORT_SEQUENCE)

	447 blocktype = LONG_STOP_SEQUENCE;

	448 } else {

	449 blocktype = EIGHT_SHORT_SEQUENCE;

	450 if (ctx->next_window_seq == ONLY_LONG_SEQUENCE)

	451 ctx->next_window_seq = LONG_START_SEQUENCE;

	452 if (ctx->next_window_seq == LONG_STOP_SEQUENCE)

	453 ctx->next_window_seq = EIGHT_SHORT_SEQUENCE;

	454 }

	455

	456 wi->window_type[0] = ctx->next_window_seq;

	457 ctx->next_window_seq = blocktype;

	458 }

	459

	460 static FFPsyWindowInfo psy_lame_window(FFPsyContext *ctx,

	461 const int16_t audio, const int16_t la,

	462 int channel, int prev_type)

	463 {

	464 AacPsyContext pctx = (AacPsyContext) ctx->model_priv_data;

	465 AacPsyChannel *pch = &pctx->ch[channel];

	466 int grouping = 0;

	467 int uselongblock = 1;

	468 int attacks[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };

	469 int i;

	470 FFPsyWindowInfo wi;

	471

	472 memset(&wi, 0, sizeof(wi));

	473 if (la) {

	474 float hpfsmpl[AAC_BLOCK_SIZE_LONG];

	475 float const *pf = hpfsmpl;

	476 float attack_intensity[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOC KS];

	477 float energy_subshort[(AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCK S];

	478 float energy_short[AAC_NUM_BLOCKS_SHORT + 1] = { 0 };

	479 int chans = ctx->avctx->channels;

	480 const int16_t firbuf = la + (AAC_BLOCK_SIZE_SHORT/4 - PSY_LAME_FIR_LEN) chans;

	481 int j, att_sum = 0;

	482

	483 /* LAME comment: apply high pass filter of fs/4 */

	484 for (i = 0; i < AAC_BLOCK_SIZE_LONG; i++) {

	485 float sum1, sum2;

	486 sum1 = firbuf[(i + ((PSY_LAME_FIR_LEN - 1) / 2)) * chans];

	487 sum2 = 0.0;

	488 for (j = 0; j < ((PSY_LAME_FIR_LEN - 1) / 2) - 1; j += 2) {

	489 sum1 += psy_fir_coeffs[j] * (firbuf[(i + j) * chans] + firbuf[(i + PSY_LAME_FIR_LEN - j) * chans]);

	490 sum2 += psy_fir_coeffs[j + 1] * (firbuf[(i + j + 1) * chans] + f irbuf[(i + PSY_LAME_FIR_LEN - j - 1) * chans]);

	491 }

	492 hpfsmpl[i] = sum1 + sum2;

	493 }

	494

	495 /* Calculate the energies of each sub-shortblock */

	496 for (i = 0; i < PSY_LAME_NUM_SUBBLOCKS; i++) {

	497 energy_subshort[i] = pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_ SHORT - 1) * PSY_LAME_NUM_SUBBLOCKS)];

	498 assert(pch->prev_energy_subshort[i + ((AAC_NUM_BLOCKS_SHORT - 2) * P SY_LAME_NUM_SUBBLOCKS + 1)] > 0);

	499 attack_intensity[i] = energy_subshort[i] / pch->prev_energy_subshort [i + ((AAC_NUM_BLOCKS_SHORT - 2) * PSY_LAME_NUM_SUBBLOCKS + 1)];

	500 energy_short[0] += energy_subshort[i];

	501 }

	502

	503 for (i = 0; i < AAC_NUM_BLOCKS_SHORT * PSY_LAME_NUM_SUBBLOCKS; i++) {

	504 float const const pfe = pf + AAC_BLOCK_SIZE_LONG / (AAC_NUM_BLOCKS_ SHORT PSY_LAME_NUM_SUBBLOCKS);

	505 float p = 1.0f;

	506 for (; pf < pfe; pf++)

	507 if (p < fabsf(*pf))

	508 p = fabsf(*pf);

	509 pch->prev_energy_subshort[i] = energy_subshort[i + PSY_LAME_NUM_SUBB LOCKS] = p;

	510 energy_short[1 + i / PSY_LAME_NUM_SUBBLOCKS] += p;

	511 /* FIXME: The indexes below are [i + 3 - 2] in the LAME source.

	512 * Obviously the 3 and 2 have some significance, or this wo uld be just [i + 1]

	513 * (which is what we use here). What the 3 stands for is am bigious, as it is both

	514 * number of short blocks, and the number of sub-short bloc ks.

	515 * It seems that LAME is comparing each sub-block to sub-bl ock + 1 in the

	516 * previous block.

	517 */

	518 if (p > energy_subshort[i + 1])

	519 p = p / energy_subshort[i + 1];

	520 else if (energy_subshort[i + 1] > p * 10.0f)

	521 p = energy_subshort[i + 1] / (p * 10.0f);

	522 else

	523 p = 0.0;

	524 attack_intensity[i + PSY_LAME_NUM_SUBBLOCKS] = p;

	525 }

	526

	527 /* compare energy between sub-short blocks */

	528 for (i = 0; i < (AAC_NUM_BLOCKS_SHORT + 1) * PSY_LAME_NUM_SUBBLOCKS; i++ )

	529 if (!attacks[i / PSY_LAME_NUM_SUBBLOCKS])

	530 if (attack_intensity[i] > pch->attack_threshold)

	531 attacks[i / PSY_LAME_NUM_SUBBLOCKS] = (i % PSY_LAME_NUM_SUBB LOCKS) + 1;

	532

	533 /* should have energy change between short blocks, in order to avoid per iodic signals */

	534 /* Good samples to show the effect are Trumpet test songs */

	535 /* GB: tuned (1) to avoid too many short blocks for test sample TRUMPET */

	536 /* RH: tuned (2) to let enough short blocks through for test sample FSOL and SNAPS */

	537 for (i = 1; i < AAC_NUM_BLOCKS_SHORT + 1; i++) {

	538 float const u = energy_short[i - 1];

	539 float const v = energy_short[i];

	540 float const m = FFMAX(u, v);

	541 if (m < 40000) { /* (2) */

	542 if (u < 1.7f * v && v < 1.7f * u) { /* (1) */

	543 if (i == 1 && attacks[0] < attacks[i])

	544 attacks[0] = 0;

	545 attacks[i] = 0;

	546 }

	547 }

	548 att_sum += attacks[i];

	549 }

	550

	551 if (attacks[0] <= pch->prev_attack)

	552 attacks[0] = 0;

	553

	554 att_sum += attacks[0];

	555 /* 3 below indicates the previous attack happened in the last sub-block of the previous sequence */

	556 if (pch->prev_attack == 3 \|\| att_sum) {

	557 uselongblock = 0;

	558

	559 if (attacks[1] && attacks[0])

	560 attacks[1] = 0;

	561 if (attacks[2] && attacks[1])

	562 attacks[2] = 0;

	563 if (attacks[3] && attacks[2])

	564 attacks[3] = 0;

	565 if (attacks[4] && attacks[3])

	566 attacks[4] = 0;

	567 if (attacks[5] && attacks[4])

	568 attacks[5] = 0;

	569 if (attacks[6] && attacks[5])

	570 attacks[6] = 0;

	571 if (attacks[7] && attacks[6])

	572 attacks[7] = 0;

	573 if (attacks[8] && attacks[7])

	574 attacks[8] = 0;

	575 }

	576 } else {

	577 /* We have no lookahead info, so just use same type as the previous sequ ence. */

	578 uselongblock = !(prev_type == EIGHT_SHORT_SEQUENCE);

	579 }

	580

	581 lame_apply_block_type(pch, &wi, uselongblock);

	582

	583 wi.window_type[1] = prev_type;

	584 if (wi.window_type[0] != EIGHT_SHORT_SEQUENCE) {

	585 wi.num_windows = 1;

	586 wi.grouping[0] = 1;

	587 if (wi.window_type[0] == LONG_START_SEQUENCE)

	588 wi.window_shape = 0;

	589 else

	590 wi.window_shape = 1;

	591 } else {

	592 int lastgrp = 0;

	593

	594 wi.num_windows = 8;

	595 wi.window_shape = 0;

	596 for (i = 0; i < 8; i++) {

	597 if (!((pch->next_grouping >> i) & 1))

	598 lastgrp = i;

	599 wi.grouping[lastgrp]++;

	600 }

	601 }

	602

	603 /* Determine grouping, based on the location of the first attack, and save f or

	604 * the next frame.

	605 * FIXME: Move this to analysis.

	606 * TODO: Tune groupings depending on attack location

	607 * TODO: Handle more than one attack in a group

	608 */

	609 for (i = 0; i < 9; i++) {

	610 if (attacks[i]) {

	611 grouping = i;

	612 break;

	613 }

	614 }

	615 pch->next_grouping = window_grouping[grouping];

	616

	617 pch->prev_attack = attacks[8];

	618

	619 return wi;

	620 }

319	621

320 const FFPsyModel ff_aac_psy_model =	622 const FFPsyModel ff_aac_psy_model =

321 {	623 {

322 .name = "3GPP TS 26.403-inspired model",	624 .name = "3GPP TS 26.403-inspired model",

323 .init = psy_3gpp_init,	625 .init = psy_3gpp_init,

324 .window = psy_3gpp_window,	626 .window = psy_lame_window,

325 .analyze = psy_3gpp_analyze,	627 .analyze = psy_3gpp_analyze,

326 .end = psy_3gpp_end,	628 .end = psy_3gpp_end,

327 };	629 };

OLD	NEW

« no previous file with comments | « source/patched-ffmpeg-mt/libavcodec/aacenc.c ('k') | source/patched-ffmpeg-mt/libavcodec/adpcm.c » ('j') | no next file with comments »