media/formats/mp2t/es_parser_mpeg1audio.cc - Issue 506943003: Support MPEG1 audio in the MPEG2-TS stream parser.

Side by Side Diff: media/formats/mp2t/es_parser_mpeg1audio.cc

Issue 506943003: Support MPEG1 audio in the MPEG2-TS stream parser. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 6 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 // Copyright 2014 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "media/formats/mp2t/es_parser_mpeg1audio.h"

	6

	7 #include <list>

	8

	9 #include "base/basictypes.h"

	10 #include "base/logging.h"

	11 #include "base/strings/string_number_conversions.h"

	12 #include "media/base/audio_timestamp_helper.h"

	13 #include "media/base/bit_reader.h"

	14 #include "media/base/buffers.h"

	15 #include "media/base/channel_layout.h"

	16 #include "media/base/stream_parser_buffer.h"

	17 #include "media/formats/common/offset_byte_queue.h"

	18 #include "media/formats/mp2t/mp2t_common.h"

	19

	20 namespace media {

	21 namespace mp2t {

	22

	23 // Map that determines which bitrate_index & channel_mode combinations

	24 // are allowed.
	wolenetz 2014/08/26 20:26:31 nit: describe columns and indices? (in case the re nit: describe columns and indices? (in case the referenced webpage becomes out of sync with this)
	25 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html

	26 static const bool kIsAllowed[17][4] = {
	wolenetz 2014/08/26 20:26:31 nit: s/17/16/ ? nit: s/17/16/ ?
	27 { true, true, true, true }, // free

	28 { true, false, false, false }, // 32

	29 { true, false, false, false }, // 48

	30 { true, false, false, false }, // 56

	31 { true, true, true, true }, // 64

	32 { true, false, false, false }, // 80

	33 { true, true, true, true }, // 96

	34 { true, true, true, true }, // 112

	35 { true, true, true, true }, // 128

	36 { true, true, true, true }, // 160

	37 { true, true, true, true }, // 192

	38 { false, true, true, true }, // 224

	39 { false, true, true, true }, // 256

	40 { false, true, true, true }, // 320

	41 { false, true, true, true }, // 384

	42 { false, false, false, false } // bad

	43 };

	44

	45 // Maps version and layer information in the frame header

	46 // into an index for the \|kBitrateMap\|.

	47 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html

	48 static const int kVersionLayerMap[4][4] = {

	49 // { reserved, L3, L2, L1 }

	50 { 5, 4, 4, 3 }, // MPEG 2.5

	51 { 5, 5, 5, 5 }, // reserved

	52 { 5, 4, 4, 3 }, // MPEG 2

	53 { 5, 2, 1, 0 } // MPEG 1

	54 };

	55

	56 // Maps the bitrate index field in the header and an index

	57 // from \|kVersionLayerMap\| to a frame bitrate.

	58 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html

	59 static const int kBitrateMap[16][6] = {

	60 // { V1L1, V1L2, V1L3, V2L1, V2L2 & V2L3, reserved }

	61 { 0, 0, 0, 0, 0, 0 },

	62 { 32, 32, 32, 32, 8, 0 },

	63 { 64, 48, 40, 48, 16, 0 },

	64 { 96, 56, 48, 56, 24, 0 },

	65 { 128, 64, 56, 64, 32, 0 },

	66 { 160, 80, 64, 80, 40, 0 },

	67 { 192, 96, 80, 96, 48, 0 },

	68 { 224, 112, 96, 112, 56, 0 },

	69 { 256, 128, 112, 128, 64, 0 },

	70 { 288, 160, 128, 144, 80, 0 },

	71 { 320, 192, 160, 160, 96, 0 },

	72 { 352, 224, 192, 176, 112, 0 },

	73 { 384, 256, 224, 192, 128, 0 },

	74 { 416, 320, 256, 224, 144, 0 },

	75 { 448, 384, 320, 256, 160, 0 },

	76 { 0, 0, 0, 0, 0}

	77 };

	78

	79 // Maps the sample rate index and version fields from the frame header

	80 // to a sample rate.

	81 // Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html

	82 static const int kSampleRateMap[4][4] = {

	83 // { V2.5, reserved, V2, V1 }

	84 { 11025, 0, 22050, 44100 },

	85 { 12000, 0, 24000, 48000 },

	86 { 8000, 0, 16000, 32000 },

	87 { 0, 0, 0, 0 }

	88 };

	89

	90 #if 0

	91 // Offset in bytes from the end of the MP3 header to "Xing" or "Info" tags which

	92 // indicate a frame is silent metadata frame. Values taken from FFmpeg.

	93 static const int kXingHeaderMap[2][2] = {{32, 17}, {17, 9}};

	94 #endif

	95

	96 // Frame header field constants.

	97 static const int kVersion2 = 2;

	98 static const int kVersionReserved = 1;

	99 static const int kVersion2_5 = 0;

	100 static const int kLayerReserved = 0;

	101 static const int kLayer1 = 3;

	102 static const int kLayer2 = 2;

	103 static const int kLayer3 = 1;

	104 static const int kBitrateFree = 0;

	105 static const int kBitrateBad = 0xf;

	106 static const int kSampleRateReserved = 3;

	107

	108 int ParseMpegAudioFrameHeader(const uint8* data,

	109 int size,

	110 int* frame_size,

	111 int* sample_rate,

	112 ChannelLayout* channel_layout,

	113 int* sample_count,

	114 bool* metadata_frame) {

	115 DCHECK(data);

	116 DCHECK_GE(size, 0);

	117 DCHECK(frame_size);

	118

	119 if (size < 4)

	120 return 0;

	121

	122 BitReader reader(data, size);

	123 int sync;

	124 int version;

	125 int layer;

	126 int is_protected;

	127 int bitrate_index;

	128 int sample_rate_index;

	129 int has_padding;

	130 int is_private;

	131 int channel_mode;

	132 int other_flags;

	133

	134 if (!reader.ReadBits(11, &sync) \|\|

	135 !reader.ReadBits(2, &version) \|\|

	136 !reader.ReadBits(2, &layer) \|\|

	137 !reader.ReadBits(1, &is_protected) \|\|

	138 !reader.ReadBits(4, &bitrate_index) \|\|

	139 !reader.ReadBits(2, &sample_rate_index) \|\|

	140 !reader.ReadBits(1, &has_padding) \|\|

	141 !reader.ReadBits(1, &is_private) \|\|

	142 !reader.ReadBits(2, &channel_mode) \|\|

	143 !reader.ReadBits(6, &other_flags)) {

	144 return -1;

	145 }

	146

	147 DVLOG(2) << "Header data :" << std::hex

	148 << " sync 0x" << sync

	149 << " version 0x" << version

	150 << " layer 0x" << layer

	151 << " bitrate_index 0x" << bitrate_index

	152 << " sample_rate_index 0x" << sample_rate_index

	153 << " channel_mode 0x" << channel_mode;

	154

	155 if (sync != 0x7ff \|\|

	156 version == kVersionReserved \|\|

	157 layer == kLayerReserved \|\|

	158 bitrate_index == kBitrateFree \|\| bitrate_index == kBitrateBad \|\|

	159 sample_rate_index == kSampleRateReserved) {

	160 return -1;

	161 }

	162

	163 if (layer == kLayer2 && kIsAllowed[bitrate_index][channel_mode]) {

	164 return -1;

	165 }

	166

	167 int bitrate = kBitrateMap[bitrate_index][kVersionLayerMap[version][layer]];

	168

	169 if (bitrate == 0) {

	170 return -1;

	171 }

	172

	173 DVLOG(2) << " bitrate " << bitrate;

	174

	175 int frame_sample_rate = kSampleRateMap[sample_rate_index][version];

	176 if (frame_sample_rate == 0) {

	177 return -1;

	178 }

	179

	180 if (sample_rate)

	181 *sample_rate = frame_sample_rate;

	182

	183 // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf

	184 // Table 2.1.5

	185 int samples_per_frame;

	186 switch (layer) {

	187 case kLayer1:

	188 samples_per_frame = 384;

	189 break;

	190

	191 case kLayer2:

	192 samples_per_frame = 1152;

	193 break;

	194

	195 case kLayer3:

	196 if (version == kVersion2 \|\| version == kVersion2_5)

	197 samples_per_frame = 576;

	198 else

	199 samples_per_frame = 1152;

	200 break;

	201

	202 default:

	203 return -1;

	204 }

	205

	206 if (sample_count)

	207 *sample_count = samples_per_frame;

	208

	209 // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf

	210 // Text just below Table 2.1.5.

	211 if (layer == kLayer1) {

	212 // This formulation is a slight variation on the equation below,

	213 // but has slightly different truncation characteristics to deal

	214 // with the fact that Layer 1 has 4 byte "slots" instead of single

	215 // byte ones.

	216 frame_size = 4 (12 * bitrate * 1000 / frame_sample_rate);

	217 } else {

	218 *frame_size =

	219 ((samples_per_frame / 8) * bitrate * 1000) / frame_sample_rate;

	220 }

	221

	222 if (has_padding)

	223 *frame_size += (layer == kLayer1) ? 4 : 1;

	224

	225 if (channel_layout) {

	226 // Map Stereo(0), Joint Stereo(1), and Dual Channel (2) to

	227 // CHANNEL_LAYOUT_STEREO and Single Channel (3) to CHANNEL_LAYOUT_MONO.

	228 *channel_layout =

	229 (channel_mode == 3) ? CHANNEL_LAYOUT_MONO : CHANNEL_LAYOUT_STEREO;

	230 }

	231

	232 if (metadata_frame)

	233 *metadata_frame = false;

	234

	235 const int header_bytes_read = reader.bits_read() / 8;

	236 #if 1

	237 return header_bytes_read;

	238 #else

	239 if (layer != kLayer3)

	240 return header_bytes_read;

	241

	242 // Check if this is a XING frame and tell the base parser to skip it if so.

	243 const int xing_header_index =

	244 kXingHeaderMap[version == kVersion2 \|\|

	245 version == kVersion2_5][channel_mode == 3];

	246 uint32_t tag = 0;

	247

	248 // It's not a XING frame if the frame isn't big enough to be one.

	249 if (*frame_size <

	250 header_bytes_read + xing_header_index + static_cast<int>(sizeof(tag))) {

	251 return header_bytes_read;

	252 }

	253

	254 // If we don't have enough data available to check, return 0 so frame parsing

	255 // will be retried once more data is available.

	256 if (!reader.SkipBits(xing_header_index * 8) \|\|

	257 !reader.ReadBits(sizeof(tag) * 8, &tag)) {

	258 return 0;

	259 }

	260

	261 // Check to see if the tag contains 'Xing' or 'Info'

	262 if (tag == 0x496e666f \|\| tag == 0x58696e67) {

	263 if (metadata_frame)

	264 *metadata_frame = true;

	265 return reader.bits_read() / 8;

	266 }

	267

	268 // If it wasn't a XING frame, just return the number consumed bytes.

	269 return header_bytes_read;

	270 #endif

	271 }

	272

	273 static const int kMpegAudioHeaderMinSize = 4;

	274

	275

	276 struct EsParserMpeg1Audio::Mpeg1AudioFrame {

	277 // Pointer to the ES data.

	278 const uint8* data;

	279

	280 // Frame size.

	281 int size;

	282

	283 // Number of samples in the frame.

	284 int sample_count;

	285

	286 // Frame offset in the ES queue.

	287 int64 queue_offset;

	288 };

	289

	290 bool EsParserMpeg1Audio::LookForMpeg1AudioFrame(

	291 Mpeg1AudioFrame* mpeg1audio_frame) {

	292 int es_size;

	293 const uint8* es;

	294 es_queue_->Peek(&es, &es_size);

	295

	296 int max_offset = es_size - kMpegAudioHeaderMinSize;

	297 if (max_offset <= 0)

	298 return false;

	299

	300 for (int offset = 0; offset < max_offset; offset++) {

	301 const uint8* cur_buf = &es[offset];

	302 if (cur_buf[0] != 0xff)

	303 continue;

	304

	305 int frame_size;

	306 int sample_rate;

	307 ChannelLayout channel_layout;

	308 int sample_count;

	309 bool metadata_frame;

	310

	311 int remaining_size = es_size - offset;

	312 int header_size =

	313 ParseMpegAudioFrameHeader(cur_buf, remaining_size,

	314 &frame_size, &sample_rate, &channel_layout,

	315 &sample_count, &metadata_frame);

	316

	317 if (header_size < 0)

	318 continue;

	319

	320 if (remaining_size < frame_size) {

	321 // Not a full frame: will resume when we have more data.

	322 es_queue_->Pop(offset);

	323 return false;

	324 }

	325

	326 // Check whether there is another frame

	327 // \|frame_size\| apart from the current one.

	328 if (remaining_size >= frame_size + 2 &&

	329 cur_buf[frame_size] != 0xff) {

	330 continue;

	331 }

	332

	333 es_queue_->Pop(offset);

	334 es_queue_->Peek(&mpeg1audio_frame->data, &es_size);

	335 mpeg1audio_frame->queue_offset = es_queue_->head();

	336 mpeg1audio_frame->size = frame_size;

	337 mpeg1audio_frame->sample_count = sample_count;

	338 DVLOG(LOG_LEVEL_ES)

	339 << "MPEG1 audio syncword @ pos=" << mpeg1audio_frame->queue_offset

	340 << " frame_size=" << mpeg1audio_frame->size;

	341 DVLOG(LOG_LEVEL_ES)

	342 << "MPEG1 audio header: "

	343 << base::HexEncode(mpeg1audio_frame->data, kMpegAudioHeaderMinSize);

	344 return true;

	345 }

	346

	347 es_queue_->Pop(max_offset);

	348 return false;

	349 }

	350

	351 void EsParserMpeg1Audio::SkipMpeg1AudioFrame(

	352 const Mpeg1AudioFrame& mpeg1audio_frame) {

	353 DCHECK_EQ(mpeg1audio_frame.queue_offset, es_queue_->head());

	354 es_queue_->Pop(mpeg1audio_frame.size);

	355 }

	356

	357 EsParserMpeg1Audio::EsParserMpeg1Audio(

	358 const NewAudioConfigCB& new_audio_config_cb,

	359 const EmitBufferCB& emit_buffer_cb)

	360 : new_audio_config_cb_(new_audio_config_cb),

	361 emit_buffer_cb_(emit_buffer_cb),

	362 es_queue_(new media::OffsetByteQueue()) {

	363 }

	364

	365 EsParserMpeg1Audio::~EsParserMpeg1Audio() {

	366 }

	367

	368 bool EsParserMpeg1Audio::Parse(

	369 const uint8* buf, int size,

	370 base::TimeDelta pts,

	371 DecodeTimestamp dts) {

	372 // The incoming PTS applies to the access unit that comes just after

	373 // the beginning of \|buf\|.

	374 if (pts != kNoTimestamp())

	375 pts_list_.push_back(EsPts(es_queue_->tail(), pts));

	376

	377 // Copy the input data to the ES buffer.

	378 es_queue_->Push(buf, size);

	379

	380 // Look for every MPEG1 audio frame in the ES buffer.

	381 Mpeg1AudioFrame mpeg1audio_frame;

	382 while (LookForMpeg1AudioFrame(&mpeg1audio_frame)) {

	383 // Update the audio configuration if needed.

	384 DCHECK_GE(mpeg1audio_frame.size, kMpegAudioHeaderMinSize);

	385 if (!UpdateAudioConfiguration(mpeg1audio_frame.data))

	386 return false;

	387

	388 // Get the PTS & the duration of this access unit.

	389 while (!pts_list_.empty() &&

	390 pts_list_.front().first <= mpeg1audio_frame.queue_offset) {

	391 audio_timestamp_helper_->SetBaseTimestamp(pts_list_.front().second);

	392 pts_list_.pop_front();

	393 }

	394

	395 if (audio_timestamp_helper_->base_timestamp() == kNoTimestamp()) {

	396 DVLOG(1) << "Audio frame with unknown timestamp";

	397 return false;

	398 }

	399 base::TimeDelta current_pts = audio_timestamp_helper_->GetTimestamp();

	400 base::TimeDelta frame_duration =

	401 audio_timestamp_helper_->GetFrameDuration(

	402 mpeg1audio_frame.sample_count);

	403

	404 // Emit an audio frame.

	405 bool is_key_frame = true;

	406

	407 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId

	408 // type and allow multiple audio tracks. See https://crbug.com/341581.

	409 scoped_refptr<StreamParserBuffer> stream_parser_buffer =

	410 StreamParserBuffer::CopyFrom(

	411 mpeg1audio_frame.data,

	412 mpeg1audio_frame.size,

	413 is_key_frame,

	414 DemuxerStream::AUDIO, 0);

	415 stream_parser_buffer->set_timestamp(current_pts);

	416 stream_parser_buffer->set_duration(frame_duration);

	417 emit_buffer_cb_.Run(stream_parser_buffer);

	418

	419 // Update the PTS of the next frame.

	420 audio_timestamp_helper_->AddFrames(mpeg1audio_frame.sample_count);

	421

	422 // Skip the current frame.

	423 SkipMpeg1AudioFrame(mpeg1audio_frame);

	424 }

	425

	426 return true;

	427 }

	428

	429 void EsParserMpeg1Audio::Flush() {

	430 }

	431

	432 void EsParserMpeg1Audio::Reset() {

	433 es_queue_.reset(new media::OffsetByteQueue());

	434 pts_list_.clear();

	435 last_audio_decoder_config_ = AudioDecoderConfig();

	436 }

	437

	438 bool EsParserMpeg1Audio::UpdateAudioConfiguration(

	439 const uint8* mpeg1audio_header) {

	440 int frame_size;

	441 int sample_rate;

	442 ChannelLayout channel_layout;

	443 int sample_count;

	444 bool metadata_frame;

	445 int header_size =

	446 ParseMpegAudioFrameHeader(mpeg1audio_header, kMpegAudioHeaderMinSize,

	447 &frame_size, &sample_rate, &channel_layout,

	448 &sample_count, &metadata_frame);

	449 if (header_size < 0)

	450 return false;

	451

	452 #if 0

	453 // The following code is written according to ISO 14496 Part 3 Table 1.13 -

	454 // Syntax of AudioSpecificConfig.

	455 uint16 extra_data_int =

	456 // Note: adts_profile is in the range [0,3], since the ADTS header only

	457 // allows two bits for its value.

	458 ((adts_profile + 1) << 11) +

	459 (frequency_index << 7) +

	460 (channel_configuration << 3);

	461 uint8 extra_data[2] = {

	462 static_cast<uint8>(extra_data_int >> 8),

	463 static_cast<uint8>(extra_data_int & 0xff)

	464 };

	465 #endif

	466

	467 AudioDecoderConfig audio_decoder_config(

	468 kCodecMP3,

	469 kSampleFormatS16,

	470 channel_layout,

	471 sample_rate,

	472 NULL, 0,

	473 false);

	474

	475 if (!audio_decoder_config.Matches(last_audio_decoder_config_)) {

	476 DVLOG(1) << "Sampling frequency: " << sample_rate;

	477 // Reset the timestamp helper to use a new time scale.

	478 if (audio_timestamp_helper_ &&

	479 audio_timestamp_helper_->base_timestamp() != kNoTimestamp()) {

	480 base::TimeDelta base_timestamp = audio_timestamp_helper_->GetTimestamp();

	481 audio_timestamp_helper_.reset(

	482 new AudioTimestampHelper(sample_rate));

	483 audio_timestamp_helper_->SetBaseTimestamp(base_timestamp);

	484 } else {

	485 audio_timestamp_helper_.reset(

	486 new AudioTimestampHelper(sample_rate));

	487 }

	488 // Audio config notification.

	489 last_audio_decoder_config_ = audio_decoder_config;

	490 new_audio_config_cb_.Run(audio_decoder_config);

	491 }

	492

	493 return true;

	494 }

	495

	496 } // namespace mp2t

	497 } // namespace media

OLD	NEW

« no previous file with comments | « media/formats/mp2t/es_parser_mpeg1audio.h ('k') | media/formats/mp2t/mp2t_stream_parser.cc » ('j') | no next file with comments »