media/formats/mp2t/es_parser_mpeg1audio.cc - Issue 506943003: Support MPEG1 audio in the MPEG2-TS stream parser.

Unified Diff: media/formats/mp2t/es_parser_mpeg1audio.cc

Issue 506943003: Support MPEG1 audio in the MPEG2-TS stream parser. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: media/formats/mp2t/es_parser_mpeg1audio.cc

diff --git a/media/formats/mp2t/es_parser_mpeg1audio.cc b/media/formats/mp2t/es_parser_mpeg1audio.cc

new file mode 100644

index 0000000000000000000000000000000000000000..f926cd98debca4758d955f509658654799eeee7a

--- /dev/null

+++ b/media/formats/mp2t/es_parser_mpeg1audio.cc

@@ -0,0 +1,497 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "media/formats/mp2t/es_parser_mpeg1audio.h"

+#include <list>

+#include "base/basictypes.h"

+#include "base/logging.h"

+#include "base/strings/string_number_conversions.h"

+#include "media/base/audio_timestamp_helper.h"

+#include "media/base/bit_reader.h"

+#include "media/base/buffers.h"

+#include "media/base/channel_layout.h"

+#include "media/base/stream_parser_buffer.h"

+#include "media/formats/common/offset_byte_queue.h"

+#include "media/formats/mp2t/mp2t_common.h"

+namespace media {

+namespace mp2t {

+// Map that determines which bitrate_index & channel_mode combinations

+// are allowed.

wolenetz 2014/08/26 20:26:31 nit: describe columns and indices? (in case the re

+// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html

+static const bool kIsAllowed[17][4] = {

wolenetz 2014/08/26 20:26:31 nit: s/17/16/ ?

+ { true, true, true, true }, // free

+ { true, false, false, false }, // 32

+ { true, false, false, false }, // 48

+ { true, false, false, false }, // 56

+ { true, true, true, true }, // 64

+ { true, false, false, false }, // 80

+ { true, true, true, true }, // 96

+ { true, true, true, true }, // 112

+ { true, true, true, true }, // 128

+ { true, true, true, true }, // 160

+ { true, true, true, true }, // 192

+ { false, true, true, true }, // 224

+ { false, true, true, true }, // 256

+ { false, true, true, true }, // 320

+ { false, true, true, true }, // 384

+ { false, false, false, false } // bad

+};

+// Maps version and layer information in the frame header

+// into an index for the |kBitrateMap|.

+// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html

+static const int kVersionLayerMap[4][4] = {

+ // { reserved, L3, L2, L1 }

+ { 5, 4, 4, 3 }, // MPEG 2.5

+ { 5, 5, 5, 5 }, // reserved

+ { 5, 4, 4, 3 }, // MPEG 2

+ { 5, 2, 1, 0 } // MPEG 1

+};

+// Maps the bitrate index field in the header and an index

+// from |kVersionLayerMap| to a frame bitrate.

+// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html

+static const int kBitrateMap[16][6] = {

+ // { V1L1, V1L2, V1L3, V2L1, V2L2 & V2L3, reserved }

+ { 0, 0, 0, 0, 0, 0 },

+ { 32, 32, 32, 32, 8, 0 },

+ { 64, 48, 40, 48, 16, 0 },

+ { 96, 56, 48, 56, 24, 0 },

+ { 128, 64, 56, 64, 32, 0 },

+ { 160, 80, 64, 80, 40, 0 },

+ { 192, 96, 80, 96, 48, 0 },

+ { 224, 112, 96, 112, 56, 0 },

+ { 256, 128, 112, 128, 64, 0 },

+ { 288, 160, 128, 144, 80, 0 },

+ { 320, 192, 160, 160, 96, 0 },

+ { 352, 224, 192, 176, 112, 0 },

+ { 384, 256, 224, 192, 128, 0 },

+ { 416, 320, 256, 224, 144, 0 },

+ { 448, 384, 320, 256, 160, 0 },

+ { 0, 0, 0, 0, 0}

+};

+// Maps the sample rate index and version fields from the frame header

+// to a sample rate.

+// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html

+static const int kSampleRateMap[4][4] = {

+ // { V2.5, reserved, V2, V1 }

+ { 11025, 0, 22050, 44100 },

+ { 12000, 0, 24000, 48000 },

+ { 8000, 0, 16000, 32000 },

+ { 0, 0, 0, 0 }

+};

+#if 0

+// Offset in bytes from the end of the MP3 header to "Xing" or "Info" tags which

+// indicate a frame is silent metadata frame. Values taken from FFmpeg.

+static const int kXingHeaderMap[2][2] = {{32, 17}, {17, 9}};

+#endif

+// Frame header field constants.

+static const int kVersion2 = 2;

+static const int kVersionReserved = 1;

+static const int kVersion2_5 = 0;

+static const int kLayerReserved = 0;

+static const int kLayer1 = 3;

+static const int kLayer2 = 2;

+static const int kLayer3 = 1;

+static const int kBitrateFree = 0;

+static const int kBitrateBad = 0xf;

+static const int kSampleRateReserved = 3;

+int ParseMpegAudioFrameHeader(const uint8* data,

+ int size,

+ int* frame_size,

+ int* sample_rate,

+ ChannelLayout* channel_layout,

+ int* sample_count,

+ bool* metadata_frame) {

+ DCHECK(data);

+ DCHECK_GE(size, 0);

+ DCHECK(frame_size);

+ if (size < 4)

+ return 0;

+ BitReader reader(data, size);

+ int sync;

+ int version;

+ int layer;

+ int is_protected;

+ int bitrate_index;

+ int sample_rate_index;

+ int has_padding;

+ int is_private;

+ int channel_mode;

+ int other_flags;

+ if (!reader.ReadBits(11, &sync) ||

+ !reader.ReadBits(2, &version) ||

+ !reader.ReadBits(2, &layer) ||

+ !reader.ReadBits(1, &is_protected) ||

+ !reader.ReadBits(4, &bitrate_index) ||

+ !reader.ReadBits(2, &sample_rate_index) ||

+ !reader.ReadBits(1, &has_padding) ||

+ !reader.ReadBits(1, &is_private) ||

+ !reader.ReadBits(2, &channel_mode) ||

+ !reader.ReadBits(6, &other_flags)) {

+ return -1;

+ }

+ DVLOG(2) << "Header data :" << std::hex

+ << " sync 0x" << sync

+ << " version 0x" << version

+ << " layer 0x" << layer

+ << " bitrate_index 0x" << bitrate_index

+ << " sample_rate_index 0x" << sample_rate_index

+ << " channel_mode 0x" << channel_mode;

+ if (sync != 0x7ff ||

+ version == kVersionReserved ||

+ layer == kLayerReserved ||

+ bitrate_index == kBitrateFree || bitrate_index == kBitrateBad ||

+ sample_rate_index == kSampleRateReserved) {

+ return -1;

+ }

+ if (layer == kLayer2 && kIsAllowed[bitrate_index][channel_mode]) {

+ return -1;

+ }

+ int bitrate = kBitrateMap[bitrate_index][kVersionLayerMap[version][layer]];

+ if (bitrate == 0) {

+ return -1;

+ }

+ DVLOG(2) << " bitrate " << bitrate;

+ int frame_sample_rate = kSampleRateMap[sample_rate_index][version];

+ if (frame_sample_rate == 0) {

+ return -1;

+ }

+ if (sample_rate)

+ *sample_rate = frame_sample_rate;

+ // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf

+ // Table 2.1.5

+ int samples_per_frame;

+ switch (layer) {

+ case kLayer1:

+ samples_per_frame = 384;

+ break;

+ case kLayer2:

+ samples_per_frame = 1152;

+ break;

+ case kLayer3:

+ if (version == kVersion2 || version == kVersion2_5)

+ samples_per_frame = 576;

+ else

+ samples_per_frame = 1152;

+ break;

+ default:

+ return -1;

+ }

+ if (sample_count)

+ *sample_count = samples_per_frame;

+ // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf

+ // Text just below Table 2.1.5.

+ if (layer == kLayer1) {

+ // This formulation is a slight variation on the equation below,

+ // but has slightly different truncation characteristics to deal

+ // with the fact that Layer 1 has 4 byte "slots" instead of single

+ // byte ones.

+ *frame_size = 4 * (12 * bitrate * 1000 / frame_sample_rate);

+ } else {

+ *frame_size =

+ ((samples_per_frame / 8) * bitrate * 1000) / frame_sample_rate;

+ }

+ if (has_padding)

+ *frame_size += (layer == kLayer1) ? 4 : 1;

+ if (channel_layout) {

+ // Map Stereo(0), Joint Stereo(1), and Dual Channel (2) to

+ // CHANNEL_LAYOUT_STEREO and Single Channel (3) to CHANNEL_LAYOUT_MONO.

+ *channel_layout =

+ (channel_mode == 3) ? CHANNEL_LAYOUT_MONO : CHANNEL_LAYOUT_STEREO;

+ }

+ if (metadata_frame)

+ *metadata_frame = false;

+ const int header_bytes_read = reader.bits_read() / 8;

+#if 1

+ return header_bytes_read;

+#else

+ if (layer != kLayer3)

+ return header_bytes_read;

+ // Check if this is a XING frame and tell the base parser to skip it if so.

+ const int xing_header_index =

+ kXingHeaderMap[version == kVersion2 ||

+ version == kVersion2_5][channel_mode == 3];

+ uint32_t tag = 0;

+ // It's not a XING frame if the frame isn't big enough to be one.

+ if (*frame_size <

+ header_bytes_read + xing_header_index + static_cast<int>(sizeof(tag))) {

+ return header_bytes_read;

+ }

+ // If we don't have enough data available to check, return 0 so frame parsing

+ // will be retried once more data is available.

+ if (!reader.SkipBits(xing_header_index * 8) ||

+ !reader.ReadBits(sizeof(tag) * 8, &tag)) {

+ return 0;

+ }

+ // Check to see if the tag contains 'Xing' or 'Info'

+ if (tag == 0x496e666f || tag == 0x58696e67) {

+ if (metadata_frame)

+ *metadata_frame = true;

+ return reader.bits_read() / 8;

+ }

+ // If it wasn't a XING frame, just return the number consumed bytes.

+ return header_bytes_read;

+#endif

+static const int kMpegAudioHeaderMinSize = 4;

+struct EsParserMpeg1Audio::Mpeg1AudioFrame {

+ // Pointer to the ES data.

+ const uint8* data;

+ // Frame size.

+ int size;

+ // Number of samples in the frame.

+ int sample_count;

+ // Frame offset in the ES queue.

+ int64 queue_offset;

+};

+bool EsParserMpeg1Audio::LookForMpeg1AudioFrame(

+ Mpeg1AudioFrame* mpeg1audio_frame) {

+ int es_size;

+ const uint8* es;

+ es_queue_->Peek(&es, &es_size);

+ int max_offset = es_size - kMpegAudioHeaderMinSize;

+ if (max_offset <= 0)

+ return false;

+ for (int offset = 0; offset < max_offset; offset++) {

+ const uint8* cur_buf = &es[offset];

+ if (cur_buf[0] != 0xff)

+ continue;

+ int frame_size;

+ int sample_rate;

+ ChannelLayout channel_layout;

+ int sample_count;

+ bool metadata_frame;

+ int remaining_size = es_size - offset;

+ int header_size =

+ ParseMpegAudioFrameHeader(cur_buf, remaining_size,

+ &frame_size, &sample_rate, &channel_layout,

+ &sample_count, &metadata_frame);

+ if (header_size < 0)

+ continue;

+ if (remaining_size < frame_size) {

+ // Not a full frame: will resume when we have more data.

+ es_queue_->Pop(offset);

+ return false;

+ }

+ // Check whether there is another frame

+ // |frame_size| apart from the current one.

+ if (remaining_size >= frame_size + 2 &&

+ cur_buf[frame_size] != 0xff) {

+ continue;

+ }

+ es_queue_->Pop(offset);

+ es_queue_->Peek(&mpeg1audio_frame->data, &es_size);

+ mpeg1audio_frame->queue_offset = es_queue_->head();

+ mpeg1audio_frame->size = frame_size;

+ mpeg1audio_frame->sample_count = sample_count;

+ DVLOG(LOG_LEVEL_ES)

+ << "MPEG1 audio syncword @ pos=" << mpeg1audio_frame->queue_offset

+ << " frame_size=" << mpeg1audio_frame->size;

+ DVLOG(LOG_LEVEL_ES)

+ << "MPEG1 audio header: "

+ << base::HexEncode(mpeg1audio_frame->data, kMpegAudioHeaderMinSize);

+ return true;

+ }

+ es_queue_->Pop(max_offset);

+ return false;

+void EsParserMpeg1Audio::SkipMpeg1AudioFrame(

+ const Mpeg1AudioFrame& mpeg1audio_frame) {

+ DCHECK_EQ(mpeg1audio_frame.queue_offset, es_queue_->head());

+ es_queue_->Pop(mpeg1audio_frame.size);

+EsParserMpeg1Audio::EsParserMpeg1Audio(

+ const NewAudioConfigCB& new_audio_config_cb,

+ const EmitBufferCB& emit_buffer_cb)

+ : new_audio_config_cb_(new_audio_config_cb),

+ emit_buffer_cb_(emit_buffer_cb),

+ es_queue_(new media::OffsetByteQueue()) {

+EsParserMpeg1Audio::~EsParserMpeg1Audio() {

+bool EsParserMpeg1Audio::Parse(

+ const uint8* buf, int size,

+ base::TimeDelta pts,

+ DecodeTimestamp dts) {

+ // The incoming PTS applies to the access unit that comes just after

+ // the beginning of |buf|.

+ if (pts != kNoTimestamp())

+ pts_list_.push_back(EsPts(es_queue_->tail(), pts));

+ // Copy the input data to the ES buffer.

+ es_queue_->Push(buf, size);

+ // Look for every MPEG1 audio frame in the ES buffer.

+ Mpeg1AudioFrame mpeg1audio_frame;

+ while (LookForMpeg1AudioFrame(&mpeg1audio_frame)) {

+ // Update the audio configuration if needed.

+ DCHECK_GE(mpeg1audio_frame.size, kMpegAudioHeaderMinSize);

+ if (!UpdateAudioConfiguration(mpeg1audio_frame.data))

+ return false;

+ // Get the PTS & the duration of this access unit.

+ while (!pts_list_.empty() &&

+ pts_list_.front().first <= mpeg1audio_frame.queue_offset) {

+ audio_timestamp_helper_->SetBaseTimestamp(pts_list_.front().second);

+ pts_list_.pop_front();

+ }

+ if (audio_timestamp_helper_->base_timestamp() == kNoTimestamp()) {

+ DVLOG(1) << "Audio frame with unknown timestamp";

+ return false;

+ }

+ base::TimeDelta current_pts = audio_timestamp_helper_->GetTimestamp();

+ base::TimeDelta frame_duration =

+ audio_timestamp_helper_->GetFrameDuration(

+ mpeg1audio_frame.sample_count);

+ // Emit an audio frame.

+ bool is_key_frame = true;

+ // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId

+ // type and allow multiple audio tracks. See https://crbug.com/341581.

+ scoped_refptr<StreamParserBuffer> stream_parser_buffer =

+ StreamParserBuffer::CopyFrom(

+ mpeg1audio_frame.data,

+ mpeg1audio_frame.size,

+ is_key_frame,

+ DemuxerStream::AUDIO, 0);

+ stream_parser_buffer->set_timestamp(current_pts);

+ stream_parser_buffer->set_duration(frame_duration);

+ emit_buffer_cb_.Run(stream_parser_buffer);

+ // Update the PTS of the next frame.

+ audio_timestamp_helper_->AddFrames(mpeg1audio_frame.sample_count);

+ // Skip the current frame.

+ SkipMpeg1AudioFrame(mpeg1audio_frame);

+ }

+ return true;

+void EsParserMpeg1Audio::Flush() {

+void EsParserMpeg1Audio::Reset() {

+ es_queue_.reset(new media::OffsetByteQueue());

+ pts_list_.clear();

+ last_audio_decoder_config_ = AudioDecoderConfig();

+bool EsParserMpeg1Audio::UpdateAudioConfiguration(

+ const uint8* mpeg1audio_header) {

+ int frame_size;

+ int sample_rate;

+ ChannelLayout channel_layout;

+ int sample_count;

+ bool metadata_frame;

+ int header_size =

+ ParseMpegAudioFrameHeader(mpeg1audio_header, kMpegAudioHeaderMinSize,

+ &frame_size, &sample_rate, &channel_layout,

+ &sample_count, &metadata_frame);

+ if (header_size < 0)

+ return false;

+#if 0

+ // The following code is written according to ISO 14496 Part 3 Table 1.13 -

+ // Syntax of AudioSpecificConfig.

+ uint16 extra_data_int =

+ // Note: adts_profile is in the range [0,3], since the ADTS header only

+ // allows two bits for its value.

+ ((adts_profile + 1) << 11) +

+ (frequency_index << 7) +

+ (channel_configuration << 3);

+ uint8 extra_data[2] = {

+ static_cast<uint8>(extra_data_int >> 8),

+ static_cast<uint8>(extra_data_int & 0xff)

+ };

+#endif

+ AudioDecoderConfig audio_decoder_config(

+ kCodecMP3,

+ kSampleFormatS16,

+ channel_layout,

+ sample_rate,

+ NULL, 0,

+ false);

+ if (!audio_decoder_config.Matches(last_audio_decoder_config_)) {

+ DVLOG(1) << "Sampling frequency: " << sample_rate;

+ // Reset the timestamp helper to use a new time scale.

+ if (audio_timestamp_helper_ &&

+ audio_timestamp_helper_->base_timestamp() != kNoTimestamp()) {

+ base::TimeDelta base_timestamp = audio_timestamp_helper_->GetTimestamp();

+ audio_timestamp_helper_.reset(

+ new AudioTimestampHelper(sample_rate));

+ audio_timestamp_helper_->SetBaseTimestamp(base_timestamp);

+ } else {

+ audio_timestamp_helper_.reset(

+ new AudioTimestampHelper(sample_rate));

+ }

+ // Audio config notification.

+ last_audio_decoder_config_ = audio_decoder_config;

+ new_audio_config_cb_.Run(audio_decoder_config);

+ }

+ return true;

+} // namespace mp2t

+} // namespace media

« no previous file with comments | « media/formats/mp2t/es_parser_mpeg1audio.h ('k') | media/formats/mp2t/mp2t_stream_parser.cc » ('j') | no next file with comments »