Index: media/formats/mp2t/es_parser_mpeg1audio.cc |
diff --git a/media/formats/mp2t/es_parser_mpeg1audio.cc b/media/formats/mp2t/es_parser_mpeg1audio.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..f926cd98debca4758d955f509658654799eeee7a |
--- /dev/null |
+++ b/media/formats/mp2t/es_parser_mpeg1audio.cc |
@@ -0,0 +1,497 @@ |
+// Copyright 2014 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "media/formats/mp2t/es_parser_mpeg1audio.h" |
+ |
+#include <list> |
+ |
+#include "base/basictypes.h" |
+#include "base/logging.h" |
+#include "base/strings/string_number_conversions.h" |
+#include "media/base/audio_timestamp_helper.h" |
+#include "media/base/bit_reader.h" |
+#include "media/base/buffers.h" |
+#include "media/base/channel_layout.h" |
+#include "media/base/stream_parser_buffer.h" |
+#include "media/formats/common/offset_byte_queue.h" |
+#include "media/formats/mp2t/mp2t_common.h" |
+ |
+namespace media { |
+namespace mp2t { |
+ |
+// Map that determines which bitrate_index & channel_mode combinations |
+// are allowed. |
wolenetz
2014/08/26 20:26:31
nit: describe columns and indices? (in case the re
|
+// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html |
+static const bool kIsAllowed[17][4] = { |
wolenetz
2014/08/26 20:26:31
nit: s/17/16/ ?
|
+ { true, true, true, true }, // free |
+ { true, false, false, false }, // 32 |
+ { true, false, false, false }, // 48 |
+ { true, false, false, false }, // 56 |
+ { true, true, true, true }, // 64 |
+ { true, false, false, false }, // 80 |
+ { true, true, true, true }, // 96 |
+ { true, true, true, true }, // 112 |
+ { true, true, true, true }, // 128 |
+ { true, true, true, true }, // 160 |
+ { true, true, true, true }, // 192 |
+ { false, true, true, true }, // 224 |
+ { false, true, true, true }, // 256 |
+ { false, true, true, true }, // 320 |
+ { false, true, true, true }, // 384 |
+ { false, false, false, false } // bad |
+}; |
+ |
+// Maps version and layer information in the frame header |
+// into an index for the |kBitrateMap|. |
+// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html |
+static const int kVersionLayerMap[4][4] = { |
+ // { reserved, L3, L2, L1 } |
+ { 5, 4, 4, 3 }, // MPEG 2.5 |
+ { 5, 5, 5, 5 }, // reserved |
+ { 5, 4, 4, 3 }, // MPEG 2 |
+ { 5, 2, 1, 0 } // MPEG 1 |
+}; |
+ |
+// Maps the bitrate index field in the header and an index |
+// from |kVersionLayerMap| to a frame bitrate. |
+// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html |
+static const int kBitrateMap[16][6] = { |
+ // { V1L1, V1L2, V1L3, V2L1, V2L2 & V2L3, reserved } |
+ { 0, 0, 0, 0, 0, 0 }, |
+ { 32, 32, 32, 32, 8, 0 }, |
+ { 64, 48, 40, 48, 16, 0 }, |
+ { 96, 56, 48, 56, 24, 0 }, |
+ { 128, 64, 56, 64, 32, 0 }, |
+ { 160, 80, 64, 80, 40, 0 }, |
+ { 192, 96, 80, 96, 48, 0 }, |
+ { 224, 112, 96, 112, 56, 0 }, |
+ { 256, 128, 112, 128, 64, 0 }, |
+ { 288, 160, 128, 144, 80, 0 }, |
+ { 320, 192, 160, 160, 96, 0 }, |
+ { 352, 224, 192, 176, 112, 0 }, |
+ { 384, 256, 224, 192, 128, 0 }, |
+ { 416, 320, 256, 224, 144, 0 }, |
+ { 448, 384, 320, 256, 160, 0 }, |
+ { 0, 0, 0, 0, 0} |
+}; |
+ |
+// Maps the sample rate index and version fields from the frame header |
+// to a sample rate. |
+// Derived from: http://mpgedit.org/mpgedit/mpeg_format/MP3Format.html |
+static const int kSampleRateMap[4][4] = { |
+ // { V2.5, reserved, V2, V1 } |
+ { 11025, 0, 22050, 44100 }, |
+ { 12000, 0, 24000, 48000 }, |
+ { 8000, 0, 16000, 32000 }, |
+ { 0, 0, 0, 0 } |
+}; |
+ |
+#if 0 |
+// Offset in bytes from the end of the MP3 header to "Xing" or "Info" tags which |
+// indicate a frame is silent metadata frame. Values taken from FFmpeg. |
+static const int kXingHeaderMap[2][2] = {{32, 17}, {17, 9}}; |
+#endif |
+ |
+// Frame header field constants. |
+static const int kVersion2 = 2; |
+static const int kVersionReserved = 1; |
+static const int kVersion2_5 = 0; |
+static const int kLayerReserved = 0; |
+static const int kLayer1 = 3; |
+static const int kLayer2 = 2; |
+static const int kLayer3 = 1; |
+static const int kBitrateFree = 0; |
+static const int kBitrateBad = 0xf; |
+static const int kSampleRateReserved = 3; |
+ |
+int ParseMpegAudioFrameHeader(const uint8* data, |
+ int size, |
+ int* frame_size, |
+ int* sample_rate, |
+ ChannelLayout* channel_layout, |
+ int* sample_count, |
+ bool* metadata_frame) { |
+ DCHECK(data); |
+ DCHECK_GE(size, 0); |
+ DCHECK(frame_size); |
+ |
+ if (size < 4) |
+ return 0; |
+ |
+ BitReader reader(data, size); |
+ int sync; |
+ int version; |
+ int layer; |
+ int is_protected; |
+ int bitrate_index; |
+ int sample_rate_index; |
+ int has_padding; |
+ int is_private; |
+ int channel_mode; |
+ int other_flags; |
+ |
+ if (!reader.ReadBits(11, &sync) || |
+ !reader.ReadBits(2, &version) || |
+ !reader.ReadBits(2, &layer) || |
+ !reader.ReadBits(1, &is_protected) || |
+ !reader.ReadBits(4, &bitrate_index) || |
+ !reader.ReadBits(2, &sample_rate_index) || |
+ !reader.ReadBits(1, &has_padding) || |
+ !reader.ReadBits(1, &is_private) || |
+ !reader.ReadBits(2, &channel_mode) || |
+ !reader.ReadBits(6, &other_flags)) { |
+ return -1; |
+ } |
+ |
+ DVLOG(2) << "Header data :" << std::hex |
+ << " sync 0x" << sync |
+ << " version 0x" << version |
+ << " layer 0x" << layer |
+ << " bitrate_index 0x" << bitrate_index |
+ << " sample_rate_index 0x" << sample_rate_index |
+ << " channel_mode 0x" << channel_mode; |
+ |
+ if (sync != 0x7ff || |
+ version == kVersionReserved || |
+ layer == kLayerReserved || |
+ bitrate_index == kBitrateFree || bitrate_index == kBitrateBad || |
+ sample_rate_index == kSampleRateReserved) { |
+ return -1; |
+ } |
+ |
+ if (layer == kLayer2 && kIsAllowed[bitrate_index][channel_mode]) { |
+ return -1; |
+ } |
+ |
+ int bitrate = kBitrateMap[bitrate_index][kVersionLayerMap[version][layer]]; |
+ |
+ if (bitrate == 0) { |
+ return -1; |
+ } |
+ |
+ DVLOG(2) << " bitrate " << bitrate; |
+ |
+ int frame_sample_rate = kSampleRateMap[sample_rate_index][version]; |
+ if (frame_sample_rate == 0) { |
+ return -1; |
+ } |
+ |
+ if (sample_rate) |
+ *sample_rate = frame_sample_rate; |
+ |
+ // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf |
+ // Table 2.1.5 |
+ int samples_per_frame; |
+ switch (layer) { |
+ case kLayer1: |
+ samples_per_frame = 384; |
+ break; |
+ |
+ case kLayer2: |
+ samples_per_frame = 1152; |
+ break; |
+ |
+ case kLayer3: |
+ if (version == kVersion2 || version == kVersion2_5) |
+ samples_per_frame = 576; |
+ else |
+ samples_per_frame = 1152; |
+ break; |
+ |
+ default: |
+ return -1; |
+ } |
+ |
+ if (sample_count) |
+ *sample_count = samples_per_frame; |
+ |
+ // http://teslabs.com/openplayer/docs/docs/specs/mp3_structure2.pdf |
+ // Text just below Table 2.1.5. |
+ if (layer == kLayer1) { |
+ // This formulation is a slight variation on the equation below, |
+ // but has slightly different truncation characteristics to deal |
+ // with the fact that Layer 1 has 4 byte "slots" instead of single |
+ // byte ones. |
+ *frame_size = 4 * (12 * bitrate * 1000 / frame_sample_rate); |
+ } else { |
+ *frame_size = |
+ ((samples_per_frame / 8) * bitrate * 1000) / frame_sample_rate; |
+ } |
+ |
+ if (has_padding) |
+ *frame_size += (layer == kLayer1) ? 4 : 1; |
+ |
+ if (channel_layout) { |
+ // Map Stereo(0), Joint Stereo(1), and Dual Channel (2) to |
+ // CHANNEL_LAYOUT_STEREO and Single Channel (3) to CHANNEL_LAYOUT_MONO. |
+ *channel_layout = |
+ (channel_mode == 3) ? CHANNEL_LAYOUT_MONO : CHANNEL_LAYOUT_STEREO; |
+ } |
+ |
+ if (metadata_frame) |
+ *metadata_frame = false; |
+ |
+ const int header_bytes_read = reader.bits_read() / 8; |
+#if 1 |
+ return header_bytes_read; |
+#else |
+ if (layer != kLayer3) |
+ return header_bytes_read; |
+ |
+ // Check if this is a XING frame and tell the base parser to skip it if so. |
+ const int xing_header_index = |
+ kXingHeaderMap[version == kVersion2 || |
+ version == kVersion2_5][channel_mode == 3]; |
+ uint32_t tag = 0; |
+ |
+ // It's not a XING frame if the frame isn't big enough to be one. |
+ if (*frame_size < |
+ header_bytes_read + xing_header_index + static_cast<int>(sizeof(tag))) { |
+ return header_bytes_read; |
+ } |
+ |
+ // If we don't have enough data available to check, return 0 so frame parsing |
+ // will be retried once more data is available. |
+ if (!reader.SkipBits(xing_header_index * 8) || |
+ !reader.ReadBits(sizeof(tag) * 8, &tag)) { |
+ return 0; |
+ } |
+ |
+ // Check to see if the tag contains 'Xing' or 'Info' |
+ if (tag == 0x496e666f || tag == 0x58696e67) { |
+ if (metadata_frame) |
+ *metadata_frame = true; |
+ return reader.bits_read() / 8; |
+ } |
+ |
+ // If it wasn't a XING frame, just return the number consumed bytes. |
+ return header_bytes_read; |
+#endif |
+} |
+ |
+static const int kMpegAudioHeaderMinSize = 4; |
+ |
+ |
+struct EsParserMpeg1Audio::Mpeg1AudioFrame { |
+ // Pointer to the ES data. |
+ const uint8* data; |
+ |
+ // Frame size. |
+ int size; |
+ |
+ // Number of samples in the frame. |
+ int sample_count; |
+ |
+ // Frame offset in the ES queue. |
+ int64 queue_offset; |
+}; |
+ |
+bool EsParserMpeg1Audio::LookForMpeg1AudioFrame( |
+ Mpeg1AudioFrame* mpeg1audio_frame) { |
+ int es_size; |
+ const uint8* es; |
+ es_queue_->Peek(&es, &es_size); |
+ |
+ int max_offset = es_size - kMpegAudioHeaderMinSize; |
+ if (max_offset <= 0) |
+ return false; |
+ |
+ for (int offset = 0; offset < max_offset; offset++) { |
+ const uint8* cur_buf = &es[offset]; |
+ if (cur_buf[0] != 0xff) |
+ continue; |
+ |
+ int frame_size; |
+ int sample_rate; |
+ ChannelLayout channel_layout; |
+ int sample_count; |
+ bool metadata_frame; |
+ |
+ int remaining_size = es_size - offset; |
+ int header_size = |
+ ParseMpegAudioFrameHeader(cur_buf, remaining_size, |
+ &frame_size, &sample_rate, &channel_layout, |
+ &sample_count, &metadata_frame); |
+ |
+ if (header_size < 0) |
+ continue; |
+ |
+ if (remaining_size < frame_size) { |
+ // Not a full frame: will resume when we have more data. |
+ es_queue_->Pop(offset); |
+ return false; |
+ } |
+ |
+ // Check whether there is another frame |
+ // |frame_size| apart from the current one. |
+ if (remaining_size >= frame_size + 2 && |
+ cur_buf[frame_size] != 0xff) { |
+ continue; |
+ } |
+ |
+ es_queue_->Pop(offset); |
+ es_queue_->Peek(&mpeg1audio_frame->data, &es_size); |
+ mpeg1audio_frame->queue_offset = es_queue_->head(); |
+ mpeg1audio_frame->size = frame_size; |
+ mpeg1audio_frame->sample_count = sample_count; |
+ DVLOG(LOG_LEVEL_ES) |
+ << "MPEG1 audio syncword @ pos=" << mpeg1audio_frame->queue_offset |
+ << " frame_size=" << mpeg1audio_frame->size; |
+ DVLOG(LOG_LEVEL_ES) |
+ << "MPEG1 audio header: " |
+ << base::HexEncode(mpeg1audio_frame->data, kMpegAudioHeaderMinSize); |
+ return true; |
+ } |
+ |
+ es_queue_->Pop(max_offset); |
+ return false; |
+} |
+ |
+void EsParserMpeg1Audio::SkipMpeg1AudioFrame( |
+ const Mpeg1AudioFrame& mpeg1audio_frame) { |
+ DCHECK_EQ(mpeg1audio_frame.queue_offset, es_queue_->head()); |
+ es_queue_->Pop(mpeg1audio_frame.size); |
+} |
+ |
+EsParserMpeg1Audio::EsParserMpeg1Audio( |
+ const NewAudioConfigCB& new_audio_config_cb, |
+ const EmitBufferCB& emit_buffer_cb) |
+ : new_audio_config_cb_(new_audio_config_cb), |
+ emit_buffer_cb_(emit_buffer_cb), |
+ es_queue_(new media::OffsetByteQueue()) { |
+} |
+ |
+EsParserMpeg1Audio::~EsParserMpeg1Audio() { |
+} |
+ |
+bool EsParserMpeg1Audio::Parse( |
+ const uint8* buf, int size, |
+ base::TimeDelta pts, |
+ DecodeTimestamp dts) { |
+ // The incoming PTS applies to the access unit that comes just after |
+ // the beginning of |buf|. |
+ if (pts != kNoTimestamp()) |
+ pts_list_.push_back(EsPts(es_queue_->tail(), pts)); |
+ |
+ // Copy the input data to the ES buffer. |
+ es_queue_->Push(buf, size); |
+ |
+ // Look for every MPEG1 audio frame in the ES buffer. |
+ Mpeg1AudioFrame mpeg1audio_frame; |
+ while (LookForMpeg1AudioFrame(&mpeg1audio_frame)) { |
+ // Update the audio configuration if needed. |
+ DCHECK_GE(mpeg1audio_frame.size, kMpegAudioHeaderMinSize); |
+ if (!UpdateAudioConfiguration(mpeg1audio_frame.data)) |
+ return false; |
+ |
+ // Get the PTS & the duration of this access unit. |
+ while (!pts_list_.empty() && |
+ pts_list_.front().first <= mpeg1audio_frame.queue_offset) { |
+ audio_timestamp_helper_->SetBaseTimestamp(pts_list_.front().second); |
+ pts_list_.pop_front(); |
+ } |
+ |
+ if (audio_timestamp_helper_->base_timestamp() == kNoTimestamp()) { |
+ DVLOG(1) << "Audio frame with unknown timestamp"; |
+ return false; |
+ } |
+ base::TimeDelta current_pts = audio_timestamp_helper_->GetTimestamp(); |
+ base::TimeDelta frame_duration = |
+ audio_timestamp_helper_->GetFrameDuration( |
+ mpeg1audio_frame.sample_count); |
+ |
+ // Emit an audio frame. |
+ bool is_key_frame = true; |
+ |
+ // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId |
+ // type and allow multiple audio tracks. See https://crbug.com/341581. |
+ scoped_refptr<StreamParserBuffer> stream_parser_buffer = |
+ StreamParserBuffer::CopyFrom( |
+ mpeg1audio_frame.data, |
+ mpeg1audio_frame.size, |
+ is_key_frame, |
+ DemuxerStream::AUDIO, 0); |
+ stream_parser_buffer->set_timestamp(current_pts); |
+ stream_parser_buffer->set_duration(frame_duration); |
+ emit_buffer_cb_.Run(stream_parser_buffer); |
+ |
+ // Update the PTS of the next frame. |
+ audio_timestamp_helper_->AddFrames(mpeg1audio_frame.sample_count); |
+ |
+ // Skip the current frame. |
+ SkipMpeg1AudioFrame(mpeg1audio_frame); |
+ } |
+ |
+ return true; |
+} |
+ |
+void EsParserMpeg1Audio::Flush() { |
+} |
+ |
+void EsParserMpeg1Audio::Reset() { |
+ es_queue_.reset(new media::OffsetByteQueue()); |
+ pts_list_.clear(); |
+ last_audio_decoder_config_ = AudioDecoderConfig(); |
+} |
+ |
+bool EsParserMpeg1Audio::UpdateAudioConfiguration( |
+ const uint8* mpeg1audio_header) { |
+ int frame_size; |
+ int sample_rate; |
+ ChannelLayout channel_layout; |
+ int sample_count; |
+ bool metadata_frame; |
+ int header_size = |
+ ParseMpegAudioFrameHeader(mpeg1audio_header, kMpegAudioHeaderMinSize, |
+ &frame_size, &sample_rate, &channel_layout, |
+ &sample_count, &metadata_frame); |
+ if (header_size < 0) |
+ return false; |
+ |
+#if 0 |
+ // The following code is written according to ISO 14496 Part 3 Table 1.13 - |
+ // Syntax of AudioSpecificConfig. |
+ uint16 extra_data_int = |
+ // Note: adts_profile is in the range [0,3], since the ADTS header only |
+ // allows two bits for its value. |
+ ((adts_profile + 1) << 11) + |
+ (frequency_index << 7) + |
+ (channel_configuration << 3); |
+ uint8 extra_data[2] = { |
+ static_cast<uint8>(extra_data_int >> 8), |
+ static_cast<uint8>(extra_data_int & 0xff) |
+ }; |
+#endif |
+ |
+ AudioDecoderConfig audio_decoder_config( |
+ kCodecMP3, |
+ kSampleFormatS16, |
+ channel_layout, |
+ sample_rate, |
+ NULL, 0, |
+ false); |
+ |
+ if (!audio_decoder_config.Matches(last_audio_decoder_config_)) { |
+ DVLOG(1) << "Sampling frequency: " << sample_rate; |
+ // Reset the timestamp helper to use a new time scale. |
+ if (audio_timestamp_helper_ && |
+ audio_timestamp_helper_->base_timestamp() != kNoTimestamp()) { |
+ base::TimeDelta base_timestamp = audio_timestamp_helper_->GetTimestamp(); |
+ audio_timestamp_helper_.reset( |
+ new AudioTimestampHelper(sample_rate)); |
+ audio_timestamp_helper_->SetBaseTimestamp(base_timestamp); |
+ } else { |
+ audio_timestamp_helper_.reset( |
+ new AudioTimestampHelper(sample_rate)); |
+ } |
+ // Audio config notification. |
+ last_audio_decoder_config_ = audio_decoder_config; |
+ new_audio_config_cb_.Run(audio_decoder_config); |
+ } |
+ |
+ return true; |
+} |
+ |
+} // namespace mp2t |
+} // namespace media |