Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(585)

Unified Diff: media/mp2t/es_parser_h264.cc

Issue 23566013: Mpeg2 TS stream parser for media source. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Better naming: mp2t namespace & class names Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: media/mp2t/es_parser_h264.cc
diff --git a/media/mp2t/es_parser_h264.cc b/media/mp2t/es_parser_h264.cc
new file mode 100644
index 0000000000000000000000000000000000000000..59d1a64932c795ddcce2055ebcd8bf6ed0ad3997
--- /dev/null
+++ b/media/mp2t/es_parser_h264.cc
@@ -0,0 +1,524 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "media/mp2t/es_parser_h264.h"
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "media/base/bit_reader.h"
+#include "media/base/buffers.h"
+#include "media/base/stream_parser_buffer.h"
+#include "media/base/video_decoder_config.h"
+#include "media/base/video_frame.h"
+#include "media/mp2t/mp2t_common.h"
+#include "ui/gfx/rect.h"
+#include "ui/gfx/size.h"
+
+static const int kExtendedSar = 255;
+
+static const int kTableSarWidth[14] = {
+ 1, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160
+};
+
+static const int kTableSarHeight[14] = {
+ 1, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99
+};
+
+namespace media {
+namespace mp2t {
+
+class BitReaderH264 : public BitReader {
+ public:
+ BitReaderH264(const uint8* data, off_t size)
+ : BitReader(data, size) { }
+
+ // Read an unsigned exp-golomb value.
+ // Return true if successful.
+ bool ReadBitsExpGolomb(uint32* exp_golomb_value);
+};
+
+bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) {
+ // Get the number of leading zeros.
+ int zero_count = 0;
+ while (true) {
+ int one_bit;
+ RCHECK(ReadBits(1, &one_bit));
+ if (one_bit != 0)
+ break;
+ zero_count++;
+ }
+
+ // If zero_count is greater than 31, the calculated value will overflow.
+ if (zero_count > 31) {
+ SkipBits(zero_count);
+ return false;
+ }
damienv1 2013/09/12 20:43:02 TODO: special case if |zero_count| = 0 ?
+
+ // Read the actual value.
+ uint32 base = (1 << zero_count) - 1;
+ uint32 offset;
+ RCHECK(ReadBits(zero_count, &offset));
+ *exp_golomb_value = base + offset;
+
+ return true;
+}
+
+EsParserH264::EsParserH264(
+ NewVideoConfigCB new_video_config_cb,
+ EmitBufferCB emit_buffer_cb)
+ : nal_es_pos_(0),
+ new_video_config_cb_(new_video_config_cb),
+ emit_buffer_cb_(emit_buffer_cb),
+ is_video_config_known_(false),
+ profile_idc_(0),
+ level_idc_(0),
+ pic_width_in_mbs_minus1_(0),
+ pic_height_in_map_units_minus1_(0) {
+}
+
+EsParserH264::~EsParserH264() {
+}
+
+bool EsParserH264::Parse(const uint8* buf, int size,
+ base::TimeDelta pts,
+ base::TimeDelta dts) {
+ // Note: Parse is invoked each time a PES packet has been reassembled.
+ // Unfortunately, a PES packet does not necessarily map
+ // to an h264 access unit, although the HLS recommandation is to use one PES
+ // for each access unit (but this is just a recommandation and some streams
+ // do not comply with this recommandation).
+
+ // Link position |raw_es_size| in the ES stream with a timing descriptor.
+ // HLS recommandation: "In AVC video, you should have both a DTS and a
+ // PTS in each PES header".
+ if (dts == kNoTimestamp() && pts == kNoTimestamp()) {
+ DVLOG(1) << "A timestamp must be provided for each reassembled PES";
+ Reset();
+ return false;
+ }
+ TimingDesc timing_desc;
+ timing_desc.pts = pts;
+ timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts;
+
+ int raw_es_size;
+ const uint8* raw_es;
+ es_byte_queue_.Peek(&raw_es, &raw_es_size);
+ timing_desc_list_.push_back(
+ std::pair<int, TimingDesc>(raw_es_size, timing_desc));
+
+ // Add the incoming bytes to the ES queue.
+ es_byte_queue_.Push(buf, size);
+ es_byte_queue_.Peek(&raw_es, &raw_es_size);
+
+ // Add NALs from the incoming buffer.
+ FindNals();
+
+ // Find access units based on AUD.
+ std::list<NalDescList::const_iterator> access_unit_list;
+ FindAccessUnits(&access_unit_list);
+ if (access_unit_list.empty()) {
+ DiscardEs(nal_es_pos_);
+ return true;
+ }
+
+ // Emit all frames.
+ std::list<NalDescList::const_iterator>::iterator it0 =
+ access_unit_list.begin();
+ std::list<NalDescList::const_iterator>::iterator it1 = it0;
+ ++it1;
+ LOG_IF(WARNING, (*it0)->position != 0)
+ << "Needs to discard some ES data before getting the 1st access unit: "
+ << (*it0)->position;
+ for (; it1 != access_unit_list.end(); ++it0, ++it1) {
+ int nxt_frame_position = (*it1)->position;
+ bool status = EmitFrame(*it0, *it1, nxt_frame_position);
+ if (!status) {
+ Reset();
+ return false;
+ }
+ }
+
+ // Discard emitted frames.
+ int last_position = access_unit_list.back()->position;
+ DiscardEs(last_position);
+
+ return true;
+}
+
+void EsParserH264::Flush() {
+ // Find access units based on AUD.
+ std::list<NalDescList::const_iterator> access_unit_list;
+ FindAccessUnits(&access_unit_list);
+
+ // At this point, there can be at most one access unit in the buffer.
+ DCHECK_GE(access_unit_list.size(), 1u);
+ if (!access_unit_list.empty()) {
+ // Force emitting the last access unit (even if it might be incomplete).
+ int nxt_frame_position = 0;
+ const uint8* raw_es = NULL;
+ es_byte_queue_.Peek(&raw_es, &nxt_frame_position);
+ NalDescList::const_iterator cur_frame = *(access_unit_list.begin());
+ NalDescList::const_iterator nxt_frame = nal_desc_list_.end();
+ EmitFrame(cur_frame, nxt_frame, nxt_frame_position);
+ }
+}
+
+void EsParserH264::Reset() {
+ DVLOG(1) << "EsParserH264::Reset";
+ es_byte_queue_.Reset();
+ timing_desc_list_.clear();
+ nal_desc_list_.clear();
+ nal_es_pos_ = 0;
+ is_video_config_known_ = false;
+}
+
+void EsParserH264::FindNals() {
+ int raw_es_size;
+ const uint8* raw_es;
+ es_byte_queue_.Peek(&raw_es, &raw_es_size);
+
+ DCHECK_GE(nal_es_pos_, 0);
+ DCHECK_LT(nal_es_pos_, raw_es_size);
+
+ // Resume NAL segmentation where it was left.
+ for ( ; nal_es_pos_ < raw_es_size - 4; nal_es_pos_++) {
+ // Make sure the syncword is either 00 00 00 01 or 00 00 01
+ if (raw_es[nal_es_pos_ + 0] != 0 ||
+ raw_es[nal_es_pos_ + 1] != 0) {
+ continue;
+ }
+ int syncword_length = 0;
+ if (raw_es[nal_es_pos_ + 2] == 0 &&
+ raw_es[nal_es_pos_ + 3] == 1) {
+ syncword_length = 4;
+ } else if (raw_es[nal_es_pos_ + 2] == 1) {
+ syncword_length = 3;
+ } else {
+ continue;
+ }
+
+ // Retrieve the NAL type.
+ int nal_header = raw_es[nal_es_pos_ + syncword_length];
+ int forbidden_zero_bit = (nal_header >> 7) & 0x1;
+ NalDesc nal_desc;
+ nal_desc.position = nal_es_pos_;
+ nal_desc.nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f);
+ if (forbidden_zero_bit != 0)
+ nal_desc.nal_unit_type = kNalUnitTypeInvalid;
+ DVLOG(LOG_LEVEL_ES) << "nal: offset=" << nal_desc.position
+ << " type=" << nal_desc.nal_unit_type;
+ nal_desc_list_.push_back(nal_desc);
+ nal_es_pos_ += syncword_length;
+ }
+}
+
+void EsParserH264::FindAccessUnits(
+ std::list<NalDescList::const_iterator>* access_unit_list) {
+ // Get the H264 access units based on AUD.
+ // Mpeg2TS spec: "2.14 Carriage of Rec. ITU-T H.264 | ISO/IEC 14496-10 video"
+ // "Each AVC access unit shall contain an access unit delimiter NAL Unit;"
+ for (NalDescList::const_iterator it = nal_desc_list_.begin();
+ it != nal_desc_list_.end(); ++it) {
+ if (it->nal_unit_type == kNalUnitTypeAUD) {
+ DVLOG(LOG_LEVEL_ES) << "aud found @ pos=" << it->position;
+ access_unit_list->push_back(it);
+ }
+ }
+}
+
+bool EsParserH264::EmitFrame(
+ NalDescList::const_iterator& cur_frame,
+ NalDescList::const_iterator& nxt_frame,
+ int nxt_frame_position) {
+ int raw_es_size;
+ const uint8* raw_es;
+ es_byte_queue_.Peek(&raw_es, &raw_es_size);
+
+ // Current frame position = position of the 1st NAL of the frame.
+ int cur_frame_position = cur_frame->position;
+ int access_unit_size = nxt_frame_position - cur_frame_position;
+
+ // Get the access unit timing info.
+ TimingDesc current_timing_desc;
+ while (!timing_desc_list_.empty() &&
+ timing_desc_list_.front().first <= cur_frame_position) {
+ current_timing_desc = timing_desc_list_.front().second;
+ timing_desc_list_.pop_front();
+ }
+
+ // Check whether this is a key frame + light NAL parsing to get some
+ // relevant information (e.g. SPS/PPS).
+ // Note: it would have been nice to get the keyframe decision based
+ // on the Mpeg2TS random_access_indicator but encoders sometimes just don't
+ // bother setting this flag in the MPEG2 TS stream.
+ bool is_key_frame = true;
+ for (NalDescList::const_iterator it = cur_frame; it != nxt_frame; ++it) {
+ if (it->nal_unit_type == kNalUnitTypeNonIdrSlice)
+ is_key_frame = false;
+ NalDescList::const_iterator next_nal_it = it;
+ ++next_nal_it;
+ int cur_nal_position = it->position;
+ int nxt_nal_position = (next_nal_it == nxt_frame)
+ ? nxt_frame_position : next_nal_it->position;
+ int nal_size = nxt_nal_position - cur_nal_position;
+ DCHECK_LE(cur_nal_position + nal_size, raw_es_size);
+ bool nal_status = NalParser(&raw_es[cur_nal_position], nal_size);
+ if (!nal_status)
+ return false;
+ }
+
+ // Emit the current frame.
+ DVLOG(LOG_LEVEL_ES) << "is_key_frame = " << is_key_frame;
+ scoped_refptr<StreamParserBuffer> stream_parser_buffer =
+ StreamParserBuffer::CopyFrom(
+ &raw_es[cur_frame_position],
+ access_unit_size,
+ is_key_frame);
+ stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts);
+ stream_parser_buffer->set_timestamp(current_timing_desc.pts);
+ emit_buffer_cb_.Run(stream_parser_buffer);
+
+ return true;
+}
+
+void EsParserH264::DiscardEs(int nbytes) {
+ if (nbytes <= 0)
+ return;
+
+ // Update the NAL list accordingly.
+ while (!nal_desc_list_.empty() &&
+ nal_desc_list_.front().position < nbytes)
+ nal_desc_list_.pop_front();
+ for (NalDescList::iterator it = nal_desc_list_.begin();
+ it != nal_desc_list_.end(); ++it) {
+ DCHECK(it->position >= nbytes);
+ it->position -= nbytes;
+ }
+ nal_es_pos_ -= nbytes;
+ if (nal_es_pos_ < 0)
+ nal_es_pos_ = 0;
+
+ // Update the timing information accordingly.
+ std::list<std::pair<int, TimingDesc> >::iterator timing_it
+ = timing_desc_list_.begin();
+ for (; timing_it != timing_desc_list_.end(); ++timing_it)
+ timing_it->first -= nbytes;
+
+ // Discard |nbytes| of ES.
+ es_byte_queue_.Pop(nbytes);
+}
+
+bool EsParserH264::NalParser(const uint8* buf, int size) {
+ // Discard the annexB syncword.
+ if (size < 3 || buf[0] != 0 || buf[1] != 0 ||
+ !(buf[2] == 1 || (size >= 4 && buf[2] == 0 && buf[3] == 1))) {
+ DVLOG(1) << "NalParser: bad annexB start code";
+ return false;
+ }
+ if (buf[2] == 1) {
+ buf += 3;
+ size -= 3;
+ } else {
+ buf += 4;
+ size -= 4;
+ }
+
+ // Get the NAL header.
+ if (size < 1) {
+ DVLOG(1) << "NalParser: incomplete NAL";
+ return false;
+ }
+ int nal_header = buf[0];
+ buf += 1;
+ size -= 1;
+
+ int forbidden_zero_bit = (nal_header >> 7) & 0x1;
+ if (forbidden_zero_bit != 0)
+ return false;
+ int nal_ref_idc = (nal_header >> 5) & 0x3;
+ int nal_unit_type = nal_header & 0x1f;
+
+ // TODO(damienv):
+ // The nal start code emulation prevention should be un-done,
+ // before parsing the NAL content.
+
+ // Process the NAL content.
+ if (nal_unit_type == kNalUnitTypeSPS) {
+ DVLOG(LOG_LEVEL_ES) << "NAL: SPS";
+ // |nal_ref_idc| should not be 0 for a SPS.
+ if (nal_ref_idc == 0)
+ return false;
+ return ProcessSPS(buf, size);
+ }
+ if (nal_unit_type == kNalUnitTypeIdrSlice) {
+ DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice";
+ return true;
+ }
+ if (nal_unit_type == kNalUnitTypeNonIdrSlice) {
+ DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice";
+ return true;
+ }
+ if (nal_unit_type == kNalUnitTypePPS) {
+ DVLOG(LOG_LEVEL_ES) << "NAL: PPS";
+ return true;
+ }
+ if (nal_unit_type == kNalUnitTypeAUD) {
+ DVLOG(LOG_LEVEL_ES) << "NAL: AUD";
+ return true;
+ }
+
+ DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type;
+ return true;
+}
+
+bool EsParserH264::ProcessSPS(const uint8* buf, int size) {
+ if (size <= 0)
+ return false;
+ BitReaderH264 bit_reader(buf, size);
+
+ int profile_idc;
+ int constraint_setX_flag;
+ int level_idc;
+ uint32 seq_parameter_set_id;
+ uint32 log2_max_frame_num_minus4;
+ uint32 pic_order_cnt_type;
+ RCHECK(bit_reader.ReadBits(8, &profile_idc));
+ RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag));
+ RCHECK(bit_reader.ReadBits(8, &level_idc));
+ RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id));
+ RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4));
+ RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type));
+
+ // |pic_order_cnt_type| shall be in the range of 0 to 2.
+ if (pic_order_cnt_type > 2)
+ return false;
+ if (pic_order_cnt_type == 0) {
+ uint32 log2_max_pic_order_cnt_lsb_minus4;
+ RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4));
+ } else if (pic_order_cnt_type == 1) {
+ NOTIMPLEMENTED();
+ return false;
+ }
+
+ uint32 num_ref_frames;
+ int gaps_in_frame_num_value_allowed_flag;
+ uint32 pic_width_in_mbs_minus1;
+ uint32 pic_height_in_map_units_minus1;
+ RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames));
+ RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag));
+ RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1));
+ RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1));
+
+ int frame_mbs_only_flag;
+ RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag));
+ if (!frame_mbs_only_flag) {
+ int mb_adaptive_frame_field_flag;
+ RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag));
+ }
+
+ int direct_8x8_inference_flag;
+ RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag));
+
+ bool frame_cropping_flag;
+ uint32 frame_crop_left_offset = 0;
+ uint32 frame_crop_right_offset = 0;
+ uint32 frame_crop_top_offset = 0;
+ uint32 frame_crop_bottom_offset = 0;
+ RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag));
+ if (frame_cropping_flag) {
+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset));
+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset));
+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset));
+ RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset));
+ }
+
+ bool vui_parameters_present_flag;
+ RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag));
+ int sar_width = 1;
+ int sar_height = 1;
+ if (vui_parameters_present_flag) {
+ // Read only the aspect ratio information from the VUI section.
+ // TODO(damienv): check whether other VUI info are useful.
+ bool aspect_ratio_info_present_flag = false;
+ RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag));
+ if (aspect_ratio_info_present_flag) {
+ int aspect_ratio_idc;
+ RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc));
+ if (aspect_ratio_idc == kExtendedSar) {
+ RCHECK(bit_reader.ReadBits(16, &sar_width));
+ RCHECK(bit_reader.ReadBits(16, &sar_height));
+ } else if (aspect_ratio_idc < 14) {
+ sar_width = kTableSarWidth[aspect_ratio_idc];
+ sar_height = kTableSarHeight[aspect_ratio_idc];
+ }
+ }
+ }
+
+ if (sar_width != sar_height) {
+ // TODO(damienv): Support non square pixels.
+ DVLOG(1)
+ << "Non square pixel not supported yet:"
+ << " sar_width=" << sar_width
+ << " sar_height=" << sar_height;
+ return false;
+ }
+
+ if (is_video_config_known_ &&
+ profile_idc == profile_idc_ &&
+ level_idc == level_idc_ &&
+ pic_width_in_mbs_minus1 == pic_width_in_mbs_minus1_ &&
+ pic_height_in_map_units_minus1 == pic_height_in_map_units_minus1_) {
+ // This is the same SPS as the previous one.
+ return true;
+ }
+ is_video_config_known_ = true;
+ profile_idc_ = profile_idc;
+ level_idc_ = level_idc;
+ pic_width_in_mbs_minus1_ = pic_width_in_mbs_minus1;
+ pic_height_in_map_units_minus1_ = pic_height_in_map_units_minus1;
+
+ // TODO(damienv):
+ // Assuming the SPS is used right away by the PPS
+ // and the slice headers is a strong assumption.
+ // In theory, we should process the SPS and PPS
+ // and only when one of the slice header is switching
+ // the PPS id, the video decoder config should be changed.
+ DVLOG(1) << "Profile IDC: " << profile_idc;
+ DVLOG(1) << "Level IDC: " << level_idc;
+ DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16;
+ DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16;
+ DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4;
+
+ // TODO(damienv): a MAP unit can be either 16 or 32 pixels.
+ // although it's 16 pixels for progressive non MBAFF frames.
+ gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16,
+ (pic_height_in_map_units_minus1 + 1) * 16);
+ gfx::Rect visible_rect(
+ frame_crop_left_offset,
+ frame_crop_top_offset,
+ (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset,
+ (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset);
+
+ // TODO(damienv): calculate the natural size based
+ // on the possible aspect ratio coded in the VUI parameters.
+ gfx::Size natural_size(visible_rect.width(),
+ visible_rect.height());
+
+ VideoDecoderConfig video_decoder_config(
+ kCodecH264,
+ VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv)
+ VideoFrame::YV12,
+ coded_size,
+ visible_rect,
+ natural_size,
+ NULL, 0,
+ false);
+ new_video_config_cb_.Run(video_decoder_config);
+
+ return true;
+}
+
+} // namespace mp2t
+} // namespace media
+

Powered by Google App Engine
This is Rietveld 408576698