media/mp2t/es_parser_h264.cc - Issue 23566013: Mpeg2 TS stream parser for media source.

Side by Side Diff: media/mp2t/es_parser_h264.cc

Issue 23566013: Mpeg2 TS stream parser for media source. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Improve buffer emission + Cleanup Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "media/mp2t/es_parser_h264.h"

	6

	7 #include "base/basictypes.h"

	8 #include "base/logging.h"

	9 #include "media/base/bit_reader.h"

	10 #include "media/base/buffers.h"

	11 #include "media/base/stream_parser_buffer.h"

	12 #include "media/base/video_frame.h"

	13 #include "media/mp2t/mp2t_common.h"

	14 #include "ui/gfx/rect.h"

	15 #include "ui/gfx/size.h"

	16

	17 static const int kExtendedSar = 255;

	18

	19 // ISO 14496 part 10

	20 // VUI parameters: Table E-1 "Meaning of sample aspect ration indicator"
	acolwell GONE FROM CHROMIUM 2013/09/18 01:46:05 nit: s/ration/ratio nit: s/ration/ratio damienv1 2013/09/18 21:40:17 Done. Show quoted text On 2013/09/18 01:46:05, acolwell wrote: > nit: s/ration/ratio Done.
	21 static const int kTableSarWidth[14] = {

	22 1, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160

	23 };

	24

	25 static const int kTableSarHeight[14] = {

	26 1, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99

	27 };

	28

	29 // Remove the start code emulation prevention ( 0x000003 )

	30 // and return the size of the converted buffer.

	31 // Note: Size of \|buf_rbsp\| should be at least \|size\| to accomodate

	32 // the worst case.

	33 static int ConvertToRbsp(const uint8* buf, int size, uint8* buf_rbsp) {

	34 int rbsp_size = 0;

	35 int zero_count = 0;

	36 for (int k = 0; k < size; k++) {

	37 if (buf[k] == 0x3 && zero_count >= 2) {

	38 zero_count = 0;

	39 continue;

	40 }

	41 if (buf[k] == 0)

	42 zero_count++;

	43 else

	44 zero_count = 0;

	45 buf_rbsp[rbsp_size++] = buf[k];

	46 }

	47 return rbsp_size;

	48 }

	49

	50 namespace media {

	51 namespace mp2t {

	52

	53 // ISO 14496 - Part 10: Table 7-1 "NAL unit type codes"

	54 enum NalUnitType {

	55 kNalUnitTypeNonIdrSlice = 1,

	56 kNalUnitTypeIdrSlice = 5,

	57 kNalUnitTypeSPS = 7,

	58 kNalUnitTypePPS = 8,

	59 kNalUnitTypeAUD = 9,

	60 };

	61

	62 class BitReaderH264 : public BitReader {

	63 public:

	64 BitReaderH264(const uint8* data, off_t size)

	65 : BitReader(data, size) { }

	66

	67 // Read an unsigned exp-golomb value.

	68 // Return true if successful.

	69 bool ReadBitsExpGolomb(uint32* exp_golomb_value);

	70 };

	71

	72 bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) {

	73 // Get the number of leading zeros.

	74 int zero_count = 0;

	75 while (true) {

	76 int one_bit;

	77 RCHECK(ReadBits(1, &one_bit));

	78 if (one_bit != 0)

	79 break;

	80 zero_count++;

	81 }

	82

	83 // If zero_count is greater than 31, the calculated value will overflow.

	84 if (zero_count > 31) {

	85 SkipBits(zero_count);

	86 return false;

	87 }

	88

	89 // Read the actual value.

	90 uint32 base = (1 << zero_count) - 1;

	91 uint32 offset;

	92 RCHECK(ReadBits(zero_count, &offset));

	93 *exp_golomb_value = base + offset;

	94

	95 return true;

	96 }

	97

	98 EsParserH264::EsParserH264(

	99 const NewVideoConfigCB& new_video_config_cb,

	100 const EmitBufferCB& emit_buffer_cb)

	101 : new_video_config_cb_(new_video_config_cb),

	102 emit_buffer_cb_(emit_buffer_cb),

	103 es_pos_(0),

	104 current_nal_pos_(-1),

	105 current_access_unit_pos_(-1),

	106 is_key_frame_(false) {

	107 }

	108

	109 EsParserH264::~EsParserH264() {

	110 }

	111

	112 bool EsParserH264::Parse(const uint8* buf, int size,

	113 base::TimeDelta pts,

	114 base::TimeDelta dts) {

	115 // Note: Parse is invoked each time a PES packet has been reassembled.

	116 // Unfortunately, a PES packet does not necessarily map

	117 // to an h264 access unit, although the HLS recommendation is to use one PES

	118 // for each access unit (but this is just a recommendation and some streams

	119 // do not comply with this recommendation).

	120

	121 // Link position \|raw_es_size\| in the ES stream with a timing descriptor.

	122 // HLS recommendation: "In AVC video, you should have both a DTS and a

	123 // PTS in each PES header".

	124 if (dts == kNoTimestamp() && pts == kNoTimestamp()) {

	125 DVLOG(1) << "A timestamp must be provided for each reassembled PES";

	126 return false;

	127 }

	128 TimingDesc timing_desc;

	129 timing_desc.pts = pts;

	130 timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts;

	131

	132 int raw_es_size;

	133 const uint8* raw_es;

	134 es_byte_queue_.Peek(&raw_es, &raw_es_size);

	135 timing_desc_list_.push_back(

	136 std::pair<int, TimingDesc>(raw_es_size, timing_desc));

	137

	138 // Add the incoming bytes to the ES queue.

	139 es_byte_queue_.Push(buf, size);

	140

	141 // Add NALs from the incoming buffer.

	142 if (!ParseInternal())

	143 return false;

	144

	145 // Discard emitted frames

	146 // or every byte that was parsed so far if there is no current frame.

	147 int skip_count =

	148 (current_access_unit_pos_ >= 0) ? current_access_unit_pos_ : es_pos_;

	149 DiscardEs(skip_count);

	150

	151 return true;

	152 }

	153

	154 void EsParserH264::Flush() {

	155 if (current_access_unit_pos_ < 0)

	156 return;

	157

	158 // Force emitting the last access unit.

	159 int next_aud_pos;

	160 const uint8* raw_es;

	161 es_byte_queue_.Peek(&raw_es, &next_aud_pos);

	162 EmitFrameIfNeeded(next_aud_pos);

	163 current_nal_pos_ = -1;

	164 current_access_unit_pos_ = -1;

	165

	166 // Discard the emitted frame.

	167 DiscardEs(next_aud_pos);

	168 }

	169

	170 void EsParserH264::Reset() {

	171 DVLOG(1) << "EsParserH264::Reset";

	172 es_byte_queue_.Reset();

	173 timing_desc_list_.clear();

	174 es_pos_ = 0;

	175 current_nal_pos_ = -1;

	176 current_access_unit_pos_ = -1;

	177 is_key_frame_ = false;

	178 last_video_decoder_config_ = VideoDecoderConfig();

	179 }

	180

	181 bool EsParserH264::ParseInternal() {

	182 int raw_es_size;

	183 const uint8* raw_es;

	184 es_byte_queue_.Peek(&raw_es, &raw_es_size);

	185

	186 DCHECK_GE(es_pos_, 0);

	187 DCHECK_LT(es_pos_, raw_es_size);

	188

	189 // Resume h264 es parsing where it was left.

	190 for ( ; es_pos_ < raw_es_size - 4; es_pos_++) {

	191 // Make sure the syncword is either 00 00 00 01 or 00 00 01

	192 if (raw_es[es_pos_ + 0] != 0 \|\| raw_es[es_pos_ + 1] != 0)

	193 continue;

	194 int syncword_length = 0;

	195 if (raw_es[es_pos_ + 2] == 0 && raw_es[es_pos_ + 3] == 1)

	196 syncword_length = 4;

	197 else if (raw_es[es_pos_ + 2] == 1)

	198 syncword_length = 3;

	199 else

	200 continue;

	201

	202 // Parse the current NAL (and the new NAL then becomes the current one).

	203 if (current_nal_pos_ >= 0) {

	204 int nal_size = es_pos_ - current_nal_pos_;

	205 DCHECK_GT(nal_size, 0);

	206 RCHECK(NalParser(&raw_es[current_nal_pos_], nal_size));

	207 }

	208 current_nal_pos_ = es_pos_ + syncword_length;

	209

	210 // Retrieve the NAL type.

	211 int nal_header = raw_es[es_pos_ + syncword_length];
	acolwell GONE FROM CHROMIUM 2013/09/18 01:46:05 nit: use current_nal_pos_ here instead just to mak nit: use current_nal_pos_ here instead just to make it immediately obvious we are reading the first byte of a NALU? damienv1 2013/09/18 21:40:17 Done. Show quoted text On 2013/09/18 01:46:05, acolwell wrote: > nit: use current_nal_pos_ here instead just to make it immediately obvious we > are reading the first byte of a NALU? Done.
	212 int forbidden_zero_bit = (nal_header >> 7) & 0x1;

	213 RCHECK(forbidden_zero_bit == 0);

	214 NalUnitType nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f);

	215 DVLOG(LOG_LEVEL_ES) << "nal: offset=" << es_pos_

	216 << " type=" << nal_unit_type;

	217

	218 // Emit a frame if needed.

	219 if (nal_unit_type == kNalUnitTypeAUD)

	220 EmitFrameIfNeeded(es_pos_);

	221

	222 // Skip the syncword.

	223 es_pos_ += syncword_length;

	224 }

	225

	226 return true;

	227 }

	228

	229 void EsParserH264::EmitFrameIfNeeded(int next_aud_pos) {

	230 // There is no current frame: start a new frame.

	231 if (current_access_unit_pos_ < 0) {

	232 current_access_unit_pos_ = next_aud_pos;
	acolwell GONE FROM CHROMIUM 2013/09/18 01:46:05 nit: Since current_access_unit_pos_ and is_key_fra nit: Since current_access_unit_pos_ and is_key_frame appear to always get set together WDYT about using the following helper function to ensure these 2 are always set consistently. void SetAccessUnitPos(int pos) { current_access_unit_pos_ = pos; is_key_frame_ = pos >= 0; } If not, then I think you need to add a is_key_frame_ = false; to Flush() so that it is always false when current_Access_unit_pos_ = -1. damienv1 2013/09/18 21:40:17 Done. Show quoted text On 2013/09/18 01:46:05, acolwell wrote: > nit: Since current_access_unit_pos_ and is_key_frame appear to always get set > together WDYT about using the following helper function to ensure these 2 are > always set consistently. > > void SetAccessUnitPos(int pos) { > current_access_unit_pos_ = pos; > is_key_frame_ = pos >= 0; > } > > If not, then I think you need to add a is_key_frame_ = false; to Flush() so that > it is always false when current_Access_unit_pos_ = -1. Done.
	233 is_key_frame_ = true;

	234 return;

	235 }

	236

	237 // Get the access unit timing info.

	238 TimingDesc current_timing_desc;

	239 while (!timing_desc_list_.empty() &&

	240 timing_desc_list_.front().first <= current_access_unit_pos_) {

	241 current_timing_desc = timing_desc_list_.front().second;

	242 timing_desc_list_.pop_front();

	243 }

	244

	245 // Emit a frame.

	246 int raw_es_size;

	247 const uint8* raw_es;

	248 es_byte_queue_.Peek(&raw_es, &raw_es_size);

	249 int access_unit_size = next_aud_pos - current_access_unit_pos_;

	250 scoped_refptr<StreamParserBuffer> stream_parser_buffer =

	251 StreamParserBuffer::CopyFrom(

	252 &raw_es[current_access_unit_pos_],

	253 access_unit_size,

	254 is_key_frame_);

	255 stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts);

	256 stream_parser_buffer->set_timestamp(current_timing_desc.pts);

	257 emit_buffer_cb_.Run(stream_parser_buffer);

	258

	259 // Start a new frame.

	260 // \|is_key_frame_\| will be updated while parsing the NALs of that frame.

	261 current_access_unit_pos_ = es_pos_;

	262 is_key_frame_ = true;

	263 }

	264

	265 void EsParserH264::DiscardEs(int nbytes) {

	266 DCHECK_GE(nbytes, 0);

	267 if (nbytes == 0)

	268 return;

	269

	270 // Update the position of

	271 // - the parser,

	272 // - the current NAL,

	273 // - the current access unit.

	274 es_pos_ -= nbytes;

	275 if (es_pos_ < 0)

	276 es_pos_ = 0;

	277

	278 if (current_nal_pos_ >= 0) {

	279 DCHECK_GE(current_nal_pos_, nbytes);

	280 current_nal_pos_ -= nbytes;

	281 }

	282 if (current_access_unit_pos_ >= 0) {

	283 DCHECK_GE(current_access_unit_pos_, nbytes);

	284 current_access_unit_pos_ -= nbytes;

	285 }

	286

	287 // Update the timing information accordingly.

	288 std::list<std::pair<int, TimingDesc> >::iterator timing_it

	289 = timing_desc_list_.begin();

	290 for (; timing_it != timing_desc_list_.end(); ++timing_it)

	291 timing_it->first -= nbytes;

	292

	293 // Discard \|nbytes\| of ES.

	294 es_byte_queue_.Pop(nbytes);

	295 }

	296

	297 bool EsParserH264::NalParser(const uint8* buf, int size) {

	298 // Get the NAL header.

	299 if (size < 1) {

	300 DVLOG(1) << "NalParser: incomplete NAL";

	301 return false;

	302 }

	303 int nal_header = buf[0];

	304 buf += 1;

	305 size -= 1;

	306

	307 int forbidden_zero_bit = (nal_header >> 7) & 0x1;

	308 if (forbidden_zero_bit != 0)

	309 return false;

	310 int nal_ref_idc = (nal_header >> 5) & 0x3;

	311 int nal_unit_type = nal_header & 0x1f;

	312

	313 // Process the NAL content.

	314 switch (nal_unit_type) {

	315 case kNalUnitTypeSPS:

	316 DVLOG(LOG_LEVEL_ES) << "NAL: SPS";

	317 // \|nal_ref_idc\| should not be 0 for a SPS.

	318 if (nal_ref_idc == 0)

	319 return false;

	320 return ProcessSPS(buf, size);

	321 case kNalUnitTypeIdrSlice:

	322 DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice";

	323 return true;

	324 case kNalUnitTypeNonIdrSlice:

	325 DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice";

	326 is_key_frame_ = false;

	327 return true;

	328 case kNalUnitTypePPS:

	329 DVLOG(LOG_LEVEL_ES) << "NAL: PPS";

	330 return true;

	331 case kNalUnitTypeAUD:

	332 DVLOG(LOG_LEVEL_ES) << "NAL: AUD";

	333 return true;

	334 default:

	335 DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type;

	336 return true;

	337 }

	338

	339 NOTREACHED();

	340 return false;

	341 }

	342

	343 bool EsParserH264::ProcessSPS(const uint8* buf, int size) {

	344 if (size <= 0)

	345 return false;

	346

	347 // Removes start code emulation prevention.

	348 // TODO(damienv): refactoring in media/base

	349 // so as to have a unique H264 bit reader in Chrome.

	350 scoped_ptr<uint8[]> buf_rbsp(new uint8[size]);

	351 int rbsp_size = ConvertToRbsp(buf, size, buf_rbsp.get());

	352

	353 BitReaderH264 bit_reader(buf_rbsp.get(), rbsp_size);

	354

	355 int profile_idc;

	356 int constraint_setX_flag;

	357 int level_idc;

	358 uint32 seq_parameter_set_id;

	359 uint32 log2_max_frame_num_minus4;

	360 uint32 pic_order_cnt_type;

	361 RCHECK(bit_reader.ReadBits(8, &profile_idc));

	362 RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag));

	363 RCHECK(bit_reader.ReadBits(8, &level_idc));

	364 RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id));

	365 RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4));

	366 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type));

	367

	368 // \|pic_order_cnt_type\| shall be in the range of 0 to 2.

	369 RCHECK(pic_order_cnt_type <= 2);

	370 if (pic_order_cnt_type == 0) {

	371 uint32 log2_max_pic_order_cnt_lsb_minus4;

	372 RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4));

	373 } else if (pic_order_cnt_type == 1) {

	374 // Note: \|offset_for_non_ref_pic\| and \|offset_for_top_to_bottom_field\|

	375 // corresponds to their codenum not to their actual value.

	376 bool delta_pic_order_always_zero_flag;

	377 uint32 offset_for_non_ref_pic;

	378 uint32 offset_for_top_to_bottom_field;

	379 uint32 num_ref_frames_in_pic_order_cnt_cycle;

	380 RCHECK(bit_reader.ReadBits(1, &delta_pic_order_always_zero_flag));

	381 RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_non_ref_pic));

	382 RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_top_to_bottom_field));

	383 RCHECK(

	384 bit_reader.ReadBitsExpGolomb(&num_ref_frames_in_pic_order_cnt_cycle));

	385 for (uint32 i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) {

	386 uint32 offset_for_ref_frame_codenum;

	387 RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_ref_frame_codenum));

	388 }

	389 }

	390

	391 uint32 num_ref_frames;

	392 int gaps_in_frame_num_value_allowed_flag;

	393 uint32 pic_width_in_mbs_minus1;

	394 uint32 pic_height_in_map_units_minus1;

	395 RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames));

	396 RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag));

	397 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1));

	398 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1));

	399

	400 int frame_mbs_only_flag;

	401 RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag));

	402 if (!frame_mbs_only_flag) {

	403 int mb_adaptive_frame_field_flag;

	404 RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag));

	405 }

	406

	407 int direct_8x8_inference_flag;

	408 RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag));

	409

	410 bool frame_cropping_flag;

	411 uint32 frame_crop_left_offset = 0;

	412 uint32 frame_crop_right_offset = 0;

	413 uint32 frame_crop_top_offset = 0;

	414 uint32 frame_crop_bottom_offset = 0;

	415 RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag));

	416 if (frame_cropping_flag) {

	417 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset));

	418 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset));

	419 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset));

	420 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset));

	421 }

	422

	423 bool vui_parameters_present_flag;

	424 RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag));

	425 int sar_width = 1;

	426 int sar_height = 1;

	427 if (vui_parameters_present_flag) {

	428 // Read only the aspect ratio information from the VUI section.

	429 // TODO(damienv): check whether other VUI info are useful.

	430 bool aspect_ratio_info_present_flag = false;

	431 RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag));

	432 if (aspect_ratio_info_present_flag) {

	433 int aspect_ratio_idc;

	434 RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc));

	435 if (aspect_ratio_idc == kExtendedSar) {

	436 RCHECK(bit_reader.ReadBits(16, &sar_width));

	437 RCHECK(bit_reader.ReadBits(16, &sar_height));

	438 } else if (aspect_ratio_idc < 14) {

	439 sar_width = kTableSarWidth[aspect_ratio_idc];

	440 sar_height = kTableSarHeight[aspect_ratio_idc];

	441 }

	442 }

	443 }

	444

	445 if (sar_width != sar_height) {

	446 // TODO(damienv): Support non square pixels.

	447 DVLOG(1)

	448 << "Non square pixel not supported yet:"

	449 << " sar_width=" << sar_width

	450 << " sar_height=" << sar_height;

	451 return false;

	452 }

	453

	454 // TODO(damienv): a MAP unit can be either 16 or 32 pixels.

	455 // although it's 16 pixels for progressive non MBAFF frames.

	456 gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16,

	457 (pic_height_in_map_units_minus1 + 1) * 16);

	458 gfx::Rect visible_rect(

	459 frame_crop_left_offset,

	460 frame_crop_top_offset,

	461 (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset,

	462 (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset);

	463

	464 // TODO(damienv): calculate the natural size based

	465 // on the possible aspect ratio coded in the VUI parameters.

	466 gfx::Size natural_size(visible_rect.width(),

	467 visible_rect.height());

	468

	469 // TODO(damienv):

	470 // Assuming the SPS is used right away by the PPS

	471 // and the slice headers is a strong assumption.

	472 // In theory, we should process the SPS and PPS

	473 // and only when one of the slice header is switching

	474 // the PPS id, the video decoder config should be changed.

	475 VideoDecoderConfig video_decoder_config(

	476 kCodecH264,

	477 VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv)

	478 VideoFrame::YV12,

	479 coded_size,

	480 visible_rect,

	481 natural_size,

	482 NULL, 0,

	483 false);

	484

	485 if (!video_decoder_config.Matches(last_video_decoder_config_)) {

	486 DVLOG(1) << "Profile IDC: " << profile_idc;

	487 DVLOG(1) << "Level IDC: " << level_idc;

	488 DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16;

	489 DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16;

	490 DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4;

	491 last_video_decoder_config_ = video_decoder_config;

	492 new_video_config_cb_.Run(video_decoder_config);

	493 }

	494

	495 return true;

	496 }

	497

	498 } // namespace mp2t

	499 } // namespace media

	500

OLD	NEW

« media/mp2t/es_parser_adts.cc ('K') | « media/mp2t/es_parser_h264.h ('k') | media/mp2t/mp2t_common.h » ('j') | media/mp2t/mp2t_stream_parser.cc » ('J')