media/mp2t/es_parser_h264.cc - Issue 23566013: Mpeg2 TS stream parser for media source.

Side by Side Diff: media/mp2t/es_parser_h264.cc

Issue 23566013: Mpeg2 TS stream parser for media source. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Better naming: mp2t namespace & class names Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

	2 // Use of this source code is governed by a BSD-style license that can be

	3 // found in the LICENSE file.

	4

	5 #include "media/mp2t/es_parser_h264.h"

	6

	7 #include "base/basictypes.h"

	8 #include "base/logging.h"

	9 #include "media/base/bit_reader.h"

	10 #include "media/base/buffers.h"

	11 #include "media/base/stream_parser_buffer.h"

	12 #include "media/base/video_decoder_config.h"

	13 #include "media/base/video_frame.h"

	14 #include "media/mp2t/mp2t_common.h"

	15 #include "ui/gfx/rect.h"

	16 #include "ui/gfx/size.h"

	17

	18 static const int kExtendedSar = 255;

	19

	20 static const int kTableSarWidth[14] = {

	21 1, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160

	22 };

	23

	24 static const int kTableSarHeight[14] = {

	25 1, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99

	26 };

	27

	28 namespace media {

	29 namespace mp2t {

	30

	31 class BitReaderH264 : public BitReader {

	32 public:

	33 BitReaderH264(const uint8* data, off_t size)

	34 : BitReader(data, size) { }

	35

	36 // Read an unsigned exp-golomb value.

	37 // Return true if successful.

	38 bool ReadBitsExpGolomb(uint32* exp_golomb_value);

	39 };

	40

	41 bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) {

	42 // Get the number of leading zeros.

	43 int zero_count = 0;

	44 while (true) {

	45 int one_bit;

	46 RCHECK(ReadBits(1, &one_bit));

	47 if (one_bit != 0)

	48 break;

	49 zero_count++;

	50 }

	51

	52 // If zero_count is greater than 31, the calculated value will overflow.

	53 if (zero_count > 31) {

	54 SkipBits(zero_count);

	55 return false;

	56 }
	damienv1 2013/09/12 20:43:02 TODO: special case if \|zero_count\| = 0 ? TODO: special case if \|zero_count\| = 0 ?
	57

	58 // Read the actual value.

	59 uint32 base = (1 << zero_count) - 1;

	60 uint32 offset;

	61 RCHECK(ReadBits(zero_count, &offset));

	62 *exp_golomb_value = base + offset;

	63

	64 return true;

	65 }

	66

	67 EsParserH264::EsParserH264(

	68 NewVideoConfigCB new_video_config_cb,

	69 EmitBufferCB emit_buffer_cb)

	70 : nal_es_pos_(0),

	71 new_video_config_cb_(new_video_config_cb),

	72 emit_buffer_cb_(emit_buffer_cb),

	73 is_video_config_known_(false),

	74 profile_idc_(0),

	75 level_idc_(0),

	76 pic_width_in_mbs_minus1_(0),

	77 pic_height_in_map_units_minus1_(0) {

	78 }

	79

	80 EsParserH264::~EsParserH264() {

	81 }

	82

	83 bool EsParserH264::Parse(const uint8* buf, int size,

	84 base::TimeDelta pts,

	85 base::TimeDelta dts) {

	86 // Note: Parse is invoked each time a PES packet has been reassembled.

	87 // Unfortunately, a PES packet does not necessarily map

	88 // to an h264 access unit, although the HLS recommandation is to use one PES

	89 // for each access unit (but this is just a recommandation and some streams

	90 // do not comply with this recommandation).

	91

	92 // Link position \|raw_es_size\| in the ES stream with a timing descriptor.

	93 // HLS recommandation: "In AVC video, you should have both a DTS and a

	94 // PTS in each PES header".

	95 if (dts == kNoTimestamp() && pts == kNoTimestamp()) {

	96 DVLOG(1) << "A timestamp must be provided for each reassembled PES";

	97 Reset();

	98 return false;

	99 }

	100 TimingDesc timing_desc;

	101 timing_desc.pts = pts;

	102 timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts;

	103

	104 int raw_es_size;

	105 const uint8* raw_es;

	106 es_byte_queue_.Peek(&raw_es, &raw_es_size);

	107 timing_desc_list_.push_back(

	108 std::pair<int, TimingDesc>(raw_es_size, timing_desc));

	109

	110 // Add the incoming bytes to the ES queue.

	111 es_byte_queue_.Push(buf, size);

	112 es_byte_queue_.Peek(&raw_es, &raw_es_size);

	113

	114 // Add NALs from the incoming buffer.

	115 FindNals();

	116

	117 // Find access units based on AUD.

	118 std::list<NalDescList::const_iterator> access_unit_list;

	119 FindAccessUnits(&access_unit_list);

	120 if (access_unit_list.empty()) {

	121 DiscardEs(nal_es_pos_);

	122 return true;

	123 }

	124

	125 // Emit all frames.

	126 std::list<NalDescList::const_iterator>::iterator it0 =

	127 access_unit_list.begin();

	128 std::list<NalDescList::const_iterator>::iterator it1 = it0;

	129 ++it1;

	130 LOG_IF(WARNING, (*it0)->position != 0)

	131 << "Needs to discard some ES data before getting the 1st access unit: "

	132 << (*it0)->position;

	133 for (; it1 != access_unit_list.end(); ++it0, ++it1) {

	134 int nxt_frame_position = (*it1)->position;

	135 bool status = EmitFrame(it0, it1, nxt_frame_position);

	136 if (!status) {

	137 Reset();

	138 return false;

	139 }

	140 }

	141

	142 // Discard emitted frames.

	143 int last_position = access_unit_list.back()->position;

	144 DiscardEs(last_position);

	145

	146 return true;

	147 }

	148

	149 void EsParserH264::Flush() {

	150 // Find access units based on AUD.

	151 std::list<NalDescList::const_iterator> access_unit_list;

	152 FindAccessUnits(&access_unit_list);

	153

	154 // At this point, there can be at most one access unit in the buffer.

	155 DCHECK_GE(access_unit_list.size(), 1u);

	156 if (!access_unit_list.empty()) {

	157 // Force emitting the last access unit (even if it might be incomplete).

	158 int nxt_frame_position = 0;

	159 const uint8* raw_es = NULL;

	160 es_byte_queue_.Peek(&raw_es, &nxt_frame_position);

	161 NalDescList::const_iterator cur_frame = *(access_unit_list.begin());

	162 NalDescList::const_iterator nxt_frame = nal_desc_list_.end();

	163 EmitFrame(cur_frame, nxt_frame, nxt_frame_position);

	164 }

	165 }

	166

	167 void EsParserH264::Reset() {

	168 DVLOG(1) << "EsParserH264::Reset";

	169 es_byte_queue_.Reset();

	170 timing_desc_list_.clear();

	171 nal_desc_list_.clear();

	172 nal_es_pos_ = 0;

	173 is_video_config_known_ = false;

	174 }

	175

	176 void EsParserH264::FindNals() {

	177 int raw_es_size;

	178 const uint8* raw_es;

	179 es_byte_queue_.Peek(&raw_es, &raw_es_size);

	180

	181 DCHECK_GE(nal_es_pos_, 0);

	182 DCHECK_LT(nal_es_pos_, raw_es_size);

	183

	184 // Resume NAL segmentation where it was left.

	185 for ( ; nal_es_pos_ < raw_es_size - 4; nal_es_pos_++) {

	186 // Make sure the syncword is either 00 00 00 01 or 00 00 01

	187 if (raw_es[nal_es_pos_ + 0] != 0 \|\|

	188 raw_es[nal_es_pos_ + 1] != 0) {

	189 continue;

	190 }

	191 int syncword_length = 0;

	192 if (raw_es[nal_es_pos_ + 2] == 0 &&

	193 raw_es[nal_es_pos_ + 3] == 1) {

	194 syncword_length = 4;

	195 } else if (raw_es[nal_es_pos_ + 2] == 1) {

	196 syncword_length = 3;

	197 } else {

	198 continue;

	199 }

	200

	201 // Retrieve the NAL type.

	202 int nal_header = raw_es[nal_es_pos_ + syncword_length];

	203 int forbidden_zero_bit = (nal_header >> 7) & 0x1;

	204 NalDesc nal_desc;

	205 nal_desc.position = nal_es_pos_;

	206 nal_desc.nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f);

	207 if (forbidden_zero_bit != 0)

	208 nal_desc.nal_unit_type = kNalUnitTypeInvalid;

	209 DVLOG(LOG_LEVEL_ES) << "nal: offset=" << nal_desc.position

	210 << " type=" << nal_desc.nal_unit_type;

	211 nal_desc_list_.push_back(nal_desc);

	212 nal_es_pos_ += syncword_length;

	213 }

	214 }

	215

	216 void EsParserH264::FindAccessUnits(

	217 std::list<NalDescList::const_iterator>* access_unit_list) {

	218 // Get the H264 access units based on AUD.

	219 // Mpeg2TS spec: "2.14 Carriage of Rec. ITU-T H.264 \| ISO/IEC 14496-10 video"

	220 // "Each AVC access unit shall contain an access unit delimiter NAL Unit;"

	221 for (NalDescList::const_iterator it = nal_desc_list_.begin();

	222 it != nal_desc_list_.end(); ++it) {

	223 if (it->nal_unit_type == kNalUnitTypeAUD) {

	224 DVLOG(LOG_LEVEL_ES) << "aud found @ pos=" << it->position;

	225 access_unit_list->push_back(it);

	226 }

	227 }

	228 }

	229

	230 bool EsParserH264::EmitFrame(

	231 NalDescList::const_iterator& cur_frame,

	232 NalDescList::const_iterator& nxt_frame,

	233 int nxt_frame_position) {

	234 int raw_es_size;

	235 const uint8* raw_es;

	236 es_byte_queue_.Peek(&raw_es, &raw_es_size);

	237

	238 // Current frame position = position of the 1st NAL of the frame.

	239 int cur_frame_position = cur_frame->position;

	240 int access_unit_size = nxt_frame_position - cur_frame_position;

	241

	242 // Get the access unit timing info.

	243 TimingDesc current_timing_desc;

	244 while (!timing_desc_list_.empty() &&

	245 timing_desc_list_.front().first <= cur_frame_position) {

	246 current_timing_desc = timing_desc_list_.front().second;

	247 timing_desc_list_.pop_front();

	248 }

	249

	250 // Check whether this is a key frame + light NAL parsing to get some

	251 // relevant information (e.g. SPS/PPS).

	252 // Note: it would have been nice to get the keyframe decision based

	253 // on the Mpeg2TS random_access_indicator but encoders sometimes just don't

	254 // bother setting this flag in the MPEG2 TS stream.

	255 bool is_key_frame = true;

	256 for (NalDescList::const_iterator it = cur_frame; it != nxt_frame; ++it) {

	257 if (it->nal_unit_type == kNalUnitTypeNonIdrSlice)

	258 is_key_frame = false;

	259 NalDescList::const_iterator next_nal_it = it;

	260 ++next_nal_it;

	261 int cur_nal_position = it->position;

	262 int nxt_nal_position = (next_nal_it == nxt_frame)

	263 ? nxt_frame_position : next_nal_it->position;

	264 int nal_size = nxt_nal_position - cur_nal_position;

	265 DCHECK_LE(cur_nal_position + nal_size, raw_es_size);

	266 bool nal_status = NalParser(&raw_es[cur_nal_position], nal_size);

	267 if (!nal_status)

	268 return false;

	269 }

	270

	271 // Emit the current frame.

	272 DVLOG(LOG_LEVEL_ES) << "is_key_frame = " << is_key_frame;

	273 scoped_refptr<StreamParserBuffer> stream_parser_buffer =

	274 StreamParserBuffer::CopyFrom(

	275 &raw_es[cur_frame_position],

	276 access_unit_size,

	277 is_key_frame);

	278 stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts);

	279 stream_parser_buffer->set_timestamp(current_timing_desc.pts);

	280 emit_buffer_cb_.Run(stream_parser_buffer);

	281

	282 return true;

	283 }

	284

	285 void EsParserH264::DiscardEs(int nbytes) {

	286 if (nbytes <= 0)

	287 return;

	288

	289 // Update the NAL list accordingly.

	290 while (!nal_desc_list_.empty() &&

	291 nal_desc_list_.front().position < nbytes)

	292 nal_desc_list_.pop_front();

	293 for (NalDescList::iterator it = nal_desc_list_.begin();

	294 it != nal_desc_list_.end(); ++it) {

	295 DCHECK(it->position >= nbytes);

	296 it->position -= nbytes;

	297 }

	298 nal_es_pos_ -= nbytes;

	299 if (nal_es_pos_ < 0)

	300 nal_es_pos_ = 0;

	301

	302 // Update the timing information accordingly.

	303 std::list<std::pair<int, TimingDesc> >::iterator timing_it

	304 = timing_desc_list_.begin();

	305 for (; timing_it != timing_desc_list_.end(); ++timing_it)

	306 timing_it->first -= nbytes;

	307

	308 // Discard \|nbytes\| of ES.

	309 es_byte_queue_.Pop(nbytes);

	310 }

	311

	312 bool EsParserH264::NalParser(const uint8* buf, int size) {

	313 // Discard the annexB syncword.

	314 if (size < 3 \|\| buf[0] != 0 \|\| buf[1] != 0 \|\|

	315 !(buf[2] == 1 \|\| (size >= 4 && buf[2] == 0 && buf[3] == 1))) {

	316 DVLOG(1) << "NalParser: bad annexB start code";

	317 return false;

	318 }

	319 if (buf[2] == 1) {

	320 buf += 3;

	321 size -= 3;

	322 } else {

	323 buf += 4;

	324 size -= 4;

	325 }

	326

	327 // Get the NAL header.

	328 if (size < 1) {

	329 DVLOG(1) << "NalParser: incomplete NAL";

	330 return false;

	331 }

	332 int nal_header = buf[0];

	333 buf += 1;

	334 size -= 1;

	335

	336 int forbidden_zero_bit = (nal_header >> 7) & 0x1;

	337 if (forbidden_zero_bit != 0)

	338 return false;

	339 int nal_ref_idc = (nal_header >> 5) & 0x3;

	340 int nal_unit_type = nal_header & 0x1f;

	341

	342 // TODO(damienv):

	343 // The nal start code emulation prevention should be un-done,

	344 // before parsing the NAL content.

	345

	346 // Process the NAL content.

	347 if (nal_unit_type == kNalUnitTypeSPS) {

	348 DVLOG(LOG_LEVEL_ES) << "NAL: SPS";

	349 // \|nal_ref_idc\| should not be 0 for a SPS.

	350 if (nal_ref_idc == 0)

	351 return false;

	352 return ProcessSPS(buf, size);

	353 }

	354 if (nal_unit_type == kNalUnitTypeIdrSlice) {

	355 DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice";

	356 return true;

	357 }

	358 if (nal_unit_type == kNalUnitTypeNonIdrSlice) {

	359 DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice";

	360 return true;

	361 }

	362 if (nal_unit_type == kNalUnitTypePPS) {

	363 DVLOG(LOG_LEVEL_ES) << "NAL: PPS";

	364 return true;

	365 }

	366 if (nal_unit_type == kNalUnitTypeAUD) {

	367 DVLOG(LOG_LEVEL_ES) << "NAL: AUD";

	368 return true;

	369 }

	370

	371 DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type;

	372 return true;

	373 }

	374

	375 bool EsParserH264::ProcessSPS(const uint8* buf, int size) {

	376 if (size <= 0)

	377 return false;

	378 BitReaderH264 bit_reader(buf, size);

	379

	380 int profile_idc;

	381 int constraint_setX_flag;

	382 int level_idc;

	383 uint32 seq_parameter_set_id;

	384 uint32 log2_max_frame_num_minus4;

	385 uint32 pic_order_cnt_type;

	386 RCHECK(bit_reader.ReadBits(8, &profile_idc));

	387 RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag));

	388 RCHECK(bit_reader.ReadBits(8, &level_idc));

	389 RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id));

	390 RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4));

	391 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type));

	392

	393 // \|pic_order_cnt_type\| shall be in the range of 0 to 2.

	394 if (pic_order_cnt_type > 2)

	395 return false;

	396 if (pic_order_cnt_type == 0) {

	397 uint32 log2_max_pic_order_cnt_lsb_minus4;

	398 RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4));

	399 } else if (pic_order_cnt_type == 1) {

	400 NOTIMPLEMENTED();

	401 return false;

	402 }

	403

	404 uint32 num_ref_frames;

	405 int gaps_in_frame_num_value_allowed_flag;

	406 uint32 pic_width_in_mbs_minus1;

	407 uint32 pic_height_in_map_units_minus1;

	408 RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames));

	409 RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag));

	410 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1));

	411 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1));

	412

	413 int frame_mbs_only_flag;

	414 RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag));

	415 if (!frame_mbs_only_flag) {

	416 int mb_adaptive_frame_field_flag;

	417 RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag));

	418 }

	419

	420 int direct_8x8_inference_flag;

	421 RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag));

	422

	423 bool frame_cropping_flag;

	424 uint32 frame_crop_left_offset = 0;

	425 uint32 frame_crop_right_offset = 0;

	426 uint32 frame_crop_top_offset = 0;

	427 uint32 frame_crop_bottom_offset = 0;

	428 RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag));

	429 if (frame_cropping_flag) {

	430 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset));

	431 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset));

	432 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset));

	433 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset));

	434 }

	435

	436 bool vui_parameters_present_flag;

	437 RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag));

	438 int sar_width = 1;

	439 int sar_height = 1;

	440 if (vui_parameters_present_flag) {

	441 // Read only the aspect ratio information from the VUI section.

	442 // TODO(damienv): check whether other VUI info are useful.

	443 bool aspect_ratio_info_present_flag = false;

	444 RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag));

	445 if (aspect_ratio_info_present_flag) {

	446 int aspect_ratio_idc;

	447 RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc));

	448 if (aspect_ratio_idc == kExtendedSar) {

	449 RCHECK(bit_reader.ReadBits(16, &sar_width));

	450 RCHECK(bit_reader.ReadBits(16, &sar_height));

	451 } else if (aspect_ratio_idc < 14) {

	452 sar_width = kTableSarWidth[aspect_ratio_idc];

	453 sar_height = kTableSarHeight[aspect_ratio_idc];

	454 }

	455 }

	456 }

	457

	458 if (sar_width != sar_height) {

	459 // TODO(damienv): Support non square pixels.

	460 DVLOG(1)

	461 << "Non square pixel not supported yet:"

	462 << " sar_width=" << sar_width

	463 << " sar_height=" << sar_height;

	464 return false;

	465 }

	466

	467 if (is_video_config_known_ &&

	468 profile_idc == profile_idc_ &&

	469 level_idc == level_idc_ &&

	470 pic_width_in_mbs_minus1 == pic_width_in_mbs_minus1_ &&

	471 pic_height_in_map_units_minus1 == pic_height_in_map_units_minus1_) {

	472 // This is the same SPS as the previous one.

	473 return true;

	474 }

	475 is_video_config_known_ = true;

	476 profile_idc_ = profile_idc;

	477 level_idc_ = level_idc;

	478 pic_width_in_mbs_minus1_ = pic_width_in_mbs_minus1;

	479 pic_height_in_map_units_minus1_ = pic_height_in_map_units_minus1;

	480

	481 // TODO(damienv):

	482 // Assuming the SPS is used right away by the PPS

	483 // and the slice headers is a strong assumption.

	484 // In theory, we should process the SPS and PPS

	485 // and only when one of the slice header is switching

	486 // the PPS id, the video decoder config should be changed.

	487 DVLOG(1) << "Profile IDC: " << profile_idc;

	488 DVLOG(1) << "Level IDC: " << level_idc;

	489 DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16;

	490 DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16;

	491 DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4;

	492

	493 // TODO(damienv): a MAP unit can be either 16 or 32 pixels.

	494 // although it's 16 pixels for progressive non MBAFF frames.

	495 gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16,

	496 (pic_height_in_map_units_minus1 + 1) * 16);

	497 gfx::Rect visible_rect(

	498 frame_crop_left_offset,

	499 frame_crop_top_offset,

	500 (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset,

	501 (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset);

	502

	503 // TODO(damienv): calculate the natural size based

	504 // on the possible aspect ratio coded in the VUI parameters.

	505 gfx::Size natural_size(visible_rect.width(),

	506 visible_rect.height());

	507

	508 VideoDecoderConfig video_decoder_config(

	509 kCodecH264,

	510 VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv)

	511 VideoFrame::YV12,

	512 coded_size,

	513 visible_rect,

	514 natural_size,

	515 NULL, 0,

	516 false);

	517 new_video_config_cb_.Run(video_decoder_config);

	518

	519 return true;

	520 }

	521

	522 } // namespace mp2t

	523 } // namespace media

	524

OLD	NEW

« media/mp2t/es_parser_h264.h ('K') | « media/mp2t/es_parser_h264.h ('k') | media/mp2t/mp2t_common.h » ('j') | no next file with comments »