Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(183)

Side by Side Diff: media/mpeg2/es_parser_h264.cc

Issue 23566013: Mpeg2 TS stream parser for media source. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "media/mpeg2/es_parser_h264.h"
6
7 #include "base/basictypes.h"
8 #include "base/logging.h"
9 #include "media/base/bit_reader.h"
10 #include "media/base/stream_parser_buffer.h"
11 #include "media/base/video_decoder_config.h"
12 #include "media/base/video_frame.h"
13 #include "media/mpeg2/mpeg2ts_common.h"
14 #include "ui/gfx/rect.h"
15 #include "ui/gfx/size.h"
16
17 #define RCHECK(x) \
18 do { \
19 if (!(x)) { \
20 DLOG(ERROR) << "Failure while parsing H264: " << #x; \
21 return false; \
22 } \
23 } while (0)
24
25 namespace {
26
27 const int kExtendedSar = 255;
28
29 const int kTableSarWidth[14] = {
30 1, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160
31 };
32
33 const int kTableSarHeight[14] = {
34 1, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99
35 };
36
37 class ByteReaderChainedBuffer {
38 public:
39 ByteReaderChainedBuffer(const uint8* buf0, int size0,
40 const uint8* buf1, int size1)
41 : buf0_(buf0),
42 size0_(size0),
43 buf1_(buf1),
44 size1_(size1) { }
45
46 uint8 Get(int offset) const {
47 DCHECK_GE(offset, 0);
48 DCHECK_LT(offset, size0_ + size1_);
49 if (offset < size0_) {
50 return buf0_[offset];
51 }
52 return buf1_[offset - size0_];
53 }
54
55 int GetSize() const {
56 return (size0_ + size1_);
57 }
58
59 private:
60 const uint8* const buf0_;
61 const int size0_;
62 const uint8* const buf1_;
63 const int size1_;
64 };
65
66 } // namespace
67
68 namespace media {
69 namespace mpeg2ts {
70
71 EsParserH264::EsParserH264(
72 NewVideoConfigCB new_video_config_cb,
73 EmitBufferCB emit_buffer_cb)
74 : nal_es_pos_(0),
75 new_video_config_cb_(new_video_config_cb),
76 emit_buffer_cb_(emit_buffer_cb),
77 is_video_config_known_(false),
78 profile_idc_(0),
79 level_idc_(0),
80 pic_width_in_mbs_minus1_(0),
81 pic_height_in_map_units_minus1_(0) {
82 }
83
84 EsParserH264::~EsParserH264() {
85 }
86
87 void EsParserH264::Parse(const uint8* buf, int size,
88 bool is_pts_valid, base::TimeDelta pts,
89 bool is_dts_valid, base::TimeDelta dts) {
90 // Note: Parse is invoked each time a PES packet has been reassembled.
91 // Unfortunately, a PES packet does not necessarily map
92 // to an h264 access unit, although the HLS recommandation is to use one PES
93 // for each access unit (but this is just a recommandation and some streams
94 // do not comply with this recommandation).
95
96 // Link position |raw_es_.size()| in the ES stream with a timing descriptor.
97 // HLS recommandation: "In AVC video, you should have both a DTS and a
98 // PTS in each PES header".
99 // TODO(damienv): What if the stream is not compliant and both the PTS and the
100 // DTS are not valid ?
101 TimingDesc timing_desc;
102 timing_desc.pts = pts;
103 if (is_dts_valid) {
104 timing_desc.dts = dts;
105 } else {
106 timing_desc.dts = pts;
107 }
108 timing_desc_list_.push_back(
109 std::pair<int, TimingDesc>(raw_es_.size(), timing_desc));
110
111 // Add NALs from the incoming buffer.
112 FindNals(buf, size);
113
114 // Find access units based on AUD.
115 std::list<NalDescList::iterator> access_unit_list;
116 FindAccessUnits(&access_unit_list);
117 if (access_unit_list.empty()) {
118 int old_size = raw_es_.size();
119 raw_es_.resize(old_size + size);
120 memcpy(&raw_es_[old_size], buf, size);
121 DiscardEs(raw_es_.size() - 4);
122 }
123
124 // Make sure that all the frames to be emitted are in the ES buffer.
125 int last_position = (access_unit_list.back())->position;
126 int copy_size = last_position - raw_es_.size();
127 if (copy_size > 0) {
128 int copy_size = last_position - raw_es_.size();
129 int old_size = raw_es_.size();
130 raw_es_.resize(old_size + copy_size);
131 memcpy(&raw_es_[old_size], buf, copy_size);
132 buf += copy_size;
133 size -= copy_size;
134 }
135
136 // Emit all frames.
137 std::list<NalDescList::iterator>::iterator it0 = access_unit_list.begin();
138 std::list<NalDescList::iterator>::iterator it1 = it0;
139 ++it1;
140 LOG_IF(WARNING, (*it0)->position != 0)
141 << "Needs to discard some ES data before getting the 1st access unit: "
142 << (*it0)->position;
143 for (; it1 != access_unit_list.end(); ++it0, ++it1) {
144 int nxt_frame_position = (*it1)->position;
145 EmitFrame(*it0, *it1, nxt_frame_position);
146 }
147
148 // Discard emitted frames.
149 DiscardEs(last_position);
150
151 // Finally copy the incomplete access unit to the ES buffer.
152 int old_size = raw_es_.size();
153 raw_es_.resize(old_size + size);
154 memcpy(&raw_es_[old_size], buf, size);
155 }
156
157 void EsParserH264::Flush() {
158 // Find access units based on AUD.
159 std::list<NalDescList::iterator> access_unit_list;
160 FindAccessUnits(&access_unit_list);
161
162 // At this point, there can be at most one access unit in the buffer.
163 DCHECK_GE(access_unit_list.size(), 1u);
164 if (!access_unit_list.empty()) {
165 // Force emitting the last access unit (even it might be incomplete).
166 int nxt_frame_position = raw_es_.size();
167 NalDescList::iterator cur_frame = *(access_unit_list.begin());
168 NalDescList::iterator nxt_frame = nal_desc_list_.end();
169 EmitFrame(cur_frame, nxt_frame, nxt_frame_position);
170 }
171 }
172
173 void EsParserH264::FindNals(const uint8* buf, int size) {
174 ByteReaderChainedBuffer byte_reader(
175 &raw_es_[0], raw_es_.size(),
176 buf, size);
177
178 DCHECK_GE(nal_es_pos_, 0);
179 DCHECK_LT(nal_es_pos_, byte_reader.GetSize());
180
181 // Resume NAL segmentation where it was left.
182 for ( ; nal_es_pos_ < byte_reader.GetSize() - 4; nal_es_pos_++) {
183 // Make sure the syncword is either 00 00 00 01 or 00 00 01
184 if (byte_reader.Get(nal_es_pos_ + 0) != 0 ||
185 byte_reader.Get(nal_es_pos_ + 1) != 0) {
186 continue;
187 }
188 int syncword_length = 0;
189 if (byte_reader.Get(nal_es_pos_ + 2) == 0 &&
190 byte_reader.Get(nal_es_pos_ + 3) == 1) {
191 syncword_length = 4;
192 } else if (byte_reader.Get(nal_es_pos_ + 2) == 1) {
193 syncword_length = 3;
194 } else {
195 continue;
196 }
197
198 // Retrieve the NAL type.
199 int nal_header = byte_reader.Get(nal_es_pos_ + syncword_length);
200 int forbidden_zero_bit = (nal_header >> 7) & 0x1;
201 NalDesc nal_desc;
202 nal_desc.position = nal_es_pos_;
203 nal_desc.nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f);
204 if (forbidden_zero_bit != 0) {
205 nal_desc.nal_unit_type = kNalUnitTypeInvalid;
206 }
207 VLOG(LOG_LEVEL_ES) << "nal: offset=" << nal_desc.position
208 << " type=" << nal_desc.nal_unit_type;
209 nal_desc_list_.push_back(nal_desc);
210 nal_es_pos_ += syncword_length;
211 }
212 }
213
214 void EsParserH264::FindAccessUnits(
215 std::list<NalDescList::iterator>* access_unit_list) {
216 // Get the H264 access units based on AUD.
217 // Mpeg2TS spec: "2.14 Carriage of Rec. ITU-T H.264 | ISO/IEC 14496-10 video"
218 // "Each AVC access unit shall contain an access unit delimiter NAL Unit;"
219 for (NalDescList::iterator it = nal_desc_list_.begin();
220 it != nal_desc_list_.end(); ++it) {
221 if (it->nal_unit_type == kNalUnitTypeAUD) {
222 VLOG(LOG_LEVEL_ES) << "aud found @ pos=" << it->position;
223 access_unit_list->push_back(it);
224 }
225 }
226 }
227
228 void EsParserH264::EmitFrame(
229 NalDescList::iterator cur_frame,
230 NalDescList::iterator nxt_frame,
231 int nxt_frame_position) {
232 // Current frame position = position of the 1st NAL of the frame.
233 int cur_frame_position = cur_frame->position;
234 int access_unit_size = nxt_frame_position - cur_frame_position;
235
236 // Get the access unit timing info.
237 TimingDesc current_timing_desc;
238 while (!timing_desc_list_.empty() &&
239 timing_desc_list_.front().first <= cur_frame_position) {
240 current_timing_desc = timing_desc_list_.front().second;
241 timing_desc_list_.pop_front();
242 }
243
244 // Check whether this is a key frame + light NAL parsing to get some
245 // relevant information (e.g. SPS/PPS).
246 // Note: it would have been nice to get the keyframe decision based
247 // on the Mpeg2TS random_access_indicator but encoders sometimes just don't
248 // bother setting this flag in the MPEG2 TS stream.
249 bool is_key_frame = true;
250 for (NalDescList::iterator it = cur_frame; it != nxt_frame; ++it) {
251 if (it->nal_unit_type == kNalUnitTypeNonIdrSlice) {
252 is_key_frame = false;
253 }
254 NalDescList::iterator next_nal_it = it;
255 ++next_nal_it;
256 int cur_nal_position = it->position;
257 int nxt_nal_position = (next_nal_it == nxt_frame)
258 ? nxt_frame_position : next_nal_it->position;
259 int nal_size = nxt_nal_position - cur_nal_position;
260 DCHECK_LE(cur_nal_position + nal_size, static_cast<int>(raw_es_.size()));
261 NalParser(&raw_es_[cur_nal_position], nal_size);
262 }
263
264 // Emit the current frame.
265 VLOG(LOG_LEVEL_ES) << "is_key_frame = " << is_key_frame;
266 scoped_refptr<StreamParserBuffer> stream_parser_buffer =
267 StreamParserBuffer::CopyFrom(
268 &raw_es_[cur_frame_position],
269 access_unit_size,
270 is_key_frame);
271 stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts);
272 stream_parser_buffer->set_timestamp(current_timing_desc.pts);
273 emit_buffer_cb_.Run(stream_parser_buffer);
274 }
275
276 void EsParserH264::DiscardEs(int nbytes) {
277 if (nbytes <= 0) {
278 return;
279 }
280
281 // Update the NAL list accordingly.
282 while (!nal_desc_list_.empty() &&
283 nal_desc_list_.front().position < nbytes) {
284 nal_desc_list_.pop_front();
285 }
286 for (NalDescList::iterator it = nal_desc_list_.begin();
287 it != nal_desc_list_.end(); ++it) {
288 DCHECK(it->position >= nbytes);
289 it->position -= nbytes;
290 }
291 nal_es_pos_ -= nbytes;
292 if (nal_es_pos_ < 0) {
293 nal_es_pos_ = 0;
294 }
295
296 // Update the timing information accordingly.
297 std::list<std::pair<int, TimingDesc> >::iterator timing_it
298 = timing_desc_list_.begin();
299 for (; timing_it != timing_desc_list_.end(); ++timing_it) {
300 timing_it->first -= nbytes;
301 }
302
303 // Discard |nbytes| of ES.
304 int old_size = raw_es_.size();
305 int new_size = old_size - nbytes;
306 CHECK_LE(nbytes, old_size);
307 if (new_size > 0) {
308 memmove(&raw_es_[0], &raw_es_[nbytes], new_size);
309 }
310 raw_es_.resize(new_size);
311 }
312
313 void EsParserH264::NalParser(const uint8* buf, int size) {
314 // Discard the annexB syncword.
315 if (size < 3) {
316 LOG(WARNING) << "NalParser: incomplete NAL";
317 return;
318 }
319 DCHECK_EQ(buf[0], 0);
320 DCHECK_EQ(buf[1], 0);
321 if (buf[2] == 1) {
322 buf += 3;
323 size -= 3;
324 } else {
325 buf += 4;
326 size -= 4;
327 }
328
329 // Get the NAL header.
330 if (size < 1) {
331 LOG(WARNING) << "NalParser: incomplete NAL";
332 return;
333 }
334 int nal_header = buf[0];
335 buf += 1;
336 size -= 1;
337
338 int forbidden_zero_bit = (nal_header >> 7) & 0x1;
339 if (forbidden_zero_bit != 0) {
340 return;
341 }
342 int nal_ref_idc = (nal_header >> 5) & 0x3;
343 int nal_unit_type = nal_header & 0x1f;
344
345 // TODO(damienv):
346 // The nal start code emulation prevention should be un-done,
347 // before parsing the NAL content.
348
349 // Process the NAL content.
350 if (nal_unit_type == kNalUnitTypeSPS) {
351 VLOG(LOG_LEVEL_ES) << "NAL: SPS";
352 if (nal_ref_idc == 0) {
353 // Should not be 0 for a SPS.
354 return;
355 }
356 ProcessSPS(buf, size);
357 } else if (nal_unit_type == kNalUnitTypeIdrSlice) {
358 VLOG(LOG_LEVEL_ES) << "NAL: IDR slice";
359 ProcessSliceLayer(buf, size);
360 } else if (nal_unit_type == kNalUnitTypeNonIdrSlice) {
361 VLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice";
362 ProcessSliceLayer(buf, size);
363 } else if (nal_unit_type == kNalUnitTypePPS) {
364 VLOG(LOG_LEVEL_ES) << "NAL: PPS";
365 } else if (nal_unit_type == kNalUnitTypeAUD) {
366 VLOG(LOG_LEVEL_ES) << "NAL: AUD";
367 } else {
368 VLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type;
369 }
370 }
371
372 bool EsParserH264::ProcessSPS(const uint8* buf, int size) {
373 if (size <= 0) {
374 return false;
375 }
376 BitReader bit_reader(buf, size);
377
378 int profile_idc;
379 RCHECK(bit_reader.ReadBits(8, &profile_idc));
380 int constraint_setX_flag;
381 RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag));
382 int level_idc;
383 RCHECK(bit_reader.ReadBits(8, &level_idc));
384 uint32 seq_parameter_set_id;
385 RCHECK(ReadBitsExpGolomb(&bit_reader, &seq_parameter_set_id));
386 uint32 log2_max_frame_num_minus4;
387 RCHECK(ReadBitsExpGolomb(&bit_reader, &log2_max_frame_num_minus4));
388 uint32 pic_order_cnt_type;
389 RCHECK(ReadBitsExpGolomb(&bit_reader, &pic_order_cnt_type));
390
391 if (pic_order_cnt_type > 2) {
392 // Bitstream error: pic_order_cnt_type shall be in the range of 0 to 2.
393 return false;
394 }
395 if (pic_order_cnt_type == 0) {
396 uint32 log2_max_pic_order_cnt_lsb_minus4;
397 RCHECK(ReadBitsExpGolomb(&bit_reader, &log2_max_pic_order_cnt_lsb_minus4));
398 } else if (pic_order_cnt_type == 1) {
399 NOTIMPLEMENTED();
400 LOG(FATAL) << "pic_order_cnt_type = 1 not supported yet";
401 }
402
403 uint32 num_ref_frames;
404 RCHECK(ReadBitsExpGolomb(&bit_reader, &num_ref_frames));
405 int gaps_in_frame_num_value_allowed_flag;
406 RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag));
407 uint32 pic_width_in_mbs_minus1;
408 RCHECK(ReadBitsExpGolomb(&bit_reader, &pic_width_in_mbs_minus1));
409 uint32 pic_height_in_map_units_minus1;
410 RCHECK(ReadBitsExpGolomb(&bit_reader, &pic_height_in_map_units_minus1));
411
412 int frame_mbs_only_flag;
413 RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag));
414 if (!frame_mbs_only_flag) {
415 int mb_adaptive_frame_field_flag;
416 RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag));
417 }
418
419 int direct_8x8_inference_flag;
420 RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag));
421
422 bool frame_cropping_flag;
423 uint32 frame_crop_left_offset = 0;
424 uint32 frame_crop_right_offset = 0;
425 uint32 frame_crop_top_offset = 0;
426 uint32 frame_crop_bottom_offset = 0;
427 RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag));
428 if (frame_cropping_flag) {
429 RCHECK(ReadBitsExpGolomb(&bit_reader, &frame_crop_left_offset));
430 RCHECK(ReadBitsExpGolomb(&bit_reader, &frame_crop_right_offset));
431 RCHECK(ReadBitsExpGolomb(&bit_reader, &frame_crop_top_offset));
432 RCHECK(ReadBitsExpGolomb(&bit_reader, &frame_crop_bottom_offset));
433 }
434
435 bool vui_parameters_present_flag;
436 RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag));
437 int sar_width = 1;
438 int sar_height = 1;
439 if (vui_parameters_present_flag) {
440 // Read only the aspect ratio information from the VUI section.
441 // TODO(damienv): check whether other VUI info are useful.
442 bool aspect_ratio_info_present_flag = false;
443 RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag));
444 if (aspect_ratio_info_present_flag) {
445 int aspect_ratio_idc;
446 RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc));
447 if (aspect_ratio_idc == kExtendedSar) {
448 RCHECK(bit_reader.ReadBits(16, &sar_width));
449 RCHECK(bit_reader.ReadBits(16, &sar_height));
450 } else if (aspect_ratio_idc < 14) {
451 sar_width = kTableSarWidth[aspect_ratio_idc];
452 sar_height = kTableSarHeight[aspect_ratio_idc];
453 }
454 }
455 }
456
457 LOG_IF(WARNING, sar_width != sar_height)
458 << "Non square pixel not supported yet:"
459 << " sar_width=" << sar_width
460 << " sar_height=" << sar_height;
461
462 if (is_video_config_known_ &&
463 profile_idc == profile_idc_ &&
464 level_idc == level_idc_ &&
465 pic_width_in_mbs_minus1 == pic_width_in_mbs_minus1_ &&
466 pic_height_in_map_units_minus1 == pic_height_in_map_units_minus1_) {
467 // This is the same SPS as the previous one.
468 return true;
469 }
470 is_video_config_known_ = true;
471 profile_idc_ = profile_idc;
472 level_idc_ = level_idc;
473 pic_width_in_mbs_minus1_ = pic_width_in_mbs_minus1;
474 pic_height_in_map_units_minus1_ = pic_height_in_map_units_minus1;
475
476 // TODO(damienv):
477 // Assuming the SPS is used right away by the PPS
478 // and the slice headers is a strong assumption.
479 // In theory, we should process the SPS and PPS
480 // and only when one of the slice header is switching
481 // the PPS id, the video decoder config should be changed.
482 LOG(INFO) << "Profile IDC: " << profile_idc;
483 LOG(INFO) << "Level IDC: " << level_idc;
484 LOG(INFO) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16;
485 LOG(INFO) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16;
486 LOG(INFO) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4;
487
488 // TODO(damienv): a MAP unit can be either 16 or 32 pixels.
489 // although it's 16 pixels for progressive non MBAFF frames.
490 gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16,
491 (pic_height_in_map_units_minus1 + 1) * 16);
492 gfx::Rect visible_rect(
493 frame_crop_left_offset,
494 frame_crop_top_offset,
495 (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset,
496 (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset);
497
498 // TODO(damienv): calculate the natural size based
499 // on the possible aspect ratio coded in the VUI parameters.
500 gfx::Size natural_size(visible_rect.width(),
501 visible_rect.height());
502
503 VideoDecoderConfig video_decoder_config(
504 kCodecH264,
505 VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv)
506 VideoFrame::YV12,
507 coded_size,
508 visible_rect,
509 natural_size,
510 NULL, 0,
511 false);
512 new_video_config_cb_.Run(video_decoder_config);
513
514 return true;
515 }
516
517 bool EsParserH264::ProcessSliceLayer(const uint8* buf, int size) {
518 if (size <= 0) {
519 return false;
520 }
521 BitReader bit_reader(buf, size);
522
523 // Read only the slice header.
524 // TODO(damienv): frame_num
525 uint32 first_mb_in_slice;
526 RCHECK(ReadBitsExpGolomb(&bit_reader, &first_mb_in_slice));
527 uint32 slice_type;
528 RCHECK(ReadBitsExpGolomb(&bit_reader, &slice_type));
529 uint32 pic_parameter_set_id;
530 RCHECK(ReadBitsExpGolomb(&bit_reader, &pic_parameter_set_id));
531
532 VLOG(LOG_LEVEL_ES) << "first_mb_in_slice: " << first_mb_in_slice;
533 VLOG(LOG_LEVEL_ES) << "slice_type: " << slice_type;
534 return true;
535 }
536
537 bool EsParserH264::ReadBitsExpGolomb(
538 BitReader* bit_reader, uint32* exp_golomb_value) {
539 // TODO(damienv): this should be a member function of BitReader.
540
541 // Get the number of leading zeros.
542 int zero_count = 0;
543 for (zero_count = 0; ; zero_count++) {
544 int one_bit;
545 if (!bit_reader->ReadBits(1, &one_bit)) {
546 return false;
547 }
548 if (one_bit != 0) {
549 break;
550 }
551 }
552
553 // Read the actual value.
554 uint32 base_value = (1 << zero_count) - 1;
555 uint32 value = 0;
556 for (int bit_count = 0; bit_count < zero_count; bit_count++) {
557 int one_bit;
558 if (!bit_reader->ReadBits(1, &one_bit)) {
559 return false;
560 }
561 if (one_bit != 0) {
562 value += (1 << (zero_count-1 - bit_count));
563 }
564 }
565
566 *exp_golomb_value = base_value + value;
567 return true;
568 }
569
570 } // namespace mpeg2ts
571 } // namespace media
572
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698