OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "media/mp2t/es_parser_h264.h" | |
6 | |
7 #include "base/basictypes.h" | |
8 #include "base/logging.h" | |
9 #include "media/base/bit_reader.h" | |
10 #include "media/base/buffers.h" | |
11 #include "media/base/stream_parser_buffer.h" | |
12 #include "media/base/video_frame.h" | |
13 #include "media/mp2t/mp2t_common.h" | |
14 #include "ui/gfx/rect.h" | |
15 #include "ui/gfx/size.h" | |
16 | |
17 static const int kExtendedSar = 255; | |
18 | |
19 // ISO 14496 part 10 | |
20 // VUI parameters: Table E-1 "Meaning of sample aspect ration indicator" | |
acolwell GONE FROM CHROMIUM
2013/09/18 01:46:05
nit: s/ration/ratio
damienv1
2013/09/18 21:40:17
Done.
| |
21 static const int kTableSarWidth[14] = { | |
22 1, 1, 12, 10, 16, 40, 24, 20, 32, 80, 18, 15, 64, 160 | |
23 }; | |
24 | |
25 static const int kTableSarHeight[14] = { | |
26 1, 1, 11, 11, 11, 33, 11, 11, 11, 33, 11, 11, 33, 99 | |
27 }; | |
28 | |
29 // Remove the start code emulation prevention ( 0x000003 ) | |
30 // and return the size of the converted buffer. | |
31 // Note: Size of |buf_rbsp| should be at least |size| to accomodate | |
32 // the worst case. | |
33 static int ConvertToRbsp(const uint8* buf, int size, uint8* buf_rbsp) { | |
34 int rbsp_size = 0; | |
35 int zero_count = 0; | |
36 for (int k = 0; k < size; k++) { | |
37 if (buf[k] == 0x3 && zero_count >= 2) { | |
38 zero_count = 0; | |
39 continue; | |
40 } | |
41 if (buf[k] == 0) | |
42 zero_count++; | |
43 else | |
44 zero_count = 0; | |
45 buf_rbsp[rbsp_size++] = buf[k]; | |
46 } | |
47 return rbsp_size; | |
48 } | |
49 | |
50 namespace media { | |
51 namespace mp2t { | |
52 | |
53 // ISO 14496 - Part 10: Table 7-1 "NAL unit type codes" | |
54 enum NalUnitType { | |
55 kNalUnitTypeNonIdrSlice = 1, | |
56 kNalUnitTypeIdrSlice = 5, | |
57 kNalUnitTypeSPS = 7, | |
58 kNalUnitTypePPS = 8, | |
59 kNalUnitTypeAUD = 9, | |
60 }; | |
61 | |
62 class BitReaderH264 : public BitReader { | |
63 public: | |
64 BitReaderH264(const uint8* data, off_t size) | |
65 : BitReader(data, size) { } | |
66 | |
67 // Read an unsigned exp-golomb value. | |
68 // Return true if successful. | |
69 bool ReadBitsExpGolomb(uint32* exp_golomb_value); | |
70 }; | |
71 | |
72 bool BitReaderH264::ReadBitsExpGolomb(uint32* exp_golomb_value) { | |
73 // Get the number of leading zeros. | |
74 int zero_count = 0; | |
75 while (true) { | |
76 int one_bit; | |
77 RCHECK(ReadBits(1, &one_bit)); | |
78 if (one_bit != 0) | |
79 break; | |
80 zero_count++; | |
81 } | |
82 | |
83 // If zero_count is greater than 31, the calculated value will overflow. | |
84 if (zero_count > 31) { | |
85 SkipBits(zero_count); | |
86 return false; | |
87 } | |
88 | |
89 // Read the actual value. | |
90 uint32 base = (1 << zero_count) - 1; | |
91 uint32 offset; | |
92 RCHECK(ReadBits(zero_count, &offset)); | |
93 *exp_golomb_value = base + offset; | |
94 | |
95 return true; | |
96 } | |
97 | |
98 EsParserH264::EsParserH264( | |
99 const NewVideoConfigCB& new_video_config_cb, | |
100 const EmitBufferCB& emit_buffer_cb) | |
101 : new_video_config_cb_(new_video_config_cb), | |
102 emit_buffer_cb_(emit_buffer_cb), | |
103 es_pos_(0), | |
104 current_nal_pos_(-1), | |
105 current_access_unit_pos_(-1), | |
106 is_key_frame_(false) { | |
107 } | |
108 | |
109 EsParserH264::~EsParserH264() { | |
110 } | |
111 | |
112 bool EsParserH264::Parse(const uint8* buf, int size, | |
113 base::TimeDelta pts, | |
114 base::TimeDelta dts) { | |
115 // Note: Parse is invoked each time a PES packet has been reassembled. | |
116 // Unfortunately, a PES packet does not necessarily map | |
117 // to an h264 access unit, although the HLS recommendation is to use one PES | |
118 // for each access unit (but this is just a recommendation and some streams | |
119 // do not comply with this recommendation). | |
120 | |
121 // Link position |raw_es_size| in the ES stream with a timing descriptor. | |
122 // HLS recommendation: "In AVC video, you should have both a DTS and a | |
123 // PTS in each PES header". | |
124 if (dts == kNoTimestamp() && pts == kNoTimestamp()) { | |
125 DVLOG(1) << "A timestamp must be provided for each reassembled PES"; | |
126 return false; | |
127 } | |
128 TimingDesc timing_desc; | |
129 timing_desc.pts = pts; | |
130 timing_desc.dts = (dts != kNoTimestamp()) ? dts : pts; | |
131 | |
132 int raw_es_size; | |
133 const uint8* raw_es; | |
134 es_byte_queue_.Peek(&raw_es, &raw_es_size); | |
135 timing_desc_list_.push_back( | |
136 std::pair<int, TimingDesc>(raw_es_size, timing_desc)); | |
137 | |
138 // Add the incoming bytes to the ES queue. | |
139 es_byte_queue_.Push(buf, size); | |
140 | |
141 // Add NALs from the incoming buffer. | |
142 if (!ParseInternal()) | |
143 return false; | |
144 | |
145 // Discard emitted frames | |
146 // or every byte that was parsed so far if there is no current frame. | |
147 int skip_count = | |
148 (current_access_unit_pos_ >= 0) ? current_access_unit_pos_ : es_pos_; | |
149 DiscardEs(skip_count); | |
150 | |
151 return true; | |
152 } | |
153 | |
154 void EsParserH264::Flush() { | |
155 if (current_access_unit_pos_ < 0) | |
156 return; | |
157 | |
158 // Force emitting the last access unit. | |
159 int next_aud_pos; | |
160 const uint8* raw_es; | |
161 es_byte_queue_.Peek(&raw_es, &next_aud_pos); | |
162 EmitFrameIfNeeded(next_aud_pos); | |
163 current_nal_pos_ = -1; | |
164 current_access_unit_pos_ = -1; | |
165 | |
166 // Discard the emitted frame. | |
167 DiscardEs(next_aud_pos); | |
168 } | |
169 | |
170 void EsParserH264::Reset() { | |
171 DVLOG(1) << "EsParserH264::Reset"; | |
172 es_byte_queue_.Reset(); | |
173 timing_desc_list_.clear(); | |
174 es_pos_ = 0; | |
175 current_nal_pos_ = -1; | |
176 current_access_unit_pos_ = -1; | |
177 is_key_frame_ = false; | |
178 last_video_decoder_config_ = VideoDecoderConfig(); | |
179 } | |
180 | |
181 bool EsParserH264::ParseInternal() { | |
182 int raw_es_size; | |
183 const uint8* raw_es; | |
184 es_byte_queue_.Peek(&raw_es, &raw_es_size); | |
185 | |
186 DCHECK_GE(es_pos_, 0); | |
187 DCHECK_LT(es_pos_, raw_es_size); | |
188 | |
189 // Resume h264 es parsing where it was left. | |
190 for ( ; es_pos_ < raw_es_size - 4; es_pos_++) { | |
191 // Make sure the syncword is either 00 00 00 01 or 00 00 01 | |
192 if (raw_es[es_pos_ + 0] != 0 || raw_es[es_pos_ + 1] != 0) | |
193 continue; | |
194 int syncword_length = 0; | |
195 if (raw_es[es_pos_ + 2] == 0 && raw_es[es_pos_ + 3] == 1) | |
196 syncword_length = 4; | |
197 else if (raw_es[es_pos_ + 2] == 1) | |
198 syncword_length = 3; | |
199 else | |
200 continue; | |
201 | |
202 // Parse the current NAL (and the new NAL then becomes the current one). | |
203 if (current_nal_pos_ >= 0) { | |
204 int nal_size = es_pos_ - current_nal_pos_; | |
205 DCHECK_GT(nal_size, 0); | |
206 RCHECK(NalParser(&raw_es[current_nal_pos_], nal_size)); | |
207 } | |
208 current_nal_pos_ = es_pos_ + syncword_length; | |
209 | |
210 // Retrieve the NAL type. | |
211 int nal_header = raw_es[es_pos_ + syncword_length]; | |
acolwell GONE FROM CHROMIUM
2013/09/18 01:46:05
nit: use current_nal_pos_ here instead just to mak
damienv1
2013/09/18 21:40:17
Done.
| |
212 int forbidden_zero_bit = (nal_header >> 7) & 0x1; | |
213 RCHECK(forbidden_zero_bit == 0); | |
214 NalUnitType nal_unit_type = static_cast<NalUnitType>(nal_header & 0x1f); | |
215 DVLOG(LOG_LEVEL_ES) << "nal: offset=" << es_pos_ | |
216 << " type=" << nal_unit_type; | |
217 | |
218 // Emit a frame if needed. | |
219 if (nal_unit_type == kNalUnitTypeAUD) | |
220 EmitFrameIfNeeded(es_pos_); | |
221 | |
222 // Skip the syncword. | |
223 es_pos_ += syncword_length; | |
224 } | |
225 | |
226 return true; | |
227 } | |
228 | |
229 void EsParserH264::EmitFrameIfNeeded(int next_aud_pos) { | |
230 // There is no current frame: start a new frame. | |
231 if (current_access_unit_pos_ < 0) { | |
232 current_access_unit_pos_ = next_aud_pos; | |
acolwell GONE FROM CHROMIUM
2013/09/18 01:46:05
nit: Since current_access_unit_pos_ and is_key_fra
damienv1
2013/09/18 21:40:17
Done.
| |
233 is_key_frame_ = true; | |
234 return; | |
235 } | |
236 | |
237 // Get the access unit timing info. | |
238 TimingDesc current_timing_desc; | |
239 while (!timing_desc_list_.empty() && | |
240 timing_desc_list_.front().first <= current_access_unit_pos_) { | |
241 current_timing_desc = timing_desc_list_.front().second; | |
242 timing_desc_list_.pop_front(); | |
243 } | |
244 | |
245 // Emit a frame. | |
246 int raw_es_size; | |
247 const uint8* raw_es; | |
248 es_byte_queue_.Peek(&raw_es, &raw_es_size); | |
249 int access_unit_size = next_aud_pos - current_access_unit_pos_; | |
250 scoped_refptr<StreamParserBuffer> stream_parser_buffer = | |
251 StreamParserBuffer::CopyFrom( | |
252 &raw_es[current_access_unit_pos_], | |
253 access_unit_size, | |
254 is_key_frame_); | |
255 stream_parser_buffer->SetDecodeTimestamp(current_timing_desc.dts); | |
256 stream_parser_buffer->set_timestamp(current_timing_desc.pts); | |
257 emit_buffer_cb_.Run(stream_parser_buffer); | |
258 | |
259 // Start a new frame. | |
260 // |is_key_frame_| will be updated while parsing the NALs of that frame. | |
261 current_access_unit_pos_ = es_pos_; | |
262 is_key_frame_ = true; | |
263 } | |
264 | |
265 void EsParserH264::DiscardEs(int nbytes) { | |
266 DCHECK_GE(nbytes, 0); | |
267 if (nbytes == 0) | |
268 return; | |
269 | |
270 // Update the position of | |
271 // - the parser, | |
272 // - the current NAL, | |
273 // - the current access unit. | |
274 es_pos_ -= nbytes; | |
275 if (es_pos_ < 0) | |
276 es_pos_ = 0; | |
277 | |
278 if (current_nal_pos_ >= 0) { | |
279 DCHECK_GE(current_nal_pos_, nbytes); | |
280 current_nal_pos_ -= nbytes; | |
281 } | |
282 if (current_access_unit_pos_ >= 0) { | |
283 DCHECK_GE(current_access_unit_pos_, nbytes); | |
284 current_access_unit_pos_ -= nbytes; | |
285 } | |
286 | |
287 // Update the timing information accordingly. | |
288 std::list<std::pair<int, TimingDesc> >::iterator timing_it | |
289 = timing_desc_list_.begin(); | |
290 for (; timing_it != timing_desc_list_.end(); ++timing_it) | |
291 timing_it->first -= nbytes; | |
292 | |
293 // Discard |nbytes| of ES. | |
294 es_byte_queue_.Pop(nbytes); | |
295 } | |
296 | |
297 bool EsParserH264::NalParser(const uint8* buf, int size) { | |
298 // Get the NAL header. | |
299 if (size < 1) { | |
300 DVLOG(1) << "NalParser: incomplete NAL"; | |
301 return false; | |
302 } | |
303 int nal_header = buf[0]; | |
304 buf += 1; | |
305 size -= 1; | |
306 | |
307 int forbidden_zero_bit = (nal_header >> 7) & 0x1; | |
308 if (forbidden_zero_bit != 0) | |
309 return false; | |
310 int nal_ref_idc = (nal_header >> 5) & 0x3; | |
311 int nal_unit_type = nal_header & 0x1f; | |
312 | |
313 // Process the NAL content. | |
314 switch (nal_unit_type) { | |
315 case kNalUnitTypeSPS: | |
316 DVLOG(LOG_LEVEL_ES) << "NAL: SPS"; | |
317 // |nal_ref_idc| should not be 0 for a SPS. | |
318 if (nal_ref_idc == 0) | |
319 return false; | |
320 return ProcessSPS(buf, size); | |
321 case kNalUnitTypeIdrSlice: | |
322 DVLOG(LOG_LEVEL_ES) << "NAL: IDR slice"; | |
323 return true; | |
324 case kNalUnitTypeNonIdrSlice: | |
325 DVLOG(LOG_LEVEL_ES) << "NAL: Non IDR slice"; | |
326 is_key_frame_ = false; | |
327 return true; | |
328 case kNalUnitTypePPS: | |
329 DVLOG(LOG_LEVEL_ES) << "NAL: PPS"; | |
330 return true; | |
331 case kNalUnitTypeAUD: | |
332 DVLOG(LOG_LEVEL_ES) << "NAL: AUD"; | |
333 return true; | |
334 default: | |
335 DVLOG(LOG_LEVEL_ES) << "NAL: " << nal_unit_type; | |
336 return true; | |
337 } | |
338 | |
339 NOTREACHED(); | |
340 return false; | |
341 } | |
342 | |
343 bool EsParserH264::ProcessSPS(const uint8* buf, int size) { | |
344 if (size <= 0) | |
345 return false; | |
346 | |
347 // Removes start code emulation prevention. | |
348 // TODO(damienv): refactoring in media/base | |
349 // so as to have a unique H264 bit reader in Chrome. | |
350 scoped_ptr<uint8[]> buf_rbsp(new uint8[size]); | |
351 int rbsp_size = ConvertToRbsp(buf, size, buf_rbsp.get()); | |
352 | |
353 BitReaderH264 bit_reader(buf_rbsp.get(), rbsp_size); | |
354 | |
355 int profile_idc; | |
356 int constraint_setX_flag; | |
357 int level_idc; | |
358 uint32 seq_parameter_set_id; | |
359 uint32 log2_max_frame_num_minus4; | |
360 uint32 pic_order_cnt_type; | |
361 RCHECK(bit_reader.ReadBits(8, &profile_idc)); | |
362 RCHECK(bit_reader.ReadBits(8, &constraint_setX_flag)); | |
363 RCHECK(bit_reader.ReadBits(8, &level_idc)); | |
364 RCHECK(bit_reader.ReadBitsExpGolomb(&seq_parameter_set_id)); | |
365 RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_frame_num_minus4)); | |
366 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_order_cnt_type)); | |
367 | |
368 // |pic_order_cnt_type| shall be in the range of 0 to 2. | |
369 RCHECK(pic_order_cnt_type <= 2); | |
370 if (pic_order_cnt_type == 0) { | |
371 uint32 log2_max_pic_order_cnt_lsb_minus4; | |
372 RCHECK(bit_reader.ReadBitsExpGolomb(&log2_max_pic_order_cnt_lsb_minus4)); | |
373 } else if (pic_order_cnt_type == 1) { | |
374 // Note: |offset_for_non_ref_pic| and |offset_for_top_to_bottom_field| | |
375 // corresponds to their codenum not to their actual value. | |
376 bool delta_pic_order_always_zero_flag; | |
377 uint32 offset_for_non_ref_pic; | |
378 uint32 offset_for_top_to_bottom_field; | |
379 uint32 num_ref_frames_in_pic_order_cnt_cycle; | |
380 RCHECK(bit_reader.ReadBits(1, &delta_pic_order_always_zero_flag)); | |
381 RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_non_ref_pic)); | |
382 RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_top_to_bottom_field)); | |
383 RCHECK( | |
384 bit_reader.ReadBitsExpGolomb(&num_ref_frames_in_pic_order_cnt_cycle)); | |
385 for (uint32 i = 0; i < num_ref_frames_in_pic_order_cnt_cycle; i++) { | |
386 uint32 offset_for_ref_frame_codenum; | |
387 RCHECK(bit_reader.ReadBitsExpGolomb(&offset_for_ref_frame_codenum)); | |
388 } | |
389 } | |
390 | |
391 uint32 num_ref_frames; | |
392 int gaps_in_frame_num_value_allowed_flag; | |
393 uint32 pic_width_in_mbs_minus1; | |
394 uint32 pic_height_in_map_units_minus1; | |
395 RCHECK(bit_reader.ReadBitsExpGolomb(&num_ref_frames)); | |
396 RCHECK(bit_reader.ReadBits(1, &gaps_in_frame_num_value_allowed_flag)); | |
397 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_width_in_mbs_minus1)); | |
398 RCHECK(bit_reader.ReadBitsExpGolomb(&pic_height_in_map_units_minus1)); | |
399 | |
400 int frame_mbs_only_flag; | |
401 RCHECK(bit_reader.ReadBits(1, &frame_mbs_only_flag)); | |
402 if (!frame_mbs_only_flag) { | |
403 int mb_adaptive_frame_field_flag; | |
404 RCHECK(bit_reader.ReadBits(1, &mb_adaptive_frame_field_flag)); | |
405 } | |
406 | |
407 int direct_8x8_inference_flag; | |
408 RCHECK(bit_reader.ReadBits(1, &direct_8x8_inference_flag)); | |
409 | |
410 bool frame_cropping_flag; | |
411 uint32 frame_crop_left_offset = 0; | |
412 uint32 frame_crop_right_offset = 0; | |
413 uint32 frame_crop_top_offset = 0; | |
414 uint32 frame_crop_bottom_offset = 0; | |
415 RCHECK(bit_reader.ReadBits(1, &frame_cropping_flag)); | |
416 if (frame_cropping_flag) { | |
417 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_left_offset)); | |
418 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_right_offset)); | |
419 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_top_offset)); | |
420 RCHECK(bit_reader.ReadBitsExpGolomb(&frame_crop_bottom_offset)); | |
421 } | |
422 | |
423 bool vui_parameters_present_flag; | |
424 RCHECK(bit_reader.ReadBits(1, &vui_parameters_present_flag)); | |
425 int sar_width = 1; | |
426 int sar_height = 1; | |
427 if (vui_parameters_present_flag) { | |
428 // Read only the aspect ratio information from the VUI section. | |
429 // TODO(damienv): check whether other VUI info are useful. | |
430 bool aspect_ratio_info_present_flag = false; | |
431 RCHECK(bit_reader.ReadBits(1, &aspect_ratio_info_present_flag)); | |
432 if (aspect_ratio_info_present_flag) { | |
433 int aspect_ratio_idc; | |
434 RCHECK(bit_reader.ReadBits(8, &aspect_ratio_idc)); | |
435 if (aspect_ratio_idc == kExtendedSar) { | |
436 RCHECK(bit_reader.ReadBits(16, &sar_width)); | |
437 RCHECK(bit_reader.ReadBits(16, &sar_height)); | |
438 } else if (aspect_ratio_idc < 14) { | |
439 sar_width = kTableSarWidth[aspect_ratio_idc]; | |
440 sar_height = kTableSarHeight[aspect_ratio_idc]; | |
441 } | |
442 } | |
443 } | |
444 | |
445 if (sar_width != sar_height) { | |
446 // TODO(damienv): Support non square pixels. | |
447 DVLOG(1) | |
448 << "Non square pixel not supported yet:" | |
449 << " sar_width=" << sar_width | |
450 << " sar_height=" << sar_height; | |
451 return false; | |
452 } | |
453 | |
454 // TODO(damienv): a MAP unit can be either 16 or 32 pixels. | |
455 // although it's 16 pixels for progressive non MBAFF frames. | |
456 gfx::Size coded_size((pic_width_in_mbs_minus1 + 1) * 16, | |
457 (pic_height_in_map_units_minus1 + 1) * 16); | |
458 gfx::Rect visible_rect( | |
459 frame_crop_left_offset, | |
460 frame_crop_top_offset, | |
461 (coded_size.width() - frame_crop_right_offset) - frame_crop_left_offset, | |
462 (coded_size.height() - frame_crop_bottom_offset) - frame_crop_top_offset); | |
463 | |
464 // TODO(damienv): calculate the natural size based | |
465 // on the possible aspect ratio coded in the VUI parameters. | |
466 gfx::Size natural_size(visible_rect.width(), | |
467 visible_rect.height()); | |
468 | |
469 // TODO(damienv): | |
470 // Assuming the SPS is used right away by the PPS | |
471 // and the slice headers is a strong assumption. | |
472 // In theory, we should process the SPS and PPS | |
473 // and only when one of the slice header is switching | |
474 // the PPS id, the video decoder config should be changed. | |
475 VideoDecoderConfig video_decoder_config( | |
476 kCodecH264, | |
477 VIDEO_CODEC_PROFILE_UNKNOWN, // TODO(damienv) | |
478 VideoFrame::YV12, | |
479 coded_size, | |
480 visible_rect, | |
481 natural_size, | |
482 NULL, 0, | |
483 false); | |
484 | |
485 if (!video_decoder_config.Matches(last_video_decoder_config_)) { | |
486 DVLOG(1) << "Profile IDC: " << profile_idc; | |
487 DVLOG(1) << "Level IDC: " << level_idc; | |
488 DVLOG(1) << "Pic width: " << (pic_width_in_mbs_minus1 + 1) * 16; | |
489 DVLOG(1) << "Pic height: " << (pic_height_in_map_units_minus1 + 1) * 16; | |
490 DVLOG(1) << "log2_max_frame_num_minus4: " << log2_max_frame_num_minus4; | |
491 last_video_decoder_config_ = video_decoder_config; | |
492 new_video_config_cb_.Run(video_decoder_config); | |
493 } | |
494 | |
495 return true; | |
496 } | |
497 | |
498 } // namespace mp2t | |
499 } // namespace media | |
500 | |
OLD | NEW |