Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(273)

Side by Side Diff: media/formats/mp4/mp4_stream_parser.cc

Issue 2254733006: Allow MP4 parser to handle multiple audio and video tracks (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@merged-buffers-map
Patch Set: CR feedback Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « media/formats/mp4/mp4_stream_parser.h ('k') | media/formats/mp4/mp4_stream_parser_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "media/formats/mp4/mp4_stream_parser.h" 5 #include "media/formats/mp4/mp4_stream_parser.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include <limits> 9 #include <limits>
10 #include <memory> 10 #include <memory>
(...skipping 23 matching lines...) Expand all
34 namespace mp4 { 34 namespace mp4 {
35 35
36 MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types, 36 MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types,
37 bool has_sbr) 37 bool has_sbr)
38 : state_(kWaitingForInit), 38 : state_(kWaitingForInit),
39 moof_head_(0), 39 moof_head_(0),
40 mdat_tail_(0), 40 mdat_tail_(0),
41 highest_end_offset_(0), 41 highest_end_offset_(0),
42 has_audio_(false), 42 has_audio_(false),
43 has_video_(false), 43 has_video_(false),
44 audio_track_id_(0),
45 video_track_id_(0),
46 audio_object_types_(audio_object_types), 44 audio_object_types_(audio_object_types),
47 has_sbr_(has_sbr), 45 has_sbr_(has_sbr),
48 is_audio_track_encrypted_(false),
49 is_video_track_encrypted_(false),
50 num_top_level_box_skipped_(0) { 46 num_top_level_box_skipped_(0) {
51 } 47 }
52 48
53 MP4StreamParser::~MP4StreamParser() {} 49 MP4StreamParser::~MP4StreamParser() {}
54 50
55 void MP4StreamParser::Init( 51 void MP4StreamParser::Init(
56 const InitCB& init_cb, 52 const InitCB& init_cb,
57 const NewConfigCB& config_cb, 53 const NewConfigCB& config_cb,
58 const NewBuffersCB& new_buffers_cb, 54 const NewBuffersCB& new_buffers_cb,
59 bool /* ignore_text_tracks */, 55 bool /* ignore_text_tracks */,
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 } 175 }
180 176
181 queue_.Pop(reader->size()); 177 queue_.Pop(reader->size());
182 return !(*err); 178 return !(*err);
183 } 179 }
184 180
185 bool MP4StreamParser::ParseMoov(BoxReader* reader) { 181 bool MP4StreamParser::ParseMoov(BoxReader* reader) {
186 moov_.reset(new Movie); 182 moov_.reset(new Movie);
187 RCHECK(moov_->Parse(reader)); 183 RCHECK(moov_->Parse(reader));
188 runs_.reset(); 184 runs_.reset();
185 audio_track_ids_.clear();
186 video_track_ids_.clear();
187 is_track_encrypted_.clear();
189 188
190 has_audio_ = false; 189 has_audio_ = false;
191 has_video_ = false; 190 has_video_ = false;
192 191
193 std::unique_ptr<MediaTracks> media_tracks(new MediaTracks()); 192 std::unique_ptr<MediaTracks> media_tracks(new MediaTracks());
194 AudioDecoderConfig audio_config; 193 AudioDecoderConfig audio_config;
195 VideoDecoderConfig video_config; 194 VideoDecoderConfig video_config;
196 int detected_audio_track_count = 0; 195 int detected_audio_track_count = 0;
197 int detected_video_track_count = 0; 196 int detected_video_track_count = 0;
198 int detected_text_track_count = 0; 197 int detected_text_track_count = 0;
(...skipping 12 matching lines...) Expand all
211 if (trex.track_id == track->header.track_id) { 210 if (trex.track_id == track->header.track_id) {
212 desc_idx = trex.default_sample_description_index; 211 desc_idx = trex.default_sample_description_index;
213 break; 212 break;
214 } 213 }
215 } 214 }
216 RCHECK(desc_idx > 0); 215 RCHECK(desc_idx > 0);
217 desc_idx -= 1; // BMFF descriptor index is one-based 216 desc_idx -= 1; // BMFF descriptor index is one-based
218 217
219 if (track->media.handler.type == kAudio) { 218 if (track->media.handler.type == kAudio) {
220 detected_audio_track_count++; 219 detected_audio_track_count++;
221 if (audio_config.IsValidConfig())
222 continue; // Skip other audio tracks once we found a supported one.
223 220
224 RCHECK(!samp_descr.audio_entries.empty()); 221 RCHECK(!samp_descr.audio_entries.empty());
225 222
226 // It is not uncommon to find otherwise-valid files with incorrect sample 223 // It is not uncommon to find otherwise-valid files with incorrect sample
227 // description indices, so we fail gracefully in that case. 224 // description indices, so we fail gracefully in that case.
228 if (desc_idx >= samp_descr.audio_entries.size()) 225 if (desc_idx >= samp_descr.audio_entries.size())
229 desc_idx = 0; 226 desc_idx = 0;
230 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx]; 227 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
231 const AAC& aac = entry.esds.aac; 228 const AAC& aac = entry.esds.aac;
232 229
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
301 sample_format = kSampleFormatU8; 298 sample_format = kSampleFormatU8;
302 } else if (entry.samplesize == 16) { 299 } else if (entry.samplesize == 16) {
303 sample_format = kSampleFormatS16; 300 sample_format = kSampleFormatS16;
304 } else if (entry.samplesize == 32) { 301 } else if (entry.samplesize == 32) {
305 sample_format = kSampleFormatS32; 302 sample_format = kSampleFormatS32;
306 } else { 303 } else {
307 LOG(ERROR) << "Unsupported sample size."; 304 LOG(ERROR) << "Unsupported sample size.";
308 return false; 305 return false;
309 } 306 }
310 307
311 is_audio_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted; 308 uint32_t audio_track_id = track->header.track_id;
312 DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_; 309 if (audio_track_ids_.find(audio_track_id) != audio_track_ids_.end()) {
310 MEDIA_LOG(ERROR, media_log_)
311 << "Audio track with track_id=" << audio_track_id
312 << " already present.";
313 return false;
314 }
315 bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;
316 is_track_encrypted_[audio_track_id] = is_track_encrypted;
313 audio_config.Initialize( 317 audio_config.Initialize(
314 codec, sample_format, channel_layout, sample_per_second, extra_data, 318 codec, sample_format, channel_layout, sample_per_second, extra_data,
315 is_audio_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted(), 319 is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted(),
316 base::TimeDelta(), 0); 320 base::TimeDelta(), 0);
321 DVLOG(1) << "audio_track_id=" << audio_track_id
322 << " config=" << audio_config.AsHumanReadableString();
317 if (!audio_config.IsValidConfig()) { 323 if (!audio_config.IsValidConfig()) {
318 MEDIA_LOG(ERROR, media_log_) << "Invalid audio decoder config: " 324 MEDIA_LOG(ERROR, media_log_) << "Invalid audio decoder config: "
319 << audio_config.AsHumanReadableString(); 325 << audio_config.AsHumanReadableString();
320 return false; 326 return false;
321 } 327 }
322 has_audio_ = true; 328 has_audio_ = true;
323 audio_track_id_ = track->header.track_id; 329 audio_track_ids_.insert(audio_track_id);
324 media_tracks->AddAudioTrack(audio_config, audio_track_id_, "main", 330 const char* track_kind = (audio_track_ids_.size() == 1 ? "main" : "");
331 media_tracks->AddAudioTrack(audio_config, audio_track_id, track_kind,
325 track->media.handler.name, 332 track->media.handler.name,
326 track->media.header.language()); 333 track->media.header.language());
327 continue; 334 continue;
328 } 335 }
329 336
330 if (track->media.handler.type == kVideo) { 337 if (track->media.handler.type == kVideo) {
331 detected_video_track_count++; 338 detected_video_track_count++;
332 if (video_config.IsValidConfig())
333 continue; // Skip other video tracks once we found a supported one.
334 339
335 RCHECK(!samp_descr.video_entries.empty()); 340 RCHECK(!samp_descr.video_entries.empty());
336 if (desc_idx >= samp_descr.video_entries.size()) 341 if (desc_idx >= samp_descr.video_entries.size())
337 desc_idx = 0; 342 desc_idx = 0;
338 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx]; 343 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
339 344
340 if (!entry.IsFormatValid()) { 345 if (!entry.IsFormatValid()) {
341 MEDIA_LOG(ERROR, media_log_) << "Unsupported video format 0x" 346 MEDIA_LOG(ERROR, media_log_) << "Unsupported video format 0x"
342 << std::hex << entry.format 347 << std::hex << entry.format
343 << " in stsd box."; 348 << " in stsd box.";
(...skipping 10 matching lines...) Expand all
354 if (entry.pixel_aspect.h_spacing != 1 || 359 if (entry.pixel_aspect.h_spacing != 1 ||
355 entry.pixel_aspect.v_spacing != 1) { 360 entry.pixel_aspect.v_spacing != 1) {
356 natural_size = 361 natural_size =
357 GetNaturalSize(visible_rect.size(), entry.pixel_aspect.h_spacing, 362 GetNaturalSize(visible_rect.size(), entry.pixel_aspect.h_spacing,
358 entry.pixel_aspect.v_spacing); 363 entry.pixel_aspect.v_spacing);
359 } else if (track->header.width && track->header.height) { 364 } else if (track->header.width && track->header.height) {
360 natural_size = 365 natural_size =
361 gfx::Size(track->header.width, track->header.height); 366 gfx::Size(track->header.width, track->header.height);
362 } 367 }
363 368
364 is_video_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted; 369 uint32_t video_track_id = track->header.track_id;
365 DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_; 370 if (video_track_ids_.find(video_track_id) != video_track_ids_.end()) {
371 MEDIA_LOG(ERROR, media_log_)
372 << "Video track with track_id=" << video_track_id
373 << " already present.";
374 return false;
375 }
376 bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;
377 is_track_encrypted_[video_track_id] = is_track_encrypted;
366 video_config.Initialize( 378 video_config.Initialize(
367 entry.video_codec, entry.video_codec_profile, PIXEL_FORMAT_YV12, 379 entry.video_codec, entry.video_codec_profile, PIXEL_FORMAT_YV12,
368 COLOR_SPACE_HD_REC709, coded_size, visible_rect, natural_size, 380 COLOR_SPACE_HD_REC709, coded_size, visible_rect, natural_size,
369 // No decoder-specific buffer needed for AVC; 381 // No decoder-specific buffer needed for AVC;
370 // SPS/PPS are embedded in the video stream 382 // SPS/PPS are embedded in the video stream
371 EmptyExtraData(), 383 EmptyExtraData(),
372 is_video_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted()); 384 is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted());
385 DVLOG(1) << "video_track_id=" << video_track_id
386 << " config=" << video_config.AsHumanReadableString();
373 if (!video_config.IsValidConfig()) { 387 if (!video_config.IsValidConfig()) {
374 MEDIA_LOG(ERROR, media_log_) << "Invalid video decoder config: " 388 MEDIA_LOG(ERROR, media_log_) << "Invalid video decoder config: "
375 << video_config.AsHumanReadableString(); 389 << video_config.AsHumanReadableString();
376 return false; 390 return false;
377 } 391 }
378 has_video_ = true; 392 has_video_ = true;
379 video_track_id_ = track->header.track_id; 393 video_track_ids_.insert(video_track_id);
380 media_tracks->AddVideoTrack(video_config, video_track_id_, "main", 394 const char* track_kind = (video_track_ids_.size() == 1 ? "main" : "");
395 media_tracks->AddVideoTrack(video_config, video_track_id, track_kind,
381 track->media.handler.name, 396 track->media.handler.name,
382 track->media.header.language()); 397 track->media.header.language());
383 continue; 398 continue;
384 } 399 }
385 400
386 // TODO(wolenetz): Investigate support in MSE and Chrome MSE for CEA 608/708 401 // TODO(wolenetz): Investigate support in MSE and Chrome MSE for CEA 608/708
387 // embedded caption data in video track. At time of init segment parsing, we 402 // embedded caption data in video track. At time of init segment parsing, we
388 // don't have this data (unless maybe by SourceBuffer's mimetype). 403 // don't have this data (unless maybe by SourceBuffer's mimetype).
389 // See https://crbug.com/597073 404 // See https://crbug.com/597073
390 if (track->media.handler.type == kText) 405 if (track->media.handler.type == kText)
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
507 return true; 522 return true;
508 } 523 }
509 524
510 DCHECK(!(*err)); 525 DCHECK(!(*err));
511 526
512 const uint8_t* buf; 527 const uint8_t* buf;
513 int buf_size; 528 int buf_size;
514 queue_.Peek(&buf, &buf_size); 529 queue_.Peek(&buf, &buf_size);
515 if (!buf_size) return false; 530 if (!buf_size) return false;
516 531
517 bool audio = has_audio_ && audio_track_id_ == runs_->track_id(); 532 bool audio =
518 bool video = has_video_ && video_track_id_ == runs_->track_id(); 533 audio_track_ids_.find(runs_->track_id()) != audio_track_ids_.end();
534 bool video =
535 video_track_ids_.find(runs_->track_id()) != video_track_ids_.end();
519 536
520 // Skip this entire track if it's not one we're interested in 537 // Skip this entire track if it's not one we're interested in
521 if (!audio && !video) { 538 if (!audio && !video) {
522 runs_->AdvanceRun(); 539 runs_->AdvanceRun();
523 return true; 540 return true;
524 } 541 }
525 542
526 // Attempt to cache the auxiliary information first. Aux info is usually 543 // Attempt to cache the auxiliary information first. Aux info is usually
527 // placed in a contiguous block before the sample data, rather than being 544 // placed in a contiguous block before the sample data, rather than being
528 // interleaved. If we didn't cache it, this would require that we retain the 545 // interleaved. If we didn't cache it, this would require that we retain the
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
578 595
579 if (decrypt_config) { 596 if (decrypt_config) {
580 if (!subsamples.empty()) { 597 if (!subsamples.empty()) {
581 // Create a new config with the updated subsamples. 598 // Create a new config with the updated subsamples.
582 decrypt_config.reset(new DecryptConfig( 599 decrypt_config.reset(new DecryptConfig(
583 decrypt_config->key_id(), 600 decrypt_config->key_id(),
584 decrypt_config->iv(), 601 decrypt_config->iv(),
585 subsamples)); 602 subsamples));
586 } 603 }
587 // else, use the existing config. 604 // else, use the existing config.
588 } else if ((audio && is_audio_track_encrypted_) || 605 } else if (is_track_encrypted_[runs_->track_id()]) {
589 (video && is_video_track_encrypted_)) {
590 // The media pipeline requires a DecryptConfig with an empty |iv|. 606 // The media pipeline requires a DecryptConfig with an empty |iv|.
591 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1"); 607 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1");
592 decrypt_config.reset( 608 decrypt_config.reset(
593 new DecryptConfig("1", "", std::vector<SubsampleEntry>())); 609 new DecryptConfig("1", "", std::vector<SubsampleEntry>()));
594 } 610 }
595 611
596 StreamParserBuffer::Type buffer_type = audio ? DemuxerStream::AUDIO : 612 StreamParserBuffer::Type buffer_type = audio ? DemuxerStream::AUDIO :
597 DemuxerStream::VIDEO; 613 DemuxerStream::VIDEO;
598 614
599 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
600 // type and allow multiple tracks for same media type, if applicable. See
601 // https://crbug.com/341581.
602 scoped_refptr<StreamParserBuffer> stream_buf = StreamParserBuffer::CopyFrom( 615 scoped_refptr<StreamParserBuffer> stream_buf = StreamParserBuffer::CopyFrom(
603 &frame_buf[0], frame_buf.size(), runs_->is_keyframe(), buffer_type, 616 &frame_buf[0], frame_buf.size(), runs_->is_keyframe(), buffer_type,
604 runs_->track_id()); 617 runs_->track_id());
605 618
606 if (decrypt_config) 619 if (decrypt_config)
607 stream_buf->set_decrypt_config(std::move(decrypt_config)); 620 stream_buf->set_decrypt_config(std::move(decrypt_config));
608 621
609 stream_buf->set_duration(runs_->duration()); 622 stream_buf->set_duration(runs_->duration());
610 stream_buf->set_timestamp(runs_->cts()); 623 stream_buf->set_timestamp(runs_->cts());
611 stream_buf->SetDecodeTimestamp(runs_->dts()); 624 stream_buf->SetDecodeTimestamp(runs_->dts());
612 625
613 DVLOG(3) << "Pushing frame: aud=" << audio 626 DVLOG(3) << "Emit " << (audio ? "audio" : "video") << " frame: "
627 << " track_id=" << runs_->track_id()
614 << ", key=" << runs_->is_keyframe() 628 << ", key=" << runs_->is_keyframe()
615 << ", dur=" << runs_->duration().InMilliseconds() 629 << ", dur=" << runs_->duration().InMilliseconds()
616 << ", dts=" << runs_->dts().InMilliseconds() 630 << ", dts=" << runs_->dts().InMilliseconds()
617 << ", cts=" << runs_->cts().InMilliseconds() 631 << ", cts=" << runs_->cts().InMilliseconds()
618 << ", size=" << runs_->sample_size(); 632 << ", size=" << runs_->sample_size();
619 633
620 (*buffers)[runs_->track_id()].push_back(stream_buf); 634 (*buffers)[runs_->track_id()].push_back(stream_buf);
621 runs_->AdvanceSample(); 635 runs_->AdvanceSample();
622 return true; 636 return true;
623 } 637 }
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
690 runs.AdvanceSample(); 704 runs.AdvanceSample();
691 } 705 }
692 runs.AdvanceRun(); 706 runs.AdvanceRun();
693 } 707 }
694 708
695 return true; 709 return true;
696 } 710 }
697 711
698 } // namespace mp4 712 } // namespace mp4
699 } // namespace media 713 } // namespace media
OLDNEW
« no previous file with comments | « media/formats/mp4/mp4_stream_parser.h ('k') | media/formats/mp4/mp4_stream_parser_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698