Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(122)

Side by Side Diff: media/formats/mp4/mp4_stream_parser.cc

Issue 2254733006: Allow MP4 parser to handle multiple audio and video tracks (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@merged-buffers-map
Patch Set: Clear track ids at the beginning of ParseMoov Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "media/formats/mp4/mp4_stream_parser.h" 5 #include "media/formats/mp4/mp4_stream_parser.h"
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include <limits> 9 #include <limits>
10 #include <memory> 10 #include <memory>
(...skipping 23 matching lines...) Expand all
34 namespace mp4 { 34 namespace mp4 {
35 35
36 MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types, 36 MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types,
37 bool has_sbr) 37 bool has_sbr)
38 : state_(kWaitingForInit), 38 : state_(kWaitingForInit),
39 moof_head_(0), 39 moof_head_(0),
40 mdat_tail_(0), 40 mdat_tail_(0),
41 highest_end_offset_(0), 41 highest_end_offset_(0),
42 has_audio_(false), 42 has_audio_(false),
43 has_video_(false), 43 has_video_(false),
44 audio_track_id_(0),
45 video_track_id_(0),
46 audio_object_types_(audio_object_types), 44 audio_object_types_(audio_object_types),
47 has_sbr_(has_sbr), 45 has_sbr_(has_sbr),
48 is_audio_track_encrypted_(false),
49 is_video_track_encrypted_(false),
50 num_top_level_box_skipped_(0) { 46 num_top_level_box_skipped_(0) {
51 } 47 }
52 48
53 MP4StreamParser::~MP4StreamParser() {} 49 MP4StreamParser::~MP4StreamParser() {}
54 50
55 void MP4StreamParser::Init( 51 void MP4StreamParser::Init(
56 const InitCB& init_cb, 52 const InitCB& init_cb,
57 const NewConfigCB& config_cb, 53 const NewConfigCB& config_cb,
58 const NewBuffersCB& new_buffers_cb, 54 const NewBuffersCB& new_buffers_cb,
59 bool /* ignore_text_tracks */, 55 bool /* ignore_text_tracks */,
(...skipping 119 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 } 175 }
180 176
181 queue_.Pop(reader->size()); 177 queue_.Pop(reader->size());
182 return !(*err); 178 return !(*err);
183 } 179 }
184 180
185 bool MP4StreamParser::ParseMoov(BoxReader* reader) { 181 bool MP4StreamParser::ParseMoov(BoxReader* reader) {
186 moov_.reset(new Movie); 182 moov_.reset(new Movie);
187 RCHECK(moov_->Parse(reader)); 183 RCHECK(moov_->Parse(reader));
188 runs_.reset(); 184 runs_.reset();
185 audio_track_ids_.clear();
186 video_track_ids_.clear();
187 is_track_encrypted_.clear();
189 188
190 has_audio_ = false; 189 has_audio_ = false;
191 has_video_ = false; 190 has_video_ = false;
192 191
193 std::unique_ptr<MediaTracks> media_tracks(new MediaTracks()); 192 std::unique_ptr<MediaTracks> media_tracks(new MediaTracks());
194 AudioDecoderConfig audio_config; 193 AudioDecoderConfig audio_config;
195 VideoDecoderConfig video_config; 194 VideoDecoderConfig video_config;
196 int detected_audio_track_count = 0; 195 int detected_audio_track_count = 0;
197 int detected_video_track_count = 0; 196 int detected_video_track_count = 0;
198 int detected_text_track_count = 0; 197 int detected_text_track_count = 0;
199 198
200 for (std::vector<Track>::const_iterator track = moov_->tracks.begin(); 199 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();
201 track != moov_->tracks.end(); ++track) { 200 track != moov_->tracks.end(); ++track) {
202 // TODO(strobe): Only the first audio and video track present in a file are 201 // TODO(strobe): Only the first audio and video track present in a file are
wolenetz 2016/08/23 22:51:42 Comment is obsolete.
servolk 2016/08/24 00:53:49 Done.
203 // used. (Track selection is better accomplished via Source IDs, though, so 202 // used. (Track selection is better accomplished via Source IDs, though, so
204 // adding support for track selection within a stream is low-priority.) 203 // adding support for track selection within a stream is low-priority.)
205 const SampleDescription& samp_descr = 204 const SampleDescription& samp_descr =
206 track->media.information.sample_table.description; 205 track->media.information.sample_table.description;
207 206
208 // TODO(strobe): When codec reconfigurations are supported, detect and send 207 // TODO(strobe): When codec reconfigurations are supported, detect and send
209 // a codec reconfiguration for fragments using a sample description index 208 // a codec reconfiguration for fragments using a sample description index
210 // different from the previous one 209 // different from the previous one
211 size_t desc_idx = 0; 210 size_t desc_idx = 0;
212 for (size_t t = 0; t < moov_->extends.tracks.size(); t++) { 211 for (size_t t = 0; t < moov_->extends.tracks.size(); t++) {
213 const TrackExtends& trex = moov_->extends.tracks[t]; 212 const TrackExtends& trex = moov_->extends.tracks[t];
214 if (trex.track_id == track->header.track_id) { 213 if (trex.track_id == track->header.track_id) {
215 desc_idx = trex.default_sample_description_index; 214 desc_idx = trex.default_sample_description_index;
216 break; 215 break;
217 } 216 }
218 } 217 }
219 RCHECK(desc_idx > 0); 218 RCHECK(desc_idx > 0);
220 desc_idx -= 1; // BMFF descriptor index is one-based 219 desc_idx -= 1; // BMFF descriptor index is one-based
221 220
222 if (track->media.handler.type == kAudio) { 221 if (track->media.handler.type == kAudio) {
223 detected_audio_track_count++; 222 detected_audio_track_count++;
224 if (audio_config.IsValidConfig())
225 continue; // Skip other audio tracks once we found a supported one.
226 223
227 RCHECK(!samp_descr.audio_entries.empty()); 224 RCHECK(!samp_descr.audio_entries.empty());
228 225
229 // It is not uncommon to find otherwise-valid files with incorrect sample 226 // It is not uncommon to find otherwise-valid files with incorrect sample
230 // description indices, so we fail gracefully in that case. 227 // description indices, so we fail gracefully in that case.
231 if (desc_idx >= samp_descr.audio_entries.size()) 228 if (desc_idx >= samp_descr.audio_entries.size())
232 desc_idx = 0; 229 desc_idx = 0;
233 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx]; 230 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];
234 const AAC& aac = entry.esds.aac; 231 const AAC& aac = entry.esds.aac;
235 232
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
304 sample_format = kSampleFormatU8; 301 sample_format = kSampleFormatU8;
305 } else if (entry.samplesize == 16) { 302 } else if (entry.samplesize == 16) {
306 sample_format = kSampleFormatS16; 303 sample_format = kSampleFormatS16;
307 } else if (entry.samplesize == 32) { 304 } else if (entry.samplesize == 32) {
308 sample_format = kSampleFormatS32; 305 sample_format = kSampleFormatS32;
309 } else { 306 } else {
310 LOG(ERROR) << "Unsupported sample size."; 307 LOG(ERROR) << "Unsupported sample size.";
311 return false; 308 return false;
312 } 309 }
313 310
314 is_audio_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted; 311 uint32_t audio_track_id = track->header.track_id;
315 DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_; 312 bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;
313 is_track_encrypted_[audio_track_id] = is_track_encrypted;
316 audio_config.Initialize( 314 audio_config.Initialize(
317 codec, sample_format, channel_layout, sample_per_second, extra_data, 315 codec, sample_format, channel_layout, sample_per_second, extra_data,
318 is_audio_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted(), 316 is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted(),
319 base::TimeDelta(), 0); 317 base::TimeDelta(), 0);
318 DVLOG(1) << "audio_track_id=" << audio_track_id
319 << " config=" << audio_config.AsHumanReadableString();
320 if (!audio_config.IsValidConfig()) { 320 if (!audio_config.IsValidConfig()) {
321 MEDIA_LOG(ERROR, media_log_) << "Invalid audio decoder config: " 321 MEDIA_LOG(ERROR, media_log_) << "Invalid audio decoder config: "
322 << audio_config.AsHumanReadableString(); 322 << audio_config.AsHumanReadableString();
323 return false; 323 return false;
324 } 324 }
325 has_audio_ = true; 325 has_audio_ = true;
326 audio_track_id_ = track->header.track_id; 326 audio_track_ids_.insert(audio_track_id);
327 media_tracks->AddAudioTrack(audio_config, audio_track_id_, "main", 327 media_tracks->AddAudioTrack(audio_config, audio_track_id, "main",
wolenetz 2016/08/23 22:51:42 Are all audio tracks "main" in a multi-track audio
servolk 2016/08/24 00:53:49 Done.
328 track->media.handler.name, 328 track->media.handler.name,
329 track->media.header.language()); 329 track->media.header.language());
330 continue; 330 continue;
331 } 331 }
332 332
333 if (track->media.handler.type == kVideo) { 333 if (track->media.handler.type == kVideo) {
334 detected_video_track_count++; 334 detected_video_track_count++;
335 if (video_config.IsValidConfig())
336 continue; // Skip other video tracks once we found a supported one.
337 335
338 RCHECK(!samp_descr.video_entries.empty()); 336 RCHECK(!samp_descr.video_entries.empty());
339 if (desc_idx >= samp_descr.video_entries.size()) 337 if (desc_idx >= samp_descr.video_entries.size())
340 desc_idx = 0; 338 desc_idx = 0;
341 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx]; 339 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];
342 340
343 if (!entry.IsFormatValid()) { 341 if (!entry.IsFormatValid()) {
344 MEDIA_LOG(ERROR, media_log_) << "Unsupported video format 0x" 342 MEDIA_LOG(ERROR, media_log_) << "Unsupported video format 0x"
345 << std::hex << entry.format 343 << std::hex << entry.format
346 << " in stsd box."; 344 << " in stsd box.";
(...skipping 10 matching lines...) Expand all
357 if (entry.pixel_aspect.h_spacing != 1 || 355 if (entry.pixel_aspect.h_spacing != 1 ||
358 entry.pixel_aspect.v_spacing != 1) { 356 entry.pixel_aspect.v_spacing != 1) {
359 natural_size = 357 natural_size =
360 GetNaturalSize(visible_rect.size(), entry.pixel_aspect.h_spacing, 358 GetNaturalSize(visible_rect.size(), entry.pixel_aspect.h_spacing,
361 entry.pixel_aspect.v_spacing); 359 entry.pixel_aspect.v_spacing);
362 } else if (track->header.width && track->header.height) { 360 } else if (track->header.width && track->header.height) {
363 natural_size = 361 natural_size =
364 gfx::Size(track->header.width, track->header.height); 362 gfx::Size(track->header.width, track->header.height);
365 } 363 }
366 364
367 is_video_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted; 365 uint32_t video_track_id = track->header.track_id;
368 DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_; 366 bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;
367 is_track_encrypted_[video_track_id] = is_track_encrypted;
chcunningham1 2016/08/23 01:54:32 Maybe capture the return value and check that its
wolenetz 2016/08/23 22:51:42 +1 to unique track id checking (here and in the au
servolk 2016/08/24 00:53:49 Done.
369 video_config.Initialize( 368 video_config.Initialize(
370 entry.video_codec, entry.video_codec_profile, PIXEL_FORMAT_YV12, 369 entry.video_codec, entry.video_codec_profile, PIXEL_FORMAT_YV12,
371 COLOR_SPACE_HD_REC709, coded_size, visible_rect, natural_size, 370 COLOR_SPACE_HD_REC709, coded_size, visible_rect, natural_size,
372 // No decoder-specific buffer needed for AVC; 371 // No decoder-specific buffer needed for AVC;
373 // SPS/PPS are embedded in the video stream 372 // SPS/PPS are embedded in the video stream
374 EmptyExtraData(), 373 EmptyExtraData(),
375 is_video_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted()); 374 is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted());
375 DVLOG(1) << "video_track_id=" << video_track_id
376 << " config=" << video_config.AsHumanReadableString();
376 if (!video_config.IsValidConfig()) { 377 if (!video_config.IsValidConfig()) {
377 MEDIA_LOG(ERROR, media_log_) << "Invalid video decoder config: " 378 MEDIA_LOG(ERROR, media_log_) << "Invalid video decoder config: "
378 << video_config.AsHumanReadableString(); 379 << video_config.AsHumanReadableString();
379 return false; 380 return false;
380 } 381 }
381 has_video_ = true; 382 has_video_ = true;
382 video_track_id_ = track->header.track_id; 383 video_track_ids_.insert(video_track_id);
383 media_tracks->AddVideoTrack(video_config, video_track_id_, "main", 384 media_tracks->AddVideoTrack(video_config, video_track_id, "main",
wolenetz 2016/08/23 22:51:42 ditto: "main" is only for the first video track.
servolk 2016/08/24 00:53:49 Done.
384 track->media.handler.name, 385 track->media.handler.name,
385 track->media.header.language()); 386 track->media.header.language());
386 continue; 387 continue;
387 } 388 }
388 389
389 // TODO(wolenetz): Investigate support in MSE and Chrome MSE for CEA 608/708 390 // TODO(wolenetz): Investigate support in MSE and Chrome MSE for CEA 608/708
390 // embedded caption data in video track. At time of init segment parsing, we 391 // embedded caption data in video track. At time of init segment parsing, we
391 // don't have this data (unless maybe by SourceBuffer's mimetype). 392 // don't have this data (unless maybe by SourceBuffer's mimetype).
392 // See https://crbug.com/597073 393 // See https://crbug.com/597073
393 if (track->media.handler.type == kText) 394 if (track->media.handler.type == kText)
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
510 return true; 511 return true;
511 } 512 }
512 513
513 DCHECK(!(*err)); 514 DCHECK(!(*err));
514 515
515 const uint8_t* buf; 516 const uint8_t* buf;
516 int buf_size; 517 int buf_size;
517 queue_.Peek(&buf, &buf_size); 518 queue_.Peek(&buf, &buf_size);
518 if (!buf_size) return false; 519 if (!buf_size) return false;
519 520
520 bool audio = has_audio_ && audio_track_id_ == runs_->track_id(); 521 bool audio =
521 bool video = has_video_ && video_track_id_ == runs_->track_id(); 522 audio_track_ids_.find(runs_->track_id()) != audio_track_ids_.end();
523 bool video =
524 video_track_ids_.find(runs_->track_id()) != video_track_ids_.end();
522 525
523 // Skip this entire track if it's not one we're interested in 526 // Skip this entire track if it's not one we're interested in
524 if (!audio && !video) { 527 if (!audio && !video) {
525 runs_->AdvanceRun(); 528 runs_->AdvanceRun();
526 return true; 529 return true;
527 } 530 }
528 531
529 // Attempt to cache the auxiliary information first. Aux info is usually 532 // Attempt to cache the auxiliary information first. Aux info is usually
530 // placed in a contiguous block before the sample data, rather than being 533 // placed in a contiguous block before the sample data, rather than being
531 // interleaved. If we didn't cache it, this would require that we retain the 534 // interleaved. If we didn't cache it, this would require that we retain the
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after
581 584
582 if (decrypt_config) { 585 if (decrypt_config) {
583 if (!subsamples.empty()) { 586 if (!subsamples.empty()) {
584 // Create a new config with the updated subsamples. 587 // Create a new config with the updated subsamples.
585 decrypt_config.reset(new DecryptConfig( 588 decrypt_config.reset(new DecryptConfig(
586 decrypt_config->key_id(), 589 decrypt_config->key_id(),
587 decrypt_config->iv(), 590 decrypt_config->iv(),
588 subsamples)); 591 subsamples));
589 } 592 }
590 // else, use the existing config. 593 // else, use the existing config.
591 } else if ((audio && is_audio_track_encrypted_) || 594 } else if (is_track_encrypted_[runs_->track_id()]) {
592 (video && is_video_track_encrypted_)) {
593 // The media pipeline requires a DecryptConfig with an empty |iv|. 595 // The media pipeline requires a DecryptConfig with an empty |iv|.
594 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1"); 596 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1");
595 decrypt_config.reset( 597 decrypt_config.reset(
596 new DecryptConfig("1", "", std::vector<SubsampleEntry>())); 598 new DecryptConfig("1", "", std::vector<SubsampleEntry>()));
597 } 599 }
598 600
599 StreamParserBuffer::Type buffer_type = audio ? DemuxerStream::AUDIO : 601 StreamParserBuffer::Type buffer_type = audio ? DemuxerStream::AUDIO :
600 DemuxerStream::VIDEO; 602 DemuxerStream::VIDEO;
601 603
602 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId
603 // type and allow multiple tracks for same media type, if applicable. See
604 // https://crbug.com/341581.
605 scoped_refptr<StreamParserBuffer> stream_buf = StreamParserBuffer::CopyFrom( 604 scoped_refptr<StreamParserBuffer> stream_buf = StreamParserBuffer::CopyFrom(
606 &frame_buf[0], frame_buf.size(), runs_->is_keyframe(), buffer_type, 605 &frame_buf[0], frame_buf.size(), runs_->is_keyframe(), buffer_type,
607 runs_->track_id()); 606 runs_->track_id());
608 607
609 if (decrypt_config) 608 if (decrypt_config)
610 stream_buf->set_decrypt_config(std::move(decrypt_config)); 609 stream_buf->set_decrypt_config(std::move(decrypt_config));
611 610
612 stream_buf->set_duration(runs_->duration()); 611 stream_buf->set_duration(runs_->duration());
613 stream_buf->set_timestamp(runs_->cts()); 612 stream_buf->set_timestamp(runs_->cts());
614 stream_buf->SetDecodeTimestamp(runs_->dts()); 613 stream_buf->SetDecodeTimestamp(runs_->dts());
615 614
616 DVLOG(3) << "Pushing frame: aud=" << audio 615 DVLOG(3) << "Emit " << (audio ? "audio" : "video") << " frame: "
chcunningham1 2016/08/23 01:54:32 what if the track is text? will you say "video" he
wolenetz 2016/08/23 22:51:42 At the moment, Chrome MSE mp4 parser doesn't parse
servolk 2016/08/24 00:53:49 Yup, text tracks in .mp4 are not supported for now
616 << " track_id=" << runs_->track_id()
617 << ", key=" << runs_->is_keyframe() 617 << ", key=" << runs_->is_keyframe()
618 << ", dur=" << runs_->duration().InMilliseconds() 618 << ", dur=" << runs_->duration().InMilliseconds()
619 << ", dts=" << runs_->dts().InMilliseconds() 619 << ", dts=" << runs_->dts().InMilliseconds()
620 << ", cts=" << runs_->cts().InMilliseconds() 620 << ", cts=" << runs_->cts().InMilliseconds()
621 << ", size=" << runs_->sample_size(); 621 << ", size=" << runs_->sample_size();
622 622
623 (*buffers)[runs_->track_id()].push_back(stream_buf); 623 (*buffers)[runs_->track_id()].push_back(stream_buf);
624 runs_->AdvanceSample(); 624 runs_->AdvanceSample();
625 return true; 625 return true;
626 } 626 }
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
693 runs.AdvanceSample(); 693 runs.AdvanceSample();
694 } 694 }
695 runs.AdvanceRun(); 695 runs.AdvanceRun();
696 } 696 }
697 697
698 return true; 698 return true;
699 } 699 }
700 700
701 } // namespace mp4 701 } // namespace mp4
702 } // namespace media 702 } // namespace media
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698