media/formats/mp4/mp4_stream_parser.cc - Issue 2254733006: Allow MP4 parser to handle multiple audio and video tracks

Side by Side Diff: media/formats/mp4/mp4_stream_parser.cc

Issue 2254733006: Allow MP4 parser to handle multiple audio and video tracks (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@merged-buffers-map

Patch Set: CR feedback Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "media/formats/mp4/mp4_stream_parser.h"	5 #include "media/formats/mp4/mp4_stream_parser.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8	8

9 #include <limits>	9 #include <limits>

10 #include <memory>	10 #include <memory>

(...skipping 23 matching lines...) Expand all Loading...
34 namespace mp4 {	34 namespace mp4 {

35	35

36 MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types,	36 MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types,

37 bool has_sbr)	37 bool has_sbr)

38 : state_(kWaitingForInit),	38 : state_(kWaitingForInit),

39 moof_head_(0),	39 moof_head_(0),

40 mdat_tail_(0),	40 mdat_tail_(0),

41 highest_end_offset_(0),	41 highest_end_offset_(0),

42 has_audio_(false),	42 has_audio_(false),

43 has_video_(false),	43 has_video_(false),

44 audio_track_id_(0),

45 video_track_id_(0),

46 audio_object_types_(audio_object_types),	44 audio_object_types_(audio_object_types),

47 has_sbr_(has_sbr),	45 has_sbr_(has_sbr),

48 is_audio_track_encrypted_(false),

49 is_video_track_encrypted_(false),

50 num_top_level_box_skipped_(0) {	46 num_top_level_box_skipped_(0) {

51 }	47 }

52	48

53 MP4StreamParser::~MP4StreamParser() {}	49 MP4StreamParser::~MP4StreamParser() {}

54	50

55 void MP4StreamParser::Init(	51 void MP4StreamParser::Init(

56 const InitCB& init_cb,	52 const InitCB& init_cb,

57 const NewConfigCB& config_cb,	53 const NewConfigCB& config_cb,

58 const NewBuffersCB& new_buffers_cb,	54 const NewBuffersCB& new_buffers_cb,

59 bool /* ignore_text_tracks */,	55 bool /* ignore_text_tracks */,

(...skipping 119 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
179 }	175 }

180	176

181 queue_.Pop(reader->size());	177 queue_.Pop(reader->size());

182 return !(*err);	178 return !(*err);

183 }	179 }

184	180

185 bool MP4StreamParser::ParseMoov(BoxReader* reader) {	181 bool MP4StreamParser::ParseMoov(BoxReader* reader) {

186 moov_.reset(new Movie);	182 moov_.reset(new Movie);

187 RCHECK(moov_->Parse(reader));	183 RCHECK(moov_->Parse(reader));

188 runs_.reset();	184 runs_.reset();

	185 audio_track_ids_.clear();

	186 video_track_ids_.clear();

	187 is_track_encrypted_.clear();

189	188

190 has_audio_ = false;	189 has_audio_ = false;

191 has_video_ = false;	190 has_video_ = false;

192	191

193 std::unique_ptr<MediaTracks> media_tracks(new MediaTracks());	192 std::unique_ptr<MediaTracks> media_tracks(new MediaTracks());

194 AudioDecoderConfig audio_config;	193 AudioDecoderConfig audio_config;

195 VideoDecoderConfig video_config;	194 VideoDecoderConfig video_config;

196 int detected_audio_track_count = 0;	195 int detected_audio_track_count = 0;

197 int detected_video_track_count = 0;	196 int detected_video_track_count = 0;

198 int detected_text_track_count = 0;	197 int detected_text_track_count = 0;

(...skipping 12 matching lines...) Expand all Loading...
211 if (trex.track_id == track->header.track_id) {	210 if (trex.track_id == track->header.track_id) {

212 desc_idx = trex.default_sample_description_index;	211 desc_idx = trex.default_sample_description_index;

213 break;	212 break;

214 }	213 }

215 }	214 }

216 RCHECK(desc_idx > 0);	215 RCHECK(desc_idx > 0);

217 desc_idx -= 1; // BMFF descriptor index is one-based	216 desc_idx -= 1; // BMFF descriptor index is one-based

218	217

219 if (track->media.handler.type == kAudio) {	218 if (track->media.handler.type == kAudio) {

220 detected_audio_track_count++;	219 detected_audio_track_count++;

221 if (audio_config.IsValidConfig())

222 continue; // Skip other audio tracks once we found a supported one.

223	220

224 RCHECK(!samp_descr.audio_entries.empty());	221 RCHECK(!samp_descr.audio_entries.empty());

225	222

226 // It is not uncommon to find otherwise-valid files with incorrect sample	223 // It is not uncommon to find otherwise-valid files with incorrect sample

227 // description indices, so we fail gracefully in that case.	224 // description indices, so we fail gracefully in that case.

228 if (desc_idx >= samp_descr.audio_entries.size())	225 if (desc_idx >= samp_descr.audio_entries.size())

229 desc_idx = 0;	226 desc_idx = 0;

230 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];	227 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];

231 const AAC& aac = entry.esds.aac;	228 const AAC& aac = entry.esds.aac;

232	229

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
301 sample_format = kSampleFormatU8;	298 sample_format = kSampleFormatU8;

302 } else if (entry.samplesize == 16) {	299 } else if (entry.samplesize == 16) {

303 sample_format = kSampleFormatS16;	300 sample_format = kSampleFormatS16;

304 } else if (entry.samplesize == 32) {	301 } else if (entry.samplesize == 32) {

305 sample_format = kSampleFormatS32;	302 sample_format = kSampleFormatS32;

306 } else {	303 } else {

307 LOG(ERROR) << "Unsupported sample size.";	304 LOG(ERROR) << "Unsupported sample size.";

308 return false;	305 return false;

309 }	306 }

310	307

311 is_audio_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted;	308 uint32_t audio_track_id = track->header.track_id;

312 DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_;	309 if (audio_track_ids_.find(audio_track_id) != audio_track_ids_.end()) {

	310 MEDIA_LOG(ERROR, media_log_)

	311 << "Audio track with track_id=" << audio_track_id

	312 << " already present.";

	313 return false;

	314 }

	315 bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;

	316 is_track_encrypted_[audio_track_id] = is_track_encrypted;

313 audio_config.Initialize(	317 audio_config.Initialize(

314 codec, sample_format, channel_layout, sample_per_second, extra_data,	318 codec, sample_format, channel_layout, sample_per_second, extra_data,

315 is_audio_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted(),	319 is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted(),

316 base::TimeDelta(), 0);	320 base::TimeDelta(), 0);

	321 DVLOG(1) << "audio_track_id=" << audio_track_id

	322 << " config=" << audio_config.AsHumanReadableString();

317 if (!audio_config.IsValidConfig()) {	323 if (!audio_config.IsValidConfig()) {

318 MEDIA_LOG(ERROR, media_log_) << "Invalid audio decoder config: "	324 MEDIA_LOG(ERROR, media_log_) << "Invalid audio decoder config: "

319 << audio_config.AsHumanReadableString();	325 << audio_config.AsHumanReadableString();

320 return false;	326 return false;

321 }	327 }

322 has_audio_ = true;	328 has_audio_ = true;

323 audio_track_id_ = track->header.track_id;	329 audio_track_ids_.insert(audio_track_id);

324 media_tracks->AddAudioTrack(audio_config, audio_track_id_, "main",	330 const char* track_kind = (audio_track_ids_.size() == 1 ? "main" : "");

	331 media_tracks->AddAudioTrack(audio_config, audio_track_id, track_kind,

325 track->media.handler.name,	332 track->media.handler.name,

326 track->media.header.language());	333 track->media.header.language());

327 continue;	334 continue;

328 }	335 }

329	336

330 if (track->media.handler.type == kVideo) {	337 if (track->media.handler.type == kVideo) {

331 detected_video_track_count++;	338 detected_video_track_count++;

332 if (video_config.IsValidConfig())

333 continue; // Skip other video tracks once we found a supported one.

334	339

335 RCHECK(!samp_descr.video_entries.empty());	340 RCHECK(!samp_descr.video_entries.empty());

336 if (desc_idx >= samp_descr.video_entries.size())	341 if (desc_idx >= samp_descr.video_entries.size())

337 desc_idx = 0;	342 desc_idx = 0;

338 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];	343 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];

339	344

340 if (!entry.IsFormatValid()) {	345 if (!entry.IsFormatValid()) {

341 MEDIA_LOG(ERROR, media_log_) << "Unsupported video format 0x"	346 MEDIA_LOG(ERROR, media_log_) << "Unsupported video format 0x"

342 << std::hex << entry.format	347 << std::hex << entry.format

343 << " in stsd box.";	348 << " in stsd box.";

(...skipping 10 matching lines...) Expand all Loading...
354 if (entry.pixel_aspect.h_spacing != 1 \|\|	359 if (entry.pixel_aspect.h_spacing != 1 \|\|

355 entry.pixel_aspect.v_spacing != 1) {	360 entry.pixel_aspect.v_spacing != 1) {

356 natural_size =	361 natural_size =

357 GetNaturalSize(visible_rect.size(), entry.pixel_aspect.h_spacing,	362 GetNaturalSize(visible_rect.size(), entry.pixel_aspect.h_spacing,

358 entry.pixel_aspect.v_spacing);	363 entry.pixel_aspect.v_spacing);

359 } else if (track->header.width && track->header.height) {	364 } else if (track->header.width && track->header.height) {

360 natural_size =	365 natural_size =

361 gfx::Size(track->header.width, track->header.height);	366 gfx::Size(track->header.width, track->header.height);

362 }	367 }

363	368

364 is_video_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted;	369 uint32_t video_track_id = track->header.track_id;

365 DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_;	370 if (video_track_ids_.find(video_track_id) != video_track_ids_.end()) {

	371 MEDIA_LOG(ERROR, media_log_)

	372 << "Video track with track_id=" << video_track_id

	373 << " already present.";

	374 return false;

	375 }

	376 bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;

	377 is_track_encrypted_[video_track_id] = is_track_encrypted;

366 video_config.Initialize(	378 video_config.Initialize(

367 entry.video_codec, entry.video_codec_profile, PIXEL_FORMAT_YV12,	379 entry.video_codec, entry.video_codec_profile, PIXEL_FORMAT_YV12,

368 COLOR_SPACE_HD_REC709, coded_size, visible_rect, natural_size,	380 COLOR_SPACE_HD_REC709, coded_size, visible_rect, natural_size,

369 // No decoder-specific buffer needed for AVC;	381 // No decoder-specific buffer needed for AVC;

370 // SPS/PPS are embedded in the video stream	382 // SPS/PPS are embedded in the video stream

371 EmptyExtraData(),	383 EmptyExtraData(),

372 is_video_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted());	384 is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted());

	385 DVLOG(1) << "video_track_id=" << video_track_id

	386 << " config=" << video_config.AsHumanReadableString();

373 if (!video_config.IsValidConfig()) {	387 if (!video_config.IsValidConfig()) {

374 MEDIA_LOG(ERROR, media_log_) << "Invalid video decoder config: "	388 MEDIA_LOG(ERROR, media_log_) << "Invalid video decoder config: "

375 << video_config.AsHumanReadableString();	389 << video_config.AsHumanReadableString();

376 return false;	390 return false;

377 }	391 }

378 has_video_ = true;	392 has_video_ = true;

379 video_track_id_ = track->header.track_id;	393 video_track_ids_.insert(video_track_id);

380 media_tracks->AddVideoTrack(video_config, video_track_id_, "main",	394 const char* track_kind = (video_track_ids_.size() == 1 ? "main" : "");

	395 media_tracks->AddVideoTrack(video_config, video_track_id, track_kind,

381 track->media.handler.name,	396 track->media.handler.name,

382 track->media.header.language());	397 track->media.header.language());

383 continue;	398 continue;

384 }	399 }

385	400

386 // TODO(wolenetz): Investigate support in MSE and Chrome MSE for CEA 608/708	401 // TODO(wolenetz): Investigate support in MSE and Chrome MSE for CEA 608/708

387 // embedded caption data in video track. At time of init segment parsing, we	402 // embedded caption data in video track. At time of init segment parsing, we

388 // don't have this data (unless maybe by SourceBuffer's mimetype).	403 // don't have this data (unless maybe by SourceBuffer's mimetype).

389 // See https://crbug.com/597073	404 // See https://crbug.com/597073

390 if (track->media.handler.type == kText)	405 if (track->media.handler.type == kText)

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
507 return true;	522 return true;

508 }	523 }

509	524

510 DCHECK(!(*err));	525 DCHECK(!(*err));

511	526

512 const uint8_t* buf;	527 const uint8_t* buf;

513 int buf_size;	528 int buf_size;

514 queue_.Peek(&buf, &buf_size);	529 queue_.Peek(&buf, &buf_size);

515 if (!buf_size) return false;	530 if (!buf_size) return false;

516	531

517 bool audio = has_audio_ && audio_track_id_ == runs_->track_id();	532 bool audio =

518 bool video = has_video_ && video_track_id_ == runs_->track_id();	533 audio_track_ids_.find(runs_->track_id()) != audio_track_ids_.end();

	534 bool video =

	535 video_track_ids_.find(runs_->track_id()) != video_track_ids_.end();

519	536

520 // Skip this entire track if it's not one we're interested in	537 // Skip this entire track if it's not one we're interested in

521 if (!audio && !video) {	538 if (!audio && !video) {

522 runs_->AdvanceRun();	539 runs_->AdvanceRun();

523 return true;	540 return true;

524 }	541 }

525	542

526 // Attempt to cache the auxiliary information first. Aux info is usually	543 // Attempt to cache the auxiliary information first. Aux info is usually

527 // placed in a contiguous block before the sample data, rather than being	544 // placed in a contiguous block before the sample data, rather than being

528 // interleaved. If we didn't cache it, this would require that we retain the	545 // interleaved. If we didn't cache it, this would require that we retain the

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
578	595

579 if (decrypt_config) {	596 if (decrypt_config) {

580 if (!subsamples.empty()) {	597 if (!subsamples.empty()) {

581 // Create a new config with the updated subsamples.	598 // Create a new config with the updated subsamples.

582 decrypt_config.reset(new DecryptConfig(	599 decrypt_config.reset(new DecryptConfig(

583 decrypt_config->key_id(),	600 decrypt_config->key_id(),

584 decrypt_config->iv(),	601 decrypt_config->iv(),

585 subsamples));	602 subsamples));

586 }	603 }

587 // else, use the existing config.	604 // else, use the existing config.

588 } else if ((audio && is_audio_track_encrypted_) \|\|	605 } else if (is_track_encrypted_[runs_->track_id()]) {

589 (video && is_video_track_encrypted_)) {

590 // The media pipeline requires a DecryptConfig with an empty \|iv\|.	606 // The media pipeline requires a DecryptConfig with an empty \|iv\|.

591 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1");	607 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1");

592 decrypt_config.reset(	608 decrypt_config.reset(

593 new DecryptConfig("1", "", std::vector<SubsampleEntry>()));	609 new DecryptConfig("1", "", std::vector<SubsampleEntry>()));

594 }	610 }

595	611

596 StreamParserBuffer::Type buffer_type = audio ? DemuxerStream::AUDIO :	612 StreamParserBuffer::Type buffer_type = audio ? DemuxerStream::AUDIO :

597 DemuxerStream::VIDEO;	613 DemuxerStream::VIDEO;

598	614

599 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId

600 // type and allow multiple tracks for same media type, if applicable. See

601 // https://crbug.com/341581.

602 scoped_refptr<StreamParserBuffer> stream_buf = StreamParserBuffer::CopyFrom(	615 scoped_refptr<StreamParserBuffer> stream_buf = StreamParserBuffer::CopyFrom(

603 &frame_buf[0], frame_buf.size(), runs_->is_keyframe(), buffer_type,	616 &frame_buf[0], frame_buf.size(), runs_->is_keyframe(), buffer_type,

604 runs_->track_id());	617 runs_->track_id());

605	618

606 if (decrypt_config)	619 if (decrypt_config)

607 stream_buf->set_decrypt_config(std::move(decrypt_config));	620 stream_buf->set_decrypt_config(std::move(decrypt_config));

608	621

609 stream_buf->set_duration(runs_->duration());	622 stream_buf->set_duration(runs_->duration());

610 stream_buf->set_timestamp(runs_->cts());	623 stream_buf->set_timestamp(runs_->cts());

611 stream_buf->SetDecodeTimestamp(runs_->dts());	624 stream_buf->SetDecodeTimestamp(runs_->dts());

612	625

613 DVLOG(3) << "Pushing frame: aud=" << audio	626 DVLOG(3) << "Emit " << (audio ? "audio" : "video") << " frame: "

	627 << " track_id=" << runs_->track_id()

614 << ", key=" << runs_->is_keyframe()	628 << ", key=" << runs_->is_keyframe()

615 << ", dur=" << runs_->duration().InMilliseconds()	629 << ", dur=" << runs_->duration().InMilliseconds()

616 << ", dts=" << runs_->dts().InMilliseconds()	630 << ", dts=" << runs_->dts().InMilliseconds()

617 << ", cts=" << runs_->cts().InMilliseconds()	631 << ", cts=" << runs_->cts().InMilliseconds()

618 << ", size=" << runs_->sample_size();	632 << ", size=" << runs_->sample_size();

619	633

620 (*buffers)[runs_->track_id()].push_back(stream_buf);	634 (*buffers)[runs_->track_id()].push_back(stream_buf);

621 runs_->AdvanceSample();	635 runs_->AdvanceSample();

622 return true;	636 return true;

623 }	637 }

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
690 runs.AdvanceSample();	704 runs.AdvanceSample();

691 }	705 }

692 runs.AdvanceRun();	706 runs.AdvanceRun();

693 }	707 }

694	708

695 return true;	709 return true;

696 }	710 }

697	711

698 } // namespace mp4	712 } // namespace mp4

699 } // namespace media	713 } // namespace media

OLD	NEW

« no previous file with comments | « media/formats/mp4/mp4_stream_parser.h ('k') | media/formats/mp4/mp4_stream_parser_unittest.cc » ('j') | no next file with comments »