media/formats/mp4/mp4_stream_parser.cc - Issue 2254733006: Allow MP4 parser to handle multiple audio and video tracks

Side by Side Diff: media/formats/mp4/mp4_stream_parser.cc

Issue 2254733006: Allow MP4 parser to handle multiple audio and video tracks (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@merged-buffers-map

Patch Set: Clear track ids at the beginning of ParseMoov Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "media/formats/mp4/mp4_stream_parser.h"	5 #include "media/formats/mp4/mp4_stream_parser.h"

6	6

7 #include <stddef.h>	7 #include <stddef.h>

8	8

9 #include <limits>	9 #include <limits>

10 #include <memory>	10 #include <memory>

(...skipping 23 matching lines...) Expand all Loading...
34 namespace mp4 {	34 namespace mp4 {

35	35

36 MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types,	36 MP4StreamParser::MP4StreamParser(const std::set<int>& audio_object_types,

37 bool has_sbr)	37 bool has_sbr)

38 : state_(kWaitingForInit),	38 : state_(kWaitingForInit),

39 moof_head_(0),	39 moof_head_(0),

40 mdat_tail_(0),	40 mdat_tail_(0),

41 highest_end_offset_(0),	41 highest_end_offset_(0),

42 has_audio_(false),	42 has_audio_(false),

43 has_video_(false),	43 has_video_(false),

44 audio_track_id_(0),

45 video_track_id_(0),

46 audio_object_types_(audio_object_types),	44 audio_object_types_(audio_object_types),

47 has_sbr_(has_sbr),	45 has_sbr_(has_sbr),

48 is_audio_track_encrypted_(false),

49 is_video_track_encrypted_(false),

50 num_top_level_box_skipped_(0) {	46 num_top_level_box_skipped_(0) {

51 }	47 }

52	48

53 MP4StreamParser::~MP4StreamParser() {}	49 MP4StreamParser::~MP4StreamParser() {}

54	50

55 void MP4StreamParser::Init(	51 void MP4StreamParser::Init(

56 const InitCB& init_cb,	52 const InitCB& init_cb,

57 const NewConfigCB& config_cb,	53 const NewConfigCB& config_cb,

58 const NewBuffersCB& new_buffers_cb,	54 const NewBuffersCB& new_buffers_cb,

59 bool /* ignore_text_tracks */,	55 bool /* ignore_text_tracks */,

(...skipping 119 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
179 }	175 }

180	176

181 queue_.Pop(reader->size());	177 queue_.Pop(reader->size());

182 return !(*err);	178 return !(*err);

183 }	179 }

184	180

185 bool MP4StreamParser::ParseMoov(BoxReader* reader) {	181 bool MP4StreamParser::ParseMoov(BoxReader* reader) {

186 moov_.reset(new Movie);	182 moov_.reset(new Movie);

187 RCHECK(moov_->Parse(reader));	183 RCHECK(moov_->Parse(reader));

188 runs_.reset();	184 runs_.reset();

	185 audio_track_ids_.clear();

	186 video_track_ids_.clear();

	187 is_track_encrypted_.clear();

189	188

190 has_audio_ = false;	189 has_audio_ = false;

191 has_video_ = false;	190 has_video_ = false;

192	191

193 std::unique_ptr<MediaTracks> media_tracks(new MediaTracks());	192 std::unique_ptr<MediaTracks> media_tracks(new MediaTracks());

194 AudioDecoderConfig audio_config;	193 AudioDecoderConfig audio_config;

195 VideoDecoderConfig video_config;	194 VideoDecoderConfig video_config;

196 int detected_audio_track_count = 0;	195 int detected_audio_track_count = 0;

197 int detected_video_track_count = 0;	196 int detected_video_track_count = 0;

198 int detected_text_track_count = 0;	197 int detected_text_track_count = 0;

199	198

200 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();	199 for (std::vector<Track>::const_iterator track = moov_->tracks.begin();

201 track != moov_->tracks.end(); ++track) {	200 track != moov_->tracks.end(); ++track) {

202 // TODO(strobe): Only the first audio and video track present in a file are	201 // TODO(strobe): Only the first audio and video track present in a file are
	wolenetz 2016/08/23 22:51:42 Comment is obsolete. Comment is obsolete. servolk 2016/08/24 00:53:49 Done. Show quoted text On 2016/08/23 22:51:42, wolenetz wrote: > Comment is obsolete. Done.
203 // used. (Track selection is better accomplished via Source IDs, though, so	202 // used. (Track selection is better accomplished via Source IDs, though, so

204 // adding support for track selection within a stream is low-priority.)	203 // adding support for track selection within a stream is low-priority.)

205 const SampleDescription& samp_descr =	204 const SampleDescription& samp_descr =

206 track->media.information.sample_table.description;	205 track->media.information.sample_table.description;

207	206

208 // TODO(strobe): When codec reconfigurations are supported, detect and send	207 // TODO(strobe): When codec reconfigurations are supported, detect and send

209 // a codec reconfiguration for fragments using a sample description index	208 // a codec reconfiguration for fragments using a sample description index

210 // different from the previous one	209 // different from the previous one

211 size_t desc_idx = 0;	210 size_t desc_idx = 0;

212 for (size_t t = 0; t < moov_->extends.tracks.size(); t++) {	211 for (size_t t = 0; t < moov_->extends.tracks.size(); t++) {

213 const TrackExtends& trex = moov_->extends.tracks[t];	212 const TrackExtends& trex = moov_->extends.tracks[t];

214 if (trex.track_id == track->header.track_id) {	213 if (trex.track_id == track->header.track_id) {

215 desc_idx = trex.default_sample_description_index;	214 desc_idx = trex.default_sample_description_index;

216 break;	215 break;

217 }	216 }

218 }	217 }

219 RCHECK(desc_idx > 0);	218 RCHECK(desc_idx > 0);

220 desc_idx -= 1; // BMFF descriptor index is one-based	219 desc_idx -= 1; // BMFF descriptor index is one-based

221	220

222 if (track->media.handler.type == kAudio) {	221 if (track->media.handler.type == kAudio) {

223 detected_audio_track_count++;	222 detected_audio_track_count++;

224 if (audio_config.IsValidConfig())

225 continue; // Skip other audio tracks once we found a supported one.

226	223

227 RCHECK(!samp_descr.audio_entries.empty());	224 RCHECK(!samp_descr.audio_entries.empty());

228	225

229 // It is not uncommon to find otherwise-valid files with incorrect sample	226 // It is not uncommon to find otherwise-valid files with incorrect sample

230 // description indices, so we fail gracefully in that case.	227 // description indices, so we fail gracefully in that case.

231 if (desc_idx >= samp_descr.audio_entries.size())	228 if (desc_idx >= samp_descr.audio_entries.size())

232 desc_idx = 0;	229 desc_idx = 0;

233 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];	230 const AudioSampleEntry& entry = samp_descr.audio_entries[desc_idx];

234 const AAC& aac = entry.esds.aac;	231 const AAC& aac = entry.esds.aac;

235	232

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
304 sample_format = kSampleFormatU8;	301 sample_format = kSampleFormatU8;

305 } else if (entry.samplesize == 16) {	302 } else if (entry.samplesize == 16) {

306 sample_format = kSampleFormatS16;	303 sample_format = kSampleFormatS16;

307 } else if (entry.samplesize == 32) {	304 } else if (entry.samplesize == 32) {

308 sample_format = kSampleFormatS32;	305 sample_format = kSampleFormatS32;

309 } else {	306 } else {

310 LOG(ERROR) << "Unsupported sample size.";	307 LOG(ERROR) << "Unsupported sample size.";

311 return false;	308 return false;

312 }	309 }

313	310

314 is_audio_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted;	311 uint32_t audio_track_id = track->header.track_id;

315 DVLOG(1) << "is_audio_track_encrypted_: " << is_audio_track_encrypted_;	312 bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;

	313 is_track_encrypted_[audio_track_id] = is_track_encrypted;

316 audio_config.Initialize(	314 audio_config.Initialize(

317 codec, sample_format, channel_layout, sample_per_second, extra_data,	315 codec, sample_format, channel_layout, sample_per_second, extra_data,

318 is_audio_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted(),	316 is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted(),

319 base::TimeDelta(), 0);	317 base::TimeDelta(), 0);

	318 DVLOG(1) << "audio_track_id=" << audio_track_id

	319 << " config=" << audio_config.AsHumanReadableString();

320 if (!audio_config.IsValidConfig()) {	320 if (!audio_config.IsValidConfig()) {

321 MEDIA_LOG(ERROR, media_log_) << "Invalid audio decoder config: "	321 MEDIA_LOG(ERROR, media_log_) << "Invalid audio decoder config: "

322 << audio_config.AsHumanReadableString();	322 << audio_config.AsHumanReadableString();

323 return false;	323 return false;

324 }	324 }

325 has_audio_ = true;	325 has_audio_ = true;

326 audio_track_id_ = track->header.track_id;	326 audio_track_ids_.insert(audio_track_id);

327 media_tracks->AddAudioTrack(audio_config, audio_track_id_, "main",	327 media_tracks->AddAudioTrack(audio_config, audio_track_id, "main",
	wolenetz 2016/08/23 22:51:42 Are all audio tracks "main" in a multi-track audio Are all audio tracks "main" in a multi-track audio mp4? IIRC, the inband sourcing spec may help answer this. Consult: https://dev.w3.org/html5/html-sourcing-inband-tracks/#mpeg4 "Track Attributes for sourced Audio and Video Tracks" section. servolk 2016/08/24 00:53:49 Done. Show quoted text On 2016/08/23 22:51:42, wolenetz wrote: > Are all audio tracks "main" in a multi-track audio mp4? IIRC, the inband > sourcing spec may help answer this. > Consult: https://dev.w3.org/html5/html-sourcing-inband-tracks/#mpeg4 "Track > Attributes for sourced Audio and Video Tracks" section. > Done.
328 track->media.handler.name,	328 track->media.handler.name,

329 track->media.header.language());	329 track->media.header.language());

330 continue;	330 continue;

331 }	331 }

332	332

333 if (track->media.handler.type == kVideo) {	333 if (track->media.handler.type == kVideo) {

334 detected_video_track_count++;	334 detected_video_track_count++;

335 if (video_config.IsValidConfig())

336 continue; // Skip other video tracks once we found a supported one.

337	335

338 RCHECK(!samp_descr.video_entries.empty());	336 RCHECK(!samp_descr.video_entries.empty());

339 if (desc_idx >= samp_descr.video_entries.size())	337 if (desc_idx >= samp_descr.video_entries.size())

340 desc_idx = 0;	338 desc_idx = 0;

341 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];	339 const VideoSampleEntry& entry = samp_descr.video_entries[desc_idx];

342	340

343 if (!entry.IsFormatValid()) {	341 if (!entry.IsFormatValid()) {

344 MEDIA_LOG(ERROR, media_log_) << "Unsupported video format 0x"	342 MEDIA_LOG(ERROR, media_log_) << "Unsupported video format 0x"

345 << std::hex << entry.format	343 << std::hex << entry.format

346 << " in stsd box.";	344 << " in stsd box.";

(...skipping 10 matching lines...) Expand all Loading...
357 if (entry.pixel_aspect.h_spacing != 1 \|\|	355 if (entry.pixel_aspect.h_spacing != 1 \|\|

358 entry.pixel_aspect.v_spacing != 1) {	356 entry.pixel_aspect.v_spacing != 1) {

359 natural_size =	357 natural_size =

360 GetNaturalSize(visible_rect.size(), entry.pixel_aspect.h_spacing,	358 GetNaturalSize(visible_rect.size(), entry.pixel_aspect.h_spacing,

361 entry.pixel_aspect.v_spacing);	359 entry.pixel_aspect.v_spacing);

362 } else if (track->header.width && track->header.height) {	360 } else if (track->header.width && track->header.height) {

363 natural_size =	361 natural_size =

364 gfx::Size(track->header.width, track->header.height);	362 gfx::Size(track->header.width, track->header.height);

365 }	363 }

366	364

367 is_video_track_encrypted_ = entry.sinf.info.track_encryption.is_encrypted;	365 uint32_t video_track_id = track->header.track_id;

368 DVLOG(1) << "is_video_track_encrypted_: " << is_video_track_encrypted_;	366 bool is_track_encrypted = entry.sinf.info.track_encryption.is_encrypted;

	367 is_track_encrypted_[video_track_id] = is_track_encrypted;
	chcunningham1 2016/08/23 01:54:32 Maybe capture the return value and check that its Maybe capture the return value and check that its not already present in the set (i.e. confirm media does not re-use track ids)? wolenetz 2016/08/23 22:51:42 +1 to unique track id checking (here and in the au Show quoted text On 2016/08/23 01:54:32, chcunningham1 wrote: > Maybe capture the return value and check that its not already present in the set > (i.e. confirm media does not re-use track ids)? +1 to unique track id checking (here and in the audio case) servolk 2016/08/24 00:53:49 Done. Show quoted text On 2016/08/23 22:51:42, wolenetz wrote: > On 2016/08/23 01:54:32, chcunningham1 wrote: > > Maybe capture the return value and check that its not already present in the > set > > (i.e. confirm media does not re-use track ids)? > > +1 to unique track id checking (here and in the audio case) Done.
369 video_config.Initialize(	368 video_config.Initialize(

370 entry.video_codec, entry.video_codec_profile, PIXEL_FORMAT_YV12,	369 entry.video_codec, entry.video_codec_profile, PIXEL_FORMAT_YV12,

371 COLOR_SPACE_HD_REC709, coded_size, visible_rect, natural_size,	370 COLOR_SPACE_HD_REC709, coded_size, visible_rect, natural_size,

372 // No decoder-specific buffer needed for AVC;	371 // No decoder-specific buffer needed for AVC;

373 // SPS/PPS are embedded in the video stream	372 // SPS/PPS are embedded in the video stream

374 EmptyExtraData(),	373 EmptyExtraData(),

375 is_video_track_encrypted_ ? AesCtrEncryptionScheme() : Unencrypted());	374 is_track_encrypted ? AesCtrEncryptionScheme() : Unencrypted());

	375 DVLOG(1) << "video_track_id=" << video_track_id

	376 << " config=" << video_config.AsHumanReadableString();

376 if (!video_config.IsValidConfig()) {	377 if (!video_config.IsValidConfig()) {

377 MEDIA_LOG(ERROR, media_log_) << "Invalid video decoder config: "	378 MEDIA_LOG(ERROR, media_log_) << "Invalid video decoder config: "

378 << video_config.AsHumanReadableString();	379 << video_config.AsHumanReadableString();

379 return false;	380 return false;

380 }	381 }

381 has_video_ = true;	382 has_video_ = true;

382 video_track_id_ = track->header.track_id;	383 video_track_ids_.insert(video_track_id);

383 media_tracks->AddVideoTrack(video_config, video_track_id_, "main",	384 media_tracks->AddVideoTrack(video_config, video_track_id, "main",
	wolenetz 2016/08/23 22:51:42 ditto: "main" is only for the first video track. ditto: "main" is only for the first video track. servolk 2016/08/24 00:53:49 Done. Show quoted text On 2016/08/23 22:51:42, wolenetz wrote: > ditto: "main" is only for the first video track. Done.
384 track->media.handler.name,	385 track->media.handler.name,

385 track->media.header.language());	386 track->media.header.language());

386 continue;	387 continue;

387 }	388 }

388	389

389 // TODO(wolenetz): Investigate support in MSE and Chrome MSE for CEA 608/708	390 // TODO(wolenetz): Investigate support in MSE and Chrome MSE for CEA 608/708

390 // embedded caption data in video track. At time of init segment parsing, we	391 // embedded caption data in video track. At time of init segment parsing, we

391 // don't have this data (unless maybe by SourceBuffer's mimetype).	392 // don't have this data (unless maybe by SourceBuffer's mimetype).

392 // See https://crbug.com/597073	393 // See https://crbug.com/597073

393 if (track->media.handler.type == kText)	394 if (track->media.handler.type == kText)

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
510 return true;	511 return true;

511 }	512 }

512	513

513 DCHECK(!(*err));	514 DCHECK(!(*err));

514	515

515 const uint8_t* buf;	516 const uint8_t* buf;

516 int buf_size;	517 int buf_size;

517 queue_.Peek(&buf, &buf_size);	518 queue_.Peek(&buf, &buf_size);

518 if (!buf_size) return false;	519 if (!buf_size) return false;

519	520

520 bool audio = has_audio_ && audio_track_id_ == runs_->track_id();	521 bool audio =

521 bool video = has_video_ && video_track_id_ == runs_->track_id();	522 audio_track_ids_.find(runs_->track_id()) != audio_track_ids_.end();

	523 bool video =

	524 video_track_ids_.find(runs_->track_id()) != video_track_ids_.end();

522	525

523 // Skip this entire track if it's not one we're interested in	526 // Skip this entire track if it's not one we're interested in

524 if (!audio && !video) {	527 if (!audio && !video) {

525 runs_->AdvanceRun();	528 runs_->AdvanceRun();

526 return true;	529 return true;

527 }	530 }

528	531

529 // Attempt to cache the auxiliary information first. Aux info is usually	532 // Attempt to cache the auxiliary information first. Aux info is usually

530 // placed in a contiguous block before the sample data, rather than being	533 // placed in a contiguous block before the sample data, rather than being

531 // interleaved. If we didn't cache it, this would require that we retain the	534 // interleaved. If we didn't cache it, this would require that we retain the

(...skipping 49 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
581	584

582 if (decrypt_config) {	585 if (decrypt_config) {

583 if (!subsamples.empty()) {	586 if (!subsamples.empty()) {

584 // Create a new config with the updated subsamples.	587 // Create a new config with the updated subsamples.

585 decrypt_config.reset(new DecryptConfig(	588 decrypt_config.reset(new DecryptConfig(

586 decrypt_config->key_id(),	589 decrypt_config->key_id(),

587 decrypt_config->iv(),	590 decrypt_config->iv(),

588 subsamples));	591 subsamples));

589 }	592 }

590 // else, use the existing config.	593 // else, use the existing config.

591 } else if ((audio && is_audio_track_encrypted_) \|\|	594 } else if (is_track_encrypted_[runs_->track_id()]) {

592 (video && is_video_track_encrypted_)) {

593 // The media pipeline requires a DecryptConfig with an empty \|iv\|.	595 // The media pipeline requires a DecryptConfig with an empty \|iv\|.

594 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1");	596 // TODO(ddorwin): Refactor so we do not need a fake key ID ("1");

595 decrypt_config.reset(	597 decrypt_config.reset(

596 new DecryptConfig("1", "", std::vector<SubsampleEntry>()));	598 new DecryptConfig("1", "", std::vector<SubsampleEntry>()));

597 }	599 }

598	600

599 StreamParserBuffer::Type buffer_type = audio ? DemuxerStream::AUDIO :	601 StreamParserBuffer::Type buffer_type = audio ? DemuxerStream::AUDIO :

600 DemuxerStream::VIDEO;	602 DemuxerStream::VIDEO;

601	603

602 // TODO(wolenetz/acolwell): Validate and use a common cross-parser TrackId

603 // type and allow multiple tracks for same media type, if applicable. See

604 // https://crbug.com/341581.

605 scoped_refptr<StreamParserBuffer> stream_buf = StreamParserBuffer::CopyFrom(	604 scoped_refptr<StreamParserBuffer> stream_buf = StreamParserBuffer::CopyFrom(

606 &frame_buf[0], frame_buf.size(), runs_->is_keyframe(), buffer_type,	605 &frame_buf[0], frame_buf.size(), runs_->is_keyframe(), buffer_type,

607 runs_->track_id());	606 runs_->track_id());

608	607

609 if (decrypt_config)	608 if (decrypt_config)

610 stream_buf->set_decrypt_config(std::move(decrypt_config));	609 stream_buf->set_decrypt_config(std::move(decrypt_config));

611	610

612 stream_buf->set_duration(runs_->duration());	611 stream_buf->set_duration(runs_->duration());

613 stream_buf->set_timestamp(runs_->cts());	612 stream_buf->set_timestamp(runs_->cts());

614 stream_buf->SetDecodeTimestamp(runs_->dts());	613 stream_buf->SetDecodeTimestamp(runs_->dts());

615	614

616 DVLOG(3) << "Pushing frame: aud=" << audio	615 DVLOG(3) << "Emit " << (audio ? "audio" : "video") << " frame: "
	chcunningham1 2016/08/23 01:54:32 what if the track is text? will you say "video" he what if the track is text? will you say "video" here? wolenetz 2016/08/23 22:51:42 At the moment, Chrome MSE mp4 parser doesn't parse Show quoted text On 2016/08/23 01:54:32, chcunningham1 wrote: > what if the track is text? will you say "video" here? At the moment, Chrome MSE mp4 parser doesn't parse text tracks. servolk 2016/08/24 00:53:49 Yup, text tracks in .mp4 are not supported for now Show quoted text On 2016/08/23 22:51:42, wolenetz wrote: > On 2016/08/23 01:54:32, chcunningham1 wrote: > > what if the track is text? will you say "video" here? > > At the moment, Chrome MSE mp4 parser doesn't parse text tracks. Yup, text tracks in .mp4 are not supported for now.
	616 << " track_id=" << runs_->track_id()

617 << ", key=" << runs_->is_keyframe()	617 << ", key=" << runs_->is_keyframe()

618 << ", dur=" << runs_->duration().InMilliseconds()	618 << ", dur=" << runs_->duration().InMilliseconds()

619 << ", dts=" << runs_->dts().InMilliseconds()	619 << ", dts=" << runs_->dts().InMilliseconds()

620 << ", cts=" << runs_->cts().InMilliseconds()	620 << ", cts=" << runs_->cts().InMilliseconds()

621 << ", size=" << runs_->sample_size();	621 << ", size=" << runs_->sample_size();

622	622

623 (*buffers)[runs_->track_id()].push_back(stream_buf);	623 (*buffers)[runs_->track_id()].push_back(stream_buf);

624 runs_->AdvanceSample();	624 runs_->AdvanceSample();

625 return true;	625 return true;

626 }	626 }

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
693 runs.AdvanceSample();	693 runs.AdvanceSample();

694 }	694 }

695 runs.AdvanceRun();	695 runs.AdvanceRun();

696 }	696 }

697	697

698 return true;	698 return true;

699 }	699 }

700	700

701 } // namespace mp4	701 } // namespace mp4

702 } // namespace media	702 } // namespace media

OLD	NEW