media/filters/media_source_state.cc - Issue 2226443002: Support multiple media tracks in MSE / ChunkDemuxer

Unified Diff: media/filters/media_source_state.cc

Issue 2226443002: Support multiple media tracks in MSE / ChunkDemuxer (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: mp4 format is not supported on some trybots, so use webm Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« media/filters/frame_processor.cc ('K') | « media/filters/media_source_state.h ('k') | media/filters/stream_parser_factory.h » ('j') | third_party/WebKit/LayoutTests/http/tests/media/media-source/mediasource-addsourcebuffer.html » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: media/filters/media_source_state.cc

diff --git a/media/filters/media_source_state.cc b/media/filters/media_source_state.cc

index c3152a2d51bbd7d2c238c28ebc4cd7f2b21ea96d..4fdcc6af93752e5a360d629fe56f5a49de781d72 100644

--- a/media/filters/media_source_state.cc

+++ b/media/filters/media_source_state.cc

@@ -4,6 +4,8 @@

#include "media/filters/media_source_state.h"

+#include <set>

#include "base/callback_helpers.h"

#include "base/command_line.h"

#include "base/stl_util.h"

@@ -11,6 +13,7 @@

#include "media/base/media_switches.h"

#include "media/base/media_track.h"

#include "media/base/media_tracks.h"

+#include "media/base/mime_util.h"

#include "media/filters/chunk_demuxer.h"

#include "media/filters/frame_processor.h"

#include "media/filters/source_buffer_stream.h"

@@ -24,10 +27,27 @@ enum {

kMaxMissingTrackInSegmentLogs = 10,

};

-static TimeDelta EndTimestamp(const StreamParser::BufferQueue& queue) {

+namespace {

+TimeDelta EndTimestamp(const StreamParser::BufferQueue& queue) {

return queue.back()->timestamp() + queue.back()->duration();

}

+const char* ToStr(MediaTrack::Type type) {

wolenetz 2016/09/13 21:03:14 nit: useful elsewhere? If so, move to some method

servolk 2016/09/14 18:15:25 Done.

+ switch (type) {

+ case MediaTrack::Audio:

+ return "audio";

+ case MediaTrack::Text:

+ return "text";

+ case MediaTrack::Video:

+ return "video";

+ }

+ NOTREACHED();

+ return "INVALID";

+} // namespace

// List of time ranges for each SourceBuffer.

// static

Ranges<TimeDelta> MediaSourceState::ComputeRangesIntersection(

@@ -94,11 +114,7 @@ MediaSourceState::MediaSourceState(

: create_demuxer_stream_cb_(create_demuxer_stream_cb),

timestamp_offset_during_append_(NULL),

parsing_media_segment_(false),

- media_segment_contained_audio_frame_(false),

- media_segment_contained_video_frame_(false),

stream_parser_(stream_parser.release()),

- audio_(NULL),

- video_(NULL),

frame_processor_(frame_processor.release()),

media_log_(media_log),

state_(UNINITIALIZED),

@@ -115,8 +131,7 @@ MediaSourceState::~MediaSourceState() {

void MediaSourceState::Init(

const StreamParser::InitCB& init_cb,

- bool allow_audio,

- bool allow_video,

+ const std::string& expected_codecs,

const StreamParser::EncryptedMediaInitDataCB& encrypted_media_init_data_cb,

const NewTextTrackCB& new_text_track_cb) {

DCHECK_EQ(state_, UNINITIALIZED);

@@ -127,7 +142,7 @@ void MediaSourceState::Init(

stream_parser_->Init(

base::Bind(&MediaSourceState::OnSourceInitDone, base::Unretained(this)),

base::Bind(&MediaSourceState::OnNewConfigs, base::Unretained(this),

- allow_audio, allow_video),

+ expected_codecs),

base::Bind(&MediaSourceState::OnNewBuffers, base::Unretained(this)),

new_text_track_cb_.is_null(), encrypted_media_init_data_cb,

base::Bind(&MediaSourceState::OnNewMediaSegment, base::Unretained(this)),

@@ -196,18 +211,19 @@ void MediaSourceState::ResetParserState(TimeDelta append_window_start,

frame_processor_->Reset();

parsing_media_segment_ = false;

- media_segment_contained_audio_frame_ = false;

- media_segment_contained_video_frame_ = false;

+ media_segment_has_data_for_track_.clear();

}

void MediaSourceState::Remove(TimeDelta start,

TimeDelta end,

TimeDelta duration) {

- if (audio_)

- audio_->Remove(start, end, duration);

+ for (const auto& it : audio_streams_) {

+ it.second->Remove(start, end, duration);

+ }

- if (video_)

- video_->Remove(start, end, duration);

+ for (const auto& it : video_streams_) {

+ it.second->Remove(start, end, duration);

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -215,93 +231,71 @@ void MediaSourceState::Remove(TimeDelta start,

}

-size_t MediaSourceState::EstimateVideoDataSize(

- size_t muxed_data_chunk_size) const {

- DCHECK(audio_);

- DCHECK(video_);

- size_t videoBufferedSize = video_->GetBufferedSize();

- size_t audioBufferedSize = audio_->GetBufferedSize();

- if (videoBufferedSize == 0 || audioBufferedSize == 0) {

- // At this point either audio or video buffer is empty, which means buffer

- // levels are probably low anyway and we should have enough space in the

- // buffers for appending new data, so just take a very rough guess.

- return muxed_data_chunk_size * 7 / 8;

- }

- // We need to estimate how much audio and video data is going to be in the

- // newly appended data chunk to make space for the new data. And we need to do

- // that without parsing the data (which will happen later, in the Append

- // phase). So for now we can only rely on some heuristic here. Let's assume

- // that the proportion of the audio/video in the new data chunk is the same as

- // the current ratio of buffered audio/video.

- // Longer term this should go away once we further change the MSE GC algorithm

- // to work across all streams of a SourceBuffer (see crbug.com/520704).

- double videoBufferedSizeF = static_cast<double>(videoBufferedSize);

- double audioBufferedSizeF = static_cast<double>(audioBufferedSize);

- double totalBufferedSizeF = videoBufferedSizeF + audioBufferedSizeF;

- CHECK_GT(totalBufferedSizeF, 0.0);

- double videoRatio = videoBufferedSizeF / totalBufferedSizeF;

- CHECK_GE(videoRatio, 0.0);

- CHECK_LE(videoRatio, 1.0);

- double estimatedVideoSize = muxed_data_chunk_size * videoRatio;

- return static_cast<size_t>(estimatedVideoSize);

bool MediaSourceState::EvictCodedFrames(DecodeTimestamp media_time,

size_t newDataSize) {

bool success = true;

DVLOG(3) << __func__ << " media_time=" << media_time.InSecondsF()

- << " newDataSize=" << newDataSize

- << " videoBufferedSize=" << (video_ ? video_->GetBufferedSize() : 0)

- << " audioBufferedSize=" << (audio_ ? audio_->GetBufferedSize() : 0);

+ << " newDataSize=" << newDataSize;

- size_t newAudioSize = 0;

- size_t newVideoSize = 0;

- if (audio_ && video_) {

- newVideoSize = EstimateVideoDataSize(newDataSize);

- newAudioSize = newDataSize - newVideoSize;

- } else if (video_) {

- newVideoSize = newDataSize;

- } else if (audio_) {

- newAudioSize = newDataSize;

+ DVLOG(4) << "Before EvictCodedFrames:";

+ for (const auto& it : audio_streams_) {

+ DVLOG(4) << "Audio track_id=" << it.second->media_track_id()

+ << " buffered_size=" << it.second->GetBufferedSize();

+ }

+ for (const auto& it : video_streams_) {

+ DVLOG(4) << "Video track_id=" << it.second->media_track_id()

+ << " buffered_size=" << it.second->GetBufferedSize();

}

- DVLOG(3) << __func__

- << " estimated audio/video sizes: newVideoSize=" << newVideoSize

- << " newAudioSize=" << newAudioSize;

- if (audio_)

- success = audio_->EvictCodedFrames(media_time, newAudioSize) && success;

+ size_t estimatedAudioSize = newDataSize;

+ size_t estimatedVideoSize = newDataSize;

+ if (!audio_streams_.empty() && !video_streams_.empty()) {

+ estimatedAudioSize = newDataSize / 16;

wolenetz 2016/09/13 21:03:14 nit: Assumption is overall, across all A+V tracks

servolk 2016/09/14 18:15:26 Yeah, after pondering this a bit more, I think we

wolenetz 2016/09/14 23:31:21 Acknowledged.

+ estimatedVideoSize = newDataSize - estimatedAudioSize;

+ }

+ if (audio_streams_.size() > 0)

+ estimatedAudioSize /= audio_streams_.size();

+ if (video_streams_.size() > 0)

+ estimatedVideoSize /= video_streams_.size();

- if (video_)

- success = video_->EvictCodedFrames(media_time, newVideoSize) && success;

+ for (const auto& it : audio_streams_) {

+ success &= it.second->EvictCodedFrames(media_time, estimatedAudioSize);

+ }

+ for (const auto& it : video_streams_) {

+ success &= it.second->EvictCodedFrames(media_time, estimatedVideoSize);

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

- success = itr->second->EvictCodedFrames(media_time, 0) && success;

+ success &= itr->second->EvictCodedFrames(media_time, 0);

}

- DVLOG(3) << __func__ << " result=" << success

- << " videoBufferedSize=" << (video_ ? video_->GetBufferedSize() : 0)

- << " audioBufferedSize=" << (audio_ ? audio_->GetBufferedSize() : 0);

+ DVLOG(4) << "After EvictCodedFrames (success=" << success << "):";

+ for (const auto& it : audio_streams_) {

+ DVLOG(4) << "Audio track_id=" << it.second->media_track_id()

+ << " buffered_size=" << it.second->GetBufferedSize();

+ }

+ for (const auto& it : video_streams_) {

+ DVLOG(4) << "Video track_id=" << it.second->media_track_id()

+ << " buffered_size=" << it.second->GetBufferedSize();

+ }

return success;

}

Ranges<TimeDelta> MediaSourceState::GetBufferedRanges(TimeDelta duration,

wolenetz 2016/09/13 21:03:14 hmm. I think that TODO was wrong. SourceBuffer.buf

servolk 2016/09/14 18:15:26 Ok, in that case we can just drop the check for en

bool ended) const {

- // TODO(acolwell): When we start allowing disabled tracks we'll need to update

- // this code to only add ranges from active tracks.

RangesList ranges_list;

- if (audio_)

- ranges_list.push_back(audio_->GetBufferedRanges(duration));

+ for (const auto& it : audio_streams_) {

+ if (it.second->enabled())

+ ranges_list.push_back(it.second->GetBufferedRanges(duration));

+ }

- if (video_)

- ranges_list.push_back(video_->GetBufferedRanges(duration));

+ for (const auto& it : video_streams_) {

+ if (it.second->enabled())

+ ranges_list.push_back(it.second->GetBufferedRanges(duration));

+ }

for (TextStreamMap::const_iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -314,11 +308,13 @@ Ranges<TimeDelta> MediaSourceState::GetBufferedRanges(TimeDelta duration,

TimeDelta MediaSourceState::GetHighestPresentationTimestamp() const {

TimeDelta max_pts;

- if (audio_)

- max_pts = std::max(max_pts, audio_->GetHighestPresentationTimestamp());

+ for (const auto& it : audio_streams_) {

+ max_pts = std::max(max_pts, it.second->GetHighestPresentationTimestamp());

+ }

- if (video_)

- max_pts = std::max(max_pts, video_->GetHighestPresentationTimestamp());

+ for (const auto& it : video_streams_) {

+ max_pts = std::max(max_pts, it.second->GetHighestPresentationTimestamp());

+ }

for (TextStreamMap::const_iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -331,11 +327,13 @@ TimeDelta MediaSourceState::GetHighestPresentationTimestamp() const {

TimeDelta MediaSourceState::GetMaxBufferedDuration() const {

TimeDelta max_duration;

- if (audio_)

- max_duration = std::max(max_duration, audio_->GetBufferedDuration());

+ for (const auto& it : audio_streams_) {

+ max_duration = std::max(max_duration, it.second->GetBufferedDuration());

+ }

- if (video_)

- max_duration = std::max(max_duration, video_->GetBufferedDuration());

+ for (const auto& it : video_streams_) {

+ max_duration = std::max(max_duration, it.second->GetBufferedDuration());

+ }

for (TextStreamMap::const_iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -346,11 +344,13 @@ TimeDelta MediaSourceState::GetMaxBufferedDuration() const {

}

void MediaSourceState::StartReturningData() {

- if (audio_)

- audio_->StartReturningData();

+ for (const auto& it : audio_streams_) {

+ it.second->StartReturningData();

+ }

- if (video_)

- video_->StartReturningData();

+ for (const auto& it : video_streams_) {

+ it.second->StartReturningData();

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -359,11 +359,13 @@ void MediaSourceState::StartReturningData() {

}

void MediaSourceState::AbortReads() {

- if (audio_)

- audio_->AbortReads();

+ for (const auto& it : audio_streams_) {

+ it.second->AbortReads();

+ }

- if (video_)

- video_->AbortReads();

+ for (const auto& it : video_streams_) {

+ it.second->AbortReads();

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -372,11 +374,13 @@ void MediaSourceState::AbortReads() {

}

void MediaSourceState::Seek(TimeDelta seek_time) {

- if (audio_)

- audio_->Seek(seek_time);

+ for (const auto& it : audio_streams_) {

+ it.second->Seek(seek_time);

+ }

- if (video_)

- video_->Seek(seek_time);

+ for (const auto& it : video_streams_) {

+ it.second->Seek(seek_time);

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -385,11 +389,13 @@ void MediaSourceState::Seek(TimeDelta seek_time) {

}

void MediaSourceState::CompletePendingReadIfPossible() {

- if (audio_)

- audio_->CompletePendingReadIfPossible();

+ for (const auto& it : audio_streams_) {

+ it.second->CompletePendingReadIfPossible();

+ }

- if (video_)

- video_->CompletePendingReadIfPossible();

+ for (const auto& it : video_streams_) {

+ it.second->CompletePendingReadIfPossible();

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -398,11 +404,13 @@ void MediaSourceState::CompletePendingReadIfPossible() {

}

void MediaSourceState::OnSetDuration(TimeDelta duration) {

- if (audio_)

- audio_->OnSetDuration(duration);

+ for (const auto& it : audio_streams_) {

+ it.second->OnSetDuration(duration);

+ }

- if (video_)

- video_->OnSetDuration(duration);

+ for (const auto& it : video_streams_) {

+ it.second->OnSetDuration(duration);

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -411,11 +419,13 @@ void MediaSourceState::OnSetDuration(TimeDelta duration) {

}

void MediaSourceState::MarkEndOfStream() {

- if (audio_)

- audio_->MarkEndOfStream();

+ for (const auto& it : audio_streams_) {

+ it.second->MarkEndOfStream();

+ }

- if (video_)

- video_->MarkEndOfStream();

+ for (const auto& it : video_streams_) {

+ it.second->MarkEndOfStream();

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -424,11 +434,13 @@ void MediaSourceState::MarkEndOfStream() {

}

void MediaSourceState::UnmarkEndOfStream() {

- if (audio_)

- audio_->UnmarkEndOfStream();

+ for (const auto& it : audio_streams_) {

+ it.second->UnmarkEndOfStream();

+ }

- if (video_)

- video_->UnmarkEndOfStream();

+ for (const auto& it : video_streams_) {

+ it.second->UnmarkEndOfStream();

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -437,11 +449,13 @@ void MediaSourceState::UnmarkEndOfStream() {

}

void MediaSourceState::Shutdown() {

- if (audio_)

- audio_->Shutdown();

+ for (const auto& it : audio_streams_) {

+ it.second->Shutdown();

+ }

- if (video_)

- video_->Shutdown();

+ for (const auto& it : video_streams_) {

+ it.second->Shutdown();

+ }

for (TextStreamMap::iterator itr = text_stream_map_.begin();

itr != text_stream_map_.end(); ++itr) {

@@ -453,12 +467,14 @@ void MediaSourceState::SetMemoryLimits(DemuxerStream::Type type,

size_t memory_limit) {

switch (type) {

case DemuxerStream::AUDIO:

- if (audio_)

- audio_->SetStreamMemoryLimit(memory_limit);

+ for (const auto& it : audio_streams_) {

+ it.second->SetStreamMemoryLimit(memory_limit);

+ }

break;

case DemuxerStream::VIDEO:

- if (video_)

- video_->SetStreamMemoryLimit(memory_limit);

+ for (const auto& it : video_streams_) {

+ it.second->SetStreamMemoryLimit(memory_limit);

+ }

break;

case DemuxerStream::TEXT:

for (TextStreamMap::iterator itr = text_stream_map_.begin();

@@ -474,11 +490,15 @@ void MediaSourceState::SetMemoryLimits(DemuxerStream::Type type,

}

bool MediaSourceState::IsSeekWaitingForData() const {

- if (audio_ && audio_->IsSeekWaitingForData())

- return true;

+ for (const auto& it : audio_streams_) {

+ if (it.second->IsSeekWaitingForData())

+ return true;

+ }

- if (video_ && video_->IsSeekWaitingForData())

- return true;

+ for (const auto& it : video_streams_) {

+ if (it.second->IsSeekWaitingForData())

+ return true;

+ }

// NOTE: We are intentionally not checking the text tracks

// because text tracks are discontinuous and may not have data

@@ -490,164 +510,200 @@ bool MediaSourceState::IsSeekWaitingForData() const {

return false;

}

+bool CheckBytestreamTrackIds(

wolenetz 2016/09/13 21:03:14 move to anon namespace at top of file

servolk 2016/09/14 18:15:25 Done.

+ const MediaTracks& tracks,

+ const StreamParser::TextTrackConfigMap& text_configs) {

+ std::set<StreamParser::TrackId> bytestream_ids;

+ for (const auto& track : tracks.tracks()) {

+ const StreamParser::TrackId& track_id = track->bytestream_track_id();

+ if (bytestream_ids.find(track_id) != bytestream_ids.end()) {

+ return false;

+ }

+ bytestream_ids.insert(track_id);

+ }

+ for (const auto& text_track : text_configs) {

+ const StreamParser::TrackId& track_id = text_track.first;

+ if (bytestream_ids.find(track_id) != bytestream_ids.end()) {

+ return false;

+ }

+ bytestream_ids.insert(track_id);

+ }

+ return true;

bool MediaSourceState::OnNewConfigs(

- bool allow_audio,

- bool allow_video,

+ std::string expected_codecs,

std::unique_ptr<MediaTracks> tracks,

const StreamParser::TextTrackConfigMap& text_configs) {

- DCHECK_GE(state_, PENDING_PARSER_CONFIG);

DCHECK(tracks.get());

+ DVLOG(1) << __func__ << " expected_codecs=" << expected_codecs

+ << " tracks=" << tracks->tracks().size();

+ DCHECK_GE(state_, PENDING_PARSER_CONFIG);

- MediaTrack* audio_track = nullptr;

- MediaTrack* video_track = nullptr;

- AudioDecoderConfig audio_config;

- VideoDecoderConfig video_config;

- for (const auto& track : tracks->tracks()) {

- const auto& track_id = track->bytestream_track_id();

- if (track->type() == MediaTrack::Audio) {

- if (audio_track) {

- MEDIA_LOG(ERROR, media_log_)

- << "Error: more than one audio track is currently not supported.";

- return false;

- }

- audio_track = track.get();

- audio_config = tracks->getAudioConfig(track_id);

- DCHECK(audio_config.IsValidConfig());

- } else if (track->type() == MediaTrack::Video) {

- if (video_track) {

- MEDIA_LOG(ERROR, media_log_)

- << "Error: more than one video track is currently not supported.";

- return false;

- }

- video_track = track.get();

- video_config = tracks->getVideoConfig(track_id);

- DCHECK(video_config.IsValidConfig());

- } else {

- MEDIA_LOG(ERROR, media_log_) << "Error: unsupported media track type "

- << track->type();

- return false;

+ // Check that there is no clashing bytestream track ids.

+ if (!CheckBytestreamTrackIds(*tracks, text_configs)) {

+ MEDIA_LOG(ERROR, media_log_)

+ << "Error: duplicate bytestream track ids detected";

wolenetz 2016/09/13 21:03:13 nit: s/Error: duplicate/Duplicate/ (ERROR already

servolk 2016/09/14 18:15:26 Done.

+ for (const auto& track : tracks->tracks()) {

+ const StreamParser::TrackId& track_id = track->bytestream_track_id();

+ MEDIA_LOG(ERROR, media_log_) << ToStr(track->type()) << " track "

wolenetz 2016/09/13 21:03:13 nit:s/ERROR/DEBUG/ so we can expose the last cache

servolk 2016/09/14 18:15:25 Done.

+ << " bytestream track id=" << track_id;

}

+ return false;

}

- DVLOG(1) << "OnNewConfigs(" << allow_audio << ", " << allow_video << ", "

- << audio_config.IsValidConfig() << ", "

- << video_config.IsValidConfig() << ")";

// MSE spec allows new configs to be emitted only during Append, but not

// during Flush or parser reset operations.

CHECK(append_in_progress_);

- if (!audio_config.IsValidConfig() && !video_config.IsValidConfig()) {

- DVLOG(1) << "OnNewConfigs() : Audio & video config are not valid!";

- return false;

- }

- // Signal an error if we get configuration info for stream types that weren't

- // specified in AddId() or more configs after a stream is initialized.

- if (allow_audio != audio_config.IsValidConfig()) {

- MEDIA_LOG(ERROR, media_log_)

- << "Initialization segment"

- << (audio_config.IsValidConfig() ? " has" : " does not have")

- << " an audio track, but the mimetype"

- << (allow_audio ? " specifies" : " does not specify")

- << " an audio codec.";

- return false;

- }

+ bool success = true;

- if (allow_video != video_config.IsValidConfig()) {

- MEDIA_LOG(ERROR, media_log_)

- << "Initialization segment"

- << (video_config.IsValidConfig() ? " has" : " does not have")

- << " a video track, but the mimetype"

- << (allow_video ? " specifies" : " does not specify")

- << " a video codec.";

- return false;

- }

+ std::vector<std::string> expected_codecs_parsed;

+ ParseCodecString(expected_codecs, &expected_codecs_parsed, false);

wolenetz 2016/09/13 21:03:14 nit: Every init segment, we parse these? Can we in

servolk 2016/09/14 18:15:25 Yeah, I don't think it's going to make a big diffe

wolenetz 2016/09/14 23:31:21 Acknowledged.

- bool success = true;

- if (audio_config.IsValidConfig()) {

- if (!audio_) {

- media_log_->SetBooleanProperty("found_audio_stream", true);

+ std::vector<AudioCodec> expected_acodecs;

+ std::vector<VideoCodec> expected_vcodecs;

+ for (const auto& codec_id : expected_codecs_parsed) {

+ AudioCodec acodec = StringToAudioCodec(codec_id);

+ if (acodec != kUnknownAudioCodec) {

+ expected_acodecs.push_back(acodec);

+ continue;

}

- if (!audio_ ||

- audio_->audio_decoder_config().codec() != audio_config.codec()) {

- media_log_->SetStringProperty("audio_codec_name",

- GetCodecName(audio_config.codec()));

+ VideoCodec vcodec = StringToVideoCodec(codec_id);

+ if (vcodec != kUnknownVideoCodec) {

+ expected_vcodecs.push_back(vcodec);

+ continue;

}

+ MEDIA_LOG(INFO, media_log_) << "Unrecognized media codec: " << codec_id;

+ }

- bool audio_stream_just_created = false;

- if (!audio_) {

- audio_ = create_demuxer_stream_cb_.Run(DemuxerStream::AUDIO);

+ for (const auto& track : tracks->tracks()) {

+ const auto& track_id = track->bytestream_track_id();

- if (!audio_) {

- DVLOG(1) << "Failed to create an audio stream.";

- return false;

- }

- audio_stream_just_created = true;

+ if (track->type() == MediaTrack::Audio) {

+ AudioDecoderConfig audio_config = tracks->getAudioConfig(track_id);

+ DVLOG(1) << "Audio track_id=" << track_id

+ << " config: " << audio_config.AsHumanReadableString();

+ DCHECK(audio_config.IsValidConfig());

- if (!frame_processor_->AddTrack(FrameProcessor::kAudioTrackId, audio_)) {

- DVLOG(1) << "Failed to add audio track to frame processor.";

+ const auto& it = std::find(expected_acodecs.begin(),

+ expected_acodecs.end(), audio_config.codec());

+ if (it == expected_acodecs.end()) {

+ MEDIA_LOG(ERROR, media_log_) << "Audio stream codec "

+ << GetCodecName(audio_config.codec())

+ << " doesn't match SourceBuffer codecs.";

return false;

}

- }

- frame_processor_->OnPossibleAudioConfigUpdate(audio_config);

- success &= audio_->UpdateAudioConfig(audio_config, media_log_);

- if (audio_stream_just_created) {

- std::string audio_buf_limit_switch =

- base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(

- switches::kMSEAudioBufferSizeLimit);

- unsigned audio_buf_size_limit = 0;

- if (base::StringToUint(audio_buf_limit_switch, &audio_buf_size_limit) &&

- audio_buf_size_limit > 0) {

- MEDIA_LOG(INFO, media_log_) << "Custom audio SourceBuffer size limit="

- << audio_buf_size_limit;

- audio_->SetStreamMemoryLimit(audio_buf_size_limit);

+ expected_acodecs.erase(it);

wolenetz 2016/09/13 21:03:13 This is a problem for the following test I suggest

servolk 2016/09/14 18:15:26 Wait, shouldn't the mime type be 'audio/webm; code

wolenetz 2016/09/14 23:31:21 I've filed spec bug (MSE vNext) https://github.com

I've filed spec bug (MSE vNext) https://github.com/w3c/media-source/issues/161 to get clarity. Unfortunately other UAs don't always require codec strings and may instead sniff them out during initialization segment received processing; isTypeSupported('video/mp4') on Edge, for instance, is true and addSourceBUffer() of same might work (I'm unsure there). Following EME's route of strict matching, Chrome MSE is more strict, and MSE vNext bug (https://github.com/w3c/media-source/issues/137) tracks making the spec more strict too. However, such strictness (#137) does not necessarily imply that multiple tracks of same codec type each have distinct entries in the codec string. I propose we start strict here (like you have currently), and only relax as necessary if web authors discover issue here only in Chrome while #161 remains open in the spec. Please add a "TODO(wolenetz): Update codec string strictness, if necessary, once spec issue https://github.com/w3c/media-source/issues/161 is resolved."

servolk 2016/09/15 00:18:32 Done.

On 2016/09/14 23:31:21, wolenetz wrote: > On 2016/09/14 18:15:26, servolk wrote: > > On 2016/09/13 21:03:13, wolenetz wrote: > > > This is a problem for the following test I suggest adding: > > > > > > addId(x, "audio/webm" "vorbis") > > > appendInitSegment(x, two vorbis tracks) > > > > > > That's a valid init segment, but the second audio track would cause > > parse/decode > > > error in l.591-596, above, with this current patch set. > > > > Wait, shouldn't the mime type be 'audio/webm; codecs=vorbis,vorbis' if there > are > > actually two vorbis tracks? I tried creating DASH manifests from multi-track > mp4 > > files using GPAC MP4Box and I got manifests that specified the same codec > > multiple times i.e. it looked like codecs="avc1.4D401F,avc1.4D401F". So I > > assumed the codecs parameter must have one value for each stream/track, and in > > fact I was wondering if we need to enforce also the order of those > > tracks/streams to match the codecs parameter (since that could simplify the > > logic here). > > I don't see this covered explicitly in the MSE spec. It simply says that the > > parameter of the addSourceBuffer is a mime type string, and does't go into > > details about how the codecs parameter should be interpreted. Can you please > > clarify this in the MSE spec? > > > > Also, unfortunately we can't create a test like you described yet, because our > > WebM parser currently doesn't support multiple audio/video tracks, it still > has > > that 1 a + 1 v track limitation. And MP4 parser, which supports multiple > tracks, > > cannot be used in tests due to MP4 using proprietary codecs. I'll leave a TODO > > to add such a test in the future, when WebM parser support multiple tracks. > > I've filed spec bug (MSE vNext) https://github.com/w3c/media-source/issues/161 > to get clarity. Unfortunately other UAs don't always require codec strings and > may instead sniff them out during initialization segment received processing; > isTypeSupported('video/mp4') on Edge, for instance, is true and > addSourceBUffer() of same might work (I'm unsure there). Following EME's route > of strict matching, Chrome MSE is more strict, and MSE vNext bug > (https://github.com/w3c/media-source/issues/137) tracks making the spec more > strict too. However, such strictness (#137) does not necessarily imply that > multiple tracks of same codec type each have distinct entries in the codec > string. I propose we start strict here (like you have currently), and only relax > as necessary if web authors discover issue here only in Chrome while #161 > remains open in the spec. > > Please add a "TODO(wolenetz): Update codec string strictness, if necessary, once > spec issue https://github.com/w3c/media-source/issues/161 is resolved."

Done.

+ ChunkDemuxerStream* stream = nullptr;

+ if (!first_init_segment_received_) {

+ DCHECK(audio_streams_.find(track_id) == audio_streams_.end());

+ stream = create_demuxer_stream_cb_.Run(DemuxerStream::AUDIO);

+ if (!stream || !frame_processor_->AddTrack(track_id, stream)) {

+ MEDIA_LOG(ERROR, media_log_) << "Failed to create audio stream.";

+ return false;

+ }

+ audio_streams_[track_id] = stream;

+ media_log_->SetBooleanProperty("found_audio_stream", true);

+ media_log_->SetStringProperty("audio_codec_name",

+ GetCodecName(audio_config.codec()));

+ } else {

+ if (audio_streams_.size() > 1) {

+ stream = audio_streams_[track_id];

+ } else {

+ // If there is only one video track then bytestream id might change in

wolenetz 2016/09/13 21:03:14 nit:s/video/audio/

servolk 2016/09/14 18:15:26 Done.

+ // a new init segment. So update our state and nofity frame processor.

+ const auto& it = audio_streams_.begin();

+ if (it != audio_streams_.end()) {

+ stream = it->second;

+ if (it->first != track_id) {

+ frame_processor_->UpdateTrack(it->first, track_id);

+ audio_streams_[track_id] = stream;

+ audio_streams_.erase(it->first);

+ }

+ if (!stream) {

+ MEDIA_LOG(ERROR, media_log_) << "Got unexpected audio track"

+ << " track_id=" << track_id;

+ return false;

+ }

}

- }

- if (video_config.IsValidConfig()) {

- if (!video_) {

- media_log_->SetBooleanProperty("found_video_stream", true);

- }

- if (!video_ ||

- video_->video_decoder_config().codec() != video_config.codec()) {

- media_log_->SetStringProperty("video_codec_name",

- GetCodecName(video_config.codec()));

- }

- bool video_stream_just_created = false;

- if (!video_) {

- video_ = create_demuxer_stream_cb_.Run(DemuxerStream::VIDEO);

+ track->set_id(stream->media_track_id());

+ frame_processor_->OnPossibleAudioConfigUpdate(audio_config);

+ success &= stream->UpdateAudioConfig(audio_config, media_log_);

+ } else if (track->type() == MediaTrack::Video) {

+ VideoDecoderConfig video_config = tracks->getVideoConfig(track_id);

+ DVLOG(1) << "Video track_id=" << track_id

+ << " config: " << video_config.AsHumanReadableString();

+ DCHECK(video_config.IsValidConfig());

- if (!video_) {

- DVLOG(1) << "Failed to create a video stream.";

+ const auto& it = std::find(expected_vcodecs.begin(),

+ expected_vcodecs.end(), video_config.codec());

+ if (it == expected_vcodecs.end()) {

+ MEDIA_LOG(ERROR, media_log_) << "Video stream codec "

+ << GetCodecName(video_config.codec())

+ << " doesn't match SourceBuffer codecs.";

return false;

}

- video_stream_just_created = true;

- if (!frame_processor_->AddTrack(FrameProcessor::kVideoTrackId, video_)) {

- DVLOG(1) << "Failed to add video track to frame processor.";

- return false;

+ expected_vcodecs.erase(it);

wolenetz 2016/09/13 21:03:13 ditto missing test and problem for multiple video

servolk 2016/09/14 18:15:26 Acknowledged.

+ ChunkDemuxerStream* stream = nullptr;

+ if (!first_init_segment_received_) {

+ DCHECK(video_streams_.find(track_id) == video_streams_.end());

+ stream = create_demuxer_stream_cb_.Run(DemuxerStream::VIDEO);

+ if (!stream || !frame_processor_->AddTrack(track_id, stream)) {

+ MEDIA_LOG(ERROR, media_log_) << "Failed to create video stream.";

+ return false;

+ }

+ video_streams_[track_id] = stream;

+ media_log_->SetBooleanProperty("found_video_stream", true);

+ media_log_->SetStringProperty("video_codec_name",

+ GetCodecName(video_config.codec()));

+ } else {

+ if (video_streams_.size() > 1) {

+ stream = video_streams_[track_id];

+ } else {

+ // If there is only one video track then bytestream id might change in

+ // a new init segment. So update our state and nofity frame processor.

+ const auto& it = video_streams_.begin();

+ if (it != video_streams_.end()) {

+ stream = it->second;

+ if (it->first != track_id) {

+ frame_processor_->UpdateTrack(it->first, track_id);

+ video_streams_[track_id] = stream;

+ video_streams_.erase(it->first);

+ }

+ if (!stream) {

+ MEDIA_LOG(ERROR, media_log_) << "Got unexpected video track"

+ << " track_id=" << track_id;

+ return false;

+ }

}

+ track->set_id(stream->media_track_id());

+ success &= stream->UpdateVideoConfig(video_config, media_log_);

+ } else {

+ MEDIA_LOG(ERROR, media_log_) << "Error: unsupported media track type "

+ << track->type();

+ return false;

}

+ }

- success &= video_->UpdateVideoConfig(video_config, media_log_);

- if (video_stream_just_created) {

- std::string video_buf_limit_switch =

- base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(

- switches::kMSEVideoBufferSizeLimit);

- unsigned video_buf_size_limit = 0;

- if (base::StringToUint(video_buf_limit_switch, &video_buf_size_limit) &&

- video_buf_size_limit > 0) {

- MEDIA_LOG(INFO, media_log_) << "Custom video SourceBuffer size limit="

- << video_buf_size_limit;

- video_->SetStreamMemoryLimit(video_buf_size_limit);

- }

+ if (!expected_acodecs.empty() || !expected_vcodecs.empty()) {

+ for (const auto& acodec : expected_acodecs) {

+ MEDIA_LOG(ERROR, media_log_) << "Initialization segment misses expected "

+ << GetCodecName(acodec) << " track.";

}

+ for (const auto& vcodec : expected_vcodecs) {

+ MEDIA_LOG(ERROR, media_log_) << "Initialization segment misses expected "

+ << GetCodecName(vcodec) << " track.";

+ }

+ return false;

}

typedef StreamParser::TextTrackConfigMap::const_iterator TextConfigItr;

@@ -725,15 +781,16 @@ bool MediaSourceState::OnNewConfigs(

}

+ if (audio_streams_.empty() && video_streams_.empty()) {

+ DVLOG(1) << __func__ << ": couldn't find a valid audio or video stream";

+ return false;

+ }

frame_processor_->SetAllTrackBuffersNeedRandomAccessPoint();

- if (audio_track) {

- DCHECK(audio_);

- audio_track->set_id(audio_->media_track_id());

- }

- if (video_track) {

- DCHECK(video_);

- video_track->set_id(video_->media_track_id());

+ if (!first_init_segment_received_) {

+ first_init_segment_received_ = true;

+ SetStreamMemoryLimits();

}

DVLOG(1) << "OnNewConfigs() : " << (success ? "success" : "failed");

@@ -747,12 +804,39 @@ bool MediaSourceState::OnNewConfigs(

return success;

}

+void MediaSourceState::SetStreamMemoryLimits() {

+ auto cmd_line = base::CommandLine::ForCurrentProcess();

+ std::string audio_buf_limit_switch =

+ cmd_line->GetSwitchValueASCII(switches::kMSEAudioBufferSizeLimit);

+ unsigned audio_buf_size_limit = 0;

+ if (base::StringToUint(audio_buf_limit_switch, &audio_buf_size_limit) &&

+ audio_buf_size_limit > 0) {

+ MEDIA_LOG(INFO, media_log_) << "Custom audio SourceBuffer size limit="

wolenetz 2016/09/13 21:03:14 nit: audio *per-track* SourceBuffer size limit=...

servolk 2016/09/14 18:15:26 Done.

+ << audio_buf_size_limit;

+ for (const auto& it : audio_streams_) {

+ it.second->SetStreamMemoryLimit(audio_buf_size_limit);

+ }

+ std::string video_buf_limit_switch =

+ cmd_line->GetSwitchValueASCII(switches::kMSEVideoBufferSizeLimit);

+ unsigned video_buf_size_limit = 0;

+ if (base::StringToUint(video_buf_limit_switch, &video_buf_size_limit) &&

+ video_buf_size_limit > 0) {

+ MEDIA_LOG(INFO, media_log_) << "Custom video SourceBuffer size limit="

wolenetz 2016/09/13 21:03:14 nit ditto

servolk 2016/09/14 18:15:26 Done.

+ << video_buf_size_limit;

+ for (const auto& it : video_streams_) {

+ it.second->SetStreamMemoryLimit(video_buf_size_limit);

+ }

void MediaSourceState::OnNewMediaSegment() {

DVLOG(2) << "OnNewMediaSegment()";

DCHECK_EQ(state_, PARSER_INITIALIZED);

parsing_media_segment_ = true;

- media_segment_contained_audio_frame_ = false;

- media_segment_contained_video_frame_ = false;

+ media_segment_has_data_for_track_.clear();

}

void MediaSourceState::OnEndOfMediaSegment() {

@@ -760,24 +844,31 @@ void MediaSourceState::OnEndOfMediaSegment() {

DCHECK_EQ(state_, PARSER_INITIALIZED);

parsing_media_segment_ = false;

- const bool missing_audio = audio_ && !media_segment_contained_audio_frame_;

- const bool missing_video = video_ && !media_segment_contained_video_frame_;

- if (!missing_audio && !missing_video)

- return;

- LIMITED_MEDIA_LOG(DEBUG, media_log_, num_missing_track_logs_,

- kMaxMissingTrackInSegmentLogs)

- << "Media segment did not contain any "

- << (missing_audio && missing_video ? "audio or video"

- : missing_audio ? "audio" : "video")

- << " coded frames, mismatching initialization segment. Therefore, MSE "

- "coded frame processing may not interoperably detect discontinuities "

- "in appended media.";

+ for (const auto& it : audio_streams_) {

+ if (!media_segment_has_data_for_track_[it.first]) {

+ LIMITED_MEDIA_LOG(DEBUG, media_log_, num_missing_track_logs_,

+ kMaxMissingTrackInSegmentLogs)

+ << "Media segment did not contain any coded frames for track "

+ << it.first << ", mismatching initialization segment. Therefore, MSE"

+ " coded frame processing may not interoperably detect"

+ " discontinuities in appended media.";

+ }

+ for (const auto& it : video_streams_) {

+ if (!media_segment_has_data_for_track_[it.first]) {

+ LIMITED_MEDIA_LOG(DEBUG, media_log_, num_missing_track_logs_,

+ kMaxMissingTrackInSegmentLogs)

+ << "Media segment did not contain any coded frames for track "

+ << it.first << ", mismatching initialization segment. Therefore, MSE"

+ " coded frame processing may not interoperably detect"

+ " discontinuities in appended media.";

+ }

}

bool MediaSourceState::OnNewBuffers(

const StreamParser::BufferQueueMap& buffer_queue_map) {

- DVLOG(2) << "OnNewBuffers()";

+ DVLOG(2) << __func__ << " buffer_queues=" << buffer_queue_map.size();

DCHECK_EQ(state_, PARSER_INITIALIZED);

DCHECK(timestamp_offset_during_append_);

DCHECK(parsing_media_segment_);

@@ -785,11 +876,7 @@ bool MediaSourceState::OnNewBuffers(

for (const auto& it : buffer_queue_map) {

const StreamParser::BufferQueue& bufq = it.second;

DCHECK(!bufq.empty());

- if (bufq[0]->type() == DemuxerStream::AUDIO) {

- media_segment_contained_audio_frame_ = true;

- } else if (bufq[0]->type() == DemuxerStream::VIDEO) {

- media_segment_contained_video_frame_ = true;

- }

+ media_segment_has_data_for_track_[it.first] = true;

}

const TimeDelta timestamp_offset_before_processing =

@@ -827,7 +914,6 @@ bool MediaSourceState::OnNewBuffers(

return true;

}

void MediaSourceState::OnSourceInitDone(

const StreamParser::InitParameters& params) {

DCHECK_EQ(state_, PENDING_PARSER_INIT);