| Index: media/formats/webm/webm_cluster_parser.cc
|
| diff --git a/media/formats/webm/webm_cluster_parser.cc b/media/formats/webm/webm_cluster_parser.cc
|
| index 6784776a0e029e0c21d168391532087b8a43fa6b..7dacd7102c7b7f24d3fc6ba27de9df9a4c484504 100644
|
| --- a/media/formats/webm/webm_cluster_parser.cc
|
| +++ b/media/formats/webm/webm_cluster_parser.cc
|
| @@ -17,6 +17,17 @@
|
|
|
| namespace media {
|
|
|
| +const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
|
| + 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
|
| + 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
|
| + 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
|
| +
|
| +enum {
|
| + // Limits the number of MEDIA_LOG() calls in the path of reading encoded
|
| + // duration to avoid spamming for corrupted data.
|
| + kMaxDurationLogs = 10,
|
| +};
|
| +
|
| WebMClusterParser::WebMClusterParser(
|
| int64 timecode_scale,
|
| int audio_track_num,
|
| @@ -27,11 +38,14 @@ WebMClusterParser::WebMClusterParser(
|
| const std::set<int64>& ignored_tracks,
|
| const std::string& audio_encryption_key_id,
|
| const std::string& video_encryption_key_id,
|
| + const AudioCodec audio_codec,
|
| const LogCB& log_cb)
|
| - : timecode_multiplier_(timecode_scale / 1000.0),
|
| + : num_duration_errors_(0),
|
| + timecode_multiplier_(timecode_scale / 1000.0),
|
| ignored_tracks_(ignored_tracks),
|
| audio_encryption_key_id_(audio_encryption_key_id),
|
| video_encryption_key_id_(video_encryption_key_id),
|
| + audio_codec_(audio_codec),
|
| parser_(kWebMIdCluster, this),
|
| last_block_timecode_(-1),
|
| block_data_size_(-1),
|
| @@ -68,7 +82,7 @@ void WebMClusterParser::Reset() {
|
| ready_buffer_upper_bound_ = kNoDecodeTimestamp();
|
| }
|
|
|
| -int WebMClusterParser::Parse(const uint8* buf, int size) {
|
| +int WebMClusterParser::Parse(const uint8_t* buf, int size) {
|
| audio_.ClearReadyBuffers();
|
| video_.ClearReadyBuffers();
|
| ClearTextTrackReadyBuffers();
|
| @@ -140,6 +154,101 @@ WebMClusterParser::GetTextBuffers() {
|
| return text_buffers_map_;
|
| }
|
|
|
| +base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
|
| + const uint8_t* data,
|
| + int size) {
|
| +
|
| + // Duration is currently read assuming the *entire* stream is unencrypted.
|
| + // The special "Signal Byte" prepended to Blocks in encrypted streams is
|
| + // assumed to not be present.
|
| + // TODO(chcunningham): Consider parsing "Signal Byte" for encrypted streams
|
| + // to return duration for any unencrypted blocks.
|
| +
|
| + if (audio_codec_ == kCodecOpus) {
|
| + return ReadOpusDuration(data, size);
|
| + }
|
| +
|
| + // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See
|
| + // motivations in http://crbug.com/396634.
|
| +
|
| + return kNoTimestamp();
|
| +}
|
| +
|
| +base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
|
| + int size) {
|
| + // Masks and constants for Opus packets. See
|
| + // https://tools.ietf.org/html/rfc6716#page-14
|
| + static const uint8_t kTocConfigMask = 0xf8;
|
| + static const uint8_t kTocFrameCountCodeMask = 0x03;
|
| + static const uint8_t kFrameCountMask = 0x3f;
|
| + static const base::TimeDelta kPacketDurationMax =
|
| + base::TimeDelta::FromMilliseconds(120);
|
| +
|
| + if (size < 1) {
|
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
|
| + << "Invalid zero-byte Opus packet; demuxed block duration may be "
|
| + "imprecise.";
|
| + return kNoTimestamp();
|
| + }
|
| +
|
| + // Frame count type described by last 2 bits of Opus TOC byte.
|
| + int frame_count_type = data[0] & kTocFrameCountCodeMask;
|
| +
|
| + int frame_count = 0;
|
| + switch (frame_count_type) {
|
| + case 0:
|
| + frame_count = 1;
|
| + break;
|
| + case 1:
|
| + case 2:
|
| + frame_count = 2;
|
| + break;
|
| + case 3:
|
| + // Type 3 indicates an arbitrary frame count described in the next byte.
|
| + if (size < 2) {
|
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
|
| + << "Second byte missing from 'Code 3' Opus packet; demuxed block "
|
| + "duration may be imprecise.";
|
| + return kNoTimestamp();
|
| + }
|
| +
|
| + frame_count = data[1] & kFrameCountMask;
|
| +
|
| + if (frame_count == 0) {
|
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
|
| + << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
|
| + "block duration may be imprecise.";
|
| + return kNoTimestamp();
|
| + }
|
| +
|
| + break;
|
| + default:
|
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
|
| + << "Unexpected Opus frame count type: " << frame_count_type << "; "
|
| + << "demuxed block duration may be imprecise.";
|
| + return kNoTimestamp();
|
| + }
|
| +
|
| + int opusConfig = (data[0] & kTocConfigMask) >> 3;
|
| + CHECK_GE(opusConfig, 0);
|
| + CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
|
| +
|
| + DCHECK_GT(frame_count, 0);
|
| + base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
|
| + kOpusFrameDurationsMu[opusConfig] * frame_count);
|
| +
|
| + if (duration > kPacketDurationMax) {
|
| + // Intentionally allowing packet to pass through for now. Decoder should
|
| + // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case
|
| + // things go sideways.
|
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
|
| + << "Warning, demuxed Opus packet with encoded duration: " << duration
|
| + << ". Should be no greater than " << kPacketDurationMax;
|
| + }
|
| +
|
| + return duration;
|
| +}
|
| +
|
| WebMParserClient* WebMClusterParser::OnListStart(int id) {
|
| if (id == kWebMIdCluster) {
|
| cluster_timecode_ = -1;
|
| @@ -205,9 +314,12 @@ bool WebMClusterParser::OnUInt(int id, int64 val) {
|
| return true;
|
| }
|
|
|
| -bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
|
| - int size, const uint8* additional,
|
| - int additional_size, int duration,
|
| +bool WebMClusterParser::ParseBlock(bool is_simple_block,
|
| + const uint8_t* buf,
|
| + int size,
|
| + const uint8_t* additional,
|
| + int additional_size,
|
| + int duration,
|
| int64 discard_padding) {
|
| if (size < 4)
|
| return false;
|
| @@ -233,14 +345,14 @@ bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
|
| if (timecode & 0x8000)
|
| timecode |= ~0xffff;
|
|
|
| - const uint8* frame_data = buf + 4;
|
| + const uint8_t* frame_data = buf + 4;
|
| int frame_size = size - (frame_data - buf);
|
| return OnBlock(is_simple_block, track_num, timecode, duration, flags,
|
| frame_data, frame_size, additional, additional_size,
|
| discard_padding);
|
| }
|
|
|
| -bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
|
| +bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
|
| switch (id) {
|
| case kWebMIdSimpleBlock:
|
| return ParseBlock(true, data, size, NULL, 0, -1, 0);
|
| @@ -251,7 +363,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
|
| "supported.";
|
| return false;
|
| }
|
| - block_data_.reset(new uint8[size]);
|
| + block_data_.reset(new uint8_t[size]);
|
| memcpy(block_data_.get(), data, size);
|
| block_data_size_ = size;
|
| return true;
|
| @@ -271,7 +383,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
|
| // element's value in Big Endian format. This is done to mimic ffmpeg
|
| // demuxer's behavior.
|
| block_additional_data_size_ = size + sizeof(block_add_id);
|
| - block_additional_data_.reset(new uint8[block_additional_data_size_]);
|
| + block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
|
| memcpy(block_additional_data_.get(), &block_add_id,
|
| sizeof(block_add_id));
|
| memcpy(block_additional_data_.get() + 8, data, size);
|
| @@ -294,12 +406,15 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
|
| }
|
| }
|
|
|
| -bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
|
| +bool WebMClusterParser::OnBlock(bool is_simple_block,
|
| + int track_num,
|
| int timecode,
|
| - int block_duration,
|
| + int block_duration,
|
| int flags,
|
| - const uint8* data, int size,
|
| - const uint8* additional, int additional_size,
|
| + const uint8_t* data,
|
| + int size,
|
| + const uint8_t* additional,
|
| + int additional_size,
|
| int64 discard_padding) {
|
| DCHECK_GE(size, 0);
|
| if (cluster_timecode_ == -1) {
|
| @@ -324,9 +439,13 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
|
| Track* track = NULL;
|
| StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
|
| std::string encryption_key_id;
|
| + base::TimeDelta encoded_duration = kNoTimestamp();
|
| if (track_num == audio_.track_num()) {
|
| track = &audio_;
|
| encryption_key_id = audio_encryption_key_id_;
|
| + if (encryption_key_id.empty()) {
|
| + encoded_duration = TryGetEncodedAudioDuration(data, size);
|
| + }
|
| } else if (track_num == video_.track_num()) {
|
| track = &video_;
|
| encryption_key_id = video_encryption_key_id_;
|
| @@ -367,7 +486,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
|
| if (!encryption_key_id.empty() &&
|
| !WebMCreateDecryptConfig(
|
| data, size,
|
| - reinterpret_cast<const uint8*>(encryption_key_id.data()),
|
| + reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
|
| encryption_key_id.size(),
|
| &decrypt_config, &data_offset)) {
|
| return false;
|
| @@ -387,7 +506,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
|
| std::string id, settings, content;
|
| WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
|
|
|
| - std::vector<uint8> side_data;
|
| + std::vector<uint8_t> side_data;
|
| MakeSideData(id.begin(), id.end(),
|
| settings.begin(), settings.end(),
|
| &side_data);
|
| @@ -396,7 +515,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
|
| // type with remapped bytestream track numbers and allow multiple tracks as
|
| // applicable. See https://crbug.com/341581.
|
| buffer = StreamParserBuffer::CopyFrom(
|
| - reinterpret_cast<const uint8*>(content.data()),
|
| + reinterpret_cast<const uint8_t*>(content.data()),
|
| content.length(),
|
| &side_data[0],
|
| side_data.size(),
|
| @@ -407,9 +526,47 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
|
| if (cluster_start_time_ == kNoTimestamp())
|
| cluster_start_time_ = timestamp;
|
|
|
| + base::TimeDelta block_duration_time_delta = kNoTimestamp();
|
| if (block_duration >= 0) {
|
| - buffer->set_duration(base::TimeDelta::FromMicroseconds(
|
| - block_duration * timecode_multiplier_));
|
| + block_duration_time_delta = base::TimeDelta::FromMicroseconds(
|
| + block_duration * timecode_multiplier_);
|
| + }
|
| +
|
| + // Prefer encoded duration over BlockGroup->BlockDuration or
|
| + // TrackEntry->DefaultDuration when available. This layering violation is a
|
| + // workaround for http://crbug.com/396634, decreasing the likelihood of
|
| + // fall-back to rough estimation techniques for Blocks that lack a
|
| + // BlockDuration at the end of a cluster. Cross cluster durations are not
|
| + // feasible given flexibility of cluster ordering and MSE APIs. Duration
|
| + // estimation may still apply in cases of encryption and codecs for which
|
| + // we do not extract encoded duration. Within a cluster, estimates are applied
|
| + // as Block Timecode deltas, or once the whole cluster is parsed in the case
|
| + // of the last Block in the cluster. See Track::AddBuffer and
|
| + // ApplyDurationEstimateIfNeeded().
|
| + if (encoded_duration != kNoTimestamp()) {
|
| + DCHECK(encoded_duration != kInfiniteDuration());
|
| + DCHECK(encoded_duration > base::TimeDelta());
|
| + buffer->set_duration(encoded_duration);
|
| +
|
| + DVLOG(3) << __FUNCTION__ << " : "
|
| + << "Using encoded duration " << encoded_duration.InSecondsF();
|
| +
|
| + if (block_duration_time_delta != kNoTimestamp()) {
|
| + base::TimeDelta duration_difference =
|
| + block_duration_time_delta - encoded_duration;
|
| +
|
| + const auto kWarnDurationDiff =
|
| + base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
|
| + if (duration_difference.magnitude() > kWarnDurationDiff) {
|
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
|
| + << "BlockDuration "
|
| + << "(" << block_duration_time_delta << ") "
|
| + << "differs significantly from encoded duration "
|
| + << "(" << encoded_duration << ").";
|
| + }
|
| + }
|
| + } else if (block_duration_time_delta != kNoTimestamp()) {
|
| + buffer->set_duration(block_duration_time_delta);
|
| } else {
|
| DCHECK_NE(buffer_type, DemuxerStream::TEXT);
|
| buffer->set_duration(track->default_duration());
|
| @@ -549,7 +706,7 @@ void WebMClusterParser::Track::Reset() {
|
| last_added_buffer_missing_duration_ = NULL;
|
| }
|
|
|
| -bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const {
|
| +bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
|
| // For now, assume that all blocks are keyframes for datatypes other than
|
| // video. This is a valid assumption for Vorbis, WebVTT, & Opus.
|
| if (!is_video_)
|
|
|