Chromium Code Reviews| Index: media/formats/webm/webm_cluster_parser.cc |
| diff --git a/media/formats/webm/webm_cluster_parser.cc b/media/formats/webm/webm_cluster_parser.cc |
| index 6784776a0e029e0c21d168391532087b8a43fa6b..f6d436cb79b924e2a6deb0c34b01101b934ffdbb 100644 |
| --- a/media/formats/webm/webm_cluster_parser.cc |
| +++ b/media/formats/webm/webm_cluster_parser.cc |
| @@ -17,6 +17,17 @@ |
| namespace media { |
| +const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = { |
| + 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000, |
| + 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, |
| + 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000}; |
| + |
| +enum { |
| + // Limits the number of MEDIA_LOG() calls in the path of reading encoded |
| + // duration to avoid spamming for corrupted data. |
| + kMaxDurationLogs = 10, |
| +}; |
| + |
| WebMClusterParser::WebMClusterParser( |
| int64 timecode_scale, |
| int audio_track_num, |
| @@ -27,11 +38,14 @@ WebMClusterParser::WebMClusterParser( |
| const std::set<int64>& ignored_tracks, |
| const std::string& audio_encryption_key_id, |
| const std::string& video_encryption_key_id, |
| + const AudioCodec audio_codec, |
| const LogCB& log_cb) |
| - : timecode_multiplier_(timecode_scale / 1000.0), |
| + : num_duration_errors_(0), |
| + timecode_multiplier_(timecode_scale / 1000.0), |
| ignored_tracks_(ignored_tracks), |
| audio_encryption_key_id_(audio_encryption_key_id), |
| video_encryption_key_id_(video_encryption_key_id), |
| + audio_codec_(audio_codec), |
| parser_(kWebMIdCluster, this), |
| last_block_timecode_(-1), |
| block_data_size_(-1), |
| @@ -68,7 +82,7 @@ void WebMClusterParser::Reset() { |
| ready_buffer_upper_bound_ = kNoDecodeTimestamp(); |
| } |
| -int WebMClusterParser::Parse(const uint8* buf, int size) { |
| +int WebMClusterParser::Parse(const uint8_t* buf, int size) { |
| audio_.ClearReadyBuffers(); |
| video_.ClearReadyBuffers(); |
| ClearTextTrackReadyBuffers(); |
| @@ -140,6 +154,86 @@ WebMClusterParser::GetTextBuffers() { |
| return text_buffers_map_; |
| } |
| +base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration( |
| + const uint8_t* data, |
| + int size) { |
| + if (audio_codec_ == kCodecOpus) { |
| + return ReadOpusDuration(data, size); |
| + } |
| + |
| + // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See |
| + // motivations in http://crbug.com/396634. |
| + |
| + return kNoTimestamp(); |
| +} |
| + |
| +base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, |
| + int size) { |
| + // Masks and constants for Opus packets. See |
| + // https://tools.ietf.org/html/rfc6716#page-14 |
| + static const uint8_t kTocConfigMask = 0xf8; |
| + static const uint8_t kTocFrameCountCodeMask = 0x03; |
| + static const uint8_t kFrameCountMask = 0x3f; |
| + static const base::TimeDelta kPacketDurationMax = |
| + base::TimeDelta::FromMilliseconds(120); |
| + |
| + if (size < 1) { |
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
| + << "Invalid zero-byte Opus packet."; |
|
wolenetz
2015/02/05 23:05:00
nit: add ", so demuxed block duration may be impre
chcunningham
2015/02/06 03:20:09
Done.
|
| + return kNoTimestamp(); |
| + } |
| + |
| + // Frame count type described by last 2 bits of Opus TOC byte. |
| + int frame_count_type = data[0] & kTocFrameCountCodeMask; |
| + |
| + int frame_count = 0; |
| + switch (frame_count_type) { |
| + case 0: |
| + frame_count = 1; |
| + break; |
| + case 1: |
| + case 2: |
| + frame_count = 2; |
| + break; |
| + case 3: |
| + // Type 3 indicates an arbitrary frame count described in the next byte. |
| + if (size < 2) { |
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
| + << "Second byte missing from 'Code 3' Opus packet."; |
|
wolenetz
2015/02/05 23:05:00
nit: ditto ", so demuxed"...
chcunningham
2015/02/06 03:20:09
Done.
|
| + return kNoTimestamp(); |
| + } |
| + |
| + frame_count = data[1] & kFrameCountMask; |
| + |
| + if (frame_count == 0) { |
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
| + << "Illegal opus packet with frame count zero."; |
|
wolenetz
2015/02/05 23:05:00
nit: ditto ", so demuxed"... Also mention "'Code 3
chcunningham
2015/02/06 03:20:09
Done.
|
| + return kNoTimestamp(); |
| + } |
| + |
| + break; |
| + default: |
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
| + << "Unexpected Opus frame count type: " << frame_count_type; |
|
wolenetz
2015/02/05 23:05:00
nit: ditto ", so demuxed"...
chcunningham
2015/02/06 03:20:09
Done.
|
| + return kNoTimestamp(); |
| + } |
| + |
| + int opusConfig = (data[0] & kTocConfigMask) >> 3; |
|
wolenetz
2015/02/05 23:05:00
nit: harden the code against regressions by DCHECK
chcunningham
2015/02/06 03:20:09
Done.
|
| + base::TimeDelta duration = base::TimeDelta::FromMicroseconds( |
|
wolenetz
2015/02/05 23:05:00
nit: harden the code against maintenance regressio
chcunningham
2015/02/06 03:20:09
Done.
|
| + kOpusFrameDurationsMu[opusConfig] * frame_count); |
| + |
| + if (duration > kPacketDurationMax) { |
| + // Intentionally allowing packet to pass through for now. Decoder should |
| + // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case |
| + // things go sideways. |
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
| + << "Illegal Opus packet duration: " << duration << ". " |
| + << "Should be no greater than " << kPacketDurationMax; |
|
wolenetz
2015/02/05 23:05:00
nit: clarify log msg slightly to indicate there's
chcunningham
2015/02/06 03:20:09
Done.
|
| + } |
| + |
| + return duration; |
| +} |
| + |
| WebMParserClient* WebMClusterParser::OnListStart(int id) { |
| if (id == kWebMIdCluster) { |
| cluster_timecode_ = -1; |
| @@ -205,9 +299,12 @@ bool WebMClusterParser::OnUInt(int id, int64 val) { |
| return true; |
| } |
| -bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf, |
| - int size, const uint8* additional, |
| - int additional_size, int duration, |
| +bool WebMClusterParser::ParseBlock(bool is_simple_block, |
| + const uint8_t* buf, |
| + int size, |
| + const uint8_t* additional, |
| + int additional_size, |
| + int duration, |
| int64 discard_padding) { |
| if (size < 4) |
| return false; |
| @@ -233,14 +330,14 @@ bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf, |
| if (timecode & 0x8000) |
| timecode |= ~0xffff; |
| - const uint8* frame_data = buf + 4; |
| + const uint8_t* frame_data = buf + 4; |
| int frame_size = size - (frame_data - buf); |
| return OnBlock(is_simple_block, track_num, timecode, duration, flags, |
| frame_data, frame_size, additional, additional_size, |
| discard_padding); |
| } |
| -bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { |
| +bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) { |
| switch (id) { |
| case kWebMIdSimpleBlock: |
| return ParseBlock(true, data, size, NULL, 0, -1, 0); |
| @@ -251,7 +348,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { |
| "supported."; |
| return false; |
| } |
| - block_data_.reset(new uint8[size]); |
| + block_data_.reset(new uint8_t[size]); |
| memcpy(block_data_.get(), data, size); |
| block_data_size_ = size; |
| return true; |
| @@ -271,7 +368,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { |
| // element's value in Big Endian format. This is done to mimic ffmpeg |
| // demuxer's behavior. |
| block_additional_data_size_ = size + sizeof(block_add_id); |
| - block_additional_data_.reset(new uint8[block_additional_data_size_]); |
| + block_additional_data_.reset(new uint8_t[block_additional_data_size_]); |
| memcpy(block_additional_data_.get(), &block_add_id, |
| sizeof(block_add_id)); |
| memcpy(block_additional_data_.get() + 8, data, size); |
| @@ -294,12 +391,15 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { |
| } |
| } |
| -bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
| +bool WebMClusterParser::OnBlock(bool is_simple_block, |
| + int track_num, |
| int timecode, |
| - int block_duration, |
| + int block_duration, |
| int flags, |
| - const uint8* data, int size, |
| - const uint8* additional, int additional_size, |
| + const uint8_t* data, |
| + int size, |
| + const uint8_t* additional, |
| + int additional_size, |
| int64 discard_padding) { |
| DCHECK_GE(size, 0); |
| if (cluster_timecode_ == -1) { |
| @@ -324,9 +424,13 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
| Track* track = NULL; |
| StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO; |
| std::string encryption_key_id; |
| + base::TimeDelta encoded_duration = kNoTimestamp(); |
| if (track_num == audio_.track_num()) { |
| track = &audio_; |
| encryption_key_id = audio_encryption_key_id_; |
| + if (encryption_key_id.empty()) { |
| + encoded_duration = TryGetEncodedAudioDuration(data, size); |
| + } |
| } else if (track_num == video_.track_num()) { |
| track = &video_; |
| encryption_key_id = video_encryption_key_id_; |
| @@ -367,7 +471,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
| if (!encryption_key_id.empty() && |
| !WebMCreateDecryptConfig( |
| data, size, |
| - reinterpret_cast<const uint8*>(encryption_key_id.data()), |
| + reinterpret_cast<const uint8_t*>(encryption_key_id.data()), |
| encryption_key_id.size(), |
| &decrypt_config, &data_offset)) { |
| return false; |
| @@ -387,7 +491,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
| std::string id, settings, content; |
| WebMWebVTTParser::Parse(data, size, &id, &settings, &content); |
| - std::vector<uint8> side_data; |
| + std::vector<uint8_t> side_data; |
| MakeSideData(id.begin(), id.end(), |
| settings.begin(), settings.end(), |
| &side_data); |
| @@ -396,7 +500,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
| // type with remapped bytestream track numbers and allow multiple tracks as |
| // applicable. See https://crbug.com/341581. |
| buffer = StreamParserBuffer::CopyFrom( |
| - reinterpret_cast<const uint8*>(content.data()), |
| + reinterpret_cast<const uint8_t*>(content.data()), |
| content.length(), |
| &side_data[0], |
| side_data.size(), |
| @@ -407,9 +511,46 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
| if (cluster_start_time_ == kNoTimestamp()) |
| cluster_start_time_ = timestamp; |
| + base::TimeDelta block_duration_time_delta = kNoTimestamp(); |
| if (block_duration >= 0) { |
| - buffer->set_duration(base::TimeDelta::FromMicroseconds( |
| - block_duration * timecode_multiplier_)); |
| + block_duration_time_delta = base::TimeDelta::FromMicroseconds( |
| + block_duration * timecode_multiplier_); |
| + } |
| + |
| + // Prefer encoded duration over BlockGroup->BlockDuration or |
| + // TrackEntry->DefaultDuration when available. This layering violation is a |
| + // workaround for http://crbug.com/396634, decreasing the likelihood of |
| + // fall-back to rough estimation techniques for Blocks that lack a |
| + // BlockDuration at the end of a cluster. Cross cluster durations are not |
| + // feasible given flexibility of cluster ordering and MSE APIs. Duration |
| + // estimation may still apply in cases of encryption and codecs for which |
| + // we do not extract encoded duration. Estimates are applied at the end of |
|
wolenetz
2015/02/05 23:05:00
not all estimates. some are inter-block, intra-clu
chcunningham
2015/02/06 03:20:09
Done.
|
| + // parsing once the whole cluster is parsed. See |
| + // ApplyDurationEstimateIfNeeded() for more on estimation. |
| + if (encoded_duration != kNoTimestamp()) { |
| + DCHECK(encoded_duration != kInfiniteDuration()); |
| + DCHECK(encoded_duration > base::TimeDelta()); |
| + buffer->set_duration(encoded_duration); |
| + |
| + DVLOG(3) << __FUNCTION__ << " : " |
| + << "Using encoded duration " << encoded_duration.InSecondsF(); |
| + |
| + if (block_duration_time_delta != kNoTimestamp()) { |
| + base::TimeDelta duration_difference = |
| + block_duration_time_delta - encoded_duration; |
| + |
| + const auto kWarnDurationDiff = |
| + base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2); |
| + if (duration_difference.magnitude() > kWarnDurationDiff) { |
| + LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
| + << "BlockDuration " |
| + << "(" << block_duration_time_delta << ") " |
| + << "differs significantly from encoded duration " |
| + << "(" << encoded_duration << ")."; |
| + } |
| + } |
| + } else if (block_duration_time_delta != kNoTimestamp()) { |
| + buffer->set_duration(block_duration_time_delta); |
| } else { |
| DCHECK_NE(buffer_type, DemuxerStream::TEXT); |
| buffer->set_duration(track->default_duration()); |
| @@ -549,7 +690,7 @@ void WebMClusterParser::Track::Reset() { |
| last_added_buffer_missing_duration_ = NULL; |
| } |
| -bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const { |
| +bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const { |
| // For now, assume that all blocks are keyframes for datatypes other than |
| // video. This is a valid assumption for Vorbis, WebVTT, & Opus. |
| if (!is_video_) |