Index: media/formats/webm/webm_cluster_parser.cc |
diff --git a/media/formats/webm/webm_cluster_parser.cc b/media/formats/webm/webm_cluster_parser.cc |
index 6784776a0e029e0c21d168391532087b8a43fa6b..6f72f154b14bcf985e8eb95dc3ed1351fe1b9da2 100644 |
--- a/media/formats/webm/webm_cluster_parser.cc |
+++ b/media/formats/webm/webm_cluster_parser.cc |
@@ -17,6 +17,17 @@ |
namespace media { |
+const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = { |
+ 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000, |
+ 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, |
+ 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000}; |
+ |
+enum { |
+ // Limits the number of MEDIA_LOG() calls in the path of reading encoded |
+ // duration to avoid spamming for corrupted data. |
+ kMaxDurationLogs = 10, |
+}; |
+ |
WebMClusterParser::WebMClusterParser( |
int64 timecode_scale, |
int audio_track_num, |
@@ -27,11 +38,14 @@ WebMClusterParser::WebMClusterParser( |
const std::set<int64>& ignored_tracks, |
const std::string& audio_encryption_key_id, |
const std::string& video_encryption_key_id, |
+ const AudioCodec audio_codec, |
const LogCB& log_cb) |
- : timecode_multiplier_(timecode_scale / 1000.0), |
+ : num_duration_errors_(0), |
+ timecode_multiplier_(timecode_scale / 1000.0), |
ignored_tracks_(ignored_tracks), |
audio_encryption_key_id_(audio_encryption_key_id), |
video_encryption_key_id_(video_encryption_key_id), |
+ audio_codec_(audio_codec), |
parser_(kWebMIdCluster, this), |
last_block_timecode_(-1), |
block_data_size_(-1), |
@@ -68,7 +82,7 @@ void WebMClusterParser::Reset() { |
ready_buffer_upper_bound_ = kNoDecodeTimestamp(); |
} |
-int WebMClusterParser::Parse(const uint8* buf, int size) { |
+int WebMClusterParser::Parse(const uint8_t* buf, int size) { |
audio_.ClearReadyBuffers(); |
video_.ClearReadyBuffers(); |
ClearTextTrackReadyBuffers(); |
@@ -140,6 +154,94 @@ WebMClusterParser::GetTextBuffers() { |
return text_buffers_map_; |
} |
+base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration( |
+ const uint8_t* data, |
+ int size) { |
+ if (audio_codec_ == kCodecOpus) { |
+ return ReadOpusDuration(data, size); |
+ } |
+ |
+ // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See |
+ // motivations in http://crbug.com/396634. |
+ |
+ return kNoTimestamp(); |
+} |
+ |
+base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data, |
+ int size) { |
+ // Masks and constants for Opus packets. See |
+ // https://tools.ietf.org/html/rfc6716#page-14 |
+ static const uint8_t kTocConfigMask = 0xf8; |
+ static const uint8_t kTocFrameCountCodeMask = 0x03; |
+ static const uint8_t kFrameCountMask = 0x3f; |
+ static const base::TimeDelta kPacketDurationMax = |
+ base::TimeDelta::FromMilliseconds(120); |
+ |
+ if (size < 1) { |
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
+ << "Invalid zero-byte Opus packet; demuxed block duration may be " |
+ "imprecise."; |
+ return kNoTimestamp(); |
+ } |
+ |
+ // Frame count type described by last 2 bits of Opus TOC byte. |
+ int frame_count_type = data[0] & kTocFrameCountCodeMask; |
+ |
+ int frame_count = 0; |
+ switch (frame_count_type) { |
+ case 0: |
+ frame_count = 1; |
+ break; |
+ case 1: |
+ case 2: |
+ frame_count = 2; |
+ break; |
+ case 3: |
+ // Type 3 indicates an arbitrary frame count described in the next byte. |
+ if (size < 2) { |
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
+ << "Second byte missing from 'Code 3' Opus packet; demuxed block " |
+ "duration may be imprecise."; |
+ return kNoTimestamp(); |
+ } |
+ |
+ frame_count = data[1] & kFrameCountMask; |
+ |
+ if (frame_count == 0) { |
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
+ << "Illegal 'Code 3' opus packet with frame count zero; demuxed " |
wolenetz
2015/02/06 19:48:01
nit: s/opus/Opus/
chcunningham
2015/02/06 22:46:00
Done.
|
+ "block duration may be imprecise."; |
+ return kNoTimestamp(); |
+ } |
+ |
+ break; |
+ default: |
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
+ << "Unexpected Opus frame count type: " << frame_count_type << "; " |
+ << "demuxed block duration may be imprecise."; |
+ return kNoTimestamp(); |
+ } |
+ |
+ int opusConfig = (data[0] & kTocConfigMask) >> 3; |
+ CHECK_GE(opusConfig, 0); |
+ CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu))); |
+ |
+ DCHECK_GT(frame_count, 0); |
+ base::TimeDelta duration = base::TimeDelta::FromMicroseconds( |
+ kOpusFrameDurationsMu[opusConfig] * frame_count); |
+ |
+ if (duration > kPacketDurationMax) { |
+ // Intentionally allowing packet to pass through for now. Decoder should |
+ // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case |
+ // things go sideways. |
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
+ << "Warning, demuxed Opus packet with encoded duration: " << duration |
+ << ". Should be no greater than " << kPacketDurationMax; |
+ } |
+ |
+ return duration; |
+} |
+ |
WebMParserClient* WebMClusterParser::OnListStart(int id) { |
if (id == kWebMIdCluster) { |
cluster_timecode_ = -1; |
@@ -205,9 +307,12 @@ bool WebMClusterParser::OnUInt(int id, int64 val) { |
return true; |
} |
-bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf, |
- int size, const uint8* additional, |
- int additional_size, int duration, |
+bool WebMClusterParser::ParseBlock(bool is_simple_block, |
+ const uint8_t* buf, |
+ int size, |
+ const uint8_t* additional, |
+ int additional_size, |
+ int duration, |
int64 discard_padding) { |
if (size < 4) |
return false; |
@@ -233,14 +338,14 @@ bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf, |
if (timecode & 0x8000) |
timecode |= ~0xffff; |
- const uint8* frame_data = buf + 4; |
+ const uint8_t* frame_data = buf + 4; |
int frame_size = size - (frame_data - buf); |
return OnBlock(is_simple_block, track_num, timecode, duration, flags, |
frame_data, frame_size, additional, additional_size, |
discard_padding); |
} |
-bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { |
+bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) { |
switch (id) { |
case kWebMIdSimpleBlock: |
return ParseBlock(true, data, size, NULL, 0, -1, 0); |
@@ -251,7 +356,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { |
"supported."; |
return false; |
} |
- block_data_.reset(new uint8[size]); |
+ block_data_.reset(new uint8_t[size]); |
memcpy(block_data_.get(), data, size); |
block_data_size_ = size; |
return true; |
@@ -271,7 +376,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { |
// element's value in Big Endian format. This is done to mimic ffmpeg |
// demuxer's behavior. |
block_additional_data_size_ = size + sizeof(block_add_id); |
- block_additional_data_.reset(new uint8[block_additional_data_size_]); |
+ block_additional_data_.reset(new uint8_t[block_additional_data_size_]); |
memcpy(block_additional_data_.get(), &block_add_id, |
sizeof(block_add_id)); |
memcpy(block_additional_data_.get() + 8, data, size); |
@@ -294,12 +399,15 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) { |
} |
} |
-bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
+bool WebMClusterParser::OnBlock(bool is_simple_block, |
+ int track_num, |
int timecode, |
- int block_duration, |
+ int block_duration, |
int flags, |
- const uint8* data, int size, |
- const uint8* additional, int additional_size, |
+ const uint8_t* data, |
+ int size, |
+ const uint8_t* additional, |
+ int additional_size, |
int64 discard_padding) { |
DCHECK_GE(size, 0); |
if (cluster_timecode_ == -1) { |
@@ -324,9 +432,13 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
Track* track = NULL; |
StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO; |
std::string encryption_key_id; |
+ base::TimeDelta encoded_duration = kNoTimestamp(); |
if (track_num == audio_.track_num()) { |
track = &audio_; |
encryption_key_id = audio_encryption_key_id_; |
+ if (encryption_key_id.empty()) { |
+ encoded_duration = TryGetEncodedAudioDuration(data, size); |
+ } |
} else if (track_num == video_.track_num()) { |
track = &video_; |
encryption_key_id = video_encryption_key_id_; |
@@ -367,7 +479,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
if (!encryption_key_id.empty() && |
!WebMCreateDecryptConfig( |
data, size, |
- reinterpret_cast<const uint8*>(encryption_key_id.data()), |
+ reinterpret_cast<const uint8_t*>(encryption_key_id.data()), |
encryption_key_id.size(), |
&decrypt_config, &data_offset)) { |
return false; |
@@ -387,7 +499,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
std::string id, settings, content; |
WebMWebVTTParser::Parse(data, size, &id, &settings, &content); |
- std::vector<uint8> side_data; |
+ std::vector<uint8_t> side_data; |
MakeSideData(id.begin(), id.end(), |
settings.begin(), settings.end(), |
&side_data); |
@@ -396,7 +508,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
// type with remapped bytestream track numbers and allow multiple tracks as |
// applicable. See https://crbug.com/341581. |
buffer = StreamParserBuffer::CopyFrom( |
- reinterpret_cast<const uint8*>(content.data()), |
+ reinterpret_cast<const uint8_t*>(content.data()), |
content.length(), |
&side_data[0], |
side_data.size(), |
@@ -407,9 +519,47 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num, |
if (cluster_start_time_ == kNoTimestamp()) |
cluster_start_time_ = timestamp; |
+ base::TimeDelta block_duration_time_delta = kNoTimestamp(); |
if (block_duration >= 0) { |
- buffer->set_duration(base::TimeDelta::FromMicroseconds( |
- block_duration * timecode_multiplier_)); |
+ block_duration_time_delta = base::TimeDelta::FromMicroseconds( |
+ block_duration * timecode_multiplier_); |
+ } |
+ |
+ // Prefer encoded duration over BlockGroup->BlockDuration or |
+ // TrackEntry->DefaultDuration when available. This layering violation is a |
+ // workaround for http://crbug.com/396634, decreasing the likelihood of |
+ // fall-back to rough estimation techniques for Blocks that lack a |
+ // BlockDuration at the end of a cluster. Cross cluster durations are not |
+ // feasible given flexibility of cluster ordering and MSE APIs. Duration |
+ // estimation may still apply in cases of encryption and codecs for which |
+ // we do not extract encoded duration. Within a cluster, estimates are applied |
+ // as Block Timecode deltas, or once the whole cluster is parsed in the case |
+ // of the last Block in the cluster. See Track::AddBuffer and |
+ // ApplyDurationEstimateIfNeeded(). |
+ if (encoded_duration != kNoTimestamp()) { |
+ DCHECK(encoded_duration != kInfiniteDuration()); |
+ DCHECK(encoded_duration > base::TimeDelta()); |
+ buffer->set_duration(encoded_duration); |
+ |
+ DVLOG(3) << __FUNCTION__ << " : " |
+ << "Using encoded duration " << encoded_duration.InSecondsF(); |
+ |
+ if (block_duration_time_delta != kNoTimestamp()) { |
+ base::TimeDelta duration_difference = |
+ block_duration_time_delta - encoded_duration; |
+ |
+ const auto kWarnDurationDiff = |
+ base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2); |
+ if (duration_difference.magnitude() > kWarnDurationDiff) { |
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) |
+ << "BlockDuration " |
+ << "(" << block_duration_time_delta << ") " |
+ << "differs significantly from encoded duration " |
+ << "(" << encoded_duration << ")."; |
+ } |
+ } |
+ } else if (block_duration_time_delta != kNoTimestamp()) { |
+ buffer->set_duration(block_duration_time_delta); |
} else { |
DCHECK_NE(buffer_type, DemuxerStream::TEXT); |
buffer->set_duration(track->default_duration()); |
@@ -549,7 +699,7 @@ void WebMClusterParser::Track::Reset() { |
last_added_buffer_missing_duration_ = NULL; |
} |
-bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const { |
+bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const { |
// For now, assume that all blocks are keyframes for datatypes other than |
// video. This is a valid assumption for Vorbis, WebVTT, & Opus. |
if (!is_video_) |