Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(597)

Unified Diff: media/formats/webm/webm_cluster_parser.cc

Issue 883403002: Parsing of encoded duration for unencrypted opus streams. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Responding to review feedback, round 2. Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: media/formats/webm/webm_cluster_parser.cc
diff --git a/media/formats/webm/webm_cluster_parser.cc b/media/formats/webm/webm_cluster_parser.cc
index 6784776a0e029e0c21d168391532087b8a43fa6b..f6d436cb79b924e2a6deb0c34b01101b934ffdbb 100644
--- a/media/formats/webm/webm_cluster_parser.cc
+++ b/media/formats/webm/webm_cluster_parser.cc
@@ -17,6 +17,17 @@
namespace media {
+const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
+ 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
+ 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
+ 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
+
+enum {
+ // Limits the number of MEDIA_LOG() calls in the path of reading encoded
+ // duration to avoid spamming for corrupted data.
+ kMaxDurationLogs = 10,
+};
+
WebMClusterParser::WebMClusterParser(
int64 timecode_scale,
int audio_track_num,
@@ -27,11 +38,14 @@ WebMClusterParser::WebMClusterParser(
const std::set<int64>& ignored_tracks,
const std::string& audio_encryption_key_id,
const std::string& video_encryption_key_id,
+ const AudioCodec audio_codec,
const LogCB& log_cb)
- : timecode_multiplier_(timecode_scale / 1000.0),
+ : num_duration_errors_(0),
+ timecode_multiplier_(timecode_scale / 1000.0),
ignored_tracks_(ignored_tracks),
audio_encryption_key_id_(audio_encryption_key_id),
video_encryption_key_id_(video_encryption_key_id),
+ audio_codec_(audio_codec),
parser_(kWebMIdCluster, this),
last_block_timecode_(-1),
block_data_size_(-1),
@@ -68,7 +82,7 @@ void WebMClusterParser::Reset() {
ready_buffer_upper_bound_ = kNoDecodeTimestamp();
}
-int WebMClusterParser::Parse(const uint8* buf, int size) {
+int WebMClusterParser::Parse(const uint8_t* buf, int size) {
audio_.ClearReadyBuffers();
video_.ClearReadyBuffers();
ClearTextTrackReadyBuffers();
@@ -140,6 +154,86 @@ WebMClusterParser::GetTextBuffers() {
return text_buffers_map_;
}
+base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(
+ const uint8_t* data,
+ int size) {
+ if (audio_codec_ == kCodecOpus) {
+ return ReadOpusDuration(data, size);
+ }
+
+ // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis. See
+ // motivations in http://crbug.com/396634.
+
+ return kNoTimestamp();
+}
+
+base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8_t* data,
+ int size) {
+ // Masks and constants for Opus packets. See
+ // https://tools.ietf.org/html/rfc6716#page-14
+ static const uint8_t kTocConfigMask = 0xf8;
+ static const uint8_t kTocFrameCountCodeMask = 0x03;
+ static const uint8_t kFrameCountMask = 0x3f;
+ static const base::TimeDelta kPacketDurationMax =
+ base::TimeDelta::FromMilliseconds(120);
+
+ if (size < 1) {
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
+ << "Invalid zero-byte Opus packet.";
wolenetz 2015/02/05 23:05:00 nit: add ", so demuxed block duration may be impre
chcunningham 2015/02/06 03:20:09 Done.
+ return kNoTimestamp();
+ }
+
+ // Frame count type described by last 2 bits of Opus TOC byte.
+ int frame_count_type = data[0] & kTocFrameCountCodeMask;
+
+ int frame_count = 0;
+ switch (frame_count_type) {
+ case 0:
+ frame_count = 1;
+ break;
+ case 1:
+ case 2:
+ frame_count = 2;
+ break;
+ case 3:
+ // Type 3 indicates an arbitrary frame count described in the next byte.
+ if (size < 2) {
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
+ << "Second byte missing from 'Code 3' Opus packet.";
wolenetz 2015/02/05 23:05:00 nit: ditto ", so demuxed"...
chcunningham 2015/02/06 03:20:09 Done.
+ return kNoTimestamp();
+ }
+
+ frame_count = data[1] & kFrameCountMask;
+
+ if (frame_count == 0) {
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
+ << "Illegal opus packet with frame count zero.";
wolenetz 2015/02/05 23:05:00 nit: ditto ", so demuxed"... Also mention "'Code 3
chcunningham 2015/02/06 03:20:09 Done.
+ return kNoTimestamp();
+ }
+
+ break;
+ default:
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
+ << "Unexpected Opus frame count type: " << frame_count_type;
wolenetz 2015/02/05 23:05:00 nit: ditto ", so demuxed"...
chcunningham 2015/02/06 03:20:09 Done.
+ return kNoTimestamp();
+ }
+
+ int opusConfig = (data[0] & kTocConfigMask) >> 3;
wolenetz 2015/02/05 23:05:00 nit: harden the code against regressions by DCHECK
chcunningham 2015/02/06 03:20:09 Done.
+ base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
wolenetz 2015/02/05 23:05:00 nit: harden the code against maintenance regressio
chcunningham 2015/02/06 03:20:09 Done.
+ kOpusFrameDurationsMu[opusConfig] * frame_count);
+
+ if (duration > kPacketDurationMax) {
+ // Intentionally allowing packet to pass through for now. Decoder should
+ // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case
+ // things go sideways.
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
+ << "Illegal Opus packet duration: " << duration << ". "
+ << "Should be no greater than " << kPacketDurationMax;
wolenetz 2015/02/05 23:05:00 nit: clarify log msg slightly to indicate there's
chcunningham 2015/02/06 03:20:09 Done.
+ }
+
+ return duration;
+}
+
WebMParserClient* WebMClusterParser::OnListStart(int id) {
if (id == kWebMIdCluster) {
cluster_timecode_ = -1;
@@ -205,9 +299,12 @@ bool WebMClusterParser::OnUInt(int id, int64 val) {
return true;
}
-bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
- int size, const uint8* additional,
- int additional_size, int duration,
+bool WebMClusterParser::ParseBlock(bool is_simple_block,
+ const uint8_t* buf,
+ int size,
+ const uint8_t* additional,
+ int additional_size,
+ int duration,
int64 discard_padding) {
if (size < 4)
return false;
@@ -233,14 +330,14 @@ bool WebMClusterParser::ParseBlock(bool is_simple_block, const uint8* buf,
if (timecode & 0x8000)
timecode |= ~0xffff;
- const uint8* frame_data = buf + 4;
+ const uint8_t* frame_data = buf + 4;
int frame_size = size - (frame_data - buf);
return OnBlock(is_simple_block, track_num, timecode, duration, flags,
frame_data, frame_size, additional, additional_size,
discard_padding);
}
-bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
+bool WebMClusterParser::OnBinary(int id, const uint8_t* data, int size) {
switch (id) {
case kWebMIdSimpleBlock:
return ParseBlock(true, data, size, NULL, 0, -1, 0);
@@ -251,7 +348,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
"supported.";
return false;
}
- block_data_.reset(new uint8[size]);
+ block_data_.reset(new uint8_t[size]);
memcpy(block_data_.get(), data, size);
block_data_size_ = size;
return true;
@@ -271,7 +368,7 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
// element's value in Big Endian format. This is done to mimic ffmpeg
// demuxer's behavior.
block_additional_data_size_ = size + sizeof(block_add_id);
- block_additional_data_.reset(new uint8[block_additional_data_size_]);
+ block_additional_data_.reset(new uint8_t[block_additional_data_size_]);
memcpy(block_additional_data_.get(), &block_add_id,
sizeof(block_add_id));
memcpy(block_additional_data_.get() + 8, data, size);
@@ -294,12 +391,15 @@ bool WebMClusterParser::OnBinary(int id, const uint8* data, int size) {
}
}
-bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
+bool WebMClusterParser::OnBlock(bool is_simple_block,
+ int track_num,
int timecode,
- int block_duration,
+ int block_duration,
int flags,
- const uint8* data, int size,
- const uint8* additional, int additional_size,
+ const uint8_t* data,
+ int size,
+ const uint8_t* additional,
+ int additional_size,
int64 discard_padding) {
DCHECK_GE(size, 0);
if (cluster_timecode_ == -1) {
@@ -324,9 +424,13 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
Track* track = NULL;
StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
std::string encryption_key_id;
+ base::TimeDelta encoded_duration = kNoTimestamp();
if (track_num == audio_.track_num()) {
track = &audio_;
encryption_key_id = audio_encryption_key_id_;
+ if (encryption_key_id.empty()) {
+ encoded_duration = TryGetEncodedAudioDuration(data, size);
+ }
} else if (track_num == video_.track_num()) {
track = &video_;
encryption_key_id = video_encryption_key_id_;
@@ -367,7 +471,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
if (!encryption_key_id.empty() &&
!WebMCreateDecryptConfig(
data, size,
- reinterpret_cast<const uint8*>(encryption_key_id.data()),
+ reinterpret_cast<const uint8_t*>(encryption_key_id.data()),
encryption_key_id.size(),
&decrypt_config, &data_offset)) {
return false;
@@ -387,7 +491,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
std::string id, settings, content;
WebMWebVTTParser::Parse(data, size, &id, &settings, &content);
- std::vector<uint8> side_data;
+ std::vector<uint8_t> side_data;
MakeSideData(id.begin(), id.end(),
settings.begin(), settings.end(),
&side_data);
@@ -396,7 +500,7 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
// type with remapped bytestream track numbers and allow multiple tracks as
// applicable. See https://crbug.com/341581.
buffer = StreamParserBuffer::CopyFrom(
- reinterpret_cast<const uint8*>(content.data()),
+ reinterpret_cast<const uint8_t*>(content.data()),
content.length(),
&side_data[0],
side_data.size(),
@@ -407,9 +511,46 @@ bool WebMClusterParser::OnBlock(bool is_simple_block, int track_num,
if (cluster_start_time_ == kNoTimestamp())
cluster_start_time_ = timestamp;
+ base::TimeDelta block_duration_time_delta = kNoTimestamp();
if (block_duration >= 0) {
- buffer->set_duration(base::TimeDelta::FromMicroseconds(
- block_duration * timecode_multiplier_));
+ block_duration_time_delta = base::TimeDelta::FromMicroseconds(
+ block_duration * timecode_multiplier_);
+ }
+
+ // Prefer encoded duration over BlockGroup->BlockDuration or
+ // TrackEntry->DefaultDuration when available. This layering violation is a
+ // workaround for http://crbug.com/396634, decreasing the likelihood of
+ // fall-back to rough estimation techniques for Blocks that lack a
+ // BlockDuration at the end of a cluster. Cross cluster durations are not
+ // feasible given flexibility of cluster ordering and MSE APIs. Duration
+ // estimation may still apply in cases of encryption and codecs for which
+ // we do not extract encoded duration. Estimates are applied at the end of
wolenetz 2015/02/05 23:05:00 not all estimates. some are inter-block, intra-clu
chcunningham 2015/02/06 03:20:09 Done.
+ // parsing once the whole cluster is parsed. See
+ // ApplyDurationEstimateIfNeeded() for more on estimation.
+ if (encoded_duration != kNoTimestamp()) {
+ DCHECK(encoded_duration != kInfiniteDuration());
+ DCHECK(encoded_duration > base::TimeDelta());
+ buffer->set_duration(encoded_duration);
+
+ DVLOG(3) << __FUNCTION__ << " : "
+ << "Using encoded duration " << encoded_duration.InSecondsF();
+
+ if (block_duration_time_delta != kNoTimestamp()) {
+ base::TimeDelta duration_difference =
+ block_duration_time_delta - encoded_duration;
+
+ const auto kWarnDurationDiff =
+ base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
+ if (duration_difference.magnitude() > kWarnDurationDiff) {
+ LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs)
+ << "BlockDuration "
+ << "(" << block_duration_time_delta << ") "
+ << "differs significantly from encoded duration "
+ << "(" << encoded_duration << ").";
+ }
+ }
+ } else if (block_duration_time_delta != kNoTimestamp()) {
+ buffer->set_duration(block_duration_time_delta);
} else {
DCHECK_NE(buffer_type, DemuxerStream::TEXT);
buffer->set_duration(track->default_duration());
@@ -549,7 +690,7 @@ void WebMClusterParser::Track::Reset() {
last_added_buffer_missing_duration_ = NULL;
}
-bool WebMClusterParser::Track::IsKeyframe(const uint8* data, int size) const {
+bool WebMClusterParser::Track::IsKeyframe(const uint8_t* data, int size) const {
// For now, assume that all blocks are keyframes for datatypes other than
// video. This is a valid assumption for Vorbis, WebVTT, & Opus.
if (!is_video_)

Powered by Google App Engine
This is Rietveld 408576698