Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(746)

Side by Side Diff: media/formats/webm/webm_cluster_parser.cc

Issue 883403002: Parsing of encoded duration for unencrypted opus streams. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Addressing review feedback Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "media/formats/webm/webm_cluster_parser.h" 5 #include "media/formats/webm/webm_cluster_parser.h"
6 6
7 #include <vector> 7 #include <vector>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/sys_byteorder.h" 10 #include "base/sys_byteorder.h"
11 #include "media/base/buffers.h" 11 #include "media/base/buffers.h"
12 #include "media/base/decrypt_config.h" 12 #include "media/base/decrypt_config.h"
13 #include "media/filters/webvtt_util.h" 13 #include "media/filters/webvtt_util.h"
14 #include "media/formats/webm/webm_constants.h" 14 #include "media/formats/webm/webm_constants.h"
15 #include "media/formats/webm/webm_crypto_helpers.h" 15 #include "media/formats/webm/webm_crypto_helpers.h"
16 #include "media/formats/webm/webm_webvtt_parser.h" 16 #include "media/formats/webm/webm_webvtt_parser.h"
17 17
18 namespace media { 18 namespace media {
19 19
20 const uint16 WebMClusterParser::kOpusFrameDurationsMu[] = {
21 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
22 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
23 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
24
20 WebMClusterParser::WebMClusterParser( 25 WebMClusterParser::WebMClusterParser(
21 int64 timecode_scale, 26 int64 timecode_scale,
22 int audio_track_num, 27 int audio_track_num,
23 base::TimeDelta audio_default_duration, 28 base::TimeDelta audio_default_duration,
24 int video_track_num, 29 int video_track_num,
25 base::TimeDelta video_default_duration, 30 base::TimeDelta video_default_duration,
26 const WebMTracksParser::TextTracks& text_tracks, 31 const WebMTracksParser::TextTracks& text_tracks,
27 const std::set<int64>& ignored_tracks, 32 const std::set<int64>& ignored_tracks,
28 const std::string& audio_encryption_key_id, 33 const std::string& audio_encryption_key_id,
29 const std::string& video_encryption_key_id, 34 const std::string& video_encryption_key_id,
35 const AudioCodec audio_codec,
30 const LogCB& log_cb) 36 const LogCB& log_cb)
31 : timecode_multiplier_(timecode_scale / 1000.0), 37 : timecode_multiplier_(timecode_scale / 1000.0),
32 ignored_tracks_(ignored_tracks), 38 ignored_tracks_(ignored_tracks),
33 audio_encryption_key_id_(audio_encryption_key_id), 39 audio_encryption_key_id_(audio_encryption_key_id),
34 video_encryption_key_id_(video_encryption_key_id), 40 video_encryption_key_id_(video_encryption_key_id),
41 audio_codec_(audio_codec),
35 parser_(kWebMIdCluster, this), 42 parser_(kWebMIdCluster, this),
36 last_block_timecode_(-1), 43 last_block_timecode_(-1),
37 block_data_size_(-1), 44 block_data_size_(-1),
38 block_duration_(-1), 45 block_duration_(-1),
39 block_add_id_(-1), 46 block_add_id_(-1),
40 block_additional_data_size_(0), 47 block_additional_data_size_(0),
41 discard_padding_(-1), 48 discard_padding_(-1),
42 cluster_timecode_(-1), 49 cluster_timecode_(-1),
43 cluster_start_time_(kNoTimestamp()), 50 cluster_start_time_(kNoTimestamp()),
44 cluster_ended_(false), 51 cluster_ended_(false),
(...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after
133 itr != text_track_map_.end(); 140 itr != text_track_map_.end();
134 ++itr) { 141 ++itr) {
135 const BufferQueue& text_buffers = itr->second.ready_buffers(); 142 const BufferQueue& text_buffers = itr->second.ready_buffers();
136 if (!text_buffers.empty()) 143 if (!text_buffers.empty())
137 text_buffers_map_.insert(std::make_pair(itr->first, text_buffers)); 144 text_buffers_map_.insert(std::make_pair(itr->first, text_buffers));
138 } 145 }
139 146
140 return text_buffers_map_; 147 return text_buffers_map_;
141 } 148 }
142 149
150 base::TimeDelta WebMClusterParser::TryGetEncodedAudioDuration(const uint8* data,
151 int size) {
152 if (audio_codec_ == kCodecOpus) {
153 return ReadOpusDuration(data, size);
154 }
155 // TODO(wolenetz/chcunningham): Implement duration reading for Vorbis.
wolenetz 2015/02/03 22:47:02 nits: let's start a new or reference an existing c
chcunningham 2015/02/05 02:48:21 Done.
156
157 return kNoTimestamp();
158 }
159
160 base::TimeDelta WebMClusterParser::ReadOpusDuration(const uint8* data,
161 int size) {
162 // Masks for Opus TOC and Frame Count bytes. See http://goo.gl/2RmoxA
wolenetz 2015/02/03 22:47:01 ditto full URL... sorry
chcunningham 2015/02/05 02:48:21 Done.
163 static const uint8 kTocConfigMask = 0xf8;
wolenetz 2015/02/03 22:47:01 ditto: _t
chcunningham 2015/02/05 02:48:21 Done.
164 static const uint8 kTocFrameCountCodeMask = 0x03;
165 static const uint8 kFrameCountMask = 0x3f;
166 static const int kFrameCountMin = 1;
167 static const int kFrameCountMax = 48;
168
169 if (size < 1) {
170 MEDIA_LOG(log_cb_) << "Invalid zero-byte Opus packet.";
171 return kNoTimestamp();
172 }
173
174 // Frame count type described by last 2 bits of Opus TOC byte.
175 int frame_count_type = data[0] & kTocFrameCountCodeMask;
176
177 int frame_count = 0;
178 switch (frame_count_type) {
179 case 0:
180 frame_count = 1;
181 break;
182 case 1:
183 case 2:
184 frame_count = 2;
185 break;
186 case 3:
187 // Type 3 indicates an arbitrary frame count described in the next byte.
188 if (size < 2) {
189 MEDIA_LOG(log_cb_) << "Second byte missing from 'Code 3' Opus packet.";
190 return kNoTimestamp();
191 }
192 frame_count = data[1] & kFrameCountMask;
193 DCHECK_GE(frame_count, 1);
194 if (frame_count < kFrameCountMin || frame_count > kFrameCountMax) {
195 MEDIA_LOG(log_cb_) << "Illegal Opus packet frame_count: " << frame_count
wolenetz 2015/02/03 22:47:01 nit: comment that we are explicitly allowing these
wolenetz 2015/02/03 22:47:01 nit: s/frame_count:/frame count:/
chcunningham 2015/02/05 02:48:21 Done.
chcunningham 2015/02/05 02:48:21 Done.
196 << " Should be in range [" << kFrameCountMin << ", "
197 << kFrameCountMax << "]";
198 }
199 break;
200 default:
201 MEDIA_LOG(log_cb_) << "Unexpected Opus frame count type: "
202 << frame_count_type;
203 return kNoTimestamp();
204 }
205
206 int opusConfig = (data[0] & kTocConfigMask) >> 3;
207 return base::TimeDelta::FromMicroseconds(kOpusFrameDurationsMu[opusConfig] *
wolenetz 2015/02/03 22:47:01 nit: kFrameCountMax is lower for longer frame size
chcunningham 2015/02/05 02:48:21 Done.
208 frame_count);
209 }
210
143 WebMParserClient* WebMClusterParser::OnListStart(int id) { 211 WebMParserClient* WebMClusterParser::OnListStart(int id) {
144 if (id == kWebMIdCluster) { 212 if (id == kWebMIdCluster) {
145 cluster_timecode_ = -1; 213 cluster_timecode_ = -1;
146 cluster_start_time_ = kNoTimestamp(); 214 cluster_start_time_ = kNoTimestamp();
147 } else if (id == kWebMIdBlockGroup) { 215 } else if (id == kWebMIdBlockGroup) {
148 block_data_.reset(); 216 block_data_.reset();
149 block_data_size_ = -1; 217 block_data_size_ = -1;
150 block_duration_ = -1; 218 block_duration_ = -1;
151 discard_padding_ = -1; 219 discard_padding_ = -1;
152 discard_padding_set_ = false; 220 discard_padding_set_ = false;
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after
317 385
318 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) { 386 if (last_block_timecode_ != -1 && timecode < last_block_timecode_) {
319 MEDIA_LOG(log_cb_) 387 MEDIA_LOG(log_cb_)
320 << "Got a block with a timecode before the previous block."; 388 << "Got a block with a timecode before the previous block.";
321 return false; 389 return false;
322 } 390 }
323 391
324 Track* track = NULL; 392 Track* track = NULL;
325 StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO; 393 StreamParserBuffer::Type buffer_type = DemuxerStream::AUDIO;
326 std::string encryption_key_id; 394 std::string encryption_key_id;
395 base::TimeDelta encoded_duration = kNoTimestamp();
327 if (track_num == audio_.track_num()) { 396 if (track_num == audio_.track_num()) {
328 track = &audio_; 397 track = &audio_;
329 encryption_key_id = audio_encryption_key_id_; 398 encryption_key_id = audio_encryption_key_id_;
399 if (encryption_key_id.empty()) {
400 encoded_duration = TryGetEncodedAudioDuration(data, size);
401 }
330 } else if (track_num == video_.track_num()) { 402 } else if (track_num == video_.track_num()) {
331 track = &video_; 403 track = &video_;
332 encryption_key_id = video_encryption_key_id_; 404 encryption_key_id = video_encryption_key_id_;
333 buffer_type = DemuxerStream::VIDEO; 405 buffer_type = DemuxerStream::VIDEO;
334 } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) { 406 } else if (ignored_tracks_.find(track_num) != ignored_tracks_.end()) {
335 return true; 407 return true;
336 } else if (Track* const text_track = FindTextTrack(track_num)) { 408 } else if (Track* const text_track = FindTextTrack(track_num)) {
337 if (is_simple_block) // BlockGroup is required for WebVTT cues 409 if (is_simple_block) // BlockGroup is required for WebVTT cues
338 return false; 410 return false;
339 if (block_duration < 0) // not specified 411 if (block_duration < 0) // not specified
(...skipping 60 matching lines...) Expand 10 before | Expand all | Expand 10 after
400 content.length(), 472 content.length(),
401 &side_data[0], 473 &side_data[0],
402 side_data.size(), 474 side_data.size(),
403 true, buffer_type, track_num); 475 true, buffer_type, track_num);
404 } 476 }
405 477
406 buffer->set_timestamp(timestamp); 478 buffer->set_timestamp(timestamp);
407 if (cluster_start_time_ == kNoTimestamp()) 479 if (cluster_start_time_ == kNoTimestamp())
408 cluster_start_time_ = timestamp; 480 cluster_start_time_ = timestamp;
409 481
482 base::TimeDelta block_duration_time_delta = kNoTimestamp();
410 if (block_duration >= 0) { 483 if (block_duration >= 0) {
411 buffer->set_duration(base::TimeDelta::FromMicroseconds( 484 block_duration_time_delta = base::TimeDelta::FromMicroseconds(
412 block_duration * timecode_multiplier_)); 485 block_duration * timecode_multiplier_);
486 }
487
488 // Prefer encoded duration when available. This layering violatiaon is a
wolenetz 2015/02/03 22:47:01 nit: s/when available/over BlockGroup->BlockDurati
wolenetz 2015/02/03 22:47:02 nit: s/violatiaon/violation/
chcunningham 2015/02/05 02:48:21 Done. Oh, and I found a sublime text spell checker
chcunningham 2015/02/05 02:48:22 Done.
wolenetz 2015/02/05 23:04:59 Please share link offline with me :)
489 // workaround for http://crbug.com/396634, decreasing the likelihood of
490 // fallback to rough estimation techniques for Blocks that lack a
491 // BlockDuration at the end of a cluster. Cross cluster durations are not
492 // feasabile given flexibility of cluster ordering and MSE APIs. Duration
wolenetz 2015/02/03 22:47:01 nit: s/feasabile/feasible/
chcunningham 2015/02/05 02:48:21 Done.
493 // estimation may still apply in cases of encryption and unsupported codecs.
wolenetz 2015/02/03 22:47:01 nit: s/unsupported codecs/codecs for which we do n
chcunningham 2015/02/05 02:48:21 Done.
494 // Estimates are applied at the end of parsing once the whole cluster is
495 // parsed. See ApplyDurationEstimateIfNeeded for more on estimation.
wolenetz 2015/02/03 22:47:01 nit: add ()
chcunningham 2015/02/05 02:48:21 Done.
496 if (encoded_duration != kNoTimestamp()) {
497 DCHECK(encoded_duration != kInfiniteDuration());
498 DCHECK(encoded_duration > base::TimeDelta());
499 buffer->set_duration(encoded_duration);
500
501 DVLOG(3) << __FUNCTION__ << " : "
502 << "Using encoded duartion " << encoded_duration.InSecondsF();
wolenetz 2015/02/03 22:47:01 nit: s/duartion/duration/
chcunningham 2015/02/05 02:48:21 Done.
503
504 if (block_duration_time_delta != kNoTimestamp()) {
505 base::TimeDelta duration_difference =
506 block_duration_time_delta - encoded_duration;
507
508 const auto kWarnDurationDiff = base::TimeDelta::FromMilliseconds(10);
wolenetz 2015/02/03 22:47:01 Hmmm. 10ms seems a little arbitrary (and big). Con
chcunningham 2015/02/05 02:48:21 Done.
509 if (duration_difference.magnitude() > kWarnDurationDiff) {
510 MEDIA_LOG(log_cb_) << "BlockDuration "
511 << "(" << block_duration_time_delta << ") "
wolenetz 2015/02/03 22:47:01 nit: Units. Is this in secondsF? (use .InSecondsF(
chcunningham 2015/02/05 02:48:21 It is in secondsF. I can be explicit, but I think
wolenetz 2015/02/05 23:04:59 Oh. I didn't realize that. And it comes with nice
512 << "differs signifcantly from encoded duration "
wolenetz 2015/02/03 22:47:01 nit:s/signifcantly/significantly/
chcunningham 2015/02/05 02:48:21 Done.
513 << "(" << encoded_duration << ").";
wolenetz 2015/02/03 22:47:01 nit ditto: InSecondsF()
chcunningham 2015/02/05 02:48:21 See other reply
wolenetz 2015/02/05 23:04:59 Acknowledged.
514 }
515 }
516 } else if (block_duration_time_delta != kNoTimestamp()) {
517 buffer->set_duration(block_duration_time_delta);
413 } else { 518 } else {
414 DCHECK_NE(buffer_type, DemuxerStream::TEXT); 519 DCHECK_NE(buffer_type, DemuxerStream::TEXT);
415 buffer->set_duration(track->default_duration()); 520 buffer->set_duration(track->default_duration());
416 } 521 }
417 522
418 if (discard_padding != 0) { 523 if (discard_padding != 0) {
419 buffer->set_discard_padding(std::make_pair( 524 buffer->set_discard_padding(std::make_pair(
420 base::TimeDelta(), 525 base::TimeDelta(),
421 base::TimeDelta::FromMicroseconds(discard_padding / 1000))); 526 base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
422 } 527 }
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
678 WebMClusterParser::FindTextTrack(int track_num) { 783 WebMClusterParser::FindTextTrack(int track_num) {
679 const TextTrackMap::iterator it = text_track_map_.find(track_num); 784 const TextTrackMap::iterator it = text_track_map_.find(track_num);
680 785
681 if (it == text_track_map_.end()) 786 if (it == text_track_map_.end())
682 return NULL; 787 return NULL;
683 788
684 return &it->second; 789 return &it->second;
685 } 790 }
686 791
687 } // namespace media 792 } // namespace media
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698