Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(33)

Side by Side Diff: media/formats/webm/webm_cluster_parser.cc

Issue 1018373003: Improving WebM video duration estimation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Adding limited media log (10 times max) for WebM duration estimates. Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "media/formats/webm/webm_cluster_parser.h" 5 #include "media/formats/webm/webm_cluster_parser.h"
6 6
7 #include <vector> 7 #include <vector>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/sys_byteorder.h" 10 #include "base/sys_byteorder.h"
11 #include "media/base/buffers.h" 11 #include "media/base/buffers.h"
12 #include "media/base/decrypt_config.h" 12 #include "media/base/decrypt_config.h"
13 #include "media/filters/webvtt_util.h" 13 #include "media/filters/webvtt_util.h"
14 #include "media/formats/webm/webm_constants.h" 14 #include "media/formats/webm/webm_constants.h"
15 #include "media/formats/webm/webm_crypto_helpers.h" 15 #include "media/formats/webm/webm_crypto_helpers.h"
16 #include "media/formats/webm/webm_webvtt_parser.h" 16 #include "media/formats/webm/webm_webvtt_parser.h"
17 17
18 namespace media { 18 namespace media {
19 19
20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = { 20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
21 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000, 21 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
22 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 22 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
23 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000}; 23 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
24 24
25 enum { 25 enum {
26 // Limits the number of MEDIA_LOG() calls in the path of reading encoded 26 // Limits the number of MEDIA_LOG() calls in the path of reading encoded
27 // duration to avoid spamming for corrupted data. 27 // duration to avoid spamming for corrupted data.
28 kMaxDurationLogs = 10, 28 kMaxDurationErrorLogs = 10,
29 // Limits the number of MEDIA_LOG() calls warning the user that a buffer
30 // durations have been estimated.
wolenetz 2015/03/28 00:26:06 nit: regrammarize "a buffer durations have" ;)
chcunningham 2015/04/13 23:25:18 Done.
31 kMaxDurationEstimateLogs = 10,
29 }; 32 };
30 33
31 WebMClusterParser::WebMClusterParser( 34 WebMClusterParser::WebMClusterParser(
32 int64 timecode_scale, 35 int64 timecode_scale,
33 int audio_track_num, 36 int audio_track_num,
34 base::TimeDelta audio_default_duration, 37 base::TimeDelta audio_default_duration,
35 int video_track_num, 38 int video_track_num,
36 base::TimeDelta video_default_duration, 39 base::TimeDelta video_default_duration,
37 const WebMTracksParser::TextTracks& text_tracks, 40 const WebMTracksParser::TextTracks& text_tracks,
38 const std::set<int64>& ignored_tracks, 41 const std::set<int64>& ignored_tracks,
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 int size) { 181 int size) {
179 // Masks and constants for Opus packets. See 182 // Masks and constants for Opus packets. See
180 // https://tools.ietf.org/html/rfc6716#page-14 183 // https://tools.ietf.org/html/rfc6716#page-14
181 static const uint8_t kTocConfigMask = 0xf8; 184 static const uint8_t kTocConfigMask = 0xf8;
182 static const uint8_t kTocFrameCountCodeMask = 0x03; 185 static const uint8_t kTocFrameCountCodeMask = 0x03;
183 static const uint8_t kFrameCountMask = 0x3f; 186 static const uint8_t kFrameCountMask = 0x3f;
184 static const base::TimeDelta kPacketDurationMax = 187 static const base::TimeDelta kPacketDurationMax =
185 base::TimeDelta::FromMilliseconds(120); 188 base::TimeDelta::FromMilliseconds(120);
186 189
187 if (size < 1) { 190 if (size < 1) {
188 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) 191 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationErrorLogs)
189 << "Invalid zero-byte Opus packet; demuxed block duration may be " 192 << "Invalid zero-byte Opus packet; demuxed block duration may be "
190 "imprecise."; 193 "imprecise.";
191 return kNoTimestamp(); 194 return kNoTimestamp();
192 } 195 }
193 196
194 // Frame count type described by last 2 bits of Opus TOC byte. 197 // Frame count type described by last 2 bits of Opus TOC byte.
195 int frame_count_type = data[0] & kTocFrameCountCodeMask; 198 int frame_count_type = data[0] & kTocFrameCountCodeMask;
196 199
197 int frame_count = 0; 200 int frame_count = 0;
198 switch (frame_count_type) { 201 switch (frame_count_type) {
199 case 0: 202 case 0:
200 frame_count = 1; 203 frame_count = 1;
201 break; 204 break;
202 case 1: 205 case 1:
203 case 2: 206 case 2:
204 frame_count = 2; 207 frame_count = 2;
205 break; 208 break;
206 case 3: 209 case 3:
207 // Type 3 indicates an arbitrary frame count described in the next byte. 210 // Type 3 indicates an arbitrary frame count described in the next byte.
208 if (size < 2) { 211 if (size < 2) {
209 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) 212 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationErrorLogs)
210 << "Second byte missing from 'Code 3' Opus packet; demuxed block " 213 << "Second byte missing from 'Code 3' Opus packet; demuxed block "
211 "duration may be imprecise."; 214 "duration may be imprecise.";
212 return kNoTimestamp(); 215 return kNoTimestamp();
213 } 216 }
214 217
215 frame_count = data[1] & kFrameCountMask; 218 frame_count = data[1] & kFrameCountMask;
216 219
217 if (frame_count == 0) { 220 if (frame_count == 0) {
218 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) 221 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationErrorLogs)
219 << "Illegal 'Code 3' Opus packet with frame count zero; demuxed " 222 << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
220 "block duration may be imprecise."; 223 "block duration may be imprecise.";
221 return kNoTimestamp(); 224 return kNoTimestamp();
222 } 225 }
223 226
224 break; 227 break;
225 default: 228 default:
226 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) 229 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationErrorLogs)
227 << "Unexpected Opus frame count type: " << frame_count_type << "; " 230 << "Unexpected Opus frame count type: " << frame_count_type << "; "
228 << "demuxed block duration may be imprecise."; 231 << "demuxed block duration may be imprecise.";
229 return kNoTimestamp(); 232 return kNoTimestamp();
230 } 233 }
231 234
232 int opusConfig = (data[0] & kTocConfigMask) >> 3; 235 int opusConfig = (data[0] & kTocConfigMask) >> 3;
233 CHECK_GE(opusConfig, 0); 236 CHECK_GE(opusConfig, 0);
234 CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu))); 237 CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
235 238
236 DCHECK_GT(frame_count, 0); 239 DCHECK_GT(frame_count, 0);
237 base::TimeDelta duration = base::TimeDelta::FromMicroseconds( 240 base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
238 kOpusFrameDurationsMu[opusConfig] * frame_count); 241 kOpusFrameDurationsMu[opusConfig] * frame_count);
239 242
240 if (duration > kPacketDurationMax) { 243 if (duration > kPacketDurationMax) {
241 // Intentionally allowing packet to pass through for now. Decoder should 244 // Intentionally allowing packet to pass through for now. Decoder should
242 // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case 245 // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case
243 // things go sideways. 246 // things go sideways.
244 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) 247 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationErrorLogs)
245 << "Warning, demuxed Opus packet with encoded duration: " << duration 248 << "Warning, demuxed Opus packet with encoded duration: " << duration
246 << ". Should be no greater than " << kPacketDurationMax; 249 << ". Should be no greater than " << kPacketDurationMax;
247 } 250 }
248 251
249 return duration; 252 return duration;
250 } 253 }
251 254
252 WebMParserClient* WebMClusterParser::OnListStart(int id) { 255 WebMParserClient* WebMClusterParser::OnListStart(int id) {
253 if (id == kWebMIdCluster) { 256 if (id == kWebMIdCluster) {
254 cluster_timecode_ = -1; 257 cluster_timecode_ = -1;
(...skipping 296 matching lines...) Expand 10 before | Expand all | Expand 10 after
551 DVLOG(3) << __FUNCTION__ << " : " 554 DVLOG(3) << __FUNCTION__ << " : "
552 << "Using encoded duration " << encoded_duration.InSecondsF(); 555 << "Using encoded duration " << encoded_duration.InSecondsF();
553 556
554 if (block_duration_time_delta != kNoTimestamp()) { 557 if (block_duration_time_delta != kNoTimestamp()) {
555 base::TimeDelta duration_difference = 558 base::TimeDelta duration_difference =
556 block_duration_time_delta - encoded_duration; 559 block_duration_time_delta - encoded_duration;
557 560
558 const auto kWarnDurationDiff = 561 const auto kWarnDurationDiff =
559 base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2); 562 base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
560 if (duration_difference.magnitude() > kWarnDurationDiff) { 563 if (duration_difference.magnitude() > kWarnDurationDiff) {
561 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationLogs) 564 LIMITED_MEDIA_LOG(log_cb_, num_duration_errors_, kMaxDurationErrorLogs)
562 << "BlockDuration " 565 << "BlockDuration "
563 << "(" << block_duration_time_delta << ") " 566 << "(" << block_duration_time_delta << ") "
564 << "differs significantly from encoded duration " 567 << "differs significantly from encoded duration "
565 << "(" << encoded_duration << ")."; 568 << "(" << encoded_duration << ").";
566 } 569 }
567 } 570 }
568 } else if (block_duration_time_delta != kNoTimestamp()) { 571 } else if (block_duration_time_delta != kNoTimestamp()) {
569 buffer->set_duration(block_duration_time_delta); 572 buffer->set_duration(block_duration_time_delta);
570 } else { 573 } else {
571 DCHECK_NE(buffer_type, DemuxerStream::TEXT); 574 DCHECK_NE(buffer_type, DemuxerStream::TEXT);
572 buffer->set_duration(track->default_duration()); 575 buffer->set_duration(track->default_duration());
573 } 576 }
574 577
575 if (discard_padding != 0) { 578 if (discard_padding != 0) {
576 buffer->set_discard_padding(std::make_pair( 579 buffer->set_discard_padding(std::make_pair(
577 base::TimeDelta(), 580 base::TimeDelta(),
578 base::TimeDelta::FromMicroseconds(discard_padding / 1000))); 581 base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
579 } 582 }
580 583
581 return track->AddBuffer(buffer); 584 return track->AddBuffer(buffer);
582 } 585 }
583 586
584 WebMClusterParser::Track::Track(int track_num, 587 WebMClusterParser::Track::Track(int track_num,
585 bool is_video, 588 bool is_video,
586 base::TimeDelta default_duration, 589 base::TimeDelta default_duration,
587 const LogCB& log_cb) 590 const LogCB& log_cb)
588 : track_num_(track_num), 591 : num_duration_estimates_(0),
592 track_num_(track_num),
589 is_video_(is_video), 593 is_video_(is_video),
590 default_duration_(default_duration), 594 default_duration_(default_duration),
591 estimated_next_frame_duration_(kNoTimestamp()), 595 estimated_next_frame_duration_(kNoTimestamp()),
592 log_cb_(log_cb) { 596 log_cb_(log_cb) {
593 DCHECK(default_duration_ == kNoTimestamp() || 597 DCHECK(default_duration_ == kNoTimestamp() ||
594 default_duration_ > base::TimeDelta()); 598 default_duration_ > base::TimeDelta());
595 } 599 }
596 600
597 WebMClusterParser::Track::~Track() {} 601 WebMClusterParser::Track::~Track() {}
598 602
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
671 return true; 675 return true;
672 } 676 }
673 677
674 return QueueBuffer(buffer); 678 return QueueBuffer(buffer);
675 } 679 }
676 680
677 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() { 681 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
678 if (!last_added_buffer_missing_duration_.get()) 682 if (!last_added_buffer_missing_duration_.get())
679 return; 683 return;
680 684
681 last_added_buffer_missing_duration_->set_duration(GetDurationEstimate()); 685 base::TimeDelta estimated_duration = GetDurationEstimate();
686 last_added_buffer_missing_duration_->set_duration(estimated_duration);
687
688 // Exposing estimation so splicing/overlap frame processing can make informed
689 // decisions downstream.
690 last_added_buffer_missing_duration_->set_is_duration_estimated(true);
691
692 LIMITED_MEDIA_LOG(log_cb_, num_duration_estimates_, kMaxDurationEstimateLogs)
693 << "Estimating duration to be " << estimated_duration << " for the "
694 << "last (Simple)Block in the Cluster. Use BlockDuration at the end of "
wolenetz 2015/03/28 00:26:06 nit: "in the Cluster for this Track". And "Instead
chcunningham 2015/04/13 23:25:18 Done.
695 << "Clusters to avoid estimation.";
682 696
683 DVLOG(2) << "ApplyDurationEstimateIfNeeded() : new dur : " 697 DVLOG(2) << "ApplyDurationEstimateIfNeeded() : new dur : "
684 << " ts " 698 << " ts "
685 << last_added_buffer_missing_duration_->timestamp().InSecondsF() 699 << last_added_buffer_missing_duration_->timestamp().InSecondsF()
686 << " dur " 700 << " dur "
687 << last_added_buffer_missing_duration_->duration().InSecondsF() 701 << last_added_buffer_missing_duration_->duration().InSecondsF()
688 << " kf " << last_added_buffer_missing_duration_->is_key_frame() 702 << " kf " << last_added_buffer_missing_duration_->is_key_frame()
689 << " size " << last_added_buffer_missing_duration_->data_size(); 703 << " size " << last_added_buffer_missing_duration_->data_size();
690 704
691 // Don't use the applied duration as a future estimation (don't use 705 // Don't use the applied duration as a future estimation (don't use
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after
739 DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ? 753 DecodeTimestamp previous_buffers_timestamp = buffers_.empty() ?
740 DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp(); 754 DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();
741 CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp()); 755 CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());
742 756
743 base::TimeDelta duration = buffer->duration(); 757 base::TimeDelta duration = buffer->duration();
744 if (duration < base::TimeDelta() || duration == kNoTimestamp()) { 758 if (duration < base::TimeDelta() || duration == kNoTimestamp()) {
745 MEDIA_LOG(log_cb_) << "Invalid buffer duration: " << duration.InSecondsF(); 759 MEDIA_LOG(log_cb_) << "Invalid buffer duration: " << duration.InSecondsF();
746 return false; 760 return false;
747 } 761 }
748 762
749 // The estimated frame duration is the minimum non-zero duration since the 763 // The estimated frame duration is the minimum (for audio) or the maximum
750 // last initialization segment. The minimum is used to ensure frame durations 764 // (for video) non-zero duration since the last initialization segment. The
751 // aren't overestimated. 765 // minimum is used for audio to ensure frame durations aren't overestimated,
766 // triggering unnecessary frame splicing. For video, splicing does not apply,
767 // so maximum is used and overlap is is simply resolved by showing the
wolenetz 2015/03/28 00:26:06 nit: is is
chcunningham 2015/04/13 23:25:18 Done.
768 // later of the overlapping frames at its given PTS, effectively trimming down
769 // the over-estimated duration of the previous frame.
770 // TODO(chcunningham): Use max for audio and disable splicing whenever
771 // estimated buffers are encountered.
752 if (duration > base::TimeDelta()) { 772 if (duration > base::TimeDelta()) {
773 base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_;
753 if (estimated_next_frame_duration_ == kNoTimestamp()) { 774 if (estimated_next_frame_duration_ == kNoTimestamp()) {
754 estimated_next_frame_duration_ = duration; 775 estimated_next_frame_duration_ = duration;
776 } else if (is_video_) {
777 estimated_next_frame_duration_ =
778 std::max(duration, estimated_next_frame_duration_);
755 } else { 779 } else {
756 estimated_next_frame_duration_ = 780 estimated_next_frame_duration_ =
757 std::min(duration, estimated_next_frame_duration_); 781 std::min(duration, estimated_next_frame_duration_);
758 } 782 }
783
784 if (orig_duration_estimate != estimated_next_frame_duration_) {
785 DVLOG(3) << "Updated duration estimate:"
786 << orig_duration_estimate
787 << " -> "
788 << estimated_next_frame_duration_
789 << " at dts: "
wolenetz 2015/03/28 00:26:06 nit: webm doesn't differentiate dts/pts. s/dts/tim
chcunningham 2015/04/13 23:25:18 Done. Re-cleanup: so you'd like a bug to change al
wolenetz 2015/04/15 02:55:23 Thanks. IIRC, WebM container doesn't mention PTS o
790 << buffer->GetDecodeTimestamp().InSecondsF();
791 }
759 } 792 }
760 793
761 buffers_.push_back(buffer); 794 buffers_.push_back(buffer);
762 return true; 795 return true;
763 } 796 }
764 797
765 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() { 798 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {
766 base::TimeDelta duration = estimated_next_frame_duration_; 799 base::TimeDelta duration = estimated_next_frame_duration_;
767 if (duration != kNoTimestamp()) { 800 if (duration != kNoTimestamp()) {
768 DVLOG(3) << __FUNCTION__ << " : using estimated duration"; 801 DVLOG(3) << __FUNCTION__ << " : using estimated duration";
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
835 WebMClusterParser::FindTextTrack(int track_num) { 868 WebMClusterParser::FindTextTrack(int track_num) {
836 const TextTrackMap::iterator it = text_track_map_.find(track_num); 869 const TextTrackMap::iterator it = text_track_map_.find(track_num);
837 870
838 if (it == text_track_map_.end()) 871 if (it == text_track_map_.end())
839 return NULL; 872 return NULL;
840 873
841 return &it->second; 874 return &it->second;
842 } 875 }
843 876
844 } // namespace media 877 } // namespace media
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698