Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(878)

Side by Side Diff: media/formats/webm/webm_cluster_parser.cc

Issue 1018373003: Improving WebM video duration estimation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fixing try failure, remove unused variable for some builds. Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "media/formats/webm/webm_cluster_parser.h" 5 #include "media/formats/webm/webm_cluster_parser.h"
6 6
7 #include <vector> 7 #include <vector>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/sys_byteorder.h" 10 #include "base/sys_byteorder.h"
11 #include "media/base/buffers.h" 11 #include "media/base/buffers.h"
12 #include "media/base/decrypt_config.h" 12 #include "media/base/decrypt_config.h"
13 #include "media/filters/webvtt_util.h" 13 #include "media/filters/webvtt_util.h"
14 #include "media/formats/webm/webm_constants.h" 14 #include "media/formats/webm/webm_constants.h"
15 #include "media/formats/webm/webm_crypto_helpers.h" 15 #include "media/formats/webm/webm_crypto_helpers.h"
16 #include "media/formats/webm/webm_webvtt_parser.h" 16 #include "media/formats/webm/webm_webvtt_parser.h"
17 17
18 namespace media { 18 namespace media {
19 19
20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = { 20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {
21 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000, 21 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,
22 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 22 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,
23 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000}; 23 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};
24 24
25 enum { 25 enum {
26 // Limits the number of MEDIA_LOG() calls in the path of reading encoded 26 // Limits the number of MEDIA_LOG() calls in the path of reading encoded
27 // duration to avoid spamming for corrupted data. 27 // duration to avoid spamming for corrupted data.
28 kMaxDurationLogs = 10, 28 kMaxDurationErrorLogs = 10,
29 // Limits the number of MEDIA_LOG() calls warning the user that buffer
30 // durations have been estimated.
31 kMaxDurationEstimateLogs = 10,
29 }; 32 };
30 33
31 WebMClusterParser::WebMClusterParser( 34 WebMClusterParser::WebMClusterParser(
32 int64 timecode_scale, 35 int64 timecode_scale,
33 int audio_track_num, 36 int audio_track_num,
34 base::TimeDelta audio_default_duration, 37 base::TimeDelta audio_default_duration,
35 int video_track_num, 38 int video_track_num,
36 base::TimeDelta video_default_duration, 39 base::TimeDelta video_default_duration,
37 const WebMTracksParser::TextTracks& text_tracks, 40 const WebMTracksParser::TextTracks& text_tracks,
38 const std::set<int64>& ignored_tracks, 41 const std::set<int64>& ignored_tracks,
(...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after
178 int size) { 181 int size) {
179 // Masks and constants for Opus packets. See 182 // Masks and constants for Opus packets. See
180 // https://tools.ietf.org/html/rfc6716#page-14 183 // https://tools.ietf.org/html/rfc6716#page-14
181 static const uint8_t kTocConfigMask = 0xf8; 184 static const uint8_t kTocConfigMask = 0xf8;
182 static const uint8_t kTocFrameCountCodeMask = 0x03; 185 static const uint8_t kTocFrameCountCodeMask = 0x03;
183 static const uint8_t kFrameCountMask = 0x3f; 186 static const uint8_t kFrameCountMask = 0x3f;
184 static const base::TimeDelta kPacketDurationMax = 187 static const base::TimeDelta kPacketDurationMax =
185 base::TimeDelta::FromMilliseconds(120); 188 base::TimeDelta::FromMilliseconds(120);
186 189
187 if (size < 1) { 190 if (size < 1) {
188 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_, kMaxDurationLogs) 191 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,
192 kMaxDurationErrorLogs)
189 << "Invalid zero-byte Opus packet; demuxed block duration may be " 193 << "Invalid zero-byte Opus packet; demuxed block duration may be "
190 "imprecise."; 194 "imprecise.";
191 return kNoTimestamp(); 195 return kNoTimestamp();
192 } 196 }
193 197
194 // Frame count type described by last 2 bits of Opus TOC byte. 198 // Frame count type described by last 2 bits of Opus TOC byte.
195 int frame_count_type = data[0] & kTocFrameCountCodeMask; 199 int frame_count_type = data[0] & kTocFrameCountCodeMask;
196 200
197 int frame_count = 0; 201 int frame_count = 0;
198 switch (frame_count_type) { 202 switch (frame_count_type) {
199 case 0: 203 case 0:
200 frame_count = 1; 204 frame_count = 1;
201 break; 205 break;
202 case 1: 206 case 1:
203 case 2: 207 case 2:
204 frame_count = 2; 208 frame_count = 2;
205 break; 209 break;
206 case 3: 210 case 3:
207 // Type 3 indicates an arbitrary frame count described in the next byte. 211 // Type 3 indicates an arbitrary frame count described in the next byte.
208 if (size < 2) { 212 if (size < 2) {
209 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_, 213 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,
210 kMaxDurationLogs) 214 kMaxDurationErrorLogs)
211 << "Second byte missing from 'Code 3' Opus packet; demuxed block " 215 << "Second byte missing from 'Code 3' Opus packet; demuxed block "
212 "duration may be imprecise."; 216 "duration may be imprecise.";
213 return kNoTimestamp(); 217 return kNoTimestamp();
214 } 218 }
215 219
216 frame_count = data[1] & kFrameCountMask; 220 frame_count = data[1] & kFrameCountMask;
217 221
218 if (frame_count == 0) { 222 if (frame_count == 0) {
219 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_, 223 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,
220 kMaxDurationLogs) 224 kMaxDurationErrorLogs)
221 << "Illegal 'Code 3' Opus packet with frame count zero; demuxed " 225 << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "
222 "block duration may be imprecise."; 226 "block duration may be imprecise.";
223 return kNoTimestamp(); 227 return kNoTimestamp();
224 } 228 }
225 229
226 break; 230 break;
227 default: 231 default:
228 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_, kMaxDurationLogs) 232 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,
233 kMaxDurationErrorLogs)
229 << "Unexpected Opus frame count type: " << frame_count_type << "; " 234 << "Unexpected Opus frame count type: " << frame_count_type << "; "
230 << "demuxed block duration may be imprecise."; 235 << "demuxed block duration may be imprecise.";
231 return kNoTimestamp(); 236 return kNoTimestamp();
232 } 237 }
233 238
234 int opusConfig = (data[0] & kTocConfigMask) >> 3; 239 int opusConfig = (data[0] & kTocConfigMask) >> 3;
235 CHECK_GE(opusConfig, 0); 240 CHECK_GE(opusConfig, 0);
236 CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu))); 241 CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));
237 242
238 DCHECK_GT(frame_count, 0); 243 DCHECK_GT(frame_count, 0);
239 base::TimeDelta duration = base::TimeDelta::FromMicroseconds( 244 base::TimeDelta duration = base::TimeDelta::FromMicroseconds(
240 kOpusFrameDurationsMu[opusConfig] * frame_count); 245 kOpusFrameDurationsMu[opusConfig] * frame_count);
241 246
242 if (duration > kPacketDurationMax) { 247 if (duration > kPacketDurationMax) {
243 // Intentionally allowing packet to pass through for now. Decoder should 248 // Intentionally allowing packet to pass through for now. Decoder should
244 // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case 249 // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case
245 // things go sideways. 250 // things go sideways.
246 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_, kMaxDurationLogs) 251 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,
252 kMaxDurationErrorLogs)
247 << "Warning, demuxed Opus packet with encoded duration: " << duration 253 << "Warning, demuxed Opus packet with encoded duration: " << duration
248 << ". Should be no greater than " << kPacketDurationMax; 254 << ". Should be no greater than " << kPacketDurationMax;
249 } 255 }
250 256
251 return duration; 257 return duration;
252 } 258 }
253 259
254 WebMParserClient* WebMClusterParser::OnListStart(int id) { 260 WebMParserClient* WebMClusterParser::OnListStart(int id) {
255 if (id == kWebMIdCluster) { 261 if (id == kWebMIdCluster) {
256 cluster_timecode_ = -1; 262 cluster_timecode_ = -1;
(...skipping 298 matching lines...) Expand 10 before | Expand all | Expand 10 after
555 << "Using encoded duration " << encoded_duration.InSecondsF(); 561 << "Using encoded duration " << encoded_duration.InSecondsF();
556 562
557 if (block_duration_time_delta != kNoTimestamp()) { 563 if (block_duration_time_delta != kNoTimestamp()) {
558 base::TimeDelta duration_difference = 564 base::TimeDelta duration_difference =
559 block_duration_time_delta - encoded_duration; 565 block_duration_time_delta - encoded_duration;
560 566
561 const auto kWarnDurationDiff = 567 const auto kWarnDurationDiff =
562 base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2); 568 base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);
563 if (duration_difference.magnitude() > kWarnDurationDiff) { 569 if (duration_difference.magnitude() > kWarnDurationDiff) {
564 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_, 570 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,
565 kMaxDurationLogs) 571 kMaxDurationErrorLogs)
566 << "BlockDuration " 572 << "BlockDuration "
567 << "(" << block_duration_time_delta << ") " 573 << "(" << block_duration_time_delta << ") "
568 << "differs significantly from encoded duration " 574 << "differs significantly from encoded duration "
569 << "(" << encoded_duration << ")."; 575 << "(" << encoded_duration << ").";
570 } 576 }
571 } 577 }
572 } else if (block_duration_time_delta != kNoTimestamp()) { 578 } else if (block_duration_time_delta != kNoTimestamp()) {
573 buffer->set_duration(block_duration_time_delta); 579 buffer->set_duration(block_duration_time_delta);
574 } else { 580 } else {
575 DCHECK_NE(buffer_type, DemuxerStream::TEXT); 581 DCHECK_NE(buffer_type, DemuxerStream::TEXT);
576 buffer->set_duration(track->default_duration()); 582 buffer->set_duration(track->default_duration());
577 } 583 }
578 584
579 if (discard_padding != 0) { 585 if (discard_padding != 0) {
580 buffer->set_discard_padding(std::make_pair( 586 buffer->set_discard_padding(std::make_pair(
581 base::TimeDelta(), 587 base::TimeDelta(),
582 base::TimeDelta::FromMicroseconds(discard_padding / 1000))); 588 base::TimeDelta::FromMicroseconds(discard_padding / 1000)));
583 } 589 }
584 590
585 return track->AddBuffer(buffer); 591 return track->AddBuffer(buffer);
586 } 592 }
587 593
588 WebMClusterParser::Track::Track(int track_num, 594 WebMClusterParser::Track::Track(int track_num,
589 bool is_video, 595 bool is_video,
590 base::TimeDelta default_duration, 596 base::TimeDelta default_duration,
591 const LogCB& log_cb) 597 const LogCB& log_cb)
592 : track_num_(track_num), 598 : num_duration_estimates_(0),
599 track_num_(track_num),
593 is_video_(is_video), 600 is_video_(is_video),
594 default_duration_(default_duration), 601 default_duration_(default_duration),
595 estimated_next_frame_duration_(kNoTimestamp()), 602 estimated_next_frame_duration_(kNoTimestamp()),
596 log_cb_(log_cb) { 603 log_cb_(log_cb) {
597 DCHECK(default_duration_ == kNoTimestamp() || 604 DCHECK(default_duration_ == kNoTimestamp() ||
598 default_duration_ > base::TimeDelta()); 605 default_duration_ > base::TimeDelta());
599 } 606 }
600 607
601 WebMClusterParser::Track::~Track() {} 608 WebMClusterParser::Track::~Track() {}
602 609
(...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after
675 return true; 682 return true;
676 } 683 }
677 684
678 return QueueBuffer(buffer); 685 return QueueBuffer(buffer);
679 } 686 }
680 687
681 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() { 688 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {
682 if (!last_added_buffer_missing_duration_.get()) 689 if (!last_added_buffer_missing_duration_.get())
683 return; 690 return;
684 691
685 last_added_buffer_missing_duration_->set_duration(GetDurationEstimate()); 692 base::TimeDelta estimated_duration = GetDurationEstimate();
693 last_added_buffer_missing_duration_->set_duration(estimated_duration);
686 694
687 DVLOG(2) << "ApplyDurationEstimateIfNeeded() : new dur : " 695 if (is_video_) {
688 << " ts " 696 // Exposing estimation so splicing/overlap frame processing can make
697 // informed decisions downstream.
698 // TODO(chcunningham): Set this for audio as well in later change where
699 // audio is switched to max estimation and splicing is disabled.
700 last_added_buffer_missing_duration_->set_is_duration_estimated(true);
701 }
702
703 LIMITED_MEDIA_LOG(INFO, log_cb_, num_duration_estimates_,
704 kMaxDurationEstimateLogs)
705 << "Estimating WebM block duration to be " << estimated_duration << " "
706 << "for the last (Simple)Block in the Cluster for this Track. Use "
707 << "BlockGroups with BlockDurations at the end of each Track in a "
708 << "Cluster to avoid estimation.";
709
710 DVLOG(2) << __FUNCTION__ << " new dur : ts "
689 << last_added_buffer_missing_duration_->timestamp().InSecondsF() 711 << last_added_buffer_missing_duration_->timestamp().InSecondsF()
690 << " dur " 712 << " dur "
691 << last_added_buffer_missing_duration_->duration().InSecondsF() 713 << last_added_buffer_missing_duration_->duration().InSecondsF()
692 << " kf " << last_added_buffer_missing_duration_->is_key_frame() 714 << " kf " << last_added_buffer_missing_duration_->is_key_frame()
693 << " size " << last_added_buffer_missing_duration_->data_size(); 715 << " size " << last_added_buffer_missing_duration_->data_size();
694 716
695 // Don't use the applied duration as a future estimation (don't use 717 // Don't use the applied duration as a future estimation (don't use
696 // QueueBuffer() here.) 718 // QueueBuffer() here.)
697 buffers_.push_back(last_added_buffer_missing_duration_); 719 buffers_.push_back(last_added_buffer_missing_duration_);
698 last_added_buffer_missing_duration_ = NULL; 720 last_added_buffer_missing_duration_ = NULL;
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
744 DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp(); 766 DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();
745 CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp()); 767 CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());
746 768
747 base::TimeDelta duration = buffer->duration(); 769 base::TimeDelta duration = buffer->duration();
748 if (duration < base::TimeDelta() || duration == kNoTimestamp()) { 770 if (duration < base::TimeDelta() || duration == kNoTimestamp()) {
749 MEDIA_LOG(ERROR, log_cb_) 771 MEDIA_LOG(ERROR, log_cb_)
750 << "Invalid buffer duration: " << duration.InSecondsF(); 772 << "Invalid buffer duration: " << duration.InSecondsF();
751 return false; 773 return false;
752 } 774 }
753 775
754 // The estimated frame duration is the minimum non-zero duration since the 776 // The estimated frame duration is the minimum (for audio) or the maximum
755 // last initialization segment. The minimum is used to ensure frame durations 777 // (for video) non-zero duration since the last initialization segment. The
756 // aren't overestimated. 778 // minimum is used for audio to ensure frame durations aren't overestimated,
779 // triggering unnecessary frame splicing. For video, splicing does not apply,
780 // so maximum is used and overlap is simply resolved by showing the
781 // later of the overlapping frames at its given PTS, effectively trimming down
782 // the over-estimated duration of the previous frame.
783 // TODO(chcunningham): Use max for audio and disable splicing whenever
784 // estimated buffers are encountered.
757 if (duration > base::TimeDelta()) { 785 if (duration > base::TimeDelta()) {
786 base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_;
758 if (estimated_next_frame_duration_ == kNoTimestamp()) { 787 if (estimated_next_frame_duration_ == kNoTimestamp()) {
759 estimated_next_frame_duration_ = duration; 788 estimated_next_frame_duration_ = duration;
789 } else if (is_video_) {
790 estimated_next_frame_duration_ =
791 std::max(duration, estimated_next_frame_duration_);
760 } else { 792 } else {
761 estimated_next_frame_duration_ = 793 estimated_next_frame_duration_ =
762 std::min(duration, estimated_next_frame_duration_); 794 std::min(duration, estimated_next_frame_duration_);
763 } 795 }
796
797 if (orig_duration_estimate != estimated_next_frame_duration_) {
798 DVLOG(3) << "Updated duration estimate:"
799 << orig_duration_estimate
800 << " -> "
801 << estimated_next_frame_duration_
802 << " at timestamp: "
803 << buffer->GetDecodeTimestamp().InSecondsF();
804 }
764 } 805 }
765 806
766 buffers_.push_back(buffer); 807 buffers_.push_back(buffer);
767 return true; 808 return true;
768 } 809 }
769 810
770 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() { 811 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {
771 base::TimeDelta duration = estimated_next_frame_duration_; 812 base::TimeDelta duration = estimated_next_frame_duration_;
772 if (duration != kNoTimestamp()) { 813 if (duration != kNoTimestamp()) {
773 DVLOG(3) << __FUNCTION__ << " : using estimated duration"; 814 DVLOG(3) << __FUNCTION__ << " : using estimated duration";
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
840 WebMClusterParser::FindTextTrack(int track_num) { 881 WebMClusterParser::FindTextTrack(int track_num) {
841 const TextTrackMap::iterator it = text_track_map_.find(track_num); 882 const TextTrackMap::iterator it = text_track_map_.find(track_num);
842 883
843 if (it == text_track_map_.end()) 884 if (it == text_track_map_.end())
844 return NULL; 885 return NULL;
845 886
846 return &it->second; 887 return &it->second;
847 } 888 }
848 889
849 } // namespace media 890 } // namespace media
OLDNEW
« no previous file with comments | « media/formats/webm/webm_cluster_parser.h ('k') | media/formats/webm/webm_cluster_parser_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698