media/formats/webm/webm_cluster_parser.cc - Issue 1018373003: Improving WebM video duration estimation.

Side by Side Diff: media/formats/webm/webm_cluster_parser.cc

Issue 1018373003: Improving WebM video duration estimation. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Fixing try failure, remove unused variable for some builds. Created 5 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2014 The Chromium Authors. All rights reserved.	1 // Copyright 2014 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "media/formats/webm/webm_cluster_parser.h"	5 #include "media/formats/webm/webm_cluster_parser.h"

6	6

7 #include <vector>	7 #include <vector>

8	8

9 #include "base/logging.h"	9 #include "base/logging.h"

10 #include "base/sys_byteorder.h"	10 #include "base/sys_byteorder.h"

11 #include "media/base/buffers.h"	11 #include "media/base/buffers.h"

12 #include "media/base/decrypt_config.h"	12 #include "media/base/decrypt_config.h"

13 #include "media/filters/webvtt_util.h"	13 #include "media/filters/webvtt_util.h"

14 #include "media/formats/webm/webm_constants.h"	14 #include "media/formats/webm/webm_constants.h"

15 #include "media/formats/webm/webm_crypto_helpers.h"	15 #include "media/formats/webm/webm_crypto_helpers.h"

16 #include "media/formats/webm/webm_webvtt_parser.h"	16 #include "media/formats/webm/webm_webvtt_parser.h"

17	17

18 namespace media {	18 namespace media {

19	19

20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {	20 const uint16_t WebMClusterParser::kOpusFrameDurationsMu[] = {

21 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,	21 10000, 20000, 40000, 60000, 10000, 20000, 40000, 60000, 10000, 20000, 40000,

22 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,	22 60000, 10000, 20000, 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000,

23 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};	23 10000, 20000, 2500, 5000, 10000, 20000, 2500, 5000, 10000, 20000};

24	24

25 enum {	25 enum {

26 // Limits the number of MEDIA_LOG() calls in the path of reading encoded	26 // Limits the number of MEDIA_LOG() calls in the path of reading encoded

27 // duration to avoid spamming for corrupted data.	27 // duration to avoid spamming for corrupted data.

28 kMaxDurationLogs = 10,	28 kMaxDurationErrorLogs = 10,

	29 // Limits the number of MEDIA_LOG() calls warning the user that buffer

	30 // durations have been estimated.

	31 kMaxDurationEstimateLogs = 10,

29 };	32 };

30	33

31 WebMClusterParser::WebMClusterParser(	34 WebMClusterParser::WebMClusterParser(

32 int64 timecode_scale,	35 int64 timecode_scale,

33 int audio_track_num,	36 int audio_track_num,

34 base::TimeDelta audio_default_duration,	37 base::TimeDelta audio_default_duration,

35 int video_track_num,	38 int video_track_num,

36 base::TimeDelta video_default_duration,	39 base::TimeDelta video_default_duration,

37 const WebMTracksParser::TextTracks& text_tracks,	40 const WebMTracksParser::TextTracks& text_tracks,

38 const std::set<int64>& ignored_tracks,	41 const std::set<int64>& ignored_tracks,

(...skipping 139 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
178 int size) {	181 int size) {

179 // Masks and constants for Opus packets. See	182 // Masks and constants for Opus packets. See

180 // https://tools.ietf.org/html/rfc6716#page-14	183 // https://tools.ietf.org/html/rfc6716#page-14

181 static const uint8_t kTocConfigMask = 0xf8;	184 static const uint8_t kTocConfigMask = 0xf8;

182 static const uint8_t kTocFrameCountCodeMask = 0x03;	185 static const uint8_t kTocFrameCountCodeMask = 0x03;

183 static const uint8_t kFrameCountMask = 0x3f;	186 static const uint8_t kFrameCountMask = 0x3f;

184 static const base::TimeDelta kPacketDurationMax =	187 static const base::TimeDelta kPacketDurationMax =

185 base::TimeDelta::FromMilliseconds(120);	188 base::TimeDelta::FromMilliseconds(120);

186	189

187 if (size < 1) {	190 if (size < 1) {

188 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_, kMaxDurationLogs)	191 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,

	192 kMaxDurationErrorLogs)

189 << "Invalid zero-byte Opus packet; demuxed block duration may be "	193 << "Invalid zero-byte Opus packet; demuxed block duration may be "

190 "imprecise.";	194 "imprecise.";

191 return kNoTimestamp();	195 return kNoTimestamp();

192 }	196 }

193	197

194 // Frame count type described by last 2 bits of Opus TOC byte.	198 // Frame count type described by last 2 bits of Opus TOC byte.

195 int frame_count_type = data[0] & kTocFrameCountCodeMask;	199 int frame_count_type = data[0] & kTocFrameCountCodeMask;

196	200

197 int frame_count = 0;	201 int frame_count = 0;

198 switch (frame_count_type) {	202 switch (frame_count_type) {

199 case 0:	203 case 0:

200 frame_count = 1;	204 frame_count = 1;

201 break;	205 break;

202 case 1:	206 case 1:

203 case 2:	207 case 2:

204 frame_count = 2;	208 frame_count = 2;

205 break;	209 break;

206 case 3:	210 case 3:

207 // Type 3 indicates an arbitrary frame count described in the next byte.	211 // Type 3 indicates an arbitrary frame count described in the next byte.

208 if (size < 2) {	212 if (size < 2) {

209 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,	213 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,

210 kMaxDurationLogs)	214 kMaxDurationErrorLogs)

211 << "Second byte missing from 'Code 3' Opus packet; demuxed block "	215 << "Second byte missing from 'Code 3' Opus packet; demuxed block "

212 "duration may be imprecise.";	216 "duration may be imprecise.";

213 return kNoTimestamp();	217 return kNoTimestamp();

214 }	218 }

215	219

216 frame_count = data[1] & kFrameCountMask;	220 frame_count = data[1] & kFrameCountMask;

217	221

218 if (frame_count == 0) {	222 if (frame_count == 0) {

219 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,	223 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,

220 kMaxDurationLogs)	224 kMaxDurationErrorLogs)

221 << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "	225 << "Illegal 'Code 3' Opus packet with frame count zero; demuxed "

222 "block duration may be imprecise.";	226 "block duration may be imprecise.";

223 return kNoTimestamp();	227 return kNoTimestamp();

224 }	228 }

225	229

226 break;	230 break;

227 default:	231 default:

228 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_, kMaxDurationLogs)	232 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,

	233 kMaxDurationErrorLogs)

229 << "Unexpected Opus frame count type: " << frame_count_type << "; "	234 << "Unexpected Opus frame count type: " << frame_count_type << "; "

230 << "demuxed block duration may be imprecise.";	235 << "demuxed block duration may be imprecise.";

231 return kNoTimestamp();	236 return kNoTimestamp();

232 }	237 }

233	238

234 int opusConfig = (data[0] & kTocConfigMask) >> 3;	239 int opusConfig = (data[0] & kTocConfigMask) >> 3;

235 CHECK_GE(opusConfig, 0);	240 CHECK_GE(opusConfig, 0);

236 CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));	241 CHECK_LT(opusConfig, static_cast<int>(arraysize(kOpusFrameDurationsMu)));

237	242

238 DCHECK_GT(frame_count, 0);	243 DCHECK_GT(frame_count, 0);

239 base::TimeDelta duration = base::TimeDelta::FromMicroseconds(	244 base::TimeDelta duration = base::TimeDelta::FromMicroseconds(

240 kOpusFrameDurationsMu[opusConfig] * frame_count);	245 kOpusFrameDurationsMu[opusConfig] * frame_count);

241	246

242 if (duration > kPacketDurationMax) {	247 if (duration > kPacketDurationMax) {

243 // Intentionally allowing packet to pass through for now. Decoder should	248 // Intentionally allowing packet to pass through for now. Decoder should

244 // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case	249 // either handle or fail gracefully. MEDIA_LOG as breadcrumbs in case

245 // things go sideways.	250 // things go sideways.

246 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_, kMaxDurationLogs)	251 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,

	252 kMaxDurationErrorLogs)

247 << "Warning, demuxed Opus packet with encoded duration: " << duration	253 << "Warning, demuxed Opus packet with encoded duration: " << duration

248 << ". Should be no greater than " << kPacketDurationMax;	254 << ". Should be no greater than " << kPacketDurationMax;

249 }	255 }

250	256

251 return duration;	257 return duration;

252 }	258 }

253	259

254 WebMParserClient* WebMClusterParser::OnListStart(int id) {	260 WebMParserClient* WebMClusterParser::OnListStart(int id) {

255 if (id == kWebMIdCluster) {	261 if (id == kWebMIdCluster) {

256 cluster_timecode_ = -1;	262 cluster_timecode_ = -1;

(...skipping 298 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
555 << "Using encoded duration " << encoded_duration.InSecondsF();	561 << "Using encoded duration " << encoded_duration.InSecondsF();

556	562

557 if (block_duration_time_delta != kNoTimestamp()) {	563 if (block_duration_time_delta != kNoTimestamp()) {

558 base::TimeDelta duration_difference =	564 base::TimeDelta duration_difference =

559 block_duration_time_delta - encoded_duration;	565 block_duration_time_delta - encoded_duration;

560	566

561 const auto kWarnDurationDiff =	567 const auto kWarnDurationDiff =

562 base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);	568 base::TimeDelta::FromMicroseconds(timecode_multiplier_ * 2);

563 if (duration_difference.magnitude() > kWarnDurationDiff) {	569 if (duration_difference.magnitude() > kWarnDurationDiff) {

564 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,	570 LIMITED_MEDIA_LOG(DEBUG, log_cb_, num_duration_errors_,

565 kMaxDurationLogs)	571 kMaxDurationErrorLogs)

566 << "BlockDuration "	572 << "BlockDuration "

567 << "(" << block_duration_time_delta << ") "	573 << "(" << block_duration_time_delta << ") "

568 << "differs significantly from encoded duration "	574 << "differs significantly from encoded duration "

569 << "(" << encoded_duration << ").";	575 << "(" << encoded_duration << ").";

570 }	576 }

571 }	577 }

572 } else if (block_duration_time_delta != kNoTimestamp()) {	578 } else if (block_duration_time_delta != kNoTimestamp()) {

573 buffer->set_duration(block_duration_time_delta);	579 buffer->set_duration(block_duration_time_delta);

574 } else {	580 } else {

575 DCHECK_NE(buffer_type, DemuxerStream::TEXT);	581 DCHECK_NE(buffer_type, DemuxerStream::TEXT);

576 buffer->set_duration(track->default_duration());	582 buffer->set_duration(track->default_duration());

577 }	583 }

578	584

579 if (discard_padding != 0) {	585 if (discard_padding != 0) {

580 buffer->set_discard_padding(std::make_pair(	586 buffer->set_discard_padding(std::make_pair(

581 base::TimeDelta(),	587 base::TimeDelta(),

582 base::TimeDelta::FromMicroseconds(discard_padding / 1000)));	588 base::TimeDelta::FromMicroseconds(discard_padding / 1000)));

583 }	589 }

584	590

585 return track->AddBuffer(buffer);	591 return track->AddBuffer(buffer);

586 }	592 }

587	593

588 WebMClusterParser::Track::Track(int track_num,	594 WebMClusterParser::Track::Track(int track_num,

589 bool is_video,	595 bool is_video,

590 base::TimeDelta default_duration,	596 base::TimeDelta default_duration,

591 const LogCB& log_cb)	597 const LogCB& log_cb)

592 : track_num_(track_num),	598 : num_duration_estimates_(0),

	599 track_num_(track_num),

593 is_video_(is_video),	600 is_video_(is_video),

594 default_duration_(default_duration),	601 default_duration_(default_duration),

595 estimated_next_frame_duration_(kNoTimestamp()),	602 estimated_next_frame_duration_(kNoTimestamp()),

596 log_cb_(log_cb) {	603 log_cb_(log_cb) {

597 DCHECK(default_duration_ == kNoTimestamp() \|\|	604 DCHECK(default_duration_ == kNoTimestamp() \|\|

598 default_duration_ > base::TimeDelta());	605 default_duration_ > base::TimeDelta());

599 }	606 }

600	607

601 WebMClusterParser::Track::~Track() {}	608 WebMClusterParser::Track::~Track() {}

602	609

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
675 return true;	682 return true;

676 }	683 }

677	684

678 return QueueBuffer(buffer);	685 return QueueBuffer(buffer);

679 }	686 }

680	687

681 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {	688 void WebMClusterParser::Track::ApplyDurationEstimateIfNeeded() {

682 if (!last_added_buffer_missing_duration_.get())	689 if (!last_added_buffer_missing_duration_.get())

683 return;	690 return;

684	691

685 last_added_buffer_missing_duration_->set_duration(GetDurationEstimate());	692 base::TimeDelta estimated_duration = GetDurationEstimate();

	693 last_added_buffer_missing_duration_->set_duration(estimated_duration);

686	694

687 DVLOG(2) << "ApplyDurationEstimateIfNeeded() : new dur : "	695 if (is_video_) {

688 << " ts "	696 // Exposing estimation so splicing/overlap frame processing can make

	697 // informed decisions downstream.

	698 // TODO(chcunningham): Set this for audio as well in later change where

	699 // audio is switched to max estimation and splicing is disabled.

	700 last_added_buffer_missing_duration_->set_is_duration_estimated(true);

	701 }

	702

	703 LIMITED_MEDIA_LOG(INFO, log_cb_, num_duration_estimates_,

	704 kMaxDurationEstimateLogs)

	705 << "Estimating WebM block duration to be " << estimated_duration << " "

	706 << "for the last (Simple)Block in the Cluster for this Track. Use "

	707 << "BlockGroups with BlockDurations at the end of each Track in a "

	708 << "Cluster to avoid estimation.";

	709

	710 DVLOG(2) << __FUNCTION__ << " new dur : ts "

689 << last_added_buffer_missing_duration_->timestamp().InSecondsF()	711 << last_added_buffer_missing_duration_->timestamp().InSecondsF()

690 << " dur "	712 << " dur "

691 << last_added_buffer_missing_duration_->duration().InSecondsF()	713 << last_added_buffer_missing_duration_->duration().InSecondsF()

692 << " kf " << last_added_buffer_missing_duration_->is_key_frame()	714 << " kf " << last_added_buffer_missing_duration_->is_key_frame()

693 << " size " << last_added_buffer_missing_duration_->data_size();	715 << " size " << last_added_buffer_missing_duration_->data_size();

694	716

695 // Don't use the applied duration as a future estimation (don't use	717 // Don't use the applied duration as a future estimation (don't use

696 // QueueBuffer() here.)	718 // QueueBuffer() here.)

697 buffers_.push_back(last_added_buffer_missing_duration_);	719 buffers_.push_back(last_added_buffer_missing_duration_);

698 last_added_buffer_missing_duration_ = NULL;	720 last_added_buffer_missing_duration_ = NULL;

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
744 DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();	766 DecodeTimestamp() : buffers_.back()->GetDecodeTimestamp();

745 CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());	767 CHECK(previous_buffers_timestamp <= buffer->GetDecodeTimestamp());

746	768

747 base::TimeDelta duration = buffer->duration();	769 base::TimeDelta duration = buffer->duration();

748 if (duration < base::TimeDelta() \|\| duration == kNoTimestamp()) {	770 if (duration < base::TimeDelta() \|\| duration == kNoTimestamp()) {

749 MEDIA_LOG(ERROR, log_cb_)	771 MEDIA_LOG(ERROR, log_cb_)

750 << "Invalid buffer duration: " << duration.InSecondsF();	772 << "Invalid buffer duration: " << duration.InSecondsF();

751 return false;	773 return false;

752 }	774 }

753	775

754 // The estimated frame duration is the minimum non-zero duration since the	776 // The estimated frame duration is the minimum (for audio) or the maximum

755 // last initialization segment. The minimum is used to ensure frame durations	777 // (for video) non-zero duration since the last initialization segment. The

756 // aren't overestimated.	778 // minimum is used for audio to ensure frame durations aren't overestimated,

	779 // triggering unnecessary frame splicing. For video, splicing does not apply,

	780 // so maximum is used and overlap is simply resolved by showing the

	781 // later of the overlapping frames at its given PTS, effectively trimming down

	782 // the over-estimated duration of the previous frame.

	783 // TODO(chcunningham): Use max for audio and disable splicing whenever

	784 // estimated buffers are encountered.

757 if (duration > base::TimeDelta()) {	785 if (duration > base::TimeDelta()) {

	786 base::TimeDelta orig_duration_estimate = estimated_next_frame_duration_;

758 if (estimated_next_frame_duration_ == kNoTimestamp()) {	787 if (estimated_next_frame_duration_ == kNoTimestamp()) {

759 estimated_next_frame_duration_ = duration;	788 estimated_next_frame_duration_ = duration;

	789 } else if (is_video_) {

	790 estimated_next_frame_duration_ =

	791 std::max(duration, estimated_next_frame_duration_);

760 } else {	792 } else {

761 estimated_next_frame_duration_ =	793 estimated_next_frame_duration_ =

762 std::min(duration, estimated_next_frame_duration_);	794 std::min(duration, estimated_next_frame_duration_);

763 }	795 }

	796

	797 if (orig_duration_estimate != estimated_next_frame_duration_) {

	798 DVLOG(3) << "Updated duration estimate:"

	799 << orig_duration_estimate

	800 << " -> "

	801 << estimated_next_frame_duration_

	802 << " at timestamp: "

	803 << buffer->GetDecodeTimestamp().InSecondsF();

	804 }

764 }	805 }

765	806

766 buffers_.push_back(buffer);	807 buffers_.push_back(buffer);

767 return true;	808 return true;

768 }	809 }

769	810

770 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {	811 base::TimeDelta WebMClusterParser::Track::GetDurationEstimate() {

771 base::TimeDelta duration = estimated_next_frame_duration_;	812 base::TimeDelta duration = estimated_next_frame_duration_;

772 if (duration != kNoTimestamp()) {	813 if (duration != kNoTimestamp()) {

773 DVLOG(3) << __FUNCTION__ << " : using estimated duration";	814 DVLOG(3) << __FUNCTION__ << " : using estimated duration";

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
840 WebMClusterParser::FindTextTrack(int track_num) {	881 WebMClusterParser::FindTextTrack(int track_num) {

841 const TextTrackMap::iterator it = text_track_map_.find(track_num);	882 const TextTrackMap::iterator it = text_track_map_.find(track_num);

842	883

843 if (it == text_track_map_.end())	884 if (it == text_track_map_.end())

844 return NULL;	885 return NULL;

845	886

846 return &it->second;	887 return &it->second;

847 }	888 }

848	889

849 } // namespace media	890 } // namespace media

OLD	NEW

« no previous file with comments | « media/formats/webm/webm_cluster_parser.h ('k') | media/formats/webm/webm_cluster_parser_unittest.cc » ('j') | no next file with comments »