media/filters/ffmpeg_demuxer.cc - Issue 23702007: Render inband text tracks in the media pipeline

Side by Side Diff: media/filters/ffmpeg_demuxer.cc

Issue 23702007: Render inband text tracks in the media pipeline (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: incorporate aaron's comments (11/04) Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "media/filters/ffmpeg_demuxer.h"	5 #include "media/filters/ffmpeg_demuxer.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8 #include <string>	8 #include <string>

9	9

10 #include "base/base64.h"	10 #include "base/base64.h"

11 #include "base/bind.h"	11 #include "base/bind.h"

12 #include "base/callback.h"	12 #include "base/callback.h"

13 #include "base/callback_helpers.h"	13 #include "base/callback_helpers.h"

14 #include "base/command_line.h"

15 #include "base/memory/scoped_ptr.h"	14 #include "base/memory/scoped_ptr.h"

16 #include "base/message_loop/message_loop.h"	15 #include "base/message_loop/message_loop.h"

17 #include "base/metrics/sparse_histogram.h"	16 #include "base/metrics/sparse_histogram.h"

18 #include "base/stl_util.h"

19 #include "base/strings/string_util.h"	17 #include "base/strings/string_util.h"

20 #include "base/strings/stringprintf.h"	18 #include "base/strings/stringprintf.h"

21 #include "base/sys_byteorder.h"	19 #include "base/sys_byteorder.h"

22 #include "base/task_runner_util.h"	20 #include "base/task_runner_util.h"

23 #include "base/time/time.h"	21 #include "base/time/time.h"

24 #include "media/base/audio_decoder_config.h"	22 #include "media/base/audio_decoder_config.h"

25 #include "media/base/bind_to_loop.h"	23 #include "media/base/bind_to_loop.h"

26 #include "media/base/decoder_buffer.h"	24 #include "media/base/decoder_buffer.h"

27 #include "media/base/decrypt_config.h"	25 #include "media/base/decrypt_config.h"

28 #include "media/base/limits.h"	26 #include "media/base/limits.h"

29 #include "media/base/media_log.h"	27 #include "media/base/media_log.h"

30 #include "media/base/media_switches.h"

31 #include "media/base/video_decoder_config.h"	28 #include "media/base/video_decoder_config.h"

32 #include "media/ffmpeg/ffmpeg_common.h"	29 #include "media/ffmpeg/ffmpeg_common.h"

33 #include "media/filters/ffmpeg_glue.h"	30 #include "media/filters/ffmpeg_glue.h"

34 #include "media/filters/ffmpeg_h264_to_annex_b_bitstream_converter.h"	31 #include "media/filters/ffmpeg_h264_to_annex_b_bitstream_converter.h"

	32 #include "media/filters/webvtt_util.h"

35 #include "media/webm/webm_crypto_helpers.h"	33 #include "media/webm/webm_crypto_helpers.h"

36	34

37 namespace media {	35 namespace media {

38	36

39 //	37 //

40 // FFmpegDemuxerStream	38 // FFmpegDemuxerStream

41 //	39 //

42 FFmpegDemuxerStream::FFmpegDemuxerStream(	40 FFmpegDemuxerStream::FFmpegDemuxerStream(

43 FFmpegDemuxer* demuxer,	41 FFmpegDemuxer* demuxer,

44 AVStream* stream)	42 AVStream* stream)

(...skipping 63 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
108 // Convert the packet if there is a bitstream filter.	106 // Convert the packet if there is a bitstream filter.

109 if (packet->data && bitstream_converter_enabled_ &&	107 if (packet->data && bitstream_converter_enabled_ &&

110 !bitstream_converter_->ConvertPacket(packet.get())) {	108 !bitstream_converter_->ConvertPacket(packet.get())) {

111 LOG(ERROR) << "Format conversion failed.";	109 LOG(ERROR) << "Format conversion failed.";

112 }	110 }

113	111

114 // Get side data if any. For now, the only type of side_data is VP8 Alpha. We	112 // Get side data if any. For now, the only type of side_data is VP8 Alpha. We

115 // keep this generic so that other side_data types in the future can be	113 // keep this generic so that other side_data types in the future can be

116 // handled the same way as well.	114 // handled the same way as well.

117 av_packet_split_side_data(packet.get());	115 av_packet_split_side_data(packet.get());

118 int side_data_size = 0;	116 scoped_refptr<DecoderBuffer> buffer;

119 uint8* side_data = av_packet_get_side_data(

120 packet.get(),

121 AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,

122 &side_data_size);

123	117

124 // If a packet is returned by FFmpeg's av_parser_parse2() the packet will	118 if (type() == DemuxerStream::TEXT) {

125 // reference inner memory of FFmpeg. As such we should transfer the packet	119 int id_size = 0;

126 // into memory we control.	120 uint8* id_data = av_packet_get_side_data(

127 scoped_refptr<DecoderBuffer> buffer;	121 packet.get(),

128 if (side_data_size > 0) {	122 AV_PKT_DATA_WEBVTT_IDENTIFIER,

	123 &id_size);

	124

	125 int settings_size = 0;

	126 uint8* settings_data = av_packet_get_side_data(

	127 packet.get(),

	128 AV_PKT_DATA_WEBVTT_SETTINGS,

	129 &settings_size);

	130

	131 std::vector<uint8> side_data;

	132 MakeSideData(id_data, id_data + id_size,

	133 settings_data, settings_data + settings_size,

	134 &side_data);

	135

129 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,	136 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,

130 side_data, side_data_size);	137 side_data.data(), side_data.size());

131 } else {	138 } else {

132 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size);	139 int side_data_size = 0;

133 }	140 uint8* side_data = av_packet_get_side_data(

	141 packet.get(),

	142 AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,

	143 &side_data_size);

134	144

135 int skip_samples_size = 0;	145 // If a packet is returned by FFmpeg's av_parser_parse2() the packet will

136 uint8* skip_samples = av_packet_get_side_data(packet.get(),	146 // reference inner memory of FFmpeg. As such we should transfer the packet

137 AV_PKT_DATA_SKIP_SAMPLES,	147 // into memory we control.

138 &skip_samples_size);	148 if (side_data_size > 0) {

139 const int kSkipSamplesValidSize = 10;	149 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,

140 const int kSkipSamplesOffset = 4;	150 side_data, side_data_size);

141 if (skip_samples_size >= kSkipSamplesValidSize) {	151 } else {

142 int discard_padding_samples = base::ByteSwapToLE32(	152 buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size);

143 (reinterpret_cast<const uint32>(skip_samples +	153 }

144 kSkipSamplesOffset)));	154

145 // TODO(vigneshv): Change decoder buffer to use number of samples so that	155 int skip_samples_size = 0;

146 // this conversion can be avoided.	156 uint8* skip_samples = av_packet_get_side_data(packet.get(),

147 buffer->set_discard_padding(base::TimeDelta::FromMicroseconds(	157 AV_PKT_DATA_SKIP_SAMPLES,

148 discard_padding_samples * 1000000.0 /	158 &skip_samples_size);

149 audio_decoder_config().samples_per_second()));	159 const int kSkipSamplesValidSize = 10;

	160 const int kSkipSamplesOffset = 4;

	161 if (skip_samples_size >= kSkipSamplesValidSize) {

	162 int discard_padding_samples = base::ByteSwapToLE32(

	163 (reinterpret_cast<const uint32>(skip_samples +

	164 kSkipSamplesOffset)));

	165 // TODO(vigneshv): Change decoder buffer to use number of samples so that

	166 // this conversion can be avoided.

	167 buffer->set_discard_padding(base::TimeDelta::FromMicroseconds(

	168 discard_padding_samples * 1000000.0 /

	169 audio_decoder_config().samples_per_second()));

	170 }

150 }	171 }

151	172

152 if ((type() == DemuxerStream::AUDIO && audio_config_.is_encrypted()) \|\|	173 if ((type() == DemuxerStream::AUDIO && audio_config_.is_encrypted()) \|\|

153 (type() == DemuxerStream::VIDEO && video_config_.is_encrypted())) {	174 (type() == DemuxerStream::VIDEO && video_config_.is_encrypted())) {

154 scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig(	175 scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig(

155 packet->data, packet->size,	176 packet->data, packet->size,

156 reinterpret_cast<const uint8*>(encryption_key_id_.data()),	177 reinterpret_cast<const uint8*>(encryption_key_id_.data()),

157 encryption_key_id_.size()));	178 encryption_key_id_.size()));

158 if (!config)	179 if (!config)

159 LOG(ERROR) << "Creation of DecryptConfig failed.";	180 LOG(ERROR) << "Creation of DecryptConfig failed.";

160 buffer->set_decrypt_config(config.Pass());	181 buffer->set_decrypt_config(config.Pass());

161 }	182 }

162	183

163 buffer->set_timestamp(ConvertStreamTimestamp(	184 buffer->set_timestamp(ConvertStreamTimestamp(

164 stream_->time_base, packet->pts));	185 stream_->time_base, packet->pts));

165 buffer->set_duration(ConvertStreamTimestamp(	186 buffer->set_duration(ConvertStreamTimestamp(

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
283 // TODO(scherkus): Remove early return and reenable time-based capacity	304 // TODO(scherkus): Remove early return and reenable time-based capacity

284 // after our data sources support canceling/concurrent reads, see	305 // after our data sources support canceling/concurrent reads, see

285 // http://crbug.com/165762 for details.	306 // http://crbug.com/165762 for details.

286 return !read_cb_.is_null();	307 return !read_cb_.is_null();

287	308

288 // Try to have one second's worth of encoded data per stream.	309 // Try to have one second's worth of encoded data per stream.

289 const base::TimeDelta kCapacity = base::TimeDelta::FromSeconds(1);	310 const base::TimeDelta kCapacity = base::TimeDelta::FromSeconds(1);

290 return buffer_queue_.IsEmpty() \|\| buffer_queue_.Duration() < kCapacity;	311 return buffer_queue_.IsEmpty() \|\| buffer_queue_.Duration() < kCapacity;

291 }	312 }

292	313

	314 TextKind FFmpegDemuxerStream::GetTextKind() const {

	315 DCHECK_EQ(type_, DemuxerStream::TEXT);

	316

	317 if (stream_->disposition & AV_DISPOSITION_CAPTIONS)

	318 return kTextCaptions;

	319

	320 if (stream_->disposition & AV_DISPOSITION_DESCRIPTIONS)

	321 return kTextDescriptions;

	322

	323 if (stream_->disposition & AV_DISPOSITION_METADATA)

	324 return kTextMetadata;

	325

	326 return kTextSubtitles;

	327 }

	328

	329 std::string FFmpegDemuxerStream::GetMetadata(const char* key) const {

	330 const AVDictionaryEntry* entry =

	331 av_dict_get(stream_->metadata, key, NULL, 0);

	332 return (entry == NULL \|\| entry->value == NULL) ? "" : entry->value;

	333 }

	334

293 // static	335 // static

294 base::TimeDelta FFmpegDemuxerStream::ConvertStreamTimestamp(	336 base::TimeDelta FFmpegDemuxerStream::ConvertStreamTimestamp(

295 const AVRational& time_base, int64 timestamp) {	337 const AVRational& time_base, int64 timestamp) {

296 if (timestamp == static_cast<int64>(AV_NOPTS_VALUE))	338 if (timestamp == static_cast<int64>(AV_NOPTS_VALUE))

297 return kNoTimestamp();	339 return kNoTimestamp();

298	340

299 return ConvertFromTimeBase(time_base, timestamp);	341 return ConvertFromTimeBase(time_base, timestamp);

300 }	342 }

301	343

302 //	344 //

303 // FFmpegDemuxer	345 // FFmpegDemuxer

304 //	346 //

305 FFmpegDemuxer::FFmpegDemuxer(	347 FFmpegDemuxer::FFmpegDemuxer(

306 const scoped_refptr<base::MessageLoopProxy>& message_loop,	348 const scoped_refptr<base::MessageLoopProxy>& message_loop,

307 DataSource* data_source,	349 DataSource* data_source,

308 const NeedKeyCB& need_key_cb,	350 const NeedKeyCB& need_key_cb,

309 const scoped_refptr<MediaLog>& media_log)	351 const scoped_refptr<MediaLog>& media_log)

310 : host_(NULL),	352 : host_(NULL),

311 message_loop_(message_loop),	353 message_loop_(message_loop),

312 weak_factory_(this),	354 weak_factory_(this),

313 blocking_thread_("FFmpegDemuxer"),	355 blocking_thread_("FFmpegDemuxer"),

314 pending_read_(false),	356 pending_read_(false),

315 pending_seek_(false),	357 pending_seek_(false),

316 data_source_(data_source),	358 data_source_(data_source),

317 media_log_(media_log),	359 media_log_(media_log),

318 bitrate_(0),	360 bitrate_(0),

319 start_time_(kNoTimestamp()),	361 start_time_(kNoTimestamp()),

320 audio_disabled_(false),	362 audio_disabled_(false),

	363 text_enabled_(false),

321 duration_known_(false),	364 duration_known_(false),

322 url_protocol_(data_source, BindToLoop(message_loop_, base::Bind(	365 url_protocol_(data_source, BindToLoop(message_loop_, base::Bind(

323 &FFmpegDemuxer::OnDataSourceError, base::Unretained(this)))),	366 &FFmpegDemuxer::OnDataSourceError, base::Unretained(this)))),

324 need_key_cb_(need_key_cb) {	367 need_key_cb_(need_key_cb) {

325 DCHECK(message_loop_.get());	368 DCHECK(message_loop_.get());

326 DCHECK(data_source_);	369 DCHECK(data_source_);

327 }	370 }

328	371

329 FFmpegDemuxer::~FFmpegDemuxer() {}	372 FFmpegDemuxer::~FFmpegDemuxer() {}

330	373

(...skipping 37 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
368 audio_disabled_ = true;	411 audio_disabled_ = true;

369 StreamVector::iterator iter;	412 StreamVector::iterator iter;

370 for (iter = streams_.begin(); iter != streams_.end(); ++iter) {	413 for (iter = streams_.begin(); iter != streams_.end(); ++iter) {

371 if (iter && (iter)->type() == DemuxerStream::AUDIO) {	414 if (iter && (iter)->type() == DemuxerStream::AUDIO) {

372 (*iter)->Stop();	415 (*iter)->Stop();

373 }	416 }

374 }	417 }

375 }	418 }

376	419

377 void FFmpegDemuxer::Initialize(DemuxerHost* host,	420 void FFmpegDemuxer::Initialize(DemuxerHost* host,

378 const PipelineStatusCB& status_cb) {	421 const PipelineStatusCB& status_cb,

	422 bool enable_text_tracks) {

379 DCHECK(message_loop_->BelongsToCurrentThread());	423 DCHECK(message_loop_->BelongsToCurrentThread());

380 host_ = host;	424 host_ = host;

381 weak_this_ = weak_factory_.GetWeakPtr();	425 weak_this_ = weak_factory_.GetWeakPtr();

	426 text_enabled_ = enable_text_tracks;

382	427

383 // TODO(scherkus): DataSource should have a host by this point,	428 // TODO(scherkus): DataSource should have a host by this point,

384 // see http://crbug.com/122071	429 // see http://crbug.com/122071

385 data_source_->set_host(host);	430 data_source_->set_host(host);

386	431

387 glue_.reset(new FFmpegGlue(&url_protocol_));	432 glue_.reset(new FFmpegGlue(&url_protocol_));

388 AVFormatContext* format_context = glue_->format_context();	433 AVFormatContext* format_context = glue_->format_context();

389	434

390 // Disable ID3v1 tag reading to avoid costly seeks to end of file for data we	435 // Disable ID3v1 tag reading to avoid costly seeks to end of file for data we

391 // don't use. FFmpeg will only read ID3v1 tags if no other metadata is	436 // don't use. FFmpeg will only read ID3v1 tags if no other metadata is

(...skipping 23 matching lines...) Expand all Loading...
415 }	460 }

416 }	461 }

417 return NULL;	462 return NULL;

418 }	463 }

419	464

420 base::TimeDelta FFmpegDemuxer::GetStartTime() const {	465 base::TimeDelta FFmpegDemuxer::GetStartTime() const {

421 DCHECK(message_loop_->BelongsToCurrentThread());	466 DCHECK(message_loop_->BelongsToCurrentThread());

422 return start_time_;	467 return start_time_;

423 }	468 }

424	469

	470 void FFmpegDemuxer::AddTextStreams() {

	471 DCHECK(message_loop_->BelongsToCurrentThread());

	472

	473 for (StreamVector::size_type idx = 0; idx < streams_.size(); ++idx) {

	474 FFmpegDemuxerStream* stream = streams_[idx];

	475 if (stream == NULL \|\| stream->type() != DemuxerStream::TEXT)

	476 continue;

	477

	478 TextKind kind = stream->GetTextKind();

	479 std::string title = stream->GetMetadata("title");

	480 std::string language = stream->GetMetadata("language");

	481

	482 host_->AddTextStream(stream, TextTrackConfig(kind, title, language));

	483 }

	484 }

	485

425 // Helper for calculating the bitrate of the media based on information stored	486 // Helper for calculating the bitrate of the media based on information stored

426 // in \|format_context\| or failing that the size and duration of the media.	487 // in \|format_context\| or failing that the size and duration of the media.

427 //	488 //

428 // Returns 0 if a bitrate could not be determined.	489 // Returns 0 if a bitrate could not be determined.

429 static int CalculateBitrate(	490 static int CalculateBitrate(

430 AVFormatContext* format_context,	491 AVFormatContext* format_context,

431 const base::TimeDelta& duration,	492 const base::TimeDelta& duration,

432 int64 filesize_in_bytes) {	493 int64 filesize_in_bytes) {

433 // If there is a bitrate set on the container, use it.	494 // If there is a bitrate set on the container, use it.

434 if (format_context->bit_rate > 0)	495 if (format_context->bit_rate > 0)

(...skipping 98 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
533 // Log the codec detected, whether it is supported or not.	594 // Log the codec detected, whether it is supported or not.

534 UMA_HISTOGRAM_SPARSE_SLOWLY("Media.DetectedVideoCodec",	595 UMA_HISTOGRAM_SPARSE_SLOWLY("Media.DetectedVideoCodec",

535 codec_context->codec_id);	596 codec_context->codec_id);

536 // Ensure the codec is supported. IsValidConfig() also checks that the	597 // Ensure the codec is supported. IsValidConfig() also checks that the

537 // frame size and visible size are valid.	598 // frame size and visible size are valid.

538 AVStreamToVideoDecoderConfig(stream, &video_config, false);	599 AVStreamToVideoDecoderConfig(stream, &video_config, false);

539	600

540 if (!video_config.IsValidConfig())	601 if (!video_config.IsValidConfig())

541 continue;	602 continue;

542 video_stream = stream;	603 video_stream = stream;

	604 } else if (codec_type == AVMEDIA_TYPE_SUBTITLE) {

	605 if (codec_context->codec_id != AV_CODEC_ID_WEBVTT \|\| !text_enabled_) {

	606 continue;

	607 }

543 } else {	608 } else {

544 continue;	609 continue;

545 }	610 }

546	611

547 streams_[i] = new FFmpegDemuxerStream(this, stream);	612 streams_[i] = new FFmpegDemuxerStream(this, stream);

548 max_duration = std::max(max_duration, streams_[i]->duration());	613 max_duration = std::max(max_duration, streams_[i]->duration());

549	614

550 if (stream->first_dts != static_cast<int64_t>(AV_NOPTS_VALUE)) {	615 if (stream->first_dts != static_cast<int64_t>(AV_NOPTS_VALUE)) {

551 const base::TimeDelta first_dts = ConvertFromTimeBase(	616 const base::TimeDelta first_dts = ConvertFromTimeBase(

552 stream->time_base, stream->first_dts);	617 stream->time_base, stream->first_dts);

553 if (start_time_ == kNoTimestamp() \|\| first_dts < start_time_)	618 if (start_time_ == kNoTimestamp() \|\| first_dts < start_time_)

554 start_time_ = first_dts;	619 start_time_ = first_dts;

555 }	620 }

556 }	621 }

557	622

558 if (!audio_stream && !video_stream) {	623 if (!audio_stream && !video_stream) {

559 status_cb.Run(DEMUXER_ERROR_NO_SUPPORTED_STREAMS);	624 status_cb.Run(DEMUXER_ERROR_NO_SUPPORTED_STREAMS);

560 return;	625 return;

561 }	626 }

562	627

	628 if (text_enabled_)

	629 AddTextStreams();

	630

563 if (format_context->duration != static_cast<int64_t>(AV_NOPTS_VALUE)) {	631 if (format_context->duration != static_cast<int64_t>(AV_NOPTS_VALUE)) {

564 // If there is a duration value in the container use that to find the	632 // If there is a duration value in the container use that to find the

565 // maximum between it and the duration from A/V streams.	633 // maximum between it and the duration from A/V streams.

566 const AVRational av_time_base = {1, AV_TIME_BASE};	634 const AVRational av_time_base = {1, AV_TIME_BASE};

567 max_duration =	635 max_duration =

568 std::max(max_duration,	636 std::max(max_duration,

569 ConvertFromTimeBase(av_time_base, format_context->duration));	637 ConvertFromTimeBase(av_time_base, format_context->duration));

570 } else {	638 } else {

571 // The duration is unknown, in which case this is likely a live stream.	639 // The duration is unknown, in which case this is likely a live stream.

572 max_duration = kInfiniteDuration();	640 max_duration = kInfiniteDuration();

(...skipping 274 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
847 }	915 }

848 for (size_t i = 0; i < buffered.size(); ++i)	916 for (size_t i = 0; i < buffered.size(); ++i)

849 host_->AddBufferedTimeRange(buffered.start(i), buffered.end(i));	917 host_->AddBufferedTimeRange(buffered.start(i), buffered.end(i));

850 }	918 }

851	919

852 void FFmpegDemuxer::OnDataSourceError() {	920 void FFmpegDemuxer::OnDataSourceError() {

853 host_->OnDemuxerError(PIPELINE_ERROR_READ);	921 host_->OnDemuxerError(PIPELINE_ERROR_READ);

854 }	922 }

855	923

856 } // namespace media	924 } // namespace media

OLD	NEW

« media/base/text_renderer_unittest.cc ('K') | « media/filters/ffmpeg_demuxer.h ('k') | media/filters/ffmpeg_demuxer_unittest.cc » ('j') | no next file with comments »