media/filters/ffmpeg_demuxer.cc - Issue 23702007: Render inband text tracks in the media pipeline

Unified Diff: media/filters/ffmpeg_demuxer.cc

Issue 23702007: Render inband text tracks in the media pipeline (Closed) Base URL: http://git.chromium.org/chromium/src.git@master

Patch Set: incorporate aaron's comments (11/12) Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: media/filters/ffmpeg_demuxer.cc

diff --git a/media/filters/ffmpeg_demuxer.cc b/media/filters/ffmpeg_demuxer.cc

index 2ba0c54074524bc84f8ebe6cfbbf83742d439a70..c3d4490621a2c83cdbded7a4090cd95edaf15454 100644

--- a/media/filters/ffmpeg_demuxer.cc

+++ b/media/filters/ffmpeg_demuxer.cc

@@ -11,11 +11,9 @@

#include "base/bind.h"

#include "base/callback.h"

#include "base/callback_helpers.h"

-#include "base/command_line.h"

#include "base/memory/scoped_ptr.h"

#include "base/message_loop/message_loop.h"

#include "base/metrics/sparse_histogram.h"

-#include "base/stl_util.h"

#include "base/strings/string_util.h"

#include "base/strings/stringprintf.h"

#include "base/sys_byteorder.h"

@@ -27,11 +25,11 @@

#include "media/base/decrypt_config.h"

#include "media/base/limits.h"

#include "media/base/media_log.h"

-#include "media/base/media_switches.h"

#include "media/base/video_decoder_config.h"

#include "media/ffmpeg/ffmpeg_common.h"

#include "media/filters/ffmpeg_glue.h"

#include "media/filters/ffmpeg_h264_to_annex_b_bitstream_converter.h"

+#include "media/filters/webvtt_util.h"

#include "media/webm/webm_crypto_helpers.h"

namespace media {

@@ -65,6 +63,9 @@ FFmpegDemuxerStream::FFmpegDemuxerStream(

AVStreamToVideoDecoderConfig(stream, &video_config_, true);

is_encrypted = video_config_.is_encrypted();

break;

+ case AVMEDIA_TYPE_SUBTITLE:

+ type_ = TEXT;

+ break;

default:

NOTREACHED();

break;

@@ -115,44 +116,67 @@ void FFmpegDemuxerStream::EnqueuePacket(ScopedAVPacket packet) {

// keep this generic so that other side_data types in the future can be

// handled the same way as well.

av_packet_split_side_data(packet.get());

- int side_data_size = 0;

- uint8* side_data = av_packet_get_side_data(

- packet.get(),

- AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,

- &side_data_size);

- // If a packet is returned by FFmpeg's av_parser_parse2() the packet will

- // reference inner memory of FFmpeg. As such we should transfer the packet

- // into memory we control.

scoped_refptr<DecoderBuffer> buffer;

- if (side_data_size > 0) {

+ if (type() == DemuxerStream::TEXT) {

+ int id_size = 0;

+ uint8* id_data = av_packet_get_side_data(

+ packet.get(),

+ AV_PKT_DATA_WEBVTT_IDENTIFIER,

+ &id_size);

+ int settings_size = 0;

+ uint8* settings_data = av_packet_get_side_data(

+ packet.get(),

+ AV_PKT_DATA_WEBVTT_SETTINGS,

+ &settings_size);

+ std::vector<uint8> side_data;

+ MakeSideData(id_data, id_data + id_size,

+ settings_data, settings_data + settings_size,

+ &side_data);

buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,

- side_data, side_data_size);

+ side_data.data(), side_data.size());

} else {

- buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size);

- }

- int skip_samples_size = 0;

- uint8* skip_samples = av_packet_get_side_data(packet.get(),

- AV_PKT_DATA_SKIP_SAMPLES,

- &skip_samples_size);

- const int kSkipSamplesValidSize = 10;

- const int kSkipSamplesOffset = 4;

- if (skip_samples_size >= kSkipSamplesValidSize) {

- int discard_padding_samples = base::ByteSwapToLE32(

- *(reinterpret_cast<const uint32*>(skip_samples +

- kSkipSamplesOffset)));

- // TODO(vigneshv): Change decoder buffer to use number of samples so that

- // this conversion can be avoided.

- buffer->set_discard_padding(base::TimeDelta::FromMicroseconds(

- discard_padding_samples * 1000000.0 /

- audio_decoder_config().samples_per_second()));

+ int side_data_size = 0;

+ uint8* side_data = av_packet_get_side_data(

+ packet.get(),

+ AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL,

+ &side_data_size);

+ // If a packet is returned by FFmpeg's av_parser_parse2() the packet will

+ // reference inner memory of FFmpeg. As such we should transfer the packet

+ // into memory we control.

+ if (side_data_size > 0) {

+ buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size,

+ side_data, side_data_size);

+ } else {

+ buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size);

+ }

+ int skip_samples_size = 0;

+ uint8* skip_samples = av_packet_get_side_data(packet.get(),

+ AV_PKT_DATA_SKIP_SAMPLES,

+ &skip_samples_size);

+ const int kSkipSamplesValidSize = 10;

+ const int kSkipSamplesOffset = 4;

+ if (skip_samples_size >= kSkipSamplesValidSize) {

+ int discard_padding_samples = base::ByteSwapToLE32(

+ *(reinterpret_cast<const uint32*>(skip_samples +

+ kSkipSamplesOffset)));

+ // TODO(vigneshv): Change decoder buffer to use number of samples so that

+ // this conversion can be avoided.

+ buffer->set_discard_padding(base::TimeDelta::FromMicroseconds(

+ discard_padding_samples * 1000000.0 /

+ audio_decoder_config().samples_per_second()));

+ }

}

if ((type() == DemuxerStream::AUDIO && audio_config_.is_encrypted()) ||

(type() == DemuxerStream::VIDEO && video_config_.is_encrypted())) {

scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig(

- packet->data, packet->size,

+ packet->data, packet->size,

reinterpret_cast<const uint8*>(encryption_key_id_.data()),

encryption_key_id_.size()));

if (!config)

@@ -290,6 +314,27 @@ bool FFmpegDemuxerStream::HasAvailableCapacity() {

return buffer_queue_.IsEmpty() || buffer_queue_.Duration() < kCapacity;

}

+TextKind FFmpegDemuxerStream::GetTextKind() const {

+ DCHECK_EQ(type_, DemuxerStream::TEXT);

+ if (stream_->disposition & AV_DISPOSITION_CAPTIONS)

+ return kTextCaptions;

+ if (stream_->disposition & AV_DISPOSITION_DESCRIPTIONS)

+ return kTextDescriptions;

+ if (stream_->disposition & AV_DISPOSITION_METADATA)

+ return kTextMetadata;

+ return kTextSubtitles;

+std::string FFmpegDemuxerStream::GetMetadata(const char* key) const {

+ const AVDictionaryEntry* entry =

+ av_dict_get(stream_->metadata, key, NULL, 0);

+ return (entry == NULL || entry->value == NULL) ? "" : entry->value;

// static

base::TimeDelta FFmpegDemuxerStream::ConvertStreamTimestamp(

const AVRational& time_base, int64 timestamp) {

@@ -318,6 +363,7 @@ FFmpegDemuxer::FFmpegDemuxer(

bitrate_(0),

start_time_(kNoTimestamp()),

audio_disabled_(false),

+ text_enabled_(false),

duration_known_(false),

url_protocol_(data_source, BindToLoop(message_loop_, base::Bind(

&FFmpegDemuxer::OnDataSourceError, base::Unretained(this)))),

@@ -375,10 +421,12 @@ void FFmpegDemuxer::OnAudioRendererDisabled() {

}

void FFmpegDemuxer::Initialize(DemuxerHost* host,

- const PipelineStatusCB& status_cb) {

+ const PipelineStatusCB& status_cb,

+ bool enable_text_tracks) {

DCHECK(message_loop_->BelongsToCurrentThread());

host_ = host;

weak_this_ = weak_factory_.GetWeakPtr();

+ text_enabled_ = enable_text_tracks;

// TODO(scherkus): DataSource should have a host by this point,

// see http://crbug.com/122071

@@ -422,6 +470,22 @@ base::TimeDelta FFmpegDemuxer::GetStartTime() const {

return start_time_;

}

+void FFmpegDemuxer::AddTextStreams() {

+ DCHECK(message_loop_->BelongsToCurrentThread());

+ for (StreamVector::size_type idx = 0; idx < streams_.size(); ++idx) {

+ FFmpegDemuxerStream* stream = streams_[idx];

+ if (stream == NULL || stream->type() != DemuxerStream::TEXT)

+ continue;

+ TextKind kind = stream->GetTextKind();

+ std::string title = stream->GetMetadata("title");

+ std::string language = stream->GetMetadata("language");

+ host_->AddTextStream(stream, TextTrackConfig(kind, title, language));

+ }

// Helper for calculating the bitrate of the media based on information stored

// in |format_context| or failing that the size and duration of the media.

@@ -540,6 +604,10 @@ void FFmpegDemuxer::OnFindStreamInfoDone(const PipelineStatusCB& status_cb,

if (!video_config.IsValidConfig())

continue;

video_stream = stream;

+ } else if (codec_type == AVMEDIA_TYPE_SUBTITLE) {

+ if (codec_context->codec_id != AV_CODEC_ID_WEBVTT || !text_enabled_) {

+ continue;

+ }

} else {

continue;

}

@@ -560,6 +628,9 @@ void FFmpegDemuxer::OnFindStreamInfoDone(const PipelineStatusCB& status_cb,

return;

}

+ if (text_enabled_)

+ AddTextStreams();

if (format_context->duration != static_cast<int64_t>(AV_NOPTS_VALUE)) {

// If there is a duration value in the container use that to find the

// maximum between it and the duration from A/V streams.

« media/filters/chunk_demuxer_unittest.cc ('K') | « media/filters/ffmpeg_demuxer.h ('k') | media/filters/ffmpeg_demuxer_unittest.cc » ('j') | media/filters/ffmpeg_demuxer_unittest.cc » ('J')