Index: media/filters/ffmpeg_demuxer.cc |
diff --git a/media/filters/ffmpeg_demuxer.cc b/media/filters/ffmpeg_demuxer.cc |
index 2ba0c54074524bc84f8ebe6cfbbf83742d439a70..c3d4490621a2c83cdbded7a4090cd95edaf15454 100644 |
--- a/media/filters/ffmpeg_demuxer.cc |
+++ b/media/filters/ffmpeg_demuxer.cc |
@@ -11,11 +11,9 @@ |
#include "base/bind.h" |
#include "base/callback.h" |
#include "base/callback_helpers.h" |
-#include "base/command_line.h" |
#include "base/memory/scoped_ptr.h" |
#include "base/message_loop/message_loop.h" |
#include "base/metrics/sparse_histogram.h" |
-#include "base/stl_util.h" |
#include "base/strings/string_util.h" |
#include "base/strings/stringprintf.h" |
#include "base/sys_byteorder.h" |
@@ -27,11 +25,11 @@ |
#include "media/base/decrypt_config.h" |
#include "media/base/limits.h" |
#include "media/base/media_log.h" |
-#include "media/base/media_switches.h" |
#include "media/base/video_decoder_config.h" |
#include "media/ffmpeg/ffmpeg_common.h" |
#include "media/filters/ffmpeg_glue.h" |
#include "media/filters/ffmpeg_h264_to_annex_b_bitstream_converter.h" |
+#include "media/filters/webvtt_util.h" |
#include "media/webm/webm_crypto_helpers.h" |
namespace media { |
@@ -65,6 +63,9 @@ FFmpegDemuxerStream::FFmpegDemuxerStream( |
AVStreamToVideoDecoderConfig(stream, &video_config_, true); |
is_encrypted = video_config_.is_encrypted(); |
break; |
+ case AVMEDIA_TYPE_SUBTITLE: |
+ type_ = TEXT; |
+ break; |
default: |
NOTREACHED(); |
break; |
@@ -115,44 +116,67 @@ void FFmpegDemuxerStream::EnqueuePacket(ScopedAVPacket packet) { |
// keep this generic so that other side_data types in the future can be |
// handled the same way as well. |
av_packet_split_side_data(packet.get()); |
- int side_data_size = 0; |
- uint8* side_data = av_packet_get_side_data( |
- packet.get(), |
- AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL, |
- &side_data_size); |
- |
- // If a packet is returned by FFmpeg's av_parser_parse2() the packet will |
- // reference inner memory of FFmpeg. As such we should transfer the packet |
- // into memory we control. |
scoped_refptr<DecoderBuffer> buffer; |
- if (side_data_size > 0) { |
+ |
+ if (type() == DemuxerStream::TEXT) { |
+ int id_size = 0; |
+ uint8* id_data = av_packet_get_side_data( |
+ packet.get(), |
+ AV_PKT_DATA_WEBVTT_IDENTIFIER, |
+ &id_size); |
+ |
+ int settings_size = 0; |
+ uint8* settings_data = av_packet_get_side_data( |
+ packet.get(), |
+ AV_PKT_DATA_WEBVTT_SETTINGS, |
+ &settings_size); |
+ |
+ std::vector<uint8> side_data; |
+ MakeSideData(id_data, id_data + id_size, |
+ settings_data, settings_data + settings_size, |
+ &side_data); |
+ |
buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size, |
- side_data, side_data_size); |
+ side_data.data(), side_data.size()); |
} else { |
- buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size); |
- } |
- |
- int skip_samples_size = 0; |
- uint8* skip_samples = av_packet_get_side_data(packet.get(), |
- AV_PKT_DATA_SKIP_SAMPLES, |
- &skip_samples_size); |
- const int kSkipSamplesValidSize = 10; |
- const int kSkipSamplesOffset = 4; |
- if (skip_samples_size >= kSkipSamplesValidSize) { |
- int discard_padding_samples = base::ByteSwapToLE32( |
- *(reinterpret_cast<const uint32*>(skip_samples + |
- kSkipSamplesOffset))); |
- // TODO(vigneshv): Change decoder buffer to use number of samples so that |
- // this conversion can be avoided. |
- buffer->set_discard_padding(base::TimeDelta::FromMicroseconds( |
- discard_padding_samples * 1000000.0 / |
- audio_decoder_config().samples_per_second())); |
+ int side_data_size = 0; |
+ uint8* side_data = av_packet_get_side_data( |
+ packet.get(), |
+ AV_PKT_DATA_MATROSKA_BLOCKADDITIONAL, |
+ &side_data_size); |
+ |
+ // If a packet is returned by FFmpeg's av_parser_parse2() the packet will |
+ // reference inner memory of FFmpeg. As such we should transfer the packet |
+ // into memory we control. |
+ if (side_data_size > 0) { |
+ buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size, |
+ side_data, side_data_size); |
+ } else { |
+ buffer = DecoderBuffer::CopyFrom(packet.get()->data, packet.get()->size); |
+ } |
+ |
+ int skip_samples_size = 0; |
+ uint8* skip_samples = av_packet_get_side_data(packet.get(), |
+ AV_PKT_DATA_SKIP_SAMPLES, |
+ &skip_samples_size); |
+ const int kSkipSamplesValidSize = 10; |
+ const int kSkipSamplesOffset = 4; |
+ if (skip_samples_size >= kSkipSamplesValidSize) { |
+ int discard_padding_samples = base::ByteSwapToLE32( |
+ *(reinterpret_cast<const uint32*>(skip_samples + |
+ kSkipSamplesOffset))); |
+ // TODO(vigneshv): Change decoder buffer to use number of samples so that |
+ // this conversion can be avoided. |
+ buffer->set_discard_padding(base::TimeDelta::FromMicroseconds( |
+ discard_padding_samples * 1000000.0 / |
+ audio_decoder_config().samples_per_second())); |
+ } |
} |
if ((type() == DemuxerStream::AUDIO && audio_config_.is_encrypted()) || |
(type() == DemuxerStream::VIDEO && video_config_.is_encrypted())) { |
scoped_ptr<DecryptConfig> config(WebMCreateDecryptConfig( |
- packet->data, packet->size, |
+ packet->data, packet->size, |
reinterpret_cast<const uint8*>(encryption_key_id_.data()), |
encryption_key_id_.size())); |
if (!config) |
@@ -290,6 +314,27 @@ bool FFmpegDemuxerStream::HasAvailableCapacity() { |
return buffer_queue_.IsEmpty() || buffer_queue_.Duration() < kCapacity; |
} |
+TextKind FFmpegDemuxerStream::GetTextKind() const { |
+ DCHECK_EQ(type_, DemuxerStream::TEXT); |
+ |
+ if (stream_->disposition & AV_DISPOSITION_CAPTIONS) |
+ return kTextCaptions; |
+ |
+ if (stream_->disposition & AV_DISPOSITION_DESCRIPTIONS) |
+ return kTextDescriptions; |
+ |
+ if (stream_->disposition & AV_DISPOSITION_METADATA) |
+ return kTextMetadata; |
+ |
+ return kTextSubtitles; |
+} |
+ |
+std::string FFmpegDemuxerStream::GetMetadata(const char* key) const { |
+ const AVDictionaryEntry* entry = |
+ av_dict_get(stream_->metadata, key, NULL, 0); |
+ return (entry == NULL || entry->value == NULL) ? "" : entry->value; |
+} |
+ |
// static |
base::TimeDelta FFmpegDemuxerStream::ConvertStreamTimestamp( |
const AVRational& time_base, int64 timestamp) { |
@@ -318,6 +363,7 @@ FFmpegDemuxer::FFmpegDemuxer( |
bitrate_(0), |
start_time_(kNoTimestamp()), |
audio_disabled_(false), |
+ text_enabled_(false), |
duration_known_(false), |
url_protocol_(data_source, BindToLoop(message_loop_, base::Bind( |
&FFmpegDemuxer::OnDataSourceError, base::Unretained(this)))), |
@@ -375,10 +421,12 @@ void FFmpegDemuxer::OnAudioRendererDisabled() { |
} |
void FFmpegDemuxer::Initialize(DemuxerHost* host, |
- const PipelineStatusCB& status_cb) { |
+ const PipelineStatusCB& status_cb, |
+ bool enable_text_tracks) { |
DCHECK(message_loop_->BelongsToCurrentThread()); |
host_ = host; |
weak_this_ = weak_factory_.GetWeakPtr(); |
+ text_enabled_ = enable_text_tracks; |
// TODO(scherkus): DataSource should have a host by this point, |
// see http://crbug.com/122071 |
@@ -422,6 +470,22 @@ base::TimeDelta FFmpegDemuxer::GetStartTime() const { |
return start_time_; |
} |
+void FFmpegDemuxer::AddTextStreams() { |
+ DCHECK(message_loop_->BelongsToCurrentThread()); |
+ |
+ for (StreamVector::size_type idx = 0; idx < streams_.size(); ++idx) { |
+ FFmpegDemuxerStream* stream = streams_[idx]; |
+ if (stream == NULL || stream->type() != DemuxerStream::TEXT) |
+ continue; |
+ |
+ TextKind kind = stream->GetTextKind(); |
+ std::string title = stream->GetMetadata("title"); |
+ std::string language = stream->GetMetadata("language"); |
+ |
+ host_->AddTextStream(stream, TextTrackConfig(kind, title, language)); |
+ } |
+} |
+ |
// Helper for calculating the bitrate of the media based on information stored |
// in |format_context| or failing that the size and duration of the media. |
// |
@@ -540,6 +604,10 @@ void FFmpegDemuxer::OnFindStreamInfoDone(const PipelineStatusCB& status_cb, |
if (!video_config.IsValidConfig()) |
continue; |
video_stream = stream; |
+ } else if (codec_type == AVMEDIA_TYPE_SUBTITLE) { |
+ if (codec_context->codec_id != AV_CODEC_ID_WEBVTT || !text_enabled_) { |
+ continue; |
+ } |
} else { |
continue; |
} |
@@ -560,6 +628,9 @@ void FFmpegDemuxer::OnFindStreamInfoDone(const PipelineStatusCB& status_cb, |
return; |
} |
+ if (text_enabled_) |
+ AddTextStreams(); |
+ |
if (format_context->duration != static_cast<int64_t>(AV_NOPTS_VALUE)) { |
// If there is a duration value in the container use that to find the |
// maximum between it and the duration from A/V streams. |