Chromium Code Reviews| Index: media/base/audio_splicer.cc |
| diff --git a/media/base/audio_splicer.cc b/media/base/audio_splicer.cc |
| index 14b4199e0e3389d8d32478fae511333915cfcf12..aa5bad9b16605032f0530e05880c205588b6889b 100644 |
| --- a/media/base/audio_splicer.cc |
| +++ b/media/base/audio_splicer.cc |
| @@ -5,12 +5,15 @@ |
| #include "media/base/audio_splicer.h" |
| #include <cstdlib> |
| +#include <deque> |
| #include "base/logging.h" |
| #include "media/base/audio_buffer.h" |
| +#include "media/base/audio_bus.h" |
| #include "media/base/audio_decoder_config.h" |
| #include "media/base/audio_timestamp_helper.h" |
| #include "media/base/buffers.h" |
| +#include "media/base/vector_math.h" |
| namespace media { |
| @@ -20,22 +23,104 @@ namespace media { |
| // roughly represents the duration of 2 compressed AAC or MP3 frames. |
| static const int kMaxTimeDeltaInMilliseconds = 50; |
| -AudioSplicer::AudioSplicer(int samples_per_second) |
| - : output_timestamp_helper_(samples_per_second), |
| - min_gap_size_(2), |
| - received_end_of_stream_(false) { |
| +// Minimum gap size needed before the splicer will take action to |
| +// fill a gap. This avoids periodically inserting and then dropping samples |
| +// when the buffer timestamps are slightly off because of timestamp rounding |
| +// in the source content. Unit is frames. |
| +static const int kMinGapSize = 2; |
| + |
| +// The number of milliseconds to crossfade before trimming when buffers overlap. |
| +static const int kCrossfadeDurationInMilliseconds = 5; |
| + |
| +// AudioBuffer::TrimStart() is not as accurate as the timestamp helper, so |
| +// manually adjust the duration and timestamp after trimming. |
| +static void AccurateTrimStart(int frames_to_trim, |
| + const scoped_refptr<AudioBuffer> buffer, |
| + const AudioTimestampHelper& timestamp_helper) { |
| + buffer->TrimStart(frames_to_trim); |
| + buffer->set_timestamp(timestamp_helper.GetTimestamp()); |
| + buffer->set_duration( |
| + timestamp_helper.GetFrameDuration(buffer->frame_count())); |
| } |
| -AudioSplicer::~AudioSplicer() { |
| +// AudioBuffer::TrimEnd() is not as accurate as the timestamp helper, so |
| +// manually adjust the duration after trimming. |
| +static void AccurateTrimEnd(int frames_to_trim, |
| + const scoped_refptr<AudioBuffer> buffer, |
| + const AudioTimestampHelper& timestamp_helper) { |
| + DCHECK(buffer->timestamp() == timestamp_helper.GetTimestamp()); |
| + buffer->TrimEnd(frames_to_trim); |
| + buffer->set_duration( |
| + timestamp_helper.GetFrameDuration(buffer->frame_count())); |
| } |
| -void AudioSplicer::Reset() { |
| - output_timestamp_helper_.SetBaseTimestamp(kNoTimestamp()); |
| +class AudioStreamSanitizer { |
| + public: |
| + explicit AudioStreamSanitizer(int samples_per_second); |
| + ~AudioStreamSanitizer(); |
| + |
| + // Resets the sanitizer state by clearing the output buffers queue, and |
| + // resetting the timestamp helper. |
| + void Reset(); |
| + |
| + // Similar to Reset(), but initializes the timestamp helper with the given |
| + // parameters. |
| + void ResetTimestampState(int64 frame_count, base::TimeDelta base_timestamp); |
| + |
| + // Adds a new buffer full of samples or end of stream buffer to the splicer. |
| + // Returns true if the buffer was accepted. False is returned if an error |
| + // occurred. |
| + bool AddInput(const scoped_refptr<AudioBuffer>& input); |
| + |
| + // Returns true if the sanitizer has a buffer to return. |
| + bool HasNextBuffer() const; |
| + |
| + // Removes the next buffer from the output buffer queue and returns it; should |
| + // only be called if HasNextBuffer() returns true. |
| + scoped_refptr<AudioBuffer> GetNextBuffer(); |
| + |
| + // Returns the total frame count of all buffers available for output. |
| + int GetFrameCount() const; |
| + |
| + // Returns the duration of all buffers added to the output queue thus far. |
| + base::TimeDelta GetDuration() const; |
| + |
| + const AudioTimestampHelper& timestamp_helper() { |
| + return output_timestamp_helper_; |
| + } |
| + |
| + private: |
| + void AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer); |
| + |
| + AudioTimestampHelper output_timestamp_helper_; |
| + bool received_end_of_stream_; |
| + |
| + typedef std::deque<scoped_refptr<AudioBuffer> > BufferQueue; |
| + BufferQueue output_buffers_; |
| + |
| + DISALLOW_ASSIGN(AudioStreamSanitizer); |
| +}; |
| + |
| +AudioStreamSanitizer::AudioStreamSanitizer(int samples_per_second) |
| + : output_timestamp_helper_(samples_per_second), |
| + received_end_of_stream_(false) {} |
| + |
| +AudioStreamSanitizer::~AudioStreamSanitizer() {} |
| + |
| +void AudioStreamSanitizer::Reset() { |
| + ResetTimestampState(0, kNoTimestamp()); |
| +} |
| + |
| +void AudioStreamSanitizer::ResetTimestampState(int64 frame_count, |
| + base::TimeDelta base_timestamp) { |
| output_buffers_.clear(); |
| received_end_of_stream_ = false; |
| + output_timestamp_helper_.SetBaseTimestamp(base_timestamp); |
| + if (frame_count > 0) |
| + output_timestamp_helper_.AddFrames(frame_count); |
| } |
| -bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) { |
| +bool AudioStreamSanitizer::AddInput(const scoped_refptr<AudioBuffer>& input) { |
| DCHECK(!received_end_of_stream_ || input->end_of_stream()); |
| if (input->end_of_stream()) { |
| @@ -56,9 +141,10 @@ bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) { |
| return false; |
| } |
| - base::TimeDelta timestamp = input->timestamp(); |
| - base::TimeDelta expected_timestamp = output_timestamp_helper_.GetTimestamp(); |
| - base::TimeDelta delta = timestamp - expected_timestamp; |
| + const base::TimeDelta timestamp = input->timestamp(); |
| + const base::TimeDelta expected_timestamp = |
| + output_timestamp_helper_.GetTimestamp(); |
| + const base::TimeDelta delta = timestamp - expected_timestamp; |
| if (std::abs(delta.InMilliseconds()) > kMaxTimeDeltaInMilliseconds) { |
| DVLOG(1) << "Timestamp delta too large: " << delta.InMicroseconds() << "us"; |
| @@ -69,7 +155,7 @@ bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) { |
| if (delta != base::TimeDelta()) |
| frames_to_fill = output_timestamp_helper_.GetFramesToTarget(timestamp); |
| - if (frames_to_fill == 0 || std::abs(frames_to_fill) < min_gap_size_) { |
| + if (frames_to_fill == 0 || std::abs(frames_to_fill) < kMinGapSize) { |
| AddOutputBuffer(input); |
| return true; |
| } |
| @@ -92,11 +178,16 @@ bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) { |
| return true; |
| } |
| - int frames_to_skip = -frames_to_fill; |
| - |
| + // Overlapping buffers marked as splice frames are handled by AudioSplicer, |
| + // but decoder and demuxer quirks may sometimes produce overlapping samples |
| + // which need to be sanitized. |
| + // |
| + // A crossfade can't be done here because only the current buffer is available |
| + // at this point, not previous buffers. |
| DVLOG(1) << "Overlap detected @ " << expected_timestamp.InMicroseconds() |
| - << " us: " << -delta.InMicroseconds() << " us"; |
| + << " us: " << -delta.InMicroseconds() << " us"; |
| + const int frames_to_skip = -frames_to_fill; |
| if (input->frame_count() <= frames_to_skip) { |
| DVLOG(1) << "Dropping whole buffer"; |
| return true; |
| @@ -104,27 +195,277 @@ bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) { |
| // Copy the trailing samples that do not overlap samples already output |
| // into a new buffer. Add this new buffer to the output queue. |
| - // |
| - // TODO(acolwell): Implement a cross-fade here so the transition is less |
|
acolwell GONE FROM CHROMIUM
2014/02/28 18:50:27
nit: I think this comment should stay. For the "ba
DaleCurtis
2014/02/28 21:14:26
I'll leave it, but it's kind of impossible to impl
|
| - // jarring. |
| - input->TrimStart(frames_to_skip); |
| + AccurateTrimStart(frames_to_skip, input, output_timestamp_helper_); |
| AddOutputBuffer(input); |
| return true; |
| } |
| -bool AudioSplicer::HasNextBuffer() const { |
| +bool AudioStreamSanitizer::HasNextBuffer() const { |
| return !output_buffers_.empty(); |
| } |
| -scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() { |
| +scoped_refptr<AudioBuffer> AudioStreamSanitizer::GetNextBuffer() { |
| scoped_refptr<AudioBuffer> ret = output_buffers_.front(); |
| output_buffers_.pop_front(); |
| return ret; |
| } |
| -void AudioSplicer::AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer) { |
| +void AudioStreamSanitizer::AddOutputBuffer( |
| + const scoped_refptr<AudioBuffer>& buffer) { |
| output_timestamp_helper_.AddFrames(buffer->frame_count()); |
| output_buffers_.push_back(buffer); |
| } |
| +int AudioStreamSanitizer::GetFrameCount() const { |
| + int frame_count = 0; |
| + for (BufferQueue::const_iterator it = output_buffers_.begin(); |
| + it != output_buffers_.end(); ++it) { |
| + frame_count += (*it)->frame_count(); |
| + } |
| + return frame_count; |
| +} |
| + |
| +base::TimeDelta AudioStreamSanitizer::GetDuration() const { |
| + DCHECK(output_timestamp_helper_.base_timestamp() != kNoTimestamp()); |
| + return output_timestamp_helper_.GetTimestamp() - |
| + output_timestamp_helper_.base_timestamp(); |
| +} |
| + |
| +AudioSplicer::AudioSplicer(int samples_per_second) |
| + : max_crossfade_duration_( |
| + base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds)), |
| + splice_timestamp_(kNoTimestamp()), |
| + output_sanitizer_(new AudioStreamSanitizer(samples_per_second)), |
| + pre_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)), |
| + post_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)) {} |
| + |
| +AudioSplicer::~AudioSplicer() {} |
| + |
| +void AudioSplicer::Reset() { |
| + output_sanitizer_->Reset(); |
| + pre_splice_sanitizer_->Reset(); |
| + post_splice_sanitizer_->Reset(); |
| + splice_timestamp_ = kNoTimestamp(); |
| +} |
| + |
| +bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) { |
| + // If we're not processing a splice, add the input to the output queue. |
| + if (splice_timestamp_ == kNoTimestamp()) { |
| + DCHECK(!pre_splice_sanitizer_->HasNextBuffer()); |
| + DCHECK(!post_splice_sanitizer_->HasNextBuffer()); |
| + return output_sanitizer_->AddInput(input); |
| + } |
| + |
| + // If we're still receiving buffers before the splice point figure out which |
| + // sanitizer (if any) to put them in. |
| + if (!post_splice_sanitizer_->HasNextBuffer()) { |
| + DCHECK(!input->end_of_stream()); |
| + |
| + // If the provided buffer is entirely before the splice point it can also be |
| + // added to the output queue. |
| + if (input->timestamp() + input->duration() < splice_timestamp_) { |
| + DCHECK(!pre_splice_sanitizer_->HasNextBuffer()); |
| + return output_sanitizer_->AddInput(input); |
| + } |
| + |
| + // If we've encountered the first pre splice buffer, reset the pre splice |
| + // sanitizer based on |output_sanitizer_|. This is done so that gaps and |
| + // overlaps between buffers across the sanitizers are accounted for prior |
| + // to calculating crossfade. |
| + if (!pre_splice_sanitizer_->HasNextBuffer()) { |
| + pre_splice_sanitizer_->ResetTimestampState( |
| + output_sanitizer_->timestamp_helper().frame_count(), |
| + output_sanitizer_->timestamp_helper().base_timestamp()); |
| + } |
| + |
| + // If we're processing a splice and the input buffer does not overlap any of |
| + // the existing buffers, append it to the splice queue for processing. |
| + if (!pre_splice_sanitizer_->HasNextBuffer() || |
| + input->timestamp() != splice_timestamp_) { |
| + return pre_splice_sanitizer_->AddInput(input); |
| + } |
| + |
| + // We've received the first overlapping buffer. |
| + } |
| + |
| + // At this point we have all the fade out preroll buffers from the decoder. |
| + // We now need to wait until we have enough data to perform the crossfade (or |
| + // we receive an end of stream). |
| + if (!post_splice_sanitizer_->AddInput(input)) |
| + return false; |
| + |
| + if (!input->end_of_stream() && |
| + post_splice_sanitizer_->GetDuration() < max_crossfade_duration_) { |
| + return true; |
| + } |
| + |
| + // Transfer out preroll buffers involved in the splice, drop those not. Since |
| + // we don't want to care what format the AudioBuffers are in, we need to use |
| + // an intermediary AudioBus to convert the data to float. |
| + scoped_ptr<AudioBus> pre_splice_bus = ExtractCrossfadeFromPreSplice(); |
| + |
| + // Allocate output buffer for crossfade. |
| + scoped_refptr<AudioBuffer> crossfade_buffer = |
| + AudioBuffer::CreateBuffer(kSampleFormatPlanarF32, |
| + pre_splice_bus->channels(), |
| + pre_splice_bus->frames()); |
| + |
| + // Use the calculated timestamp and duration to ensure there's no extra gaps |
| + // or overlaps to process when adding the buffer to |output_sanitizer_|. |
| + const AudioTimestampHelper& output_ts_helper = |
| + output_sanitizer_->timestamp_helper(); |
| + crossfade_buffer->set_timestamp(output_ts_helper.GetTimestamp()); |
| + crossfade_buffer->set_duration( |
| + output_ts_helper.GetFrameDuration(pre_splice_bus->frames())); |
| + |
| + // AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap |
| + // our AudioBuffer in one so we can avoid extra data copies. |
| + scoped_ptr<AudioBus> crossfade_bus_wrapper = |
| + AudioBus::CreateWrapper(crossfade_buffer->channel_count()); |
|
acolwell GONE FROM CHROMIUM
2014/02/28 18:50:27
nit: Please move this and the following 5 lines in
DaleCurtis
2014/02/28 21:14:26
Done.
|
| + crossfade_bus_wrapper->set_frames(crossfade_buffer->frame_count()); |
| + for (int ch = 0; ch < crossfade_buffer->channel_count(); ++ch) { |
| + crossfade_bus_wrapper->SetChannelData( |
| + ch, reinterpret_cast<float*>(crossfade_buffer->channel_data()[ch])); |
| + } |
| + |
| + // Insert the crossfade buffer into the output queue now so post splice |
| + // buffers can be added in processing order. We will still modify the buffer |
| + // during the crossfade step. |
| + CHECK(output_sanitizer_->AddInput(crossfade_buffer)); |
| + DCHECK_EQ(crossfade_buffer->frame_count(), crossfade_bus_wrapper->frames()); |
| + |
| + ExtractCrossfadeFromPostSplice(crossfade_bus_wrapper.get()); |
| + |
| + // Crossfade the audio into |crossfade_buffer|. |
| + for (int ch = 0; ch < crossfade_bus_wrapper->channels(); ++ch) { |
| + vector_math::Crossfade(pre_splice_bus->channel(ch), |
| + pre_splice_bus->frames(), |
| + crossfade_bus_wrapper->channel(ch)); |
| + } |
| + |
| + // Clear the splice timestamp so new splices can be accepted. |
| + splice_timestamp_ = kNoTimestamp(); |
| + return true; |
| +} |
| + |
| +bool AudioSplicer::HasNextBuffer() const { |
| + return output_sanitizer_->HasNextBuffer(); |
| +} |
| + |
| +scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() { |
| + return output_sanitizer_->GetNextBuffer(); |
| +} |
| + |
| +void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp) { |
| + DCHECK(splice_timestamp != kNoTimestamp()); |
| + if (splice_timestamp_ == splice_timestamp) |
| + return; |
| + |
| + // TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to |
| + // handle cases where another splice comes in before we've received 5ms of |
| + // data from the last one. Leave this as a CHECK for now to figure out if |
| + // this case is possible. |
| + CHECK(splice_timestamp_ == kNoTimestamp()); |
| + splice_timestamp_ = splice_timestamp; |
| +} |
| + |
| +scoped_ptr<AudioBus> AudioSplicer::ExtractCrossfadeFromPreSplice() { |
| + const AudioTimestampHelper& output_ts_helper = |
| + output_sanitizer_->timestamp_helper(); |
| + |
| + // Ensure |output_sanitizer_| has a valid base timestamp so we can use it for |
| + // timestamp calculations. |
| + if (output_ts_helper.base_timestamp() == kNoTimestamp()) { |
| + output_sanitizer_->ResetTimestampState( |
| + 0, pre_splice_sanitizer_->timestamp_helper().base_timestamp()); |
| + } |
| + |
| + int frames_before_splice = |
| + output_ts_helper.GetFramesToTarget(splice_timestamp_); |
| + |
| + // Determine crossfade frame count based on available frames in each splicer |
| + // and capping to the maximum crossfade duration. |
| + const int max_crossfade_frame_count = |
| + output_ts_helper.GetFramesToTarget(splice_timestamp_ + |
| + max_crossfade_duration_) - |
| + frames_before_splice; |
| + const int frames_to_crossfade = std::min( |
| + max_crossfade_frame_count, |
| + std::min(pre_splice_sanitizer_->GetFrameCount() - frames_before_splice, |
| + post_splice_sanitizer_->GetFrameCount())); |
| + |
| + int frames_read = 0; |
| + scoped_ptr<AudioBus> output_bus; |
| + while (pre_splice_sanitizer_->HasNextBuffer() && |
| + frames_read < frames_to_crossfade) { |
| + scoped_refptr<AudioBuffer> preroll = pre_splice_sanitizer_->GetNextBuffer(); |
| + |
| + // We don't know the channel count until we see the first buffer, so wait |
| + // until the first buffer to allocate the output AudioBus. |
| + if (!output_bus) { |
| + output_bus = |
| + AudioBus::Create(preroll->channel_count(), frames_to_crossfade); |
| + } |
| + |
| + // There may be enough of a gap introduced during decoding such that an |
| + // entire buffer exists before the splice point. |
| + if (frames_before_splice >= preroll->frame_count()) { |
| + frames_before_splice -= preroll->frame_count(); |
| + CHECK(output_sanitizer_->AddInput(preroll)); |
| + continue; |
| + } |
| + |
| + const int frames_to_read = |
| + std::min(preroll->frame_count() - frames_before_splice, |
| + output_bus->frames() - frames_read); |
| + preroll->ReadFrames( |
| + frames_to_read, frames_before_splice, frames_read, output_bus.get()); |
| + frames_read += frames_to_read; |
| + |
| + // If only part of the buffer was consumed, trim it appropriately and stick |
| + // it into the output queue. |
| + if (frames_before_splice) { |
| + AccurateTrimEnd(preroll->frame_count() - frames_before_splice, |
| + preroll, |
| + output_ts_helper); |
| + CHECK(output_sanitizer_->AddInput(preroll)); |
| + frames_before_splice = 0; |
| + } |
| + } |
| + |
| + // All necessary buffers have been processed, it's safe to reset. |
| + pre_splice_sanitizer_->Reset(); |
| + DCHECK_EQ(output_bus->frames(), frames_read); |
| + DCHECK_EQ(output_ts_helper.GetFramesToTarget(splice_timestamp_), 0); |
| + return output_bus.Pass(); |
| +} |
| + |
| +void AudioSplicer::ExtractCrossfadeFromPostSplice(AudioBus* output_bus) { |
| + int frames_read = 0; |
| + while (post_splice_sanitizer_->HasNextBuffer() && |
| + frames_read < output_bus->frames()) { |
| + scoped_refptr<AudioBuffer> postroll = |
| + post_splice_sanitizer_->GetNextBuffer(); |
| + const int frames_to_read = |
| + std::min(postroll->frame_count(), output_bus->frames() - frames_read); |
| + postroll->ReadFrames(frames_to_read, 0, frames_read, output_bus); |
| + frames_read += frames_to_read; |
| + |
| + // If only part of the buffer was consumed, trim it appropriately and stick |
| + // it into the output queue. |
| + if (frames_to_read < postroll->frame_count()) { |
| + AccurateTrimStart( |
| + frames_to_read, postroll, output_sanitizer_->timestamp_helper()); |
| + CHECK(output_sanitizer_->AddInput(postroll)); |
| + } |
| + } |
| + |
| + DCHECK_EQ(output_bus->frames(), frames_read); |
| + |
| + // Transfer all remaining buffers out and reset once empty. |
| + while (post_splice_sanitizer_->HasNextBuffer()) |
| + CHECK(output_sanitizer_->AddInput(post_splice_sanitizer_->GetNextBuffer())); |
| + post_splice_sanitizer_->Reset(); |
| +} |
| + |
| } // namespace media |