media/base/audio_splicer.cc - Issue 156783003: Enhance AudioSplicer to crossfade marked splice frames.

Unified Diff: media/base/audio_splicer.cc

Issue 156783003: Enhance AudioSplicer to crossfade marked splice frames. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Resolve comments. Created 6 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: media/base/audio_splicer.cc

diff --git a/media/base/audio_splicer.cc b/media/base/audio_splicer.cc

index 14b4199e0e3389d8d32478fae511333915cfcf12..6e39c93c57d385af8384410a30eb82f1512e910e 100644

--- a/media/base/audio_splicer.cc

+++ b/media/base/audio_splicer.cc

@@ -5,12 +5,15 @@

#include "media/base/audio_splicer.h"

#include <cstdlib>

+#include <deque>

#include "base/logging.h"

#include "media/base/audio_buffer.h"

+#include "media/base/audio_bus.h"

#include "media/base/audio_decoder_config.h"

#include "media/base/audio_timestamp_helper.h"

#include "media/base/buffers.h"

+#include "media/base/vector_math.h"

namespace media {

@@ -20,22 +23,69 @@ namespace media {

// roughly represents the duration of 2 compressed AAC or MP3 frames.

static const int kMaxTimeDeltaInMilliseconds = 50;

-AudioSplicer::AudioSplicer(int samples_per_second)

+// Minimum gap size needed before the splicer will take action to

+// fill a gap. This avoids periodically inserting and then dropping samples

+// when the buffer timestamps are slightly off because of timestamp rounding

+// in the source content. Unit is frames.

+static const int kMinGapSize = 2;

+// The number of milliseconds to crossfade before trimming when buffers overlap.

+static const int kCrossfadeDurationInMilliseconds = 5;

+typedef std::deque<scoped_refptr<AudioBuffer> > BufferQueue;

+class AudioStreamSanitizer {

+ public:

+ AudioStreamSanitizer(int samples_per_second);

+ ~AudioStreamSanitizer();

+ // Resets the sanitizer state by clearing the output buffers queue,

+ // and resetting the timestamp helper.

+ void Reset();

+ // Adds a new buffer full of samples or end of stream buffer to the splicer.

+ // Returns true if the buffer was accepted. False is returned if an error

+ // occurred.

+ bool AddInput(const scoped_refptr<AudioBuffer>& input);

+ // Returns true if the sanitizer has a buffer to return.

+ bool HasNextBuffer() const;

+ // Removes the next buffer from the output buffer queue and returns it.

+ // This should only be called if HasNextBuffer() returns true.

+ scoped_refptr<AudioBuffer> GetNextBuffer();

+ const scoped_refptr<AudioBuffer>& PeekNextBuffer() const;

+ // Get the current timestamp. This value is computed from based on the first

acolwell GONE FROM CHROMIUM 2014/02/18 23:22:59 nit: word missing?

DaleCurtis 2014/02/19 03:05:14 Yeah, the methods below need comments too; I just

DaleCurtis 2014/02/22 00:59:04 Done.

+ // buffer's timestamp and the number of frames that have been added so far.

+ base::TimeDelta GetTimestamp() const;

+ // Get the duration of all buffers in the...

+ base::TimeDelta GetDuration() const;

+ int64 frame_count() const { return output_timestamp_helper_.frame_count(); }

+ private:

+ void AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer);

+ AudioTimestampHelper output_timestamp_helper_;

+ BufferQueue output_buffers_;

+ bool received_end_of_stream_;

+};

+AudioStreamSanitizer::AudioStreamSanitizer(int samples_per_second)

: output_timestamp_helper_(samples_per_second),

- min_gap_size_(2),

- received_end_of_stream_(false) {

+ received_end_of_stream_(false) {}

-AudioSplicer::~AudioSplicer() {

+AudioStreamSanitizer::~AudioStreamSanitizer() {}

-void AudioSplicer::Reset() {

+void AudioStreamSanitizer::Reset() {

output_timestamp_helper_.SetBaseTimestamp(kNoTimestamp());

output_buffers_.clear();

received_end_of_stream_ = false;

}

-bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {

+bool AudioStreamSanitizer::AddInput(const scoped_refptr<AudioBuffer>& input) {

DCHECK(!received_end_of_stream_ || input->end_of_stream());

if (input->end_of_stream()) {

@@ -69,7 +119,7 @@ bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {

if (delta != base::TimeDelta())

frames_to_fill = output_timestamp_helper_.GetFramesToTarget(timestamp);

- if (frames_to_fill == 0 || std::abs(frames_to_fill) < min_gap_size_) {

+ if (frames_to_fill == 0 || std::abs(frames_to_fill) < kMinGapSize) {

AddOutputBuffer(input);

return true;

}

@@ -92,11 +142,16 @@ bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {

return true;

}

- int frames_to_skip = -frames_to_fill;

+ // Overlapping buffers marked as splice frames are handled by AudioSplicer,

+ // but decoder and demuxer quirks may sometimes produce overlapping samples

+ // which need to be sanitized.

+ //

+ // A crossfade can't be done here because only the current buffer is available

+ // at this point, not previous buffers.

DVLOG(1) << "Overlap detected @ " << expected_timestamp.InMicroseconds()

- << " us: " << -delta.InMicroseconds() << " us";

+ << " us: " << -delta.InMicroseconds() << " us";

+ int frames_to_skip = -frames_to_fill;

if (input->frame_count() <= frames_to_skip) {

DVLOG(1) << "Dropping whole buffer";

return true;

@@ -104,27 +159,232 @@ bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {

// Copy the trailing samples that do not overlap samples already output

// into a new buffer. Add this new buffer to the output queue.

- //

- // TODO(acolwell): Implement a cross-fade here so the transition is less

- // jarring.

input->TrimStart(frames_to_skip);

AddOutputBuffer(input);

return true;

}

-bool AudioSplicer::HasNextBuffer() const {

+bool AudioStreamSanitizer::HasNextBuffer() const {

return !output_buffers_.empty();

}

-scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() {

+scoped_refptr<AudioBuffer> AudioStreamSanitizer::GetNextBuffer() {

scoped_refptr<AudioBuffer> ret = output_buffers_.front();

output_buffers_.pop_front();

return ret;

}

-void AudioSplicer::AddOutputBuffer(const scoped_refptr<AudioBuffer>& buffer) {

+const scoped_refptr<AudioBuffer>& AudioStreamSanitizer::PeekNextBuffer() const {

+ return output_buffers_.front();

+void AudioStreamSanitizer::AddOutputBuffer(

+ const scoped_refptr<AudioBuffer>& buffer) {

output_timestamp_helper_.AddFrames(buffer->frame_count());

output_buffers_.push_back(buffer);

}

+base::TimeDelta AudioStreamSanitizer::GetTimestamp() const {

+ return output_timestamp_helper_.GetTimestamp();

+base::TimeDelta AudioStreamSanitizer::GetDuration() const {

+ DCHECK(output_timestamp_helper_.base_timestamp() != kNoTimestamp());

+ return output_timestamp_helper_.GetTimestamp() -

+ output_timestamp_helper_.base_timestamp();

+AudioSplicer::AudioSplicer(int samples_per_second)

+ : sanitizer_(new AudioStreamSanitizer(samples_per_second)),

acolwell GONE FROM CHROMIUM 2014/02/18 23:22:59 Are these pointers just so that you can hide the d

DaleCurtis 2014/02/19 03:05:14 Correct. I could move the decl to the header file

+ pre_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),

+ post_splice_sanitizer_(new AudioStreamSanitizer(samples_per_second)),

+ splice_timestamp_(kNoTimestamp()),

+ crossfade_frame_count_(

+ (samples_per_second *

+ static_cast<double>(kCrossfadeDurationInMilliseconds)) /

+ base::Time::kMillisecondsPerSecond) {}

+AudioSplicer::~AudioSplicer() {}

+void AudioSplicer::Reset() {

+ sanitizer_->Reset();

+ pre_splice_sanitizer_->Reset();

+ post_splice_sanitizer_->Reset();

+ splice_timestamp_ = kNoTimestamp();

+bool AudioSplicer::AddInput(const scoped_refptr<AudioBuffer>& input) {

+ // If we're not processing a splice, add the input to the output queue.

+ if (splice_timestamp_ == kNoTimestamp())

+ return sanitizer_->AddInput(input);

acolwell GONE FROM CHROMIUM 2014/02/18 23:22:59 nit: s/sanitizer_/output_sanitizer_/?

DaleCurtis 2014/02/22 00:59:04 Done.

+ // If we're still receiving buffers before the splice point figure out which

+ // sanitizer (if any) to put them in.

+ if (!post_splice_sanitizer_->HasNextBuffer()) {

+ DCHECK(!input->end_of_stream());

+ // If the provided buffer is entirely before the splice point it can also be

+ // added to the output queue.

+ if (input->timestamp() + input->duration() < splice_timestamp_)

+ return sanitizer_->AddInput(input);

+ // If we're processing a splice and the input buffer does not overlap any of

+ // the existing buffers, append it to the splice queue for processing.

+ if (input->timestamp() >= pre_splice_sanitizer_->GetTimestamp())

+ return pre_splice_sanitizer_->AddInput(input);

+ // We've received the first overlapping buffer.

+ }

+ // At this point we have all the fade out preroll buffers from the decoder.

+ // We now need to wait until we have enough data to perform the crossfade (or

+ // we receive an end of stream).

+ if (!post_splice_sanitizer_->AddInput(input))

+ return false;

+ if (!input->end_of_stream() &&

+ post_splice_sanitizer_->frame_count() < crossfade_frame_count_) {

+ // TODO(dalecurtis): What if the next buffer we receive is the start of

+ // another splice frame? See comment in SetSpliceTimestamp below.

+ return true;

+ }

+ const int frames_to_crossfade =

+ std::min(crossfade_frame_count_,

+ static_cast<int>(post_splice_sanitizer_->frame_count()));

+ const base::TimeDelta splice_end_timestamp = std::min(

+ post_splice_sanitizer_->GetDuration(),

+ splice_timestamp_ +

+ base::TimeDelta::FromMilliseconds(kCrossfadeDurationInMilliseconds));

+ const int channel_count =

+ pre_splice_sanitizer_->PeekNextBuffer()->channel_count();

+ DCHECK_EQ(channel_count,

+ post_splice_sanitizer_->PeekNextBuffer()->channel_count());

+ // Allocate output buffer for crossfade.

+ scoped_refptr<AudioBuffer> crossfade_buffer = AudioBuffer::CreateBuffer(

+ kSampleFormatPlanarF32, channel_count, frames_to_crossfade);

+ crossfade_buffer->set_timestamp(splice_timestamp_);

+ crossfade_buffer->set_duration(splice_end_timestamp - splice_timestamp_);

+ // AudioBuffer::ReadFrames() only allows output into an AudioBus, so wrap

+ // our AudioBuffer in one so we can avoid extra data copies.

+ scoped_ptr<AudioBus> crossfade_bus_wrapper =

+ AudioBus::CreateWrapper(crossfade_buffer->channel_count());

+ for (int ch = 0; ch < crossfade_buffer->channel_count(); ++ch) {

+ crossfade_bus_wrapper->SetChannelData(

+ ch, reinterpret_cast<float*>(crossfade_buffer->channel_data()[ch]));

+ }

+ // Transfer out preroll buffers involved in the splice, drop those not.

+ ExtractCrossfadeFromPreroll(crossfade_bus_wrapper.get());

acolwell GONE FROM CHROMIUM 2014/02/18 23:22:59 nit: s/Preroll/PreSplice/ ?. It seems like you are

DaleCurtis 2014/02/19 03:05:14 I'm not partial to any names, I used preroll here

+ DCHECK(!pre_splice_sanitizer_->HasNextBuffer());

+ // Insert the crossfade buffer into the output queue now so post splice

+ // buffers can be added in processing order. We will still modify the buffer

+ // during the crossfade step.

+ sanitizer_->AddInput(crossfade_buffer);

+ // Since we don't want to care what format the AudioBuffers are in, we need to

+ // use an intermediary AudioBus to convert the data to float.

+ scoped_ptr<AudioBus> post_splice_bus = AudioBus::Create(

+ crossfade_bus_wrapper->channels(), crossfade_bus_wrapper->frames());

+ ExtractCrossfadeFromPostroll(post_splice_bus.get());

+ // Crossfade the audio into |crossfade_buffer|.

+ for (int ch = 0; ch < crossfade_bus_wrapper->channels(); ++ch) {

+ vector_math::Crossfade(post_splice_bus->channel(ch),

+ frames_to_crossfade,

+ crossfade_bus_wrapper->channel(ch));

+ }

+ // Clear the splice timestamp so new splices can be accepted.

+ splice_timestamp_ = kNoTimestamp();

+ return true;

+bool AudioSplicer::HasNextBuffer() const {

+ return sanitizer_->HasNextBuffer();

+scoped_refptr<AudioBuffer> AudioSplicer::GetNextBuffer() {

+ return sanitizer_->GetNextBuffer();

+void AudioSplicer::SetSpliceTimestamp(base::TimeDelta splice_timestamp) {

+ DCHECK(splice_timestamp != kNoTimestamp());

+ if (splice_timestamp_ == splice_timestamp)

acolwell GONE FROM CHROMIUM 2014/02/18 23:22:59 Why are we allowing this?

DaleCurtis 2014/02/19 03:05:14 Essentially to allow callers to not have to worry

+ return;

+ DCHECK(splice_timestamp_ == kNoTimestamp());

+ splice_timestamp_ = splice_timestamp;

+ pre_splice_sanitizer_->Reset();

+ post_splice_sanitizer_->Reset();

acolwell GONE FROM CHROMIUM 2014/02/18 23:22:59 nit: I wonder if these should be at the bottom of

DaleCurtis 2014/02/19 03:05:14 I wondered that as well, I think it's fine.

DaleCurtis 2014/02/22 00:59:04 Done.

+ // TODO(dalecurtis): We may need the concept of a future_splice_timestamp_ to

+ // handle cases where another splice comes in before we've received 5ms of data

+ // from the last one.

+void AudioSplicer::ExtractCrossfadeFromPreroll(AudioBus* output_bus) {

+ int frames_read = 0;

+ while (pre_splice_sanitizer_->HasNextBuffer() &&

+ frames_read < output_bus->frames()) {

+ scoped_refptr<AudioBuffer> preroll = pre_splice_sanitizer_->GetNextBuffer();

+ int read_offset = 0;

+ if (splice_timestamp_ > preroll->timestamp()) {

+ // This should only happen if the splice point is within the preroll

+ // buffer somewhere. Early code should have put it in |sanitizer_|

+ // otherwise.

+ DCHECK(preroll->timestamp() + preroll->duration() >= splice_timestamp_);

+ read_offset =

+ preroll->frame_count() * preroll->duration().InMillisecondsF() /

acolwell GONE FROM CHROMIUM 2014/02/18 23:22:59 nit: Any reason to not use SecondsF? It's 5 chars

DaleCurtis 2014/02/22 00:59:04 Done.

+ (splice_timestamp_ - preroll->timestamp()).InMillisecondsF();

+ }

+ const int frames_to_read = std::min(preroll->frame_count() - read_offset,

+ output_bus->frames() - frames_read);

+ preroll->ReadFrames(frames_to_read, read_offset, frames_read, output_bus);

+ frames_read += frames_to_read;

+ // If only part of the buffer was consumed, trim it appropriately and stick

+ // it into the output queue.

+ if (read_offset) {

+ preroll->TrimEnd(preroll->frame_count() - read_offset);

+ sanitizer_->AddInput(preroll);

+ }

+ // All necessary buffers have been processed, it's safe to reset.

+ DCHECK_EQ(output_bus->frames(), frames_read);

+ pre_splice_sanitizer_->Reset();

+void AudioSplicer::ExtractCrossfadeFromPostroll(AudioBus* output_bus) {

+ int frames_read = 0;

+ while (post_splice_sanitizer_->HasNextBuffer() &&

+ frames_read < output_bus->frames()) {

+ scoped_refptr<AudioBuffer> postroll =

+ post_splice_sanitizer_->GetNextBuffer();

+ const int frames_to_read = std::min(

+ postroll->frame_count(), output_bus->frames() - frames_read);

+ postroll->ReadFrames(frames_to_read, 0, frames_read, output_bus);

+ frames_read += frames_to_read;

+ // If only part of the buffer was consumed, trim it appropriately and stick

+ // it into the output queue.

+ if (frames_to_read < postroll->frame_count()) {

+ postroll->TrimStart(frames_to_read);

+ sanitizer_->AddInput(postroll);

+ }

+ DCHECK_EQ(output_bus->frames(), frames_read);

+ // Transfer all remaining buffers out.

+ while (post_splice_sanitizer_->HasNextBuffer())

+ sanitizer_->AddInput(post_splice_sanitizer_->GetNextBuffer());

} // namespace media

« no previous file with comments | « media/base/audio_splicer.h ('k') | media/base/audio_timestamp_helper.h » ('j') | no next file with comments »