content/renderer/media/audio_track_recorder.cc - Issue 1579693006: MediaRecorder: support sampling rate adaption in AudioTrackRecorder

Unified Diff: content/renderer/media/audio_track_recorder.cc

Issue 1579693006: MediaRecorder: support sampling rate adaption in AudioTrackRecorder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « content/renderer/media/audio_track_recorder.h ('k') | content/renderer/media/audio_track_recorder_unittest.cc » ('j') | content/renderer/media/audio_track_recorder_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: content/renderer/media/audio_track_recorder.cc

diff --git a/content/renderer/media/audio_track_recorder.cc b/content/renderer/media/audio_track_recorder.cc

index 71f35cb888d521bd7a6b7209399ac7c0c3ac9c36..4244590b43cb5ba95a96046c03eb31a3698283e6 100644

--- a/content/renderer/media/audio_track_recorder.cc

+++ b/content/renderer/media/audio_track_recorder.cc

@@ -12,38 +12,94 @@

#include "base/stl_util.h"

#include "media/audio/audio_parameters.h"

#include "media/base/audio_bus.h"

+#include "media/base/audio_converter.h"

+#include "media/base/audio_fifo.h"

#include "media/base/bind_to_current_loop.h"

#include "third_party/opus/src/include/opus.h"

// Note that this code follows the Chrome media convention of defining a "frame"

-// as "one multi-channel sample" as opposed to another common definition

-// meaning "a chunk of samples". Here this second definition of "frame" is

-// called a "buffer"; so what might be called "frame duration" is instead

-// "buffer duration", and so on.

+// as "one multi-channel sample" as opposed to another common definition meaning

+// "a chunk of samples". Here this second definition of "frame" is called a

+// "buffer"; so what might be called "frame duration" is instead "buffer

+// duration", and so on.

namespace content {

namespace {

-enum {

- // This is the recommended value, according to documentation in

- // third_party/opus/src/include/opus.h, so that the Opus encoder does not

- // degrade the audio due to memory constraints.

- OPUS_MAX_PAYLOAD_SIZE = 4000,

+// Maximum amount of buffers that can be held in the AudioFifo of AudioEncoder.

+static const size_t kMaxNumberOfFifoBuffers = 2;

- // Support for max sampling rate of 48KHz, 2 channels, 60 ms duration.

- MAX_SAMPLES_PER_BUFFER = 48 * 2 * 60,

-};

+// Recommended value for opus_encode_float(), according to documentation in

+// third_party/opus/src/include/opus.h, so that the Opus encoder does not

+// degrade the audio due to memory constraints.

+static const int kOpusMaxDataBytes = 4000;

+// Opus preferred sampling rate for encoding. This is also the one webm likes to

+// have: https://wiki.xiph.org/MatroskaOpus.

+static const int kOpusPreferredSamplingRate = 48000;

+// Media Stream Audio Tracks always send 10ms worth of Audio, which happens to

+// be Opus-friendly (https://tools.ietf.org/html/rfc6716#section-2.1.4).

+static const int kMediaStreamTrackBufferDurationMs = 10;

+// The amount of Frames in a 10 ms buffer @ 48000 samples/second.

+static const int kOpusPreferredFramesPerBuffer = 480;

miu 2016/01/22 00:14:53 Opus will produce higher quality audio if encoding

mcasas 2016/01/22 22:03:53 Done. Note that the input will still be chunks of

+static_assert(kOpusPreferredFramesPerBuffer ==

+ kOpusPreferredSamplingRate *

+ kMediaStreamTrackBufferDurationMs /

+ 1000,

+ "kOpusPreferredFramesPerBuffer should be =="

+ "kOpusPreferredSamplingRate * kMediaStreamTrackBufferDurationMs");

+bool DoEncode(OpusEncoder* opus_encoder,

+ float* data_in,

+ int num_samples,

+ std::string* data_out) {

+ DCHECK_EQ(kOpusPreferredFramesPerBuffer, num_samples);

+ data_out->resize(kOpusMaxDataBytes);

+ const opus_int32 result = opus_encode_float(

+ opus_encoder, data_in, num_samples,

+ reinterpret_cast<uint8_t*>(string_as_array(data_out)), kOpusMaxDataBytes);

+ if (result > 1) {

+ // TODO(ajose): Investigate improving this. http://crbug.com/547918

+ data_out->resize(result);

+ return true;

+ }

+ // If |result| in {0,1}, do nothing; the documentation says that a return

+ // value of zero or one means the packet does not need to be transmitted.

+ // Otherwise, we have an error.

+ DLOG_IF(ERROR, result < 0) << " encode failed: " << opus_strerror(result);

+ return false;

+// Interleaves |audio_bus| channels() of floats into a single output linear

+// |buffer|. (AudioBus::ToInterleaved() does not support float).

miu 2016/01/22 00:14:53 It probably should! ;) This code was originally

mcasas 2016/01/22 22:03:53 Bug it is. Happy to do it or it can a GoodFirstBug

+void ToInterleaved(media::AudioBus* audio_bus, float* buffer) {

+ for (int ch = 0; ch < audio_bus->channels(); ++ch) {

+ const float* src = audio_bus->channel(ch);

+ const float* const src_end = src + audio_bus->frames();

+ float* dest = buffer + ch;

+ for (; src < src_end; ++src, dest += audio_bus->channels())

+ *dest = *src;

+ }

} // anonymous namespace

-// Nested class encapsulating opus-related encoding details.

-// AudioEncoder is created and destroyed on ATR's main thread (usually the

-// main render thread) but otherwise should operate entirely on

-// |encoder_thread_|, which is owned by AudioTrackRecorder. Be sure to delete

-// |encoder_thread_| before deleting the AudioEncoder using it.

+// Nested class encapsulating opus-related encoding details. It contains an

+// AudioConverter to adapt incoming data to the format Opus likes to have.

+// AudioEncoder is created and destroyed on ATR's main thread (usually the main

+// render thread) but otherwise should operate entirely on |encoder_thread_|,

+// which is owned by AudioTrackRecorder. Be sure to delete |encoder_thread_|

+// before deleting the AudioEncoder using it.

class AudioTrackRecorder::AudioEncoder

- : public base::RefCountedThreadSafe<AudioEncoder> {

+ : public base::RefCountedThreadSafe<AudioEncoder>,

+ public media::AudioConverter::InputCallback {

public:

explicit AudioEncoder(const OnEncodedAudioCB& on_encoded_audio_cb)

: on_encoded_audio_cb_(on_encoded_audio_cb), opus_encoder_(nullptr) {

@@ -54,6 +110,10 @@ class AudioTrackRecorder::AudioEncoder

encoder_thread_checker_.DetachFromThread();

}

+ // media::AudioConverted::InputCallback implementation.

+ double ProvideInput(media::AudioBus* audio_bus,

miu 2016/01/22 00:14:53 This should be private, since it's only meant to b

mcasas 2016/01/22 22:03:53 Done.

+ base::TimeDelta buffer_delay) override;

void OnSetFormat(const media::AudioParameters& params);

void EncodeAudio(scoped_ptr<media::AudioBus> audio_bus,

@@ -62,33 +122,25 @@ class AudioTrackRecorder::AudioEncoder

private:

friend class base::RefCountedThreadSafe<AudioEncoder>;

- ~AudioEncoder();

+ ~AudioEncoder() override;

bool is_initialized() const { return !!opus_encoder_; }

void DestroyExistingOpusEncoder();

- void TransferSamplesIntoBuffer(const media::AudioBus* audio_bus,

- int source_offset,

- int buffer_fill_offset,

- int num_samples);

- bool EncodeFromFilledBuffer(std::string* out);

const OnEncodedAudioCB on_encoded_audio_cb_;

base::ThreadChecker encoder_thread_checker_;

- // In the case where a call to EncodeAudio() cannot completely fill the

- // buffer, this points to the position at which to populate data in a later

- // call.

- int buffer_fill_end_;

- int frames_per_buffer_;

+ // Track Audio (ingress) and Opus encoder input parameters, respectively. They

+ // only differ in their sample_rate() and frames_per_buffer(): output is

+ // 48ksamples/s and 480, respectively.

+ media::AudioParameters input_params_;

+ media::AudioParameters output_params_;

- // The duration of one set of frames of encoded audio samples.

- base::TimeDelta buffer_duration_;

- media::AudioParameters audio_params_;

+ // Sampling rate adapter between an OpusEncoder supported and the provided.

+ scoped_ptr<media::AudioConverter> converter_;

+ scoped_ptr<media::AudioFifo> fifo_;

// Buffer for passing AudioBus data to OpusEncoder.

scoped_ptr<float[]> buffer_;

@@ -104,46 +156,66 @@ AudioTrackRecorder::AudioEncoder::~AudioEncoder() {

DestroyExistingOpusEncoder();

}

+double AudioTrackRecorder::AudioEncoder::ProvideInput(

+ media::AudioBus* audio_bus,

+ base::TimeDelta buffer_delay) {

+ if (fifo_->frames() >= audio_bus->frames())

+ fifo_->Consume(audio_bus, 0, audio_bus->frames());

+ else

+ audio_bus->Zero();

+ // Return volume greater than zero to indicate we have more data.

+ return 1.0;

void AudioTrackRecorder::AudioEncoder::OnSetFormat(

- const media::AudioParameters& params) {

+ const media::AudioParameters& input_params) {

+ DVLOG(1) << __FUNCTION__;

DCHECK(encoder_thread_checker_.CalledOnValidThread());

- if (audio_params_.Equals(params))

+ if (input_params_.Equals(input_params))

return;

DestroyExistingOpusEncoder();

- if (!params.IsValid()) {

- DLOG(ERROR) << "Invalid audio params: " << params.AsHumanReadableString();

- return;

- }

- buffer_duration_ = base::TimeDelta::FromMilliseconds(

- AudioTrackRecorder::GetOpusBufferDuration(params.sample_rate()));

- if (buffer_duration_ == base::TimeDelta()) {

- DLOG(ERROR) << "Could not find a valid |buffer_duration| for the given "

- << "sample rate: " << params.sample_rate();

- return;

- }

- frames_per_buffer_ =

- params.sample_rate() * buffer_duration_.InMilliseconds() / 1000;

- if (frames_per_buffer_ * params.channels() > MAX_SAMPLES_PER_BUFFER) {

- DLOG(ERROR) << "Invalid |frames_per_buffer_|: " << frames_per_buffer_;

+ if (!input_params.IsValid() || input_params.channels() > 2) {

+ DLOG(ERROR) << "Invalid params: " << input_params.AsHumanReadableString();

return;

}

- // Initialize AudioBus buffer for OpusEncoder.

- buffer_fill_end_ = 0;

- buffer_.reset(new float[params.channels() * frames_per_buffer_]);

+ input_params_ = input_params;

+ input_params_.set_frames_per_buffer(input_params_.sample_rate() *

miu 2016/01/22 00:14:53 Shouldn't this be: input_params_.set_frames_per

mcasas 2016/01/22 22:03:53 No, input is always 10ms, the input sampling rate

+ kMediaStreamTrackBufferDurationMs /

+ 1000);

+ output_params_ = media::AudioParameters(

+ media::AudioParameters::AUDIO_PCM_LOW_LATENCY,

+ media::GuessChannelLayout(input_params_.channels()),

+ kOpusPreferredSamplingRate,

+ input_params_.bits_per_sample(),

+ kOpusPreferredFramesPerBuffer);

+ DVLOG(1) << "|input_params_|:" << input_params_.AsHumanReadableString()

+ << " -->|output_params_|:" << output_params_.AsHumanReadableString();

+ converter_.reset(new media::AudioConverter(input_params_, output_params_,

+ false /* disable_fifo */));

+ converter_->AddInput(this);

mcasas 2016/01/22 22:03:53 add here |converter_->PrimeWithSilence()|

+ fifo_.reset(new media::AudioFifo(

+ input_params_.channels(),

+ kMaxNumberOfFifoBuffers * input_params_.frames_per_buffer()));

+ buffer_.reset(

+ new float[output_params_.channels() * kOpusPreferredFramesPerBuffer *

+ output_params_.bits_per_sample() / 8]);

miu 2016/01/22 00:14:53 bits_per_sample() is erroneous (it really should b

mcasas 2016/01/22 22:03:53 Done.

// Initialize OpusEncoder.

int opus_result;

- opus_encoder_ = opus_encoder_create(params.sample_rate(), params.channels(),

- OPUS_APPLICATION_AUDIO, &opus_result);

+ opus_encoder_ = opus_encoder_create(output_params_.sample_rate(),

+ output_params_.channels(),

+ OPUS_APPLICATION_AUDIO,

+ &opus_result);

if (opus_result < 0) {

DLOG(ERROR) << "Couldn't init opus encoder: " << opus_strerror(opus_result)

- << ", sample rate: " << params.sample_rate()

- << ", channels: " << params.channels();

+ << ", sample rate: " << output_params_.sample_rate()

+ << ", channels: " << output_params_.channels();

return;

}

@@ -155,45 +227,32 @@ void AudioTrackRecorder::AudioEncoder::OnSetFormat(

DLOG(ERROR) << "Failed to set opus bitrate.";

return;

}

- audio_params_ = params;

}

void AudioTrackRecorder::AudioEncoder::EncodeAudio(

- scoped_ptr<media::AudioBus> audio_bus,

+ scoped_ptr<media::AudioBus> input_bus,

const base::TimeTicks& capture_time) {

+ DVLOG(1) << __FUNCTION__ << ", #frames " << input_bus->frames();

DCHECK(encoder_thread_checker_.CalledOnValidThread());

- DCHECK_EQ(audio_bus->channels(), audio_params_.channels());

+ DCHECK_EQ(input_bus->channels(), input_params_.channels());

+ DCHECK_EQ(input_bus->frames(), input_params_.frames_per_buffer());

+ DCHECK(!capture_time.is_null());

+ DCHECK(converter_);

if (!is_initialized())

return;

- base::TimeDelta buffer_fill_duration =

- buffer_fill_end_ * buffer_duration_ / frames_per_buffer_;

- base::TimeTicks buffer_capture_time = capture_time - buffer_fill_duration;

- // Encode all audio in |audio_bus| into zero or more packets.

- int src_pos = 0;

- while (src_pos < audio_bus->frames()) {

- const int num_samples_to_xfer = std::min(

- frames_per_buffer_ - buffer_fill_end_, audio_bus->frames() - src_pos);

- TransferSamplesIntoBuffer(audio_bus.get(), src_pos, buffer_fill_end_,

- num_samples_to_xfer);

- src_pos += num_samples_to_xfer;

- buffer_fill_end_ += num_samples_to_xfer;

- if (buffer_fill_end_ < frames_per_buffer_)

- break;

- scoped_ptr<std::string> encoded_data(new std::string());

- if (EncodeFromFilledBuffer(encoded_data.get())) {

- on_encoded_audio_cb_.Run(audio_params_, std::move(encoded_data),

- buffer_capture_time);

- }

- // Reset the capture timestamp and internal buffer for next set of frames.

- buffer_capture_time += buffer_duration_;

- buffer_fill_end_ = 0;

+ fifo_->Push(input_bus.release()); // Push received data into |fifo_|.

+ scoped_ptr<media::AudioBus> audio_bus = media::AudioBus::Create(

+ output_params_.channels(), kOpusPreferredFramesPerBuffer);

+ converter_->Convert(audio_bus.get());

miu 2016/01/22 00:14:53 You can't call convert until you know there are en

mcasas 2016/01/22 22:03:53 Noted. This is experimented in another unrelated C

+ ToInterleaved(audio_bus.release(), buffer_.get());

+ scoped_ptr<std::string> encoded_data(new std::string());

+ if (DoEncode(opus_encoder_, buffer_.get(), kOpusPreferredFramesPerBuffer,

+ encoded_data.get())) {

+ on_encoded_audio_cb_.Run(output_params_, std::move(encoded_data),

+ capture_time);

}

@@ -206,48 +265,6 @@ void AudioTrackRecorder::AudioEncoder::DestroyExistingOpusEncoder() {

}

-void AudioTrackRecorder::AudioEncoder::TransferSamplesIntoBuffer(

- const media::AudioBus* audio_bus,

- int source_offset,

- int buffer_fill_offset,

- int num_samples) {

- // TODO(ajose): Consider replacing with AudioBus::ToInterleaved().

- // http://crbug.com/547918

- DCHECK(encoder_thread_checker_.CalledOnValidThread());

- DCHECK(is_initialized());

- // Opus requires channel-interleaved samples in a single array.

- for (int ch = 0; ch < audio_bus->channels(); ++ch) {

- const float* src = audio_bus->channel(ch) + source_offset;

- const float* const src_end = src + num_samples;

- float* dest =

- buffer_.get() + buffer_fill_offset * audio_params_.channels() + ch;

- for (; src < src_end; ++src, dest += audio_params_.channels())

- *dest = *src;

- }

-bool AudioTrackRecorder::AudioEncoder::EncodeFromFilledBuffer(

- std::string* out) {

- DCHECK(encoder_thread_checker_.CalledOnValidThread());

- DCHECK(is_initialized());

- out->resize(OPUS_MAX_PAYLOAD_SIZE);

- const opus_int32 result = opus_encode_float(

- opus_encoder_, buffer_.get(), frames_per_buffer_,

- reinterpret_cast<uint8_t*>(string_as_array(out)), OPUS_MAX_PAYLOAD_SIZE);

- if (result > 1) {

- // TODO(ajose): Investigate improving this. http://crbug.com/547918

- out->resize(result);

- return true;

- }

- // If |result| in {0,1}, do nothing; the documentation says that a return

- // value of zero or one means the packet does not need to be transmitted.

- // Otherwise, we have an error.

- DLOG_IF(ERROR, result < 0) << __FUNCTION__

- << " failed: " << opus_strerror(result);

- return false;

AudioTrackRecorder::AudioTrackRecorder(

const blink::WebMediaStreamTrack& track,

const OnEncodedAudioCB& on_encoded_audio_cb)

@@ -297,22 +314,4 @@ void AudioTrackRecorder::OnData(const media::AudioBus& audio_bus,

base::Passed(&audio_data), capture_time));

}

-int AudioTrackRecorder::GetOpusBufferDuration(int sample_rate) {

- // Valid buffer durations in millseconds. Note there are other valid

- // durations for Opus, see https://tools.ietf.org/html/rfc6716#section-2.1.4

- // Descending order as longer durations can increase compression performance.

- const std::vector<int> opus_valid_buffer_durations_ms = {60, 40, 20, 10};

- // Search for a duration such that |sample_rate| % |buffers_per_second| == 0,

- // where |buffers_per_second| = 1000ms / |possible_duration|.

- for (auto possible_duration : opus_valid_buffer_durations_ms) {

- if (sample_rate * possible_duration % 1000 == 0) {

- return possible_duration;

- }

- // Otherwise, couldn't find a good duration.

- return 0;

} // namespace content