Chromium Code Reviews| Index: content/renderer/media/audio_track_recorder.cc |
| diff --git a/content/renderer/media/audio_track_recorder.cc b/content/renderer/media/audio_track_recorder.cc |
| index 71f35cb888d521bd7a6b7209399ac7c0c3ac9c36..4244590b43cb5ba95a96046c03eb31a3698283e6 100644 |
| --- a/content/renderer/media/audio_track_recorder.cc |
| +++ b/content/renderer/media/audio_track_recorder.cc |
| @@ -12,38 +12,94 @@ |
| #include "base/stl_util.h" |
| #include "media/audio/audio_parameters.h" |
| #include "media/base/audio_bus.h" |
| +#include "media/base/audio_converter.h" |
| +#include "media/base/audio_fifo.h" |
| #include "media/base/bind_to_current_loop.h" |
| #include "third_party/opus/src/include/opus.h" |
| // Note that this code follows the Chrome media convention of defining a "frame" |
| -// as "one multi-channel sample" as opposed to another common definition |
| -// meaning "a chunk of samples". Here this second definition of "frame" is |
| -// called a "buffer"; so what might be called "frame duration" is instead |
| -// "buffer duration", and so on. |
| +// as "one multi-channel sample" as opposed to another common definition meaning |
| +// "a chunk of samples". Here this second definition of "frame" is called a |
| +// "buffer"; so what might be called "frame duration" is instead "buffer |
| +// duration", and so on. |
| namespace content { |
| namespace { |
| -enum { |
| - // This is the recommended value, according to documentation in |
| - // third_party/opus/src/include/opus.h, so that the Opus encoder does not |
| - // degrade the audio due to memory constraints. |
| - OPUS_MAX_PAYLOAD_SIZE = 4000, |
| +// Maximum amount of buffers that can be held in the AudioFifo of AudioEncoder. |
| +static const size_t kMaxNumberOfFifoBuffers = 2; |
| - // Support for max sampling rate of 48KHz, 2 channels, 60 ms duration. |
| - MAX_SAMPLES_PER_BUFFER = 48 * 2 * 60, |
| -}; |
| +// Recommended value for opus_encode_float(), according to documentation in |
| +// third_party/opus/src/include/opus.h, so that the Opus encoder does not |
| +// degrade the audio due to memory constraints. |
| +static const int kOpusMaxDataBytes = 4000; |
| + |
| +// Opus preferred sampling rate for encoding. This is also the one webm likes to |
| +// have: https://wiki.xiph.org/MatroskaOpus. |
| +static const int kOpusPreferredSamplingRate = 48000; |
| + |
| +// Media Stream Audio Tracks always send 10ms worth of Audio, which happens to |
| +// be Opus-friendly (https://tools.ietf.org/html/rfc6716#section-2.1.4). |
| +static const int kMediaStreamTrackBufferDurationMs = 10; |
| + |
| +// The amount of Frames in a 10 ms buffer @ 48000 samples/second. |
| +static const int kOpusPreferredFramesPerBuffer = 480; |
|
miu
2016/01/22 00:14:53
Opus will produce higher quality audio if encoding
mcasas
2016/01/22 22:03:53
Done.
Note that the input will still be chunks of
|
| + |
| +static_assert(kOpusPreferredFramesPerBuffer == |
| + kOpusPreferredSamplingRate * |
| + kMediaStreamTrackBufferDurationMs / |
| + 1000, |
| + "kOpusPreferredFramesPerBuffer should be ==" |
| + "kOpusPreferredSamplingRate * kMediaStreamTrackBufferDurationMs"); |
| + |
| +// Tries to encode |data_in|'s |num_samples| into |data_out|. |
| +bool DoEncode(OpusEncoder* opus_encoder, |
| + float* data_in, |
| + int num_samples, |
| + std::string* data_out) { |
| + DCHECK_EQ(kOpusPreferredFramesPerBuffer, num_samples); |
| + |
| + data_out->resize(kOpusMaxDataBytes); |
| + const opus_int32 result = opus_encode_float( |
| + opus_encoder, data_in, num_samples, |
| + reinterpret_cast<uint8_t*>(string_as_array(data_out)), kOpusMaxDataBytes); |
| + |
| + if (result > 1) { |
| + // TODO(ajose): Investigate improving this. http://crbug.com/547918 |
| + data_out->resize(result); |
| + return true; |
| + } |
| + // If |result| in {0,1}, do nothing; the documentation says that a return |
| + // value of zero or one means the packet does not need to be transmitted. |
| + // Otherwise, we have an error. |
| + DLOG_IF(ERROR, result < 0) << " encode failed: " << opus_strerror(result); |
| + return false; |
| +} |
| + |
| +// Interleaves |audio_bus| channels() of floats into a single output linear |
| +// |buffer|. (AudioBus::ToInterleaved() does not support float). |
|
miu
2016/01/22 00:14:53
It probably should! ;)
This code was originally
mcasas
2016/01/22 22:03:53
Bug it is. Happy to do it or it can a GoodFirstBug
|
| +void ToInterleaved(media::AudioBus* audio_bus, float* buffer) { |
| + for (int ch = 0; ch < audio_bus->channels(); ++ch) { |
| + const float* src = audio_bus->channel(ch); |
| + const float* const src_end = src + audio_bus->frames(); |
| + float* dest = buffer + ch; |
| + for (; src < src_end; ++src, dest += audio_bus->channels()) |
| + *dest = *src; |
| + } |
| +} |
| } // anonymous namespace |
| -// Nested class encapsulating opus-related encoding details. |
| -// AudioEncoder is created and destroyed on ATR's main thread (usually the |
| -// main render thread) but otherwise should operate entirely on |
| -// |encoder_thread_|, which is owned by AudioTrackRecorder. Be sure to delete |
| -// |encoder_thread_| before deleting the AudioEncoder using it. |
| +// Nested class encapsulating opus-related encoding details. It contains an |
| +// AudioConverter to adapt incoming data to the format Opus likes to have. |
| +// AudioEncoder is created and destroyed on ATR's main thread (usually the main |
| +// render thread) but otherwise should operate entirely on |encoder_thread_|, |
| +// which is owned by AudioTrackRecorder. Be sure to delete |encoder_thread_| |
| +// before deleting the AudioEncoder using it. |
| class AudioTrackRecorder::AudioEncoder |
| - : public base::RefCountedThreadSafe<AudioEncoder> { |
| + : public base::RefCountedThreadSafe<AudioEncoder>, |
| + public media::AudioConverter::InputCallback { |
| public: |
| explicit AudioEncoder(const OnEncodedAudioCB& on_encoded_audio_cb) |
| : on_encoded_audio_cb_(on_encoded_audio_cb), opus_encoder_(nullptr) { |
| @@ -54,6 +110,10 @@ class AudioTrackRecorder::AudioEncoder |
| encoder_thread_checker_.DetachFromThread(); |
| } |
| + // media::AudioConverted::InputCallback implementation. |
| + double ProvideInput(media::AudioBus* audio_bus, |
|
miu
2016/01/22 00:14:53
This should be private, since it's only meant to b
mcasas
2016/01/22 22:03:53
Done.
|
| + base::TimeDelta buffer_delay) override; |
| + |
| void OnSetFormat(const media::AudioParameters& params); |
| void EncodeAudio(scoped_ptr<media::AudioBus> audio_bus, |
| @@ -62,33 +122,25 @@ class AudioTrackRecorder::AudioEncoder |
| private: |
| friend class base::RefCountedThreadSafe<AudioEncoder>; |
| - ~AudioEncoder(); |
| + ~AudioEncoder() override; |
| bool is_initialized() const { return !!opus_encoder_; } |
| void DestroyExistingOpusEncoder(); |
| - void TransferSamplesIntoBuffer(const media::AudioBus* audio_bus, |
| - int source_offset, |
| - int buffer_fill_offset, |
| - int num_samples); |
| - bool EncodeFromFilledBuffer(std::string* out); |
| - |
| const OnEncodedAudioCB on_encoded_audio_cb_; |
| base::ThreadChecker encoder_thread_checker_; |
| - // In the case where a call to EncodeAudio() cannot completely fill the |
| - // buffer, this points to the position at which to populate data in a later |
| - // call. |
| - int buffer_fill_end_; |
| - |
| - int frames_per_buffer_; |
| + // Track Audio (ingress) and Opus encoder input parameters, respectively. They |
| + // only differ in their sample_rate() and frames_per_buffer(): output is |
| + // 48ksamples/s and 480, respectively. |
| + media::AudioParameters input_params_; |
| + media::AudioParameters output_params_; |
| - // The duration of one set of frames of encoded audio samples. |
| - base::TimeDelta buffer_duration_; |
| - |
| - media::AudioParameters audio_params_; |
| + // Sampling rate adapter between an OpusEncoder supported and the provided. |
| + scoped_ptr<media::AudioConverter> converter_; |
| + scoped_ptr<media::AudioFifo> fifo_; |
| // Buffer for passing AudioBus data to OpusEncoder. |
| scoped_ptr<float[]> buffer_; |
| @@ -104,46 +156,66 @@ AudioTrackRecorder::AudioEncoder::~AudioEncoder() { |
| DestroyExistingOpusEncoder(); |
| } |
| +double AudioTrackRecorder::AudioEncoder::ProvideInput( |
| + media::AudioBus* audio_bus, |
| + base::TimeDelta buffer_delay) { |
| + if (fifo_->frames() >= audio_bus->frames()) |
| + fifo_->Consume(audio_bus, 0, audio_bus->frames()); |
| + else |
| + audio_bus->Zero(); |
| + // Return volume greater than zero to indicate we have more data. |
| + return 1.0; |
| +} |
| + |
| void AudioTrackRecorder::AudioEncoder::OnSetFormat( |
| - const media::AudioParameters& params) { |
| + const media::AudioParameters& input_params) { |
| + DVLOG(1) << __FUNCTION__; |
| DCHECK(encoder_thread_checker_.CalledOnValidThread()); |
| - if (audio_params_.Equals(params)) |
| + if (input_params_.Equals(input_params)) |
| return; |
| DestroyExistingOpusEncoder(); |
| - if (!params.IsValid()) { |
| - DLOG(ERROR) << "Invalid audio params: " << params.AsHumanReadableString(); |
| - return; |
| - } |
| - |
| - buffer_duration_ = base::TimeDelta::FromMilliseconds( |
| - AudioTrackRecorder::GetOpusBufferDuration(params.sample_rate())); |
| - if (buffer_duration_ == base::TimeDelta()) { |
| - DLOG(ERROR) << "Could not find a valid |buffer_duration| for the given " |
| - << "sample rate: " << params.sample_rate(); |
| - return; |
| - } |
| - |
| - frames_per_buffer_ = |
| - params.sample_rate() * buffer_duration_.InMilliseconds() / 1000; |
| - if (frames_per_buffer_ * params.channels() > MAX_SAMPLES_PER_BUFFER) { |
| - DLOG(ERROR) << "Invalid |frames_per_buffer_|: " << frames_per_buffer_; |
| + if (!input_params.IsValid() || input_params.channels() > 2) { |
| + DLOG(ERROR) << "Invalid params: " << input_params.AsHumanReadableString(); |
| return; |
| } |
| - |
| - // Initialize AudioBus buffer for OpusEncoder. |
| - buffer_fill_end_ = 0; |
| - buffer_.reset(new float[params.channels() * frames_per_buffer_]); |
| + input_params_ = input_params; |
| + input_params_.set_frames_per_buffer(input_params_.sample_rate() * |
|
miu
2016/01/22 00:14:53
Shouldn't this be:
input_params_.set_frames_per
mcasas
2016/01/22 22:03:53
No, input is always 10ms, the input sampling rate
|
| + kMediaStreamTrackBufferDurationMs / |
| + 1000); |
| + |
| + output_params_ = media::AudioParameters( |
| + media::AudioParameters::AUDIO_PCM_LOW_LATENCY, |
| + media::GuessChannelLayout(input_params_.channels()), |
| + kOpusPreferredSamplingRate, |
| + input_params_.bits_per_sample(), |
| + kOpusPreferredFramesPerBuffer); |
| + DVLOG(1) << "|input_params_|:" << input_params_.AsHumanReadableString() |
| + << " -->|output_params_|:" << output_params_.AsHumanReadableString(); |
| + |
| + converter_.reset(new media::AudioConverter(input_params_, output_params_, |
| + false /* disable_fifo */)); |
| + converter_->AddInput(this); |
|
mcasas
2016/01/22 22:03:53
add here
|converter_->PrimeWithSilence()|
|
| + |
| + fifo_.reset(new media::AudioFifo( |
| + input_params_.channels(), |
| + kMaxNumberOfFifoBuffers * input_params_.frames_per_buffer())); |
| + |
| + buffer_.reset( |
| + new float[output_params_.channels() * kOpusPreferredFramesPerBuffer * |
| + output_params_.bits_per_sample() / 8]); |
|
miu
2016/01/22 00:14:53
bits_per_sample() is erroneous (it really should b
mcasas
2016/01/22 22:03:53
Done.
|
| // Initialize OpusEncoder. |
| int opus_result; |
| - opus_encoder_ = opus_encoder_create(params.sample_rate(), params.channels(), |
| - OPUS_APPLICATION_AUDIO, &opus_result); |
| + opus_encoder_ = opus_encoder_create(output_params_.sample_rate(), |
| + output_params_.channels(), |
| + OPUS_APPLICATION_AUDIO, |
| + &opus_result); |
| if (opus_result < 0) { |
| DLOG(ERROR) << "Couldn't init opus encoder: " << opus_strerror(opus_result) |
| - << ", sample rate: " << params.sample_rate() |
| - << ", channels: " << params.channels(); |
| + << ", sample rate: " << output_params_.sample_rate() |
| + << ", channels: " << output_params_.channels(); |
| return; |
| } |
| @@ -155,45 +227,32 @@ void AudioTrackRecorder::AudioEncoder::OnSetFormat( |
| DLOG(ERROR) << "Failed to set opus bitrate."; |
| return; |
| } |
| - |
| - audio_params_ = params; |
| } |
| void AudioTrackRecorder::AudioEncoder::EncodeAudio( |
| - scoped_ptr<media::AudioBus> audio_bus, |
| + scoped_ptr<media::AudioBus> input_bus, |
| const base::TimeTicks& capture_time) { |
| + DVLOG(1) << __FUNCTION__ << ", #frames " << input_bus->frames(); |
| DCHECK(encoder_thread_checker_.CalledOnValidThread()); |
| - DCHECK_EQ(audio_bus->channels(), audio_params_.channels()); |
| + DCHECK_EQ(input_bus->channels(), input_params_.channels()); |
| + DCHECK_EQ(input_bus->frames(), input_params_.frames_per_buffer()); |
| + DCHECK(!capture_time.is_null()); |
| + DCHECK(converter_); |
| if (!is_initialized()) |
| return; |
| - |
| - base::TimeDelta buffer_fill_duration = |
| - buffer_fill_end_ * buffer_duration_ / frames_per_buffer_; |
| - base::TimeTicks buffer_capture_time = capture_time - buffer_fill_duration; |
| - |
| - // Encode all audio in |audio_bus| into zero or more packets. |
| - int src_pos = 0; |
| - while (src_pos < audio_bus->frames()) { |
| - const int num_samples_to_xfer = std::min( |
| - frames_per_buffer_ - buffer_fill_end_, audio_bus->frames() - src_pos); |
| - TransferSamplesIntoBuffer(audio_bus.get(), src_pos, buffer_fill_end_, |
| - num_samples_to_xfer); |
| - src_pos += num_samples_to_xfer; |
| - buffer_fill_end_ += num_samples_to_xfer; |
| - |
| - if (buffer_fill_end_ < frames_per_buffer_) |
| - break; |
| - |
| - scoped_ptr<std::string> encoded_data(new std::string()); |
| - if (EncodeFromFilledBuffer(encoded_data.get())) { |
| - on_encoded_audio_cb_.Run(audio_params_, std::move(encoded_data), |
| - buffer_capture_time); |
| - } |
| - |
| - // Reset the capture timestamp and internal buffer for next set of frames. |
| - buffer_capture_time += buffer_duration_; |
| - buffer_fill_end_ = 0; |
| + fifo_->Push(input_bus.release()); // Push received data into |fifo_|. |
| + |
| + scoped_ptr<media::AudioBus> audio_bus = media::AudioBus::Create( |
| + output_params_.channels(), kOpusPreferredFramesPerBuffer); |
| + converter_->Convert(audio_bus.get()); |
|
miu
2016/01/22 00:14:53
You can't call convert until you know there are en
mcasas
2016/01/22 22:03:53
Noted. This is experimented in another unrelated C
|
| + ToInterleaved(audio_bus.release(), buffer_.get()); |
| + |
| + scoped_ptr<std::string> encoded_data(new std::string()); |
| + if (DoEncode(opus_encoder_, buffer_.get(), kOpusPreferredFramesPerBuffer, |
| + encoded_data.get())) { |
| + on_encoded_audio_cb_.Run(output_params_, std::move(encoded_data), |
| + capture_time); |
| } |
| } |
| @@ -206,48 +265,6 @@ void AudioTrackRecorder::AudioEncoder::DestroyExistingOpusEncoder() { |
| } |
| } |
| -void AudioTrackRecorder::AudioEncoder::TransferSamplesIntoBuffer( |
| - const media::AudioBus* audio_bus, |
| - int source_offset, |
| - int buffer_fill_offset, |
| - int num_samples) { |
| - // TODO(ajose): Consider replacing with AudioBus::ToInterleaved(). |
| - // http://crbug.com/547918 |
| - DCHECK(encoder_thread_checker_.CalledOnValidThread()); |
| - DCHECK(is_initialized()); |
| - // Opus requires channel-interleaved samples in a single array. |
| - for (int ch = 0; ch < audio_bus->channels(); ++ch) { |
| - const float* src = audio_bus->channel(ch) + source_offset; |
| - const float* const src_end = src + num_samples; |
| - float* dest = |
| - buffer_.get() + buffer_fill_offset * audio_params_.channels() + ch; |
| - for (; src < src_end; ++src, dest += audio_params_.channels()) |
| - *dest = *src; |
| - } |
| -} |
| - |
| -bool AudioTrackRecorder::AudioEncoder::EncodeFromFilledBuffer( |
| - std::string* out) { |
| - DCHECK(encoder_thread_checker_.CalledOnValidThread()); |
| - DCHECK(is_initialized()); |
| - |
| - out->resize(OPUS_MAX_PAYLOAD_SIZE); |
| - const opus_int32 result = opus_encode_float( |
| - opus_encoder_, buffer_.get(), frames_per_buffer_, |
| - reinterpret_cast<uint8_t*>(string_as_array(out)), OPUS_MAX_PAYLOAD_SIZE); |
| - if (result > 1) { |
| - // TODO(ajose): Investigate improving this. http://crbug.com/547918 |
| - out->resize(result); |
| - return true; |
| - } |
| - // If |result| in {0,1}, do nothing; the documentation says that a return |
| - // value of zero or one means the packet does not need to be transmitted. |
| - // Otherwise, we have an error. |
| - DLOG_IF(ERROR, result < 0) << __FUNCTION__ |
| - << " failed: " << opus_strerror(result); |
| - return false; |
| -} |
| - |
| AudioTrackRecorder::AudioTrackRecorder( |
| const blink::WebMediaStreamTrack& track, |
| const OnEncodedAudioCB& on_encoded_audio_cb) |
| @@ -297,22 +314,4 @@ void AudioTrackRecorder::OnData(const media::AudioBus& audio_bus, |
| base::Passed(&audio_data), capture_time)); |
| } |
| -int AudioTrackRecorder::GetOpusBufferDuration(int sample_rate) { |
| - // Valid buffer durations in millseconds. Note there are other valid |
| - // durations for Opus, see https://tools.ietf.org/html/rfc6716#section-2.1.4 |
| - // Descending order as longer durations can increase compression performance. |
| - const std::vector<int> opus_valid_buffer_durations_ms = {60, 40, 20, 10}; |
| - |
| - // Search for a duration such that |sample_rate| % |buffers_per_second| == 0, |
| - // where |buffers_per_second| = 1000ms / |possible_duration|. |
| - for (auto possible_duration : opus_valid_buffer_durations_ms) { |
| - if (sample_rate * possible_duration % 1000 == 0) { |
| - return possible_duration; |
| - } |
| - } |
| - |
| - // Otherwise, couldn't find a good duration. |
| - return 0; |
| -} |
| - |
| } // namespace content |