Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1261)

Unified Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 1211203006: Fixes issue where Web Speech API drops a frame every 5.1 seconds (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: nits Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | media/base/audio_converter.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: content/browser/speech/speech_recognizer_impl.cc
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
index a08ffe17661af3b5ecaeffa381fa22b8a0c6308f..b38963dc91e90e7be68b8493f2bfc0884ce2e277 100644
--- a/content/browser/speech/speech_recognizer_impl.cc
+++ b/content/browser/speech/speech_recognizer_impl.cc
@@ -42,6 +42,8 @@ class SpeechRecognizerImpl::OnDataConverter
// |output_parameters_|.
scoped_refptr<AudioChunk> Convert(const AudioBus* data);
+ bool data_was_converted() const { return data_was_converted_; }
+
private:
// media::AudioConverter::InputCallback implementation.
double ProvideInput(AudioBus* dest, base::TimeDelta buffer_delay) override;
@@ -54,7 +56,7 @@ class SpeechRecognizerImpl::OnDataConverter
scoped_ptr<AudioBus> output_bus_;
const AudioParameters input_parameters_;
const AudioParameters output_parameters_;
- bool waiting_for_input_;
+ bool data_was_converted_;
DISALLOW_COPY_AND_ASSIGN(OnDataConverter);
};
@@ -119,8 +121,9 @@ SpeechRecognizerImpl::OnDataConverter::OnDataConverter(
output_bus_(AudioBus::Create(output_params)),
input_parameters_(input_params),
output_parameters_(output_params),
- waiting_for_input_(false) {
+ data_was_converted_(false) {
audio_converter_.AddInput(this);
+ audio_converter_.PrimeWithSilence();
}
SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() {
@@ -132,12 +135,18 @@ SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() {
scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert(
const AudioBus* data) {
CHECK_EQ(data->frames(), input_parameters_.frames_per_buffer());
-
+ data_was_converted_ = false;
+ // Copy recorded audio to the |input_bus_| for later use in ProvideInput().
data->CopyTo(input_bus_.get());
-
- waiting_for_input_ = true;
+ // Convert the audio and place the result in |output_bus_|. This call will
+ // result in a ProvideInput() callback where the actual input is provided.
+ // However, it can happen that the converter contains enough cached data
+ // to return a result without calling ProvideInput(). The caller of this
+ // method should check the state of data_was_converted_() and make an
+ // additional call if it is set to false at return.
+ // See http://crbug.com/506051 for details.
audio_converter_.Convert(output_bus_.get());
-
+ // Create an audio chunk based on the converted result.
scoped_refptr<AudioChunk> chunk(
new AudioChunk(output_parameters_.GetBytesPerBuffer(),
output_parameters_.bits_per_sample() / 8));
@@ -149,16 +158,10 @@ scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert(
double SpeechRecognizerImpl::OnDataConverter::ProvideInput(
AudioBus* dest, base::TimeDelta buffer_delay) {
- // The audio converted should never ask for more than one bus in each call
- // to Convert(). If so, we have a serious issue in our design since we might
- // miss recorded chunks of 100 ms audio data.
- CHECK(waiting_for_input_);
-
// Read from the input bus to feed the converter.
input_bus_->CopyTo(dest);
-
- // |input_bus_| should only be provide once.
- waiting_for_input_ = false;
+ // Indicate that the recorded audio has in fact been used by the converter.
+ data_was_converted_ = true;
return 1;
}
@@ -273,10 +276,20 @@ void SpeechRecognizerImpl::OnData(AudioInputController* controller,
// Convert audio from native format to fixed format used by WebSpeech.
FSMEventArgs event_args(EVENT_AUDIO_DATA);
event_args.audio_data = audio_converter_->Convert(data);
-
BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
base::Bind(&SpeechRecognizerImpl::DispatchEvent,
this, event_args));
+ // See http://crbug.com/506051 regarding why one extra convert call can
+ // sometimes be required. It should be a rare case.
+ if (!audio_converter_->data_was_converted()) {
+ event_args.audio_data = audio_converter_->Convert(data);
+ BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent,
+ this, event_args));
+ }
+ // Something is seriously wrong here and we are most likely missing some
+ // audio segments.
+ CHECK(audio_converter_->data_was_converted());
}
void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {}
@@ -523,6 +536,8 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
AudioParameters output_parameters = AudioParameters(
AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate,
kNumBitsPerAudioSample, frames_per_buffer);
+ DVLOG(1) << "SRI::output_parameters: "
+ << output_parameters.AsHumanReadableString();
// Audio converter will receive audio based on these parameters as input.
// On Windows we start by verifying that Core Audio is supported. If not,
@@ -543,17 +558,17 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
// We rely on internal buffers in the audio back-end to fulfill this request
// and the idea is to simplify the audio conversion since each Convert()
// call will then render exactly one ProvideInput() call.
- // Due to implementation details in the audio converter, 2 milliseconds
- // are added to the default frame size (100 ms) to ensure there is enough
- // data to generate 100 ms of output when resampling.
+ // in_params.sample_rate()
frames_per_buffer =
- ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5;
+ ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;
input_parameters.Reset(in_params.format(),
in_params.channel_layout(),
in_params.channels(),
in_params.sample_rate(),
in_params.bits_per_sample(),
frames_per_buffer);
+ DVLOG(1) << "SRI::input_parameters: "
+ << input_parameters.AsHumanReadableString();
}
// Create an audio converter which converts data between native input format
« no previous file with comments | « no previous file | media/base/audio_converter.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698