| Index: content/browser/speech/speech_recognizer_impl.cc
|
| diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
|
| index a08ffe17661af3b5ecaeffa381fa22b8a0c6308f..b38963dc91e90e7be68b8493f2bfc0884ce2e277 100644
|
| --- a/content/browser/speech/speech_recognizer_impl.cc
|
| +++ b/content/browser/speech/speech_recognizer_impl.cc
|
| @@ -42,6 +42,8 @@ class SpeechRecognizerImpl::OnDataConverter
|
| // |output_parameters_|.
|
| scoped_refptr<AudioChunk> Convert(const AudioBus* data);
|
|
|
| + bool data_was_converted() const { return data_was_converted_; }
|
| +
|
| private:
|
| // media::AudioConverter::InputCallback implementation.
|
| double ProvideInput(AudioBus* dest, base::TimeDelta buffer_delay) override;
|
| @@ -54,7 +56,7 @@ class SpeechRecognizerImpl::OnDataConverter
|
| scoped_ptr<AudioBus> output_bus_;
|
| const AudioParameters input_parameters_;
|
| const AudioParameters output_parameters_;
|
| - bool waiting_for_input_;
|
| + bool data_was_converted_;
|
|
|
| DISALLOW_COPY_AND_ASSIGN(OnDataConverter);
|
| };
|
| @@ -119,8 +121,9 @@ SpeechRecognizerImpl::OnDataConverter::OnDataConverter(
|
| output_bus_(AudioBus::Create(output_params)),
|
| input_parameters_(input_params),
|
| output_parameters_(output_params),
|
| - waiting_for_input_(false) {
|
| + data_was_converted_(false) {
|
| audio_converter_.AddInput(this);
|
| + audio_converter_.PrimeWithSilence();
|
| }
|
|
|
| SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() {
|
| @@ -132,12 +135,18 @@ SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() {
|
| scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert(
|
| const AudioBus* data) {
|
| CHECK_EQ(data->frames(), input_parameters_.frames_per_buffer());
|
| -
|
| + data_was_converted_ = false;
|
| + // Copy recorded audio to the |input_bus_| for later use in ProvideInput().
|
| data->CopyTo(input_bus_.get());
|
| -
|
| - waiting_for_input_ = true;
|
| + // Convert the audio and place the result in |output_bus_|. This call will
|
| + // result in a ProvideInput() callback where the actual input is provided.
|
| + // However, it can happen that the converter contains enough cached data
|
| + // to return a result without calling ProvideInput(). The caller of this
|
| + // method should check the state of data_was_converted_() and make an
|
| + // additional call if it is set to false at return.
|
| + // See http://crbug.com/506051 for details.
|
| audio_converter_.Convert(output_bus_.get());
|
| -
|
| + // Create an audio chunk based on the converted result.
|
| scoped_refptr<AudioChunk> chunk(
|
| new AudioChunk(output_parameters_.GetBytesPerBuffer(),
|
| output_parameters_.bits_per_sample() / 8));
|
| @@ -149,16 +158,10 @@ scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert(
|
|
|
| double SpeechRecognizerImpl::OnDataConverter::ProvideInput(
|
| AudioBus* dest, base::TimeDelta buffer_delay) {
|
| - // The audio converted should never ask for more than one bus in each call
|
| - // to Convert(). If so, we have a serious issue in our design since we might
|
| - // miss recorded chunks of 100 ms audio data.
|
| - CHECK(waiting_for_input_);
|
| -
|
| // Read from the input bus to feed the converter.
|
| input_bus_->CopyTo(dest);
|
| -
|
| - // |input_bus_| should only be provide once.
|
| - waiting_for_input_ = false;
|
| + // Indicate that the recorded audio has in fact been used by the converter.
|
| + data_was_converted_ = true;
|
| return 1;
|
| }
|
|
|
| @@ -273,10 +276,20 @@ void SpeechRecognizerImpl::OnData(AudioInputController* controller,
|
| // Convert audio from native format to fixed format used by WebSpeech.
|
| FSMEventArgs event_args(EVENT_AUDIO_DATA);
|
| event_args.audio_data = audio_converter_->Convert(data);
|
| -
|
| BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
|
| base::Bind(&SpeechRecognizerImpl::DispatchEvent,
|
| this, event_args));
|
| + // See http://crbug.com/506051 regarding why one extra convert call can
|
| + // sometimes be required. It should be a rare case.
|
| + if (!audio_converter_->data_was_converted()) {
|
| + event_args.audio_data = audio_converter_->Convert(data);
|
| + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
|
| + base::Bind(&SpeechRecognizerImpl::DispatchEvent,
|
| + this, event_args));
|
| + }
|
| + // Something is seriously wrong here and we are most likely missing some
|
| + // audio segments.
|
| + CHECK(audio_converter_->data_was_converted());
|
| }
|
|
|
| void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {}
|
| @@ -523,6 +536,8 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
|
| AudioParameters output_parameters = AudioParameters(
|
| AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate,
|
| kNumBitsPerAudioSample, frames_per_buffer);
|
| + DVLOG(1) << "SRI::output_parameters: "
|
| + << output_parameters.AsHumanReadableString();
|
|
|
| // Audio converter will receive audio based on these parameters as input.
|
| // On Windows we start by verifying that Core Audio is supported. If not,
|
| @@ -543,17 +558,17 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
|
| // We rely on internal buffers in the audio back-end to fulfill this request
|
| // and the idea is to simplify the audio conversion since each Convert()
|
| // call will then render exactly one ProvideInput() call.
|
| - // Due to implementation details in the audio converter, 2 milliseconds
|
| - // are added to the default frame size (100 ms) to ensure there is enough
|
| - // data to generate 100 ms of output when resampling.
|
| + // in_params.sample_rate()
|
| frames_per_buffer =
|
| - ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5;
|
| + ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;
|
| input_parameters.Reset(in_params.format(),
|
| in_params.channel_layout(),
|
| in_params.channels(),
|
| in_params.sample_rate(),
|
| in_params.bits_per_sample(),
|
| frames_per_buffer);
|
| + DVLOG(1) << "SRI::input_parameters: "
|
| + << input_parameters.AsHumanReadableString();
|
| }
|
|
|
| // Create an audio converter which converts data between native input format
|
|
|