Chromium Code Reviews| Index: content/browser/speech/speech_recognizer_impl.cc |
| diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc |
| index a08ffe17661af3b5ecaeffa381fa22b8a0c6308f..0f9632f58a5d0b86bff58a1b9137805ef4860658 100644 |
| --- a/content/browser/speech/speech_recognizer_impl.cc |
| +++ b/content/browser/speech/speech_recognizer_impl.cc |
| @@ -42,6 +42,8 @@ class SpeechRecognizerImpl::OnDataConverter |
| // |output_parameters_|. |
| scoped_refptr<AudioChunk> Convert(const AudioBus* data); |
| + bool data_was_converted() const { return data_was_converted_; } |
| + |
| private: |
| // media::AudioConverter::InputCallback implementation. |
| double ProvideInput(AudioBus* dest, base::TimeDelta buffer_delay) override; |
| @@ -54,7 +56,7 @@ class SpeechRecognizerImpl::OnDataConverter |
| scoped_ptr<AudioBus> output_bus_; |
| const AudioParameters input_parameters_; |
| const AudioParameters output_parameters_; |
| - bool waiting_for_input_; |
| + bool data_was_converted_; |
| DISALLOW_COPY_AND_ASSIGN(OnDataConverter); |
| }; |
| @@ -119,8 +121,9 @@ SpeechRecognizerImpl::OnDataConverter::OnDataConverter( |
| output_bus_(AudioBus::Create(output_params)), |
| input_parameters_(input_params), |
| output_parameters_(output_params), |
| - waiting_for_input_(false) { |
| + data_was_converted_(false) { |
| audio_converter_.AddInput(this); |
| + audio_converter_.PrimeWithSilence(); |
| } |
| SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { |
| @@ -132,12 +135,18 @@ SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { |
| scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( |
| const AudioBus* data) { |
| CHECK_EQ(data->frames(), input_parameters_.frames_per_buffer()); |
| - |
| + data_was_converted_ = false; |
| + // Copy recorded audio to the |input_bus_| for later use in ProvideInput(). |
| data->CopyTo(input_bus_.get()); |
| - |
| - waiting_for_input_ = true; |
| + // Convert the audio and place the result in |output_bus_|. This call will |
| + // result in a ProvideInput() callback where the actual input is provided. |
| + // However, it can happen that the converter contains enough cached data |
| + // to return a result without calling ProvideInput(). The caller of this |
| + // method should check the state of data_was_converted_() and make an |
| + // additional call if it is set to false at return. |
| + // See http://crbug.com/506051 for details. |
| audio_converter_.Convert(output_bus_.get()); |
| - |
| + // Create an audio chunk based on the converted result. |
| scoped_refptr<AudioChunk> chunk( |
| new AudioChunk(output_parameters_.GetBytesPerBuffer(), |
| output_parameters_.bits_per_sample() / 8)); |
| @@ -149,16 +158,10 @@ scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( |
| double SpeechRecognizerImpl::OnDataConverter::ProvideInput( |
| AudioBus* dest, base::TimeDelta buffer_delay) { |
| - // The audio converted should never ask for more than one bus in each call |
| - // to Convert(). If so, we have a serious issue in our design since we might |
| - // miss recorded chunks of 100 ms audio data. |
| - CHECK(waiting_for_input_); |
| - |
| // Read from the input bus to feed the converter. |
| input_bus_->CopyTo(dest); |
| - |
| - // |input_bus_| should only be provide once. |
| - waiting_for_input_ = false; |
| + // Indicate that the recorded audio has in fact been used by the converter. |
| + data_was_converted_ = true; |
| return 1; |
| } |
| @@ -273,10 +276,22 @@ void SpeechRecognizerImpl::OnData(AudioInputController* controller, |
| // Convert audio from native format to fixed format used by WebSpeech. |
| FSMEventArgs event_args(EVENT_AUDIO_DATA); |
| event_args.audio_data = audio_converter_->Convert(data); |
| - |
| BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| this, event_args)); |
| + // See http://crbug.com/506051 regarding why one extra convert call can |
|
DaleCurtis
2015/07/08 18:41:47
Oh, you could just do a while (!data_was_converted
henrika (OOO until Aug 14)
2015/07/09 10:06:09
Great point, tried that. But what then happens is:
|
| + // sometimes be required. It should be a rare case. |
| + if (!audio_converter_->data_was_converted()) { |
| + DCHECK(false); |
|
DaleCurtis
2015/07/08 18:41:47
Remove dcheck and dlog, I don't think it's unexpec
henrika (OOO until Aug 14)
2015/07/09 10:06:09
Done.
|
| + DLOG(WARNING) << "One extra convert call is required"; |
| + event_args.audio_data = audio_converter_->Convert(data); |
| + BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| + base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| + this, event_args)); |
| + } |
| + // Something is seriously wrong here and we are most likely missing some |
| + // audio segments. |
| + CHECK(audio_converter_->data_was_converted()); |
| } |
| void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {} |
| @@ -523,6 +538,8 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
| AudioParameters output_parameters = AudioParameters( |
| AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, |
| kNumBitsPerAudioSample, frames_per_buffer); |
| + DVLOG(1) << "SRI::output_parameters: " |
| + << output_parameters.AsHumanReadableString(); |
| // Audio converter will receive audio based on these parameters as input. |
| // On Windows we start by verifying that Core Audio is supported. If not, |
| @@ -543,17 +560,17 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
| // We rely on internal buffers in the audio back-end to fulfill this request |
| // and the idea is to simplify the audio conversion since each Convert() |
| // call will then render exactly one ProvideInput() call. |
| - // Due to implementation details in the audio converter, 2 milliseconds |
| - // are added to the default frame size (100 ms) to ensure there is enough |
| - // data to generate 100 ms of output when resampling. |
| + // in_params.sample_rate() |
| frames_per_buffer = |
| - ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5; |
| + ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
|
DaleCurtis
2015/07/08 18:39:46
One last thing, can verify that the chunk size for
henrika (OOO until Aug 14)
2015/07/09 10:06:09
SRI::output_parameters: format: 1 channels: 1 chan
|
| input_parameters.Reset(in_params.format(), |
| in_params.channel_layout(), |
| in_params.channels(), |
| in_params.sample_rate(), |
| in_params.bits_per_sample(), |
| frames_per_buffer); |
| + DVLOG(1) << "SRI::input_parameters: " |
| + << input_parameters.AsHumanReadableString(); |
| } |
| // Create an audio converter which converts data between native input format |