Chromium Code Reviews| Index: content/browser/speech/speech_recognizer_impl.cc |
| diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc |
| index a08ffe17661af3b5ecaeffa381fa22b8a0c6308f..96b0876bf5cfbb8425c558e168ab469ac75690ac 100644 |
| --- a/content/browser/speech/speech_recognizer_impl.cc |
| +++ b/content/browser/speech/speech_recognizer_impl.cc |
| @@ -55,6 +55,7 @@ class SpeechRecognizerImpl::OnDataConverter |
| const AudioParameters input_parameters_; |
| const AudioParameters output_parameters_; |
| bool waiting_for_input_; |
| + int convert_count_; |
| DISALLOW_COPY_AND_ASSIGN(OnDataConverter); |
| }; |
| @@ -119,8 +120,18 @@ SpeechRecognizerImpl::OnDataConverter::OnDataConverter( |
| output_bus_(AudioBus::Create(output_params)), |
| input_parameters_(input_params), |
| output_parameters_(output_params), |
| - waiting_for_input_(false) { |
| + waiting_for_input_(false), |
| + convert_count_(0) { |
| audio_converter_.AddInput(this); |
| + DVLOG(1) << "SRI::AudioConverter::ChunkSize " << audio_converter_.ChunkSize(); |
| + |
| + // Initial priming with zeros... |
| + // waiting_for_input_ = true; |
| + // input_bus_->Zero(); |
| + // audio_converter_.Convert(output_bus_.get()); |
| + |
| + audio_converter_.PrimeWithSilence(); |
| + DVLOG(1) << "SRI::AudioConverter::ChunkSize " << audio_converter_.ChunkSize(); |
| } |
| SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { |
| @@ -132,11 +143,18 @@ SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { |
| scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( |
| const AudioBus* data) { |
| CHECK_EQ(data->frames(), input_parameters_.frames_per_buffer()); |
| - |
| + DVLOG(1) << "SRI::ODC::Convert..."; |
| + // Data should always have been provided by ProvideInput(). If not, the |
| + // previous call to Convert() could produce converted data using cached |
| + // data. But that means that we will miss one large frame of recorded audio |
| + // samples. |
| + CHECK(!waiting_for_input_); |
| data->CopyTo(input_bus_.get()); |
| waiting_for_input_ = true; |
| + convert_count_++; |
| audio_converter_.Convert(output_bus_.get()); |
|
DaleCurtis
2015/07/07 16:38:17
As mentioned in the email thread, you should proba
henrika (OOO until Aug 14)
2015/07/07 19:30:05
Smart. But note that I have modified the input siz
DaleCurtis
2015/07/07 20:40:44
Well, I'm not confident your modification always e
henrika (OOO until Aug 14)
2015/07/07 21:14:08
Got it. I will test your scheme using 102 where I
DaleCurtis
2015/07/07 21:44:15
I think it's fine to use 100ms like you do w/ prim
henrika (OOO until Aug 14)
2015/07/08 12:22:59
Done. Hope you are OK with how I handle the extra
|
| + DVLOG(1) << "SRI::ODC::Convert done (" << convert_count_ << ")"; |
|
tommi (sloooow) - chröme
2015/07/07 12:40:21
seems like this will always log a value higher tha
henrika (OOO until Aug 14)
2015/07/07 12:56:54
Got it. Plan is to remove this counter. Just wante
|
| scoped_refptr<AudioChunk> chunk( |
| new AudioChunk(output_parameters_.GetBytesPerBuffer(), |
| @@ -149,6 +167,7 @@ scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( |
| double SpeechRecognizerImpl::OnDataConverter::ProvideInput( |
| AudioBus* dest, base::TimeDelta buffer_delay) { |
| + DVLOG(1) << "SRI::ODC::ProvideInput"; |
| // The audio converted should never ask for more than one bus in each call |
| // to Convert(). If so, we have a serious issue in our design since we might |
| // miss recorded chunks of 100 ms audio data. |
| @@ -523,6 +542,8 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
| AudioParameters output_parameters = AudioParameters( |
| AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, |
| kNumBitsPerAudioSample, frames_per_buffer); |
| + DVLOG(1) << "SRI::output_parameters: " |
| + << output_parameters.AsHumanReadableString(); |
| // Audio converter will receive audio based on these parameters as input. |
| // On Windows we start by verifying that Core Audio is supported. If not, |
| @@ -546,14 +567,18 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
| // Due to implementation details in the audio converter, 2 milliseconds |
| // are added to the default frame size (100 ms) to ensure there is enough |
| // data to generate 100 ms of output when resampling. |
| + // frames_per_buffer = |
| + // ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5; |
| frames_per_buffer = |
| - ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5; |
| + ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
| input_parameters.Reset(in_params.format(), |
| in_params.channel_layout(), |
| in_params.channels(), |
| in_params.sample_rate(), |
| in_params.bits_per_sample(), |
| frames_per_buffer); |
| + DVLOG(1) << "SRI::input_parameters: " |
| + << input_parameters.AsHumanReadableString(); |
| } |
| // Create an audio converter which converts data between native input format |