Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/speech_recognizer_impl.h" | 5 #include "content/browser/speech/speech_recognizer_impl.h" |
| 6 | 6 |
| 7 #include "base/basictypes.h" | 7 #include "base/basictypes.h" |
| 8 #include "base/bind.h" | 8 #include "base/bind.h" |
| 9 #include "base/time/time.h" | 9 #include "base/time/time.h" |
| 10 #include "content/browser/browser_main_loop.h" | 10 #include "content/browser/browser_main_loop.h" |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 48 | 48 |
| 49 // Handles resampling, buffering, and channel mixing between input and output | 49 // Handles resampling, buffering, and channel mixing between input and output |
| 50 // parameters. | 50 // parameters. |
| 51 AudioConverter audio_converter_; | 51 AudioConverter audio_converter_; |
| 52 | 52 |
| 53 scoped_ptr<AudioBus> input_bus_; | 53 scoped_ptr<AudioBus> input_bus_; |
| 54 scoped_ptr<AudioBus> output_bus_; | 54 scoped_ptr<AudioBus> output_bus_; |
| 55 const AudioParameters input_parameters_; | 55 const AudioParameters input_parameters_; |
| 56 const AudioParameters output_parameters_; | 56 const AudioParameters output_parameters_; |
| 57 bool waiting_for_input_; | 57 bool waiting_for_input_; |
| 58 int convert_count_; | |
| 58 | 59 |
| 59 DISALLOW_COPY_AND_ASSIGN(OnDataConverter); | 60 DISALLOW_COPY_AND_ASSIGN(OnDataConverter); |
| 60 }; | 61 }; |
| 61 | 62 |
| 62 namespace { | 63 namespace { |
| 63 | 64 |
| 64 // The following constants are related to the volume level indicator shown in | 65 // The following constants are related to the volume level indicator shown in |
| 65 // the UI for recorded audio. | 66 // the UI for recorded audio. |
| 66 // Multiplier used when new volume is greater than previous level. | 67 // Multiplier used when new volume is greater than previous level. |
| 67 const float kUpSmoothingFactor = 1.0f; | 68 const float kUpSmoothingFactor = 1.0f; |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 112 // SpeechRecognizerImpl::OnDataConverter implementation | 113 // SpeechRecognizerImpl::OnDataConverter implementation |
| 113 | 114 |
| 114 SpeechRecognizerImpl::OnDataConverter::OnDataConverter( | 115 SpeechRecognizerImpl::OnDataConverter::OnDataConverter( |
| 115 const AudioParameters& input_params, | 116 const AudioParameters& input_params, |
| 116 const AudioParameters& output_params) | 117 const AudioParameters& output_params) |
| 117 : audio_converter_(input_params, output_params, false), | 118 : audio_converter_(input_params, output_params, false), |
| 118 input_bus_(AudioBus::Create(input_params)), | 119 input_bus_(AudioBus::Create(input_params)), |
| 119 output_bus_(AudioBus::Create(output_params)), | 120 output_bus_(AudioBus::Create(output_params)), |
| 120 input_parameters_(input_params), | 121 input_parameters_(input_params), |
| 121 output_parameters_(output_params), | 122 output_parameters_(output_params), |
| 122 waiting_for_input_(false) { | 123 waiting_for_input_(false), |
| 124 convert_count_(0) { | |
| 123 audio_converter_.AddInput(this); | 125 audio_converter_.AddInput(this); |
| 126 DVLOG(1) << "SRI::AudioConverter::ChunkSize " << audio_converter_.ChunkSize(); | |
| 127 | |
| 128 // Initial priming with zeros... | |
| 129 // waiting_for_input_ = true; | |
| 130 // input_bus_->Zero(); | |
| 131 // audio_converter_.Convert(output_bus_.get()); | |
| 132 | |
| 133 audio_converter_.PrimeWithSilence(); | |
| 134 DVLOG(1) << "SRI::AudioConverter::ChunkSize " << audio_converter_.ChunkSize(); | |
| 124 } | 135 } |
| 125 | 136 |
| 126 SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { | 137 SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { |
| 127 // It should now be safe to unregister the converter since no more OnData() | 138 // It should now be safe to unregister the converter since no more OnData() |
| 128 // callbacks are outstanding at this point. | 139 // callbacks are outstanding at this point. |
| 129 audio_converter_.RemoveInput(this); | 140 audio_converter_.RemoveInput(this); |
| 130 } | 141 } |
| 131 | 142 |
| 132 scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( | 143 scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( |
| 133 const AudioBus* data) { | 144 const AudioBus* data) { |
| 134 CHECK_EQ(data->frames(), input_parameters_.frames_per_buffer()); | 145 CHECK_EQ(data->frames(), input_parameters_.frames_per_buffer()); |
| 135 | 146 DVLOG(1) << "SRI::ODC::Convert..."; |
| 147 // Data should always have been provided by ProvideInput(). If not, the | |
| 148 // previous call to Convert() could produce converted data using cached | |
| 149 // data. But that means that we will miss one large frame of recorded audio | |
| 150 // samples. | |
| 151 CHECK(!waiting_for_input_); | |
| 136 data->CopyTo(input_bus_.get()); | 152 data->CopyTo(input_bus_.get()); |
| 137 | 153 |
| 138 waiting_for_input_ = true; | 154 waiting_for_input_ = true; |
| 155 convert_count_++; | |
| 139 audio_converter_.Convert(output_bus_.get()); | 156 audio_converter_.Convert(output_bus_.get()); |
|
DaleCurtis
2015/07/07 16:38:17
As mentioned in the email thread, you should proba
henrika (OOO until Aug 14)
2015/07/07 19:30:05
Smart. But note that I have modified the input siz
DaleCurtis
2015/07/07 20:40:44
Well, I'm not confident your modification always e
henrika (OOO until Aug 14)
2015/07/07 21:14:08
Got it. I will test your scheme using 102 where I
DaleCurtis
2015/07/07 21:44:15
I think it's fine to use 100ms like you do w/ prim
henrika (OOO until Aug 14)
2015/07/08 12:22:59
Done. Hope you are OK with how I handle the extra
| |
| 157 DVLOG(1) << "SRI::ODC::Convert done (" << convert_count_ << ")"; | |
|
tommi (sloooow) - chröme
2015/07/07 12:40:21
seems like this will always log a value higher tha
henrika (OOO until Aug 14)
2015/07/07 12:56:54
Got it. Plan is to remove this counter. Just wante
| |
| 140 | 158 |
| 141 scoped_refptr<AudioChunk> chunk( | 159 scoped_refptr<AudioChunk> chunk( |
| 142 new AudioChunk(output_parameters_.GetBytesPerBuffer(), | 160 new AudioChunk(output_parameters_.GetBytesPerBuffer(), |
| 143 output_parameters_.bits_per_sample() / 8)); | 161 output_parameters_.bits_per_sample() / 8)); |
| 144 output_bus_->ToInterleaved(output_bus_->frames(), | 162 output_bus_->ToInterleaved(output_bus_->frames(), |
| 145 output_parameters_.bits_per_sample() / 8, | 163 output_parameters_.bits_per_sample() / 8, |
| 146 chunk->writable_data()); | 164 chunk->writable_data()); |
| 147 return chunk; | 165 return chunk; |
| 148 } | 166 } |
| 149 | 167 |
| 150 double SpeechRecognizerImpl::OnDataConverter::ProvideInput( | 168 double SpeechRecognizerImpl::OnDataConverter::ProvideInput( |
| 151 AudioBus* dest, base::TimeDelta buffer_delay) { | 169 AudioBus* dest, base::TimeDelta buffer_delay) { |
| 170 DVLOG(1) << "SRI::ODC::ProvideInput"; | |
| 152 // The audio converted should never ask for more than one bus in each call | 171 // The audio converted should never ask for more than one bus in each call |
| 153 // to Convert(). If so, we have a serious issue in our design since we might | 172 // to Convert(). If so, we have a serious issue in our design since we might |
| 154 // miss recorded chunks of 100 ms audio data. | 173 // miss recorded chunks of 100 ms audio data. |
| 155 CHECK(waiting_for_input_); | 174 CHECK(waiting_for_input_); |
| 156 | 175 |
| 157 // Read from the input bus to feed the converter. | 176 // Read from the input bus to feed the converter. |
| 158 input_bus_->CopyTo(dest); | 177 input_bus_->CopyTo(dest); |
| 159 | 178 |
| 160 // |input_bus_| should only be provide once. | 179 // |input_bus_| should only be provide once. |
| 161 waiting_for_input_ = false; | 180 waiting_for_input_ = false; |
| (...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 516 return Abort( | 535 return Abort( |
| 517 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); | 536 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); |
| 518 } | 537 } |
| 519 | 538 |
| 520 // Audio converter shall provide audio based on these parameters as output. | 539 // Audio converter shall provide audio based on these parameters as output. |
| 521 // Hard coded, WebSpeech specific parameters are utilized here. | 540 // Hard coded, WebSpeech specific parameters are utilized here. |
| 522 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000; | 541 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000; |
| 523 AudioParameters output_parameters = AudioParameters( | 542 AudioParameters output_parameters = AudioParameters( |
| 524 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, | 543 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, |
| 525 kNumBitsPerAudioSample, frames_per_buffer); | 544 kNumBitsPerAudioSample, frames_per_buffer); |
| 545 DVLOG(1) << "SRI::output_parameters: " | |
| 546 << output_parameters.AsHumanReadableString(); | |
| 526 | 547 |
| 527 // Audio converter will receive audio based on these parameters as input. | 548 // Audio converter will receive audio based on these parameters as input. |
| 528 // On Windows we start by verifying that Core Audio is supported. If not, | 549 // On Windows we start by verifying that Core Audio is supported. If not, |
| 529 // the WaveIn API is used and we might as well avoid all audio conversations | 550 // the WaveIn API is used and we might as well avoid all audio conversations |
| 530 // since WaveIn does the conversion for us. | 551 // since WaveIn does the conversion for us. |
| 531 // TODO(henrika): this code should be moved to platform dependent audio | 552 // TODO(henrika): this code should be moved to platform dependent audio |
| 532 // managers. | 553 // managers. |
| 533 bool use_native_audio_params = true; | 554 bool use_native_audio_params = true; |
| 534 #if defined(OS_WIN) | 555 #if defined(OS_WIN) |
|
DaleCurtis
2015/07/07 20:40:43
I'm surprised we're not using the native params fo
henrika (OOO until Aug 14)
2015/07/07 21:14:08
Hmm, but we are on all but Win XP. All others use
DaleCurtis
2015/07/07 21:44:15
Ah no, I misread and inverted the check.
| |
| 535 use_native_audio_params = media::CoreAudioUtil::IsSupported(); | 556 use_native_audio_params = media::CoreAudioUtil::IsSupported(); |
| 536 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech"; | 557 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech"; |
| 537 #endif | 558 #endif |
| 538 | 559 |
| 539 AudioParameters input_parameters = output_parameters; | 560 AudioParameters input_parameters = output_parameters; |
| 540 if (use_native_audio_params && !unit_test_is_active) { | 561 if (use_native_audio_params && !unit_test_is_active) { |
| 541 // Use native audio parameters but avoid opening up at the native buffer | 562 // Use native audio parameters but avoid opening up at the native buffer |
| 542 // size. Instead use same frame size (in milliseconds) as WebSpeech uses. | 563 // size. Instead use same frame size (in milliseconds) as WebSpeech uses. |
| 543 // We rely on internal buffers in the audio back-end to fulfill this request | 564 // We rely on internal buffers in the audio back-end to fulfill this request |
| 544 // and the idea is to simplify the audio conversion since each Convert() | 565 // and the idea is to simplify the audio conversion since each Convert() |
| 545 // call will then render exactly one ProvideInput() call. | 566 // call will then render exactly one ProvideInput() call. |
| 546 // Due to implementation details in the audio converter, 2 milliseconds | 567 // Due to implementation details in the audio converter, 2 milliseconds |
| 547 // are added to the default frame size (100 ms) to ensure there is enough | 568 // are added to the default frame size (100 ms) to ensure there is enough |
| 548 // data to generate 100 ms of output when resampling. | 569 // data to generate 100 ms of output when resampling. |
| 570 // frames_per_buffer = | |
| 571 // ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5; | |
| 549 frames_per_buffer = | 572 frames_per_buffer = |
| 550 ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5; | 573 ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
| 551 input_parameters.Reset(in_params.format(), | 574 input_parameters.Reset(in_params.format(), |
| 552 in_params.channel_layout(), | 575 in_params.channel_layout(), |
| 553 in_params.channels(), | 576 in_params.channels(), |
| 554 in_params.sample_rate(), | 577 in_params.sample_rate(), |
| 555 in_params.bits_per_sample(), | 578 in_params.bits_per_sample(), |
| 556 frames_per_buffer); | 579 frames_per_buffer); |
| 580 DVLOG(1) << "SRI::input_parameters: " | |
| 581 << input_parameters.AsHumanReadableString(); | |
| 557 } | 582 } |
| 558 | 583 |
| 559 // Create an audio converter which converts data between native input format | 584 // Create an audio converter which converts data between native input format |
| 560 // and WebSpeech specific output format. | 585 // and WebSpeech specific output format. |
| 561 audio_converter_.reset( | 586 audio_converter_.reset( |
| 562 new OnDataConverter(input_parameters, output_parameters)); | 587 new OnDataConverter(input_parameters, output_parameters)); |
| 563 | 588 |
| 564 audio_controller_ = AudioInputController::Create( | 589 audio_controller_ = AudioInputController::Create( |
| 565 audio_manager, this, input_parameters, device_id_, NULL); | 590 audio_manager, this, input_parameters, device_id_, NULL); |
| 566 | 591 |
| (...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 812 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) | 837 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
| 813 : event(event_value), | 838 : event(event_value), |
| 814 audio_data(NULL), | 839 audio_data(NULL), |
| 815 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { | 840 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { |
| 816 } | 841 } |
| 817 | 842 |
| 818 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { | 843 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { |
| 819 } | 844 } |
| 820 | 845 |
| 821 } // namespace content | 846 } // namespace content |
| OLD | NEW |