OLD | NEW |
---|---|
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognizer_impl.h" | 5 #include "content/browser/speech/speech_recognizer_impl.h" |
6 | 6 |
7 #include "base/basictypes.h" | 7 #include "base/basictypes.h" |
8 #include "base/bind.h" | 8 #include "base/bind.h" |
9 #include "base/time/time.h" | 9 #include "base/time/time.h" |
10 #include "content/browser/browser_main_loop.h" | 10 #include "content/browser/browser_main_loop.h" |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
48 | 48 |
49 // Handles resampling, buffering, and channel mixing between input and output | 49 // Handles resampling, buffering, and channel mixing between input and output |
50 // parameters. | 50 // parameters. |
51 AudioConverter audio_converter_; | 51 AudioConverter audio_converter_; |
52 | 52 |
53 scoped_ptr<AudioBus> input_bus_; | 53 scoped_ptr<AudioBus> input_bus_; |
54 scoped_ptr<AudioBus> output_bus_; | 54 scoped_ptr<AudioBus> output_bus_; |
55 const AudioParameters input_parameters_; | 55 const AudioParameters input_parameters_; |
56 const AudioParameters output_parameters_; | 56 const AudioParameters output_parameters_; |
57 bool waiting_for_input_; | 57 bool waiting_for_input_; |
58 int convert_count_; | |
58 | 59 |
59 DISALLOW_COPY_AND_ASSIGN(OnDataConverter); | 60 DISALLOW_COPY_AND_ASSIGN(OnDataConverter); |
60 }; | 61 }; |
61 | 62 |
62 namespace { | 63 namespace { |
63 | 64 |
64 // The following constants are related to the volume level indicator shown in | 65 // The following constants are related to the volume level indicator shown in |
65 // the UI for recorded audio. | 66 // the UI for recorded audio. |
66 // Multiplier used when new volume is greater than previous level. | 67 // Multiplier used when new volume is greater than previous level. |
67 const float kUpSmoothingFactor = 1.0f; | 68 const float kUpSmoothingFactor = 1.0f; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
112 // SpeechRecognizerImpl::OnDataConverter implementation | 113 // SpeechRecognizerImpl::OnDataConverter implementation |
113 | 114 |
114 SpeechRecognizerImpl::OnDataConverter::OnDataConverter( | 115 SpeechRecognizerImpl::OnDataConverter::OnDataConverter( |
115 const AudioParameters& input_params, | 116 const AudioParameters& input_params, |
116 const AudioParameters& output_params) | 117 const AudioParameters& output_params) |
117 : audio_converter_(input_params, output_params, false), | 118 : audio_converter_(input_params, output_params, false), |
118 input_bus_(AudioBus::Create(input_params)), | 119 input_bus_(AudioBus::Create(input_params)), |
119 output_bus_(AudioBus::Create(output_params)), | 120 output_bus_(AudioBus::Create(output_params)), |
120 input_parameters_(input_params), | 121 input_parameters_(input_params), |
121 output_parameters_(output_params), | 122 output_parameters_(output_params), |
122 waiting_for_input_(false) { | 123 waiting_for_input_(false), |
124 convert_count_(0) { | |
123 audio_converter_.AddInput(this); | 125 audio_converter_.AddInput(this); |
126 DVLOG(1) << "SRI::AudioConverter::ChunkSize " << audio_converter_.ChunkSize(); | |
127 | |
128 // Initial priming with zeros... | |
129 // waiting_for_input_ = true; | |
130 // input_bus_->Zero(); | |
131 // audio_converter_.Convert(output_bus_.get()); | |
132 | |
133 audio_converter_.PrimeWithSilence(); | |
134 DVLOG(1) << "SRI::AudioConverter::ChunkSize " << audio_converter_.ChunkSize(); | |
124 } | 135 } |
125 | 136 |
126 SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { | 137 SpeechRecognizerImpl::OnDataConverter::~OnDataConverter() { |
127 // It should now be safe to unregister the converter since no more OnData() | 138 // It should now be safe to unregister the converter since no more OnData() |
128 // callbacks are outstanding at this point. | 139 // callbacks are outstanding at this point. |
129 audio_converter_.RemoveInput(this); | 140 audio_converter_.RemoveInput(this); |
130 } | 141 } |
131 | 142 |
132 scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( | 143 scoped_refptr<AudioChunk> SpeechRecognizerImpl::OnDataConverter::Convert( |
133 const AudioBus* data) { | 144 const AudioBus* data) { |
134 CHECK_EQ(data->frames(), input_parameters_.frames_per_buffer()); | 145 CHECK_EQ(data->frames(), input_parameters_.frames_per_buffer()); |
135 | 146 DVLOG(1) << "SRI::ODC::Convert..."; |
147 // Data should always have been provided by ProvideInput(). If not, the | |
148 // previous call to Convert() could produce converted data using cached | |
149 // data. But that means that we will miss one large frame of recorded audio | |
150 // samples. | |
151 CHECK(!waiting_for_input_); | |
136 data->CopyTo(input_bus_.get()); | 152 data->CopyTo(input_bus_.get()); |
137 | 153 |
138 waiting_for_input_ = true; | 154 waiting_for_input_ = true; |
155 convert_count_++; | |
139 audio_converter_.Convert(output_bus_.get()); | 156 audio_converter_.Convert(output_bus_.get()); |
DaleCurtis
2015/07/07 16:38:17
As mentioned in the email thread, you should proba
henrika (OOO until Aug 14)
2015/07/07 19:30:05
Smart. But note that I have modified the input siz
DaleCurtis
2015/07/07 20:40:44
Well, I'm not confident your modification always e
henrika (OOO until Aug 14)
2015/07/07 21:14:08
Got it. I will test your scheme using 102 where I
DaleCurtis
2015/07/07 21:44:15
I think it's fine to use 100ms like you do w/ prim
henrika (OOO until Aug 14)
2015/07/08 12:22:59
Done. Hope you are OK with how I handle the extra
| |
157 DVLOG(1) << "SRI::ODC::Convert done (" << convert_count_ << ")"; | |
tommi (sloooow) - chröme
2015/07/07 12:40:21
seems like this will always log a value higher tha
henrika (OOO until Aug 14)
2015/07/07 12:56:54
Got it. Plan is to remove this counter. Just wante
| |
140 | 158 |
141 scoped_refptr<AudioChunk> chunk( | 159 scoped_refptr<AudioChunk> chunk( |
142 new AudioChunk(output_parameters_.GetBytesPerBuffer(), | 160 new AudioChunk(output_parameters_.GetBytesPerBuffer(), |
143 output_parameters_.bits_per_sample() / 8)); | 161 output_parameters_.bits_per_sample() / 8)); |
144 output_bus_->ToInterleaved(output_bus_->frames(), | 162 output_bus_->ToInterleaved(output_bus_->frames(), |
145 output_parameters_.bits_per_sample() / 8, | 163 output_parameters_.bits_per_sample() / 8, |
146 chunk->writable_data()); | 164 chunk->writable_data()); |
147 return chunk; | 165 return chunk; |
148 } | 166 } |
149 | 167 |
150 double SpeechRecognizerImpl::OnDataConverter::ProvideInput( | 168 double SpeechRecognizerImpl::OnDataConverter::ProvideInput( |
151 AudioBus* dest, base::TimeDelta buffer_delay) { | 169 AudioBus* dest, base::TimeDelta buffer_delay) { |
170 DVLOG(1) << "SRI::ODC::ProvideInput"; | |
152 // The audio converted should never ask for more than one bus in each call | 171 // The audio converted should never ask for more than one bus in each call |
153 // to Convert(). If so, we have a serious issue in our design since we might | 172 // to Convert(). If so, we have a serious issue in our design since we might |
154 // miss recorded chunks of 100 ms audio data. | 173 // miss recorded chunks of 100 ms audio data. |
155 CHECK(waiting_for_input_); | 174 CHECK(waiting_for_input_); |
156 | 175 |
157 // Read from the input bus to feed the converter. | 176 // Read from the input bus to feed the converter. |
158 input_bus_->CopyTo(dest); | 177 input_bus_->CopyTo(dest); |
159 | 178 |
160 // |input_bus_| should only be provide once. | 179 // |input_bus_| should only be provide once. |
161 waiting_for_input_ = false; | 180 waiting_for_input_ = false; |
(...skipping 354 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
516 return Abort( | 535 return Abort( |
517 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); | 536 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); |
518 } | 537 } |
519 | 538 |
520 // Audio converter shall provide audio based on these parameters as output. | 539 // Audio converter shall provide audio based on these parameters as output. |
521 // Hard coded, WebSpeech specific parameters are utilized here. | 540 // Hard coded, WebSpeech specific parameters are utilized here. |
522 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000; | 541 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000; |
523 AudioParameters output_parameters = AudioParameters( | 542 AudioParameters output_parameters = AudioParameters( |
524 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, | 543 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, |
525 kNumBitsPerAudioSample, frames_per_buffer); | 544 kNumBitsPerAudioSample, frames_per_buffer); |
545 DVLOG(1) << "SRI::output_parameters: " | |
546 << output_parameters.AsHumanReadableString(); | |
526 | 547 |
527 // Audio converter will receive audio based on these parameters as input. | 548 // Audio converter will receive audio based on these parameters as input. |
528 // On Windows we start by verifying that Core Audio is supported. If not, | 549 // On Windows we start by verifying that Core Audio is supported. If not, |
529 // the WaveIn API is used and we might as well avoid all audio conversations | 550 // the WaveIn API is used and we might as well avoid all audio conversations |
530 // since WaveIn does the conversion for us. | 551 // since WaveIn does the conversion for us. |
531 // TODO(henrika): this code should be moved to platform dependent audio | 552 // TODO(henrika): this code should be moved to platform dependent audio |
532 // managers. | 553 // managers. |
533 bool use_native_audio_params = true; | 554 bool use_native_audio_params = true; |
534 #if defined(OS_WIN) | 555 #if defined(OS_WIN) |
DaleCurtis
2015/07/07 20:40:43
I'm surprised we're not using the native params fo
henrika (OOO until Aug 14)
2015/07/07 21:14:08
Hmm, but we are on all but Win XP. All others use
DaleCurtis
2015/07/07 21:44:15
Ah no, I misread and inverted the check.
| |
535 use_native_audio_params = media::CoreAudioUtil::IsSupported(); | 556 use_native_audio_params = media::CoreAudioUtil::IsSupported(); |
536 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech"; | 557 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech"; |
537 #endif | 558 #endif |
538 | 559 |
539 AudioParameters input_parameters = output_parameters; | 560 AudioParameters input_parameters = output_parameters; |
540 if (use_native_audio_params && !unit_test_is_active) { | 561 if (use_native_audio_params && !unit_test_is_active) { |
541 // Use native audio parameters but avoid opening up at the native buffer | 562 // Use native audio parameters but avoid opening up at the native buffer |
542 // size. Instead use same frame size (in milliseconds) as WebSpeech uses. | 563 // size. Instead use same frame size (in milliseconds) as WebSpeech uses. |
543 // We rely on internal buffers in the audio back-end to fulfill this request | 564 // We rely on internal buffers in the audio back-end to fulfill this request |
544 // and the idea is to simplify the audio conversion since each Convert() | 565 // and the idea is to simplify the audio conversion since each Convert() |
545 // call will then render exactly one ProvideInput() call. | 566 // call will then render exactly one ProvideInput() call. |
546 // Due to implementation details in the audio converter, 2 milliseconds | 567 // Due to implementation details in the audio converter, 2 milliseconds |
547 // are added to the default frame size (100 ms) to ensure there is enough | 568 // are added to the default frame size (100 ms) to ensure there is enough |
548 // data to generate 100 ms of output when resampling. | 569 // data to generate 100 ms of output when resampling. |
570 // frames_per_buffer = | |
571 // ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5; | |
549 frames_per_buffer = | 572 frames_per_buffer = |
550 ((in_params.sample_rate() * (chunk_duration_ms + 2)) / 1000.0) + 0.5; | 573 ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
551 input_parameters.Reset(in_params.format(), | 574 input_parameters.Reset(in_params.format(), |
552 in_params.channel_layout(), | 575 in_params.channel_layout(), |
553 in_params.channels(), | 576 in_params.channels(), |
554 in_params.sample_rate(), | 577 in_params.sample_rate(), |
555 in_params.bits_per_sample(), | 578 in_params.bits_per_sample(), |
556 frames_per_buffer); | 579 frames_per_buffer); |
580 DVLOG(1) << "SRI::input_parameters: " | |
581 << input_parameters.AsHumanReadableString(); | |
557 } | 582 } |
558 | 583 |
559 // Create an audio converter which converts data between native input format | 584 // Create an audio converter which converts data between native input format |
560 // and WebSpeech specific output format. | 585 // and WebSpeech specific output format. |
561 audio_converter_.reset( | 586 audio_converter_.reset( |
562 new OnDataConverter(input_parameters, output_parameters)); | 587 new OnDataConverter(input_parameters, output_parameters)); |
563 | 588 |
564 audio_controller_ = AudioInputController::Create( | 589 audio_controller_ = AudioInputController::Create( |
565 audio_manager, this, input_parameters, device_id_, NULL); | 590 audio_manager, this, input_parameters, device_id_, NULL); |
566 | 591 |
(...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
812 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) | 837 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
813 : event(event_value), | 838 : event(event_value), |
814 audio_data(NULL), | 839 audio_data(NULL), |
815 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { | 840 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { |
816 } | 841 } |
817 | 842 |
818 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { | 843 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { |
819 } | 844 } |
820 | 845 |
821 } // namespace content | 846 } // namespace content |
OLD | NEW |