| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/speech_recognizer_impl.h" | 5 #include "content/browser/speech/speech_recognizer_impl.h" |
| 6 | 6 |
| 7 #include "base/basictypes.h" |
| 7 #include "base/bind.h" | 8 #include "base/bind.h" |
| 8 #include "base/time.h" | 9 #include "base/time.h" |
| 9 #include "content/browser/browser_main_loop.h" | 10 #include "content/browser/browser_main_loop.h" |
| 10 #include "content/browser/speech/audio_buffer.h" | 11 #include "content/browser/speech/audio_buffer.h" |
| 11 #include "content/browser/speech/google_one_shot_remote_engine.h" | 12 #include "content/browser/speech/google_one_shot_remote_engine.h" |
| 12 #include "content/public/browser/browser_thread.h" | 13 #include "content/public/browser/browser_thread.h" |
| 13 #include "content/public/browser/speech_recognition_event_listener.h" | 14 #include "content/public/browser/speech_recognition_event_listener.h" |
| 14 #include "content/public/browser/speech_recognizer.h" | 15 #include "content/public/browser/speech_recognizer.h" |
| 15 #include "content/public/common/speech_recognition_error.h" | 16 #include "content/public/common/speech_recognition_error.h" |
| 16 #include "content/public/common/speech_recognition_result.h" | 17 #include "content/public/common/speech_recognition_result.h" |
| 17 #include "net/url_request/url_request_context_getter.h" | 18 #include "net/url_request/url_request_context_getter.h" |
| 18 | 19 |
| 19 using content::BrowserMainLoop; | 20 using content::BrowserMainLoop; |
| 20 using content::BrowserThread; | 21 using content::BrowserThread; |
| 21 using content::SpeechRecognitionError; | 22 using content::SpeechRecognitionError; |
| 22 using content::SpeechRecognitionEventListener; | 23 using content::SpeechRecognitionEventListener; |
| 23 using content::SpeechRecognitionResult; | 24 using content::SpeechRecognitionResult; |
| 24 using content::SpeechRecognizer; | 25 using content::SpeechRecognizer; |
| 25 using media::AudioInputController; | 26 using media::AudioInputController; |
| 26 using media::AudioManager; | 27 using media::AudioManager; |
| 28 using media::AudioParameters; |
| 27 | 29 |
| 28 namespace { | 30 namespace { |
| 29 | 31 |
| 30 // The following constants are related to the volume level indicator shown in | 32 // The following constants are related to the volume level indicator shown in |
| 31 // the UI for recorded audio. | 33 // the UI for recorded audio. |
| 32 // Multiplier used when new volume is greater than previous level. | 34 // Multiplier used when new volume is greater than previous level. |
| 33 const float kUpSmoothingFactor = 1.0f; | 35 const float kUpSmoothingFactor = 1.0f; |
| 34 // Multiplier used when new volume is lesser than previous level. | 36 // Multiplier used when new volume is lesser than previous level. |
| 35 const float kDownSmoothingFactor = 0.7f; | 37 const float kDownSmoothingFactor = 0.7f; |
| 36 // RMS dB value of a maximum (unclipped) sine wave for int16 samples. | 38 // RMS dB value of a maximum (unclipped) sine wave for int16 samples. |
| 37 const float kAudioMeterMaxDb = 90.31f; | 39 const float kAudioMeterMaxDb = 90.31f; |
| 38 // This value corresponds to RMS dB for int16 with 6 most-significant-bits = 0. | 40 // This value corresponds to RMS dB for int16 with 6 most-significant-bits = 0. |
| 39 // Values lower than this will display as empty level-meter. | 41 // Values lower than this will display as empty level-meter. |
| 40 const float kAudioMeterMinDb = 30.0f; | 42 const float kAudioMeterMinDb = 30.0f; |
| 41 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; | 43 const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; |
| 42 | 44 |
| 43 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) | 45 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) |
| 44 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; | 46 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; |
| 45 | 47 |
| 46 // Returns true if more than 5% of the samples are at min or max value. | 48 // Returns true if more than 5% of the samples are at min or max value. |
| 47 bool DetectClipping(const speech::AudioChunk& chunk) { | 49 bool DetectClipping(const speech::AudioChunk& chunk) { |
| 48 const int num_samples = chunk.NumSamples(); | 50 const int num_samples = chunk.NumSamples(); |
| 49 const int16* samples = chunk.SamplesData16(); | 51 const int16* samples = chunk.SamplesData16(); |
| 50 const int kThreshold = num_samples / 20; | 52 const int kThreshold = num_samples / 20; |
| 51 int clipping_samples = 0; | 53 int clipping_samples = 0; |
| 54 |
| 52 for (int i = 0; i < num_samples; ++i) { | 55 for (int i = 0; i < num_samples; ++i) { |
| 53 if (samples[i] <= -32767 || samples[i] >= 32767) { | 56 if (samples[i] <= -32767 || samples[i] >= 32767) { |
| 54 if (++clipping_samples > kThreshold) | 57 if (++clipping_samples > kThreshold) |
| 55 return true; | 58 return true; |
| 56 } | 59 } |
| 57 } | 60 } |
| 58 return false; | 61 return false; |
| 59 } | 62 } |
| 60 | 63 |
| 61 } // namespace | 64 } // namespace |
| 62 | 65 |
| 63 SpeechRecognizer* SpeechRecognizer::Create( | 66 SpeechRecognizer* SpeechRecognizer::Create( |
| 64 SpeechRecognitionEventListener* listener, | 67 SpeechRecognitionEventListener* listener, |
| 65 int caller_id, | 68 int caller_id, |
| 66 const std::string& language, | 69 const std::string& language, |
| 67 const std::string& grammar, | 70 const std::string& grammar, |
| 68 net::URLRequestContextGetter* context_getter, | 71 net::URLRequestContextGetter* context_getter, |
| 69 bool filter_profanities, | 72 bool filter_profanities, |
| 70 const std::string& hardware_info, | 73 const std::string& hardware_info, |
| 71 const std::string& origin_url) { | 74 const std::string& origin_url) { |
| 75 speech::GoogleOneShotRemoteEngineConfig remote_engine_config; |
| 76 remote_engine_config.language = language; |
| 77 remote_engine_config.grammar = grammar; |
| 78 remote_engine_config.audio_sample_rate = |
| 79 speech::SpeechRecognizerImpl::kAudioSampleRate; |
| 80 remote_engine_config.audio_num_bits_per_sample = |
| 81 speech::SpeechRecognizerImpl::kNumBitsPerAudioSample; |
| 82 remote_engine_config.filter_profanities = filter_profanities; |
| 83 remote_engine_config.hardware_info = hardware_info; |
| 84 remote_engine_config.origin_url = origin_url; |
| 85 |
| 86 // SpeechRecognizerImpl takes ownership of google_remote_engine. |
| 87 speech::GoogleOneShotRemoteEngine* google_remote_engine = |
| 88 new speech::GoogleOneShotRemoteEngine(context_getter); |
| 89 google_remote_engine->SetConfig(remote_engine_config); |
| 90 |
| 72 return new speech::SpeechRecognizerImpl(listener, | 91 return new speech::SpeechRecognizerImpl(listener, |
| 73 caller_id, | 92 caller_id, |
| 74 language, | 93 google_remote_engine); |
| 75 grammar, | |
| 76 context_getter, | |
| 77 filter_profanities, | |
| 78 hardware_info, | |
| 79 origin_url); | |
| 80 } | 94 } |
| 81 | 95 |
| 82 namespace speech { | 96 namespace speech { |
| 83 | 97 |
| 84 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; | 98 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; |
| 85 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO; | 99 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO; |
| 86 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; | 100 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; |
| 87 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; | 101 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; |
| 88 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; | 102 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; |
| 89 | 103 |
| 104 COMPILE_ASSERT(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0, |
| 105 kNumBitsPerAudioSample_must_be_a_multiple_of_8); |
| 106 |
| 90 SpeechRecognizerImpl::SpeechRecognizerImpl( | 107 SpeechRecognizerImpl::SpeechRecognizerImpl( |
| 91 SpeechRecognitionEventListener* listener, | 108 SpeechRecognitionEventListener* listener, |
| 92 int caller_id, | 109 int caller_id, |
| 93 const std::string& language, | 110 SpeechRecognitionEngine* engine) |
| 94 const std::string& grammar, | |
| 95 net::URLRequestContextGetter* context_getter, | |
| 96 bool filter_profanities, | |
| 97 const std::string& hardware_info, | |
| 98 const std::string& origin_url) | |
| 99 : listener_(listener), | 111 : listener_(listener), |
| 100 testing_audio_manager_(NULL), | 112 testing_audio_manager_(NULL), |
| 113 recognition_engine_(engine), |
| 101 endpointer_(kAudioSampleRate), | 114 endpointer_(kAudioSampleRate), |
| 102 context_getter_(context_getter), | |
| 103 caller_id_(caller_id), | 115 caller_id_(caller_id), |
| 104 language_(language), | 116 is_dispatching_event_(false), |
| 105 grammar_(grammar), | 117 state_(STATE_IDLE) { |
| 106 filter_profanities_(filter_profanities), | |
| 107 hardware_info_(hardware_info), | |
| 108 origin_url_(origin_url), | |
| 109 num_samples_recorded_(0), | |
| 110 audio_level_(0.0f) { | |
| 111 DCHECK(listener_ != NULL); | 118 DCHECK(listener_ != NULL); |
| 119 DCHECK(recognition_engine_ != NULL); |
| 112 endpointer_.set_speech_input_complete_silence_length( | 120 endpointer_.set_speech_input_complete_silence_length( |
| 113 base::Time::kMicrosecondsPerSecond / 2); | 121 base::Time::kMicrosecondsPerSecond / 2); |
| 114 endpointer_.set_long_speech_input_complete_silence_length( | 122 endpointer_.set_long_speech_input_complete_silence_length( |
| 115 base::Time::kMicrosecondsPerSecond); | 123 base::Time::kMicrosecondsPerSecond); |
| 116 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); | 124 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); |
| 117 endpointer_.StartSession(); | 125 endpointer_.StartSession(); |
| 126 recognition_engine_->set_delegate(this); |
| 118 } | 127 } |
| 119 | 128 |
| 120 SpeechRecognizerImpl::~SpeechRecognizerImpl() { | 129 SpeechRecognizerImpl::~SpeechRecognizerImpl() { |
| 121 // Recording should have stopped earlier due to the endpointer or | |
| 122 // |StopRecording| being called. | |
| 123 DCHECK(!audio_controller_.get()); | |
| 124 DCHECK(!recognition_engine_.get() || | |
| 125 !recognition_engine_->IsRecognitionPending()); | |
| 126 endpointer_.EndSession(); | 130 endpointer_.EndSession(); |
| 127 } | 131 } |
| 128 | 132 |
| 133 // ------- Methods that trigger Finite State Machine (FSM) events ------------ |
| 134 |
| 135 // NOTE:all the external events and requests should be enqueued (PostTask), even |
| 136 // if they come from the same (IO) thread, in order to preserve the relationship |
| 137 // of causality between events and avoid interleaved event processing due to |
| 138 // synchronous callbacks. |
| 139 |
| 129 void SpeechRecognizerImpl::StartRecognition() { | 140 void SpeechRecognizerImpl::StartRecognition() { |
| 141 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 142 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| 143 this, FSMEventArgs(EVENT_START))); |
| 144 } |
| 145 |
| 146 void SpeechRecognizerImpl::AbortRecognition() { |
| 147 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 148 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| 149 this, FSMEventArgs(EVENT_ABORT))); |
| 150 } |
| 151 |
| 152 void SpeechRecognizerImpl::StopAudioCapture() { |
| 153 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 154 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| 155 this, FSMEventArgs(EVENT_STOP_CAPTURE))); |
| 156 } |
| 157 |
| 158 bool SpeechRecognizerImpl::IsActive() const { |
| 159 // Checking the FSM state from another thread (thus, while the FSM is |
| 160 // potentially concurrently evolving) is meaningless. |
| 130 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 161 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 131 DCHECK(!audio_controller_.get()); | 162 return state_ != STATE_IDLE; |
| 132 DCHECK(!recognition_engine_.get() || | 163 } |
| 133 !recognition_engine_->IsRecognitionPending()); | 164 |
| 134 | 165 bool SpeechRecognizerImpl::IsCapturingAudio() const { |
| 135 // The endpointer needs to estimate the environment/background noise before | 166 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // See IsActive(). |
| 136 // starting to treat the audio as user input. In |HandleOnData| we wait until | 167 const bool is_capturing_audio = state_ >= STATE_STARTING && |
| 137 // such time has passed before switching to user input mode. | 168 state_ <= STATE_RECOGNIZING; |
| 138 endpointer_.SetEnvironmentEstimationMode(); | 169 DCHECK((is_capturing_audio && (audio_controller_.get() != NULL)) || |
| 139 | 170 (!is_capturing_audio && audio_controller_.get() == NULL)); |
| 140 AudioManager* audio_manager = (testing_audio_manager_ != NULL) ? | 171 return is_capturing_audio; |
| 141 testing_audio_manager_ : BrowserMainLoop::GetAudioManager(); | |
| 142 const int samples_per_packet = kAudioSampleRate * | |
| 143 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000; | |
| 144 media::AudioParameters params( | |
| 145 media::AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, | |
| 146 kAudioSampleRate, kNumBitsPerAudioSample, samples_per_packet); | |
| 147 audio_controller_ = AudioInputController::Create(audio_manager, this, params); | |
| 148 DCHECK(audio_controller_.get()); | |
| 149 VLOG(1) << "SpeechRecognizer starting record."; | |
| 150 num_samples_recorded_ = 0; | |
| 151 audio_controller_->Record(); | |
| 152 } | |
| 153 | |
| 154 void SpeechRecognizerImpl::AbortRecognition() { | |
| 155 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
| 156 DCHECK(audio_controller_.get() || recognition_engine_.get()); | |
| 157 | |
| 158 // Stop recording if required. | |
| 159 if (audio_controller_.get()) { | |
| 160 CloseAudioControllerAsynchronously(); | |
| 161 } | |
| 162 | |
| 163 VLOG(1) << "SpeechRecognizer canceling recognition."; | |
| 164 recognition_engine_.reset(); | |
| 165 } | |
| 166 | |
| 167 void SpeechRecognizerImpl::StopAudioCapture() { | |
| 168 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
| 169 | |
| 170 // If audio recording has already stopped and we are in recognition phase, | |
| 171 // silently ignore any more calls to stop recording. | |
| 172 if (!audio_controller_.get()) | |
| 173 return; | |
| 174 | |
| 175 CloseAudioControllerAsynchronously(); | |
| 176 listener_->OnSoundEnd(caller_id_); | |
| 177 listener_->OnAudioEnd(caller_id_); | |
| 178 | |
| 179 // If we haven't got any audio yet end the recognition sequence here. | |
| 180 if (recognition_engine_ == NULL) { | |
| 181 // Guard against the listener freeing us until we finish our job. | |
| 182 scoped_refptr<SpeechRecognizerImpl> me(this); | |
| 183 listener_->OnRecognitionEnd(caller_id_); | |
| 184 } else { | |
| 185 recognition_engine_->AudioChunksEnded(); | |
| 186 } | |
| 187 } | 172 } |
| 188 | 173 |
| 189 // Invoked in the audio thread. | 174 // Invoked in the audio thread. |
| 190 void SpeechRecognizerImpl::OnError(AudioInputController* controller, | 175 void SpeechRecognizerImpl::OnError(AudioInputController* controller, |
| 191 int error_code) { | 176 int error_code) { |
| 192 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 177 FSMEventArgs event_args(EVENT_AUDIO_ERROR); |
| 193 base::Bind(&SpeechRecognizerImpl::HandleOnError, | 178 event_args.audio_error_code = error_code; |
| 194 this, error_code)); | 179 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 195 } | 180 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| 196 | 181 this, event_args)); |
| 197 void SpeechRecognizerImpl::HandleOnError(int error_code) { | |
| 198 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; | |
| 199 | |
| 200 // Check if we are still recording before canceling recognition, as | |
| 201 // recording might have been stopped after this error was posted to the queue | |
| 202 // by |OnError|. | |
| 203 if (!audio_controller_.get()) | |
| 204 return; | |
| 205 | |
| 206 InformErrorAndAbortRecognition(content::SPEECH_RECOGNITION_ERROR_AUDIO); | |
| 207 } | 182 } |
| 208 | 183 |
| 209 void SpeechRecognizerImpl::OnData(AudioInputController* controller, | 184 void SpeechRecognizerImpl::OnData(AudioInputController* controller, |
| 210 const uint8* data, uint32 size) { | 185 const uint8* data, uint32 size) { |
| 211 if (size == 0) // This could happen when recording stops and is normal. | 186 if (size == 0) // This could happen when audio capture stops and is normal. |
| 212 return; | 187 return; |
| 213 scoped_refptr<AudioChunk> raw_audio( | 188 |
| 214 new AudioChunk(data, | 189 FSMEventArgs event_args(EVENT_AUDIO_DATA); |
| 215 static_cast<size_t>(size), | 190 event_args.audio_data = new AudioChunk(data, static_cast<size_t>(size), |
| 216 kNumBitsPerAudioSample / 8)); | 191 kNumBitsPerAudioSample / 8); |
| 217 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 192 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 218 base::Bind(&SpeechRecognizerImpl::HandleOnData, | 193 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| 219 this, raw_audio)); | 194 this, event_args)); |
| 220 } | |
| 221 | |
| 222 void SpeechRecognizerImpl::HandleOnData(scoped_refptr<AudioChunk> raw_audio) { | |
| 223 // Check if we are still recording and if not discard this buffer, as | |
| 224 // recording might have been stopped after this buffer was posted to the queue | |
| 225 // by |OnData|. | |
| 226 if (!audio_controller_.get()) | |
| 227 return; | |
| 228 | |
| 229 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); | |
| 230 | |
| 231 float rms; | |
| 232 endpointer_.ProcessAudio(*raw_audio, &rms); | |
| 233 bool did_clip = DetectClipping(*raw_audio); | |
| 234 num_samples_recorded_ += raw_audio->NumSamples(); | |
| 235 | |
| 236 if (recognition_engine_ == NULL) { | |
| 237 // This was the first audio packet recorded, so start a request to the | |
| 238 // server to send the data and inform the listener. | |
| 239 listener_->OnAudioStart(caller_id_); | |
| 240 GoogleOneShotRemoteEngineConfig google_sr_config; | |
| 241 google_sr_config.language = language_; | |
| 242 google_sr_config.grammar = grammar_; | |
| 243 google_sr_config.audio_sample_rate = kAudioSampleRate; | |
| 244 google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample; | |
| 245 google_sr_config.filter_profanities = filter_profanities_; | |
| 246 google_sr_config.hardware_info = hardware_info_; | |
| 247 google_sr_config.origin_url = origin_url_; | |
| 248 GoogleOneShotRemoteEngine* google_sr_engine = | |
| 249 new GoogleOneShotRemoteEngine(context_getter_.get()); | |
| 250 google_sr_engine->SetConfig(google_sr_config); | |
| 251 recognition_engine_.reset(google_sr_engine); | |
| 252 recognition_engine_->set_delegate(this); | |
| 253 recognition_engine_->StartRecognition(); | |
| 254 } | |
| 255 | |
| 256 recognition_engine_->TakeAudioChunk(*raw_audio); | |
| 257 | |
| 258 if (endpointer_.IsEstimatingEnvironment()) { | |
| 259 // Check if we have gathered enough audio for the endpointer to do | |
| 260 // environment estimation and should move on to detect speech/end of speech. | |
| 261 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | |
| 262 kAudioSampleRate) / 1000) { | |
| 263 endpointer_.SetUserInputMode(); | |
| 264 listener_->OnEnvironmentEstimationComplete(caller_id_); | |
| 265 } | |
| 266 return; // No more processing since we are still estimating environment. | |
| 267 } | |
| 268 | |
| 269 // Check if we have waited too long without hearing any speech. | |
| 270 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech(); | |
| 271 if (!speech_was_heard_after_packet && | |
| 272 num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) { | |
| 273 InformErrorAndAbortRecognition( | |
| 274 content::SPEECH_RECOGNITION_ERROR_NO_SPEECH); | |
| 275 return; | |
| 276 } | |
| 277 | |
| 278 if (!speech_was_heard_before_packet && speech_was_heard_after_packet) | |
| 279 listener_->OnSoundStart(caller_id_); | |
| 280 | |
| 281 // Calculate the input volume to display in the UI, smoothing towards the | |
| 282 // new level. | |
| 283 float level = (rms - kAudioMeterMinDb) / | |
| 284 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
| 285 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); | |
| 286 if (level > audio_level_) { | |
| 287 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; | |
| 288 } else { | |
| 289 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; | |
| 290 } | |
| 291 | |
| 292 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | |
| 293 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
| 294 noise_level = std::min(std::max(0.0f, noise_level), | |
| 295 kAudioMeterRangeMaxUnclipped); | |
| 296 | |
| 297 listener_->OnAudioLevelsChange(caller_id_, did_clip ? 1.0f : audio_level_, | |
| 298 noise_level); | |
| 299 | |
| 300 if (endpointer_.speech_input_complete()) | |
| 301 StopAudioCapture(); | |
| 302 } | 195 } |
| 303 | 196 |
| 304 void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {} | 197 void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {} |
| 305 | 198 |
| 306 void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult( | 199 void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult( |
| 307 const content::SpeechRecognitionResult& result) { | 200 const content::SpeechRecognitionResult& result) { |
| 308 // Guard against the listener freeing us until we finish our job. | 201 FSMEventArgs event_args(EVENT_ENGINE_RESULT); |
| 202 event_args.engine_result = result; |
| 203 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 204 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| 205 this, event_args)); |
| 206 } |
| 207 |
| 208 void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( |
| 209 const content::SpeechRecognitionError& error) { |
| 210 FSMEventArgs event_args(EVENT_ENGINE_ERROR); |
| 211 event_args.engine_error = error; |
| 212 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 213 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| 214 this, event_args)); |
| 215 } |
| 216 |
| 217 // ----------------------- Core FSM implementation --------------------------- |
| 218 // TODO(primiano) After the changes in the media package (r129173), this class |
| 219 // slightly violates the SpeechRecognitionEventListener interface contract. In |
| 220 // particular, it is not true anymore that this class can be freed after the |
| 221 // OnRecognitionEnd event, since the audio_controller_.Close() asynchronous |
| 222 // call can be still in progress after the end event. Currently, it does not |
| 223 // represent a problem for the browser itself, since refcounting protects us |
| 224 // against such race conditions. However, we should fix this in the next CLs. |
| 225 // For instance, tests are currently working just because the |
| 226 // TestAudioInputController is not closing asynchronously as the real controller |
| 227 // does, but they will become flaky if TestAudioInputController will be fixed. |
| 228 |
| 229 void SpeechRecognizerImpl::DispatchEvent(const FSMEventArgs& event_args) { |
| 230 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 231 DCHECK_LE(event_args.event, EVENT_MAX_VALUE); |
| 232 DCHECK_LE(state_, STATE_MAX_VALUE); |
| 233 |
| 234 // Event dispatching must be sequential, otherwise it will break all the rules |
| 235 // and the assumptions of the finite state automata model. |
| 236 DCHECK(!is_dispatching_event_); |
| 237 is_dispatching_event_ = true; |
| 238 |
| 239 // Guard against the delegate freeing us until we finish processing the event. |
| 309 scoped_refptr<SpeechRecognizerImpl> me(this); | 240 scoped_refptr<SpeechRecognizerImpl> me(this); |
| 241 |
| 242 if (event_args.event == EVENT_AUDIO_DATA) { |
| 243 DCHECK(event_args.audio_data.get() != NULL); |
| 244 ProcessAudioPipeline(*event_args.audio_data); |
| 245 } |
| 246 |
| 247 // The audio pipeline must be processed before the event dispatch, otherwise |
| 248 // it would take actions according to the future state instead of the current. |
| 249 state_ = ExecuteTransitionAndGetNextState(event_args); |
| 250 |
| 251 is_dispatching_event_ = false; |
| 252 } |
| 253 |
| 254 SpeechRecognizerImpl::FSMState |
| 255 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
| 256 const FSMEventArgs& event_args) { |
| 257 const FSMEvent event = event_args.event; |
| 258 switch (state_) { |
| 259 case STATE_IDLE: |
| 260 switch (event) { |
| 261 // TODO(primiano) restore UNREACHABLE_CONDITION on EVENT_ABORT and |
| 262 // EVENT_STOP_CAPTURE below once speech input extensions are fixed. |
| 263 case EVENT_ABORT: |
| 264 return DoNothing(event_args); |
| 265 case EVENT_START: |
| 266 return StartRecording(event_args); |
| 267 case EVENT_STOP_CAPTURE: // Corner cases related to queued messages |
| 268 case EVENT_AUDIO_DATA: // being lately dispatched. |
| 269 case EVENT_ENGINE_RESULT: |
| 270 case EVENT_ENGINE_ERROR: |
| 271 case EVENT_AUDIO_ERROR: |
| 272 return DoNothing(event_args); |
| 273 } |
| 274 break; |
| 275 case STATE_STARTING: |
| 276 switch (event) { |
| 277 case EVENT_ABORT: |
| 278 return Abort(event_args); |
| 279 case EVENT_START: |
| 280 return NotFeasible(event_args); |
| 281 case EVENT_STOP_CAPTURE: |
| 282 return Abort(event_args); |
| 283 case EVENT_AUDIO_DATA: |
| 284 return StartRecognitionEngine(event_args); |
| 285 case EVENT_ENGINE_RESULT: |
| 286 return NotFeasible(event_args); |
| 287 case EVENT_ENGINE_ERROR: |
| 288 case EVENT_AUDIO_ERROR: |
| 289 return Abort(event_args); |
| 290 } |
| 291 break; |
| 292 case STATE_ESTIMATING_ENVIRONMENT: |
| 293 switch (event) { |
| 294 case EVENT_ABORT: |
| 295 return Abort(event_args); |
| 296 case EVENT_START: |
| 297 return NotFeasible(event_args); |
| 298 case EVENT_STOP_CAPTURE: |
| 299 return StopCaptureAndWaitForResult(event_args); |
| 300 case EVENT_AUDIO_DATA: |
| 301 return WaitEnvironmentEstimationCompletion(event_args); |
| 302 case EVENT_ENGINE_RESULT: |
| 303 return ProcessIntermediateResult(event_args); |
| 304 case EVENT_ENGINE_ERROR: |
| 305 case EVENT_AUDIO_ERROR: |
| 306 return Abort(event_args); |
| 307 } |
| 308 break; |
| 309 case STATE_WAITING_FOR_SPEECH: |
| 310 switch (event) { |
| 311 case EVENT_ABORT: |
| 312 return Abort(event_args); |
| 313 case EVENT_START: |
| 314 return NotFeasible(event_args); |
| 315 case EVENT_STOP_CAPTURE: |
| 316 return StopCaptureAndWaitForResult(event_args); |
| 317 case EVENT_AUDIO_DATA: |
| 318 return DetectUserSpeechOrTimeout(event_args); |
| 319 case EVENT_ENGINE_RESULT: |
| 320 return ProcessIntermediateResult(event_args); |
| 321 case EVENT_ENGINE_ERROR: |
| 322 case EVENT_AUDIO_ERROR: |
| 323 return Abort(event_args); |
| 324 } |
| 325 break; |
| 326 case STATE_RECOGNIZING: |
| 327 switch (event) { |
| 328 case EVENT_ABORT: |
| 329 return Abort(event_args); |
| 330 case EVENT_START: |
| 331 return NotFeasible(event_args); |
| 332 case EVENT_STOP_CAPTURE: |
| 333 return StopCaptureAndWaitForResult(event_args); |
| 334 case EVENT_AUDIO_DATA: |
| 335 return DetectEndOfSpeech(event_args); |
| 336 case EVENT_ENGINE_RESULT: |
| 337 return ProcessIntermediateResult(event_args); |
| 338 case EVENT_ENGINE_ERROR: |
| 339 case EVENT_AUDIO_ERROR: |
| 340 return Abort(event_args); |
| 341 } |
| 342 break; |
| 343 case STATE_WAITING_FINAL_RESULT: |
| 344 switch (event) { |
| 345 case EVENT_ABORT: |
| 346 return Abort(event_args); |
| 347 case EVENT_START: |
| 348 return NotFeasible(event_args); |
| 349 case EVENT_STOP_CAPTURE: |
| 350 case EVENT_AUDIO_DATA: |
| 351 return DoNothing(event_args); |
| 352 case EVENT_ENGINE_RESULT: |
| 353 return ProcessFinalResult(event_args); |
| 354 case EVENT_ENGINE_ERROR: |
| 355 case EVENT_AUDIO_ERROR: |
| 356 return Abort(event_args); |
| 357 } |
| 358 break; |
| 359 } |
| 360 return NotFeasible(event_args); |
| 361 } |
| 362 |
| 363 // ----------- Contract for all the FSM evolution functions below ------------- |
| 364 // - Are guaranteed to be executed in the IO thread; |
| 365 // - Are guaranteed to be not reentrant (themselves and each other); |
| 366 // - event_args members are guaranteed to be stable during the call; |
| 367 // - The class won't be freed in the meanwhile due to callbacks; |
| 368 // - IsCapturingAudio() returns true if and only if audio_controller_ != NULL. |
| 369 |
| 370 // TODO(primiano) the audio pipeline is currently serial. However, the |
| 371 // clipper->endpointer->vumeter chain and the sr_engine could be parallelized. |
| 372 // We should profile the execution to see if it would be worth or not. |
| 373 void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) { |
| 374 const bool route_to_endpointer = state_ >= STATE_ESTIMATING_ENVIRONMENT && |
| 375 state_ <= STATE_RECOGNIZING; |
| 376 const bool route_to_sr_engine = route_to_endpointer; |
| 377 const bool route_to_vumeter = state_ >= STATE_WAITING_FOR_SPEECH && |
| 378 state_ <= STATE_RECOGNIZING; |
| 379 const bool clip_detected = DetectClipping(raw_audio); |
| 380 float rms = 0.0f; |
| 381 |
| 382 num_samples_recorded_ += raw_audio.NumSamples(); |
| 383 |
| 384 if (route_to_endpointer) |
| 385 endpointer_.ProcessAudio(raw_audio, &rms); |
| 386 |
| 387 if (route_to_vumeter) { |
| 388 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|. |
| 389 UpdateSignalAndNoiseLevels(rms, clip_detected); |
| 390 } |
| 391 if (route_to_sr_engine) { |
| 392 DCHECK(recognition_engine_.get() != NULL); |
| 393 recognition_engine_->TakeAudioChunk(raw_audio); |
| 394 } |
| 395 } |
| 396 |
| 397 SpeechRecognizerImpl::FSMState |
| 398 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
| 399 DCHECK(recognition_engine_.get() != NULL); |
| 400 DCHECK(!IsCapturingAudio()); |
| 401 AudioManager* audio_manager = (testing_audio_manager_ != NULL) ? |
| 402 testing_audio_manager_ : |
| 403 BrowserMainLoop::GetAudioManager(); |
| 404 DCHECK(audio_manager != NULL); |
| 405 |
| 406 DVLOG(1) << "SpeechRecognizerImpl starting audio capture."; |
| 407 num_samples_recorded_ = 0; |
| 408 audio_level_ = 0; |
| 409 listener_->OnRecognitionStart(caller_id_); |
| 410 |
| 411 if (!audio_manager->HasAudioInputDevices()) { |
| 412 return AbortWithError(SpeechRecognitionError( |
| 413 content::SPEECH_RECOGNITION_ERROR_AUDIO, |
| 414 content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); |
| 415 } |
| 416 |
| 417 if (audio_manager->IsRecordingInProcess()) { |
| 418 return AbortWithError(SpeechRecognitionError( |
| 419 content::SPEECH_RECOGNITION_ERROR_AUDIO, |
| 420 content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE)); |
| 421 } |
| 422 |
| 423 const int samples_per_packet = (kAudioSampleRate * |
| 424 recognition_engine_->GetDesiredAudioChunkDurationMs()) / 1000; |
| 425 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, |
| 426 kAudioSampleRate, kNumBitsPerAudioSample, |
| 427 samples_per_packet); |
| 428 audio_controller_ = AudioInputController::Create(audio_manager, this, params); |
| 429 |
| 430 if (audio_controller_.get() == NULL) { |
| 431 return AbortWithError( |
| 432 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO)); |
| 433 } |
| 434 |
| 435 // The endpointer needs to estimate the environment/background noise before |
| 436 // starting to treat the audio as user input. We wait in the state |
| 437 // ESTIMATING_ENVIRONMENT until such interval has elapsed before switching |
| 438 // to user input mode. |
| 439 endpointer_.SetEnvironmentEstimationMode(); |
| 440 audio_controller_->Record(); |
| 441 return STATE_STARTING; |
| 442 } |
| 443 |
| 444 SpeechRecognizerImpl::FSMState |
| 445 SpeechRecognizerImpl::StartRecognitionEngine(const FSMEventArgs& event_args) { |
| 446 // This is the first audio packet captured, so the recognition engine is |
| 447 // started and the delegate notified about the event. |
| 448 DCHECK(recognition_engine_.get() != NULL); |
| 449 recognition_engine_->StartRecognition(); |
| 450 listener_->OnAudioStart(caller_id_); |
| 451 |
| 452 // This is a little hack, since TakeAudioChunk() is already called by |
| 453 // ProcessAudioPipeline(). It is the best tradeoff, unless we allow dropping |
| 454 // the first audio chunk captured after opening the audio device. |
| 455 recognition_engine_->TakeAudioChunk(*(event_args.audio_data)); |
| 456 return STATE_ESTIMATING_ENVIRONMENT; |
| 457 } |
| 458 |
| 459 SpeechRecognizerImpl::FSMState |
| 460 SpeechRecognizerImpl::WaitEnvironmentEstimationCompletion(const FSMEventArgs&) { |
| 461 DCHECK(endpointer_.IsEstimatingEnvironment()); |
| 462 if (GetElapsedTimeMs() >= kEndpointerEstimationTimeMs) { |
| 463 endpointer_.SetUserInputMode(); |
| 464 listener_->OnEnvironmentEstimationComplete(caller_id_); |
| 465 return STATE_WAITING_FOR_SPEECH; |
| 466 } else { |
| 467 return STATE_ESTIMATING_ENVIRONMENT; |
| 468 } |
| 469 } |
| 470 |
| 471 SpeechRecognizerImpl::FSMState |
| 472 SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) { |
| 473 if (endpointer_.DidStartReceivingSpeech()) { |
| 474 listener_->OnSoundStart(caller_id_); |
| 475 return STATE_RECOGNIZING; |
| 476 } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) { |
| 477 return AbortWithError( |
| 478 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_SPEECH)); |
| 479 } |
| 480 return STATE_WAITING_FOR_SPEECH; |
| 481 } |
| 482 |
| 483 SpeechRecognizerImpl::FSMState |
| 484 SpeechRecognizerImpl::DetectEndOfSpeech(const FSMEventArgs& event_args) { |
| 485 if (endpointer_.speech_input_complete()) { |
| 486 return StopCaptureAndWaitForResult(event_args); |
| 487 } |
| 488 return STATE_RECOGNIZING; |
| 489 } |
| 490 |
| 491 SpeechRecognizerImpl::FSMState |
| 492 SpeechRecognizerImpl::StopCaptureAndWaitForResult(const FSMEventArgs&) { |
| 493 DCHECK(state_ >= STATE_ESTIMATING_ENVIRONMENT && state_ <= STATE_RECOGNIZING); |
| 494 |
| 495 DVLOG(1) << "Concluding recognition"; |
| 496 CloseAudioControllerAsynchronously(); |
| 497 recognition_engine_->AudioChunksEnded(); |
| 498 |
| 499 if (state_ > STATE_WAITING_FOR_SPEECH) |
| 500 listener_->OnSoundEnd(caller_id_); |
| 501 |
| 502 listener_->OnAudioEnd(caller_id_); |
| 503 return STATE_WAITING_FINAL_RESULT; |
| 504 } |
| 505 |
| 506 SpeechRecognizerImpl::FSMState |
| 507 SpeechRecognizerImpl::Abort(const FSMEventArgs& event_args) { |
| 508 // TODO(primiano) Should raise SPEECH_RECOGNITION_ERROR_ABORTED in lack of |
| 509 // other specific error sources (so that it was an explicit abort request). |
| 510 // However, SPEECH_RECOGNITION_ERROR_ABORTED is not currently caught by |
| 511 // ChromeSpeechRecognitionManagerDelegate and would cause an exception. |
| 512 // JS support will probably need it in future. |
| 513 if (event_args.event == EVENT_AUDIO_ERROR) { |
| 514 return AbortWithError( |
| 515 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO)); |
| 516 } else if (event_args.event == EVENT_ENGINE_ERROR) { |
| 517 return AbortWithError(event_args.engine_error); |
| 518 } |
| 519 return AbortWithError(NULL); |
| 520 } |
| 521 |
| 522 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::AbortWithError( |
| 523 const SpeechRecognitionError& error) { |
| 524 return AbortWithError(&error); |
| 525 } |
| 526 |
| 527 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::AbortWithError( |
| 528 const SpeechRecognitionError* error) { |
| 529 if (IsCapturingAudio()) |
| 530 CloseAudioControllerAsynchronously(); |
| 531 |
| 532 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. "; |
| 533 |
| 534 // The recognition engine is initialized only after STATE_STARTING. |
| 535 if (state_ > STATE_STARTING) { |
| 536 DCHECK(recognition_engine_.get() != NULL); |
| 537 recognition_engine_->EndRecognition(); |
| 538 } |
| 539 |
| 540 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) |
| 541 listener_->OnSoundEnd(caller_id_); |
| 542 |
| 543 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) |
| 544 listener_->OnAudioEnd(caller_id_); |
| 545 |
| 546 if (error != NULL) |
| 547 listener_->OnRecognitionError(caller_id_, *error); |
| 548 |
| 549 listener_->OnRecognitionEnd(caller_id_); |
| 550 |
| 551 return STATE_IDLE; |
| 552 } |
| 553 |
| 554 SpeechRecognizerImpl::FSMState |
| 555 SpeechRecognizerImpl::ProcessIntermediateResult(const FSMEventArgs&) { |
| 556 // This is in preparation for future speech recognition functions. |
| 557 NOTREACHED(); |
| 558 return state_; |
| 559 } |
| 560 |
| 561 SpeechRecognizerImpl::FSMState |
| 562 SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) { |
| 563 const SpeechRecognitionResult& result = event_args.engine_result; |
| 564 DVLOG(1) << "Got valid result"; |
| 565 recognition_engine_->EndRecognition(); |
| 310 listener_->OnRecognitionResult(caller_id_, result); | 566 listener_->OnRecognitionResult(caller_id_, result); |
| 311 listener_->OnRecognitionEnd(caller_id_); | 567 listener_->OnRecognitionEnd(caller_id_); |
| 312 } | 568 return STATE_IDLE; |
| 313 | 569 } |
| 314 void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( | 570 |
| 315 const content::SpeechRecognitionError& error) { | 571 SpeechRecognizerImpl::FSMState |
| 316 InformErrorAndAbortRecognition(error.code); | 572 SpeechRecognizerImpl::DoNothing(const FSMEventArgs&) const { |
| 317 } | 573 return state_; // Just keep the current state. |
| 318 | 574 } |
| 319 void SpeechRecognizerImpl::InformErrorAndAbortRecognition( | 575 |
| 320 content::SpeechRecognitionErrorCode error) { | 576 SpeechRecognizerImpl::FSMState |
| 321 DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE); | 577 SpeechRecognizerImpl::NotFeasible(const FSMEventArgs& event_args) { |
| 322 AbortRecognition(); | 578 NOTREACHED() << "Unfeasible event " << event_args.event |
| 323 | 579 << " in state " << state_; |
| 324 // Guard against the listener freeing us until we finish our job. | 580 return state_; |
| 325 scoped_refptr<SpeechRecognizerImpl> me(this); | |
| 326 listener_->OnRecognitionError(caller_id_, error); | |
| 327 } | 581 } |
| 328 | 582 |
| 329 void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() { | 583 void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() { |
| 330 VLOG(1) << "SpeechRecognizer stopping record."; | 584 DCHECK(IsCapturingAudio()); |
| 585 DVLOG(1) << "SpeechRecognizerImpl stopping audio capture."; |
| 331 // Issues a Close on the audio controller, passing an empty callback. The only | 586 // Issues a Close on the audio controller, passing an empty callback. The only |
| 332 // purpose of such callback is to keep the audio controller refcounted until | 587 // purpose of such callback is to keep the audio controller refcounted until |
| 333 // Close has completed (in the audio thread) and automatically destroy it | 588 // Close has completed (in the audio thread) and automatically destroy it |
| 334 // afterwards (upon return from OnAudioClosed). | 589 // afterwards (upon return from OnAudioClosed). |
| 335 audio_controller_->Close(base::Bind(&SpeechRecognizerImpl::OnAudioClosed, | 590 audio_controller_->Close(base::Bind(&SpeechRecognizerImpl::OnAudioClosed, |
| 336 this, audio_controller_)); | 591 this, audio_controller_)); |
| 337 audio_controller_ = NULL; // The controller is still refcounted by Bind. | 592 audio_controller_ = NULL; // The controller is still refcounted by Bind. |
| 338 } | 593 } |
| 339 | 594 |
| 340 bool SpeechRecognizerImpl::IsActive() const { | 595 int SpeechRecognizerImpl::GetElapsedTimeMs() const { |
| 341 return (recognition_engine_.get() != NULL); | 596 return (num_samples_recorded_ * 1000) / kAudioSampleRate; |
| 342 } | 597 } |
| 343 | 598 |
| 344 bool SpeechRecognizerImpl::IsCapturingAudio() const { | 599 void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms, |
| 345 return (audio_controller_.get() != NULL); | 600 bool clip_detected) { |
| 601 // Calculate the input volume to display in the UI, smoothing towards the |
| 602 // new level. |
| 603 // TODO(primiano) Do we really need all this floating point arith here? |
| 604 // Perhaps it might be quite expensive on mobile. |
| 605 float level = (rms - kAudioMeterMinDb) / |
| 606 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
| 607 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); |
| 608 const float smoothing_factor = (level > audio_level_) ? kUpSmoothingFactor : |
| 609 kDownSmoothingFactor; |
| 610 audio_level_ += (level - audio_level_) * smoothing_factor; |
| 611 |
| 612 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / |
| 613 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
| 614 noise_level = std::min(std::max(0.0f, noise_level), |
| 615 kAudioMeterRangeMaxUnclipped); |
| 616 |
| 617 listener_->OnAudioLevelsChange( |
| 618 caller_id_, clip_detected ? 1.0f : audio_level_, noise_level); |
| 346 } | 619 } |
| 347 | 620 |
| 348 const SpeechRecognitionEngine& | 621 const SpeechRecognitionEngine& |
| 349 SpeechRecognizerImpl::recognition_engine() const { | 622 SpeechRecognizerImpl::recognition_engine() const { |
| 350 return *(recognition_engine_.get()); | 623 return *(recognition_engine_.get()); |
| 351 } | 624 } |
| 352 | 625 |
| 353 void SpeechRecognizerImpl::SetAudioManagerForTesting( | 626 void SpeechRecognizerImpl::SetAudioManagerForTesting( |
| 354 AudioManager* audio_manager) { | 627 AudioManager* audio_manager) { |
| 355 testing_audio_manager_ = audio_manager; | 628 testing_audio_manager_ = audio_manager; |
| 356 } | 629 } |
| 357 | 630 |
| 631 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
| 632 : event(event_value), |
| 633 audio_error_code(0), |
| 634 audio_data(NULL), |
| 635 engine_error(content::SPEECH_RECOGNITION_ERROR_NONE) { |
| 636 } |
| 637 |
| 638 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { |
| 639 } |
| 358 | 640 |
| 359 } // namespace speech | 641 } // namespace speech |
| OLD | NEW |