Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/speech_recognizer_impl.h" | 5 #include "content/browser/speech/speech_recognizer_impl.h" |
| 6 | 6 |
| 7 #include "base/basictypes.h" | |
| 7 #include "base/bind.h" | 8 #include "base/bind.h" |
| 8 #include "base/time.h" | 9 #include "base/time.h" |
| 9 #include "content/browser/browser_main_loop.h" | 10 #include "content/browser/browser_main_loop.h" |
| 10 #include "content/browser/speech/audio_buffer.h" | 11 #include "content/browser/speech/audio_buffer.h" |
| 11 #include "content/browser/speech/google_one_shot_remote_engine.h" | 12 #include "content/browser/speech/google_one_shot_remote_engine.h" |
| 12 #include "content/public/browser/browser_thread.h" | 13 #include "content/public/browser/browser_thread.h" |
| 13 #include "content/public/browser/speech_recognition_event_listener.h" | 14 #include "content/public/browser/speech_recognition_event_listener.h" |
| 14 #include "content/public/browser/speech_recognizer.h" | 15 #include "content/public/browser/speech_recognizer.h" |
| 15 #include "content/public/common/speech_recognition_error.h" | 16 #include "content/public/common/speech_recognition_error.h" |
| 16 #include "content/public/common/speech_recognition_result.h" | 17 #include "content/public/common/speech_recognition_result.h" |
| 17 #include "net/url_request/url_request_context_getter.h" | 18 #include "net/url_request/url_request_context_getter.h" |
| 18 | 19 |
| 20 #define BIND(x) base::Bind(&SpeechRecognizerImpl::x, this) | |
|
hans
2012/04/02 16:05:59
Hmm, not super happy about this macro and the use
Primiano Tucci (use gerrit)
2012/04/03 10:16:39
Reverted to switch-style FSM as agreed.
| |
| 21 | |
| 19 using content::BrowserMainLoop; | 22 using content::BrowserMainLoop; |
| 20 using content::BrowserThread; | 23 using content::BrowserThread; |
| 21 using content::SpeechRecognitionError; | 24 using content::SpeechRecognitionError; |
| 22 using content::SpeechRecognitionEventListener; | 25 using content::SpeechRecognitionEventListener; |
| 23 using content::SpeechRecognitionResult; | 26 using content::SpeechRecognitionResult; |
| 24 using content::SpeechRecognizer; | 27 using content::SpeechRecognizer; |
| 25 using media::AudioInputController; | 28 using media::AudioInputController; |
| 26 | 29 |
| 27 namespace { | 30 namespace { |
| 28 | 31 |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 41 | 44 |
| 42 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) | 45 // Maximum level to draw to display unclipped meter. (1.0f displays clipping.) |
| 43 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; | 46 const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; |
| 44 | 47 |
| 45 // Returns true if more than 5% of the samples are at min or max value. | 48 // Returns true if more than 5% of the samples are at min or max value. |
| 46 bool DetectClipping(const speech::AudioChunk& chunk) { | 49 bool DetectClipping(const speech::AudioChunk& chunk) { |
| 47 const int num_samples = chunk.NumSamples(); | 50 const int num_samples = chunk.NumSamples(); |
| 48 const int16* samples = chunk.SamplesData16(); | 51 const int16* samples = chunk.SamplesData16(); |
| 49 const int kThreshold = num_samples / 20; | 52 const int kThreshold = num_samples / 20; |
| 50 int clipping_samples = 0; | 53 int clipping_samples = 0; |
| 54 | |
| 51 for (int i = 0; i < num_samples; ++i) { | 55 for (int i = 0; i < num_samples; ++i) { |
| 52 if (samples[i] <= -32767 || samples[i] >= 32767) { | 56 if (samples[i] <= -32767 || samples[i] >= 32767) { |
| 53 if (++clipping_samples > kThreshold) | 57 if (++clipping_samples > kThreshold) |
| 54 return true; | 58 return true; |
| 55 } | 59 } |
| 56 } | 60 } |
| 57 return false; | 61 return false; |
| 58 } | 62 } |
| 59 | 63 |
| 60 } // namespace | 64 } // namespace |
| 61 | 65 |
| 62 SpeechRecognizer* SpeechRecognizer::Create( | 66 SpeechRecognizer* SpeechRecognizer::Create( |
| 63 SpeechRecognitionEventListener* listener, | 67 SpeechRecognitionEventListener* listener, |
| 64 int caller_id, | 68 int caller_id, |
| 65 const std::string& language, | 69 const std::string& language, |
| 66 const std::string& grammar, | 70 const std::string& grammar, |
| 67 net::URLRequestContextGetter* context_getter, | 71 net::URLRequestContextGetter* context_getter, |
| 68 bool filter_profanities, | 72 bool filter_profanities, |
| 69 const std::string& hardware_info, | 73 const std::string& hardware_info, |
| 70 const std::string& origin_url) { | 74 const std::string& origin_url) { |
| 75 speech::GoogleOneShotRemoteEngineConfig google_sr_config; | |
| 76 google_sr_config.language = language; | |
| 77 google_sr_config.grammar = grammar; | |
| 78 google_sr_config.audio_sample_rate = | |
| 79 speech::SpeechRecognizerImpl::kAudioSampleRate; | |
| 80 google_sr_config.audio_num_bits_per_sample = | |
| 81 speech::SpeechRecognizerImpl::kNumBitsPerAudioSample; | |
| 82 google_sr_config.filter_profanities = filter_profanities; | |
| 83 google_sr_config.hardware_info = hardware_info; | |
| 84 google_sr_config.origin_url = origin_url; | |
| 85 | |
| 86 speech::GoogleOneShotRemoteEngine* google_sr_engine = | |
| 87 new speech::GoogleOneShotRemoteEngine(context_getter); | |
| 88 google_sr_engine->SetConfig(google_sr_config); | |
| 89 | |
| 71 return new speech::SpeechRecognizerImpl(listener, | 90 return new speech::SpeechRecognizerImpl(listener, |
| 72 caller_id, | 91 caller_id, |
| 73 language, | 92 google_sr_engine); |
| 74 grammar, | |
| 75 context_getter, | |
| 76 filter_profanities, | |
| 77 hardware_info, | |
| 78 origin_url); | |
| 79 } | 93 } |
| 80 | 94 |
| 81 namespace speech { | 95 namespace speech { |
| 82 | 96 |
| 83 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; | 97 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; |
| 84 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO; | 98 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO; |
| 85 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; | 99 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; |
| 86 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; | 100 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; |
| 87 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; | 101 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; |
| 88 | 102 |
| 103 COMPILE_ASSERT((SpeechRecognizerImpl::kNumBitsPerAudioSample & 0x7) == 0, | |
|
hans
2012/04/02 16:05:59
I think using the % operator instead of & would ma
Primiano Tucci (use gerrit)
2012/04/03 10:16:39
Done.
| |
| 104 kNumBitsPerAudioSample_must_be_a_multiple_of_8); | |
| 105 | |
| 89 SpeechRecognizerImpl::SpeechRecognizerImpl( | 106 SpeechRecognizerImpl::SpeechRecognizerImpl( |
| 90 SpeechRecognitionEventListener* listener, | 107 SpeechRecognitionEventListener* listener, |
| 91 int caller_id, | 108 int caller_id, |
| 92 const std::string& language, | 109 SpeechRecognitionEngine* engine) |
| 93 const std::string& grammar, | |
| 94 net::URLRequestContextGetter* context_getter, | |
| 95 bool filter_profanities, | |
| 96 const std::string& hardware_info, | |
| 97 const std::string& origin_url) | |
| 98 : listener_(listener), | 110 : listener_(listener), |
| 99 testing_audio_manager_(NULL), | 111 testing_audio_manager_(NULL), |
| 112 recognition_engine_(engine), | |
| 100 endpointer_(kAudioSampleRate), | 113 endpointer_(kAudioSampleRate), |
| 101 context_getter_(context_getter), | |
| 102 caller_id_(caller_id), | 114 caller_id_(caller_id), |
| 103 language_(language), | 115 in_event_dispatching_(false), |
| 104 grammar_(grammar), | 116 state_(STATE_IDLE) { |
| 105 filter_profanities_(filter_profanities), | |
| 106 hardware_info_(hardware_info), | |
| 107 origin_url_(origin_url), | |
| 108 num_samples_recorded_(0), | |
| 109 audio_level_(0.0f) { | |
| 110 DCHECK(listener_ != NULL); | 117 DCHECK(listener_ != NULL); |
| 118 DCHECK(recognition_engine_ != NULL); | |
| 119 InitializeFSM(); | |
| 111 endpointer_.set_speech_input_complete_silence_length( | 120 endpointer_.set_speech_input_complete_silence_length( |
| 112 base::Time::kMicrosecondsPerSecond / 2); | 121 base::Time::kMicrosecondsPerSecond / 2); |
| 113 endpointer_.set_long_speech_input_complete_silence_length( | 122 endpointer_.set_long_speech_input_complete_silence_length( |
| 114 base::Time::kMicrosecondsPerSecond); | 123 base::Time::kMicrosecondsPerSecond); |
| 115 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); | 124 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); |
| 116 endpointer_.StartSession(); | 125 endpointer_.StartSession(); |
| 126 recognition_engine_->set_delegate(this); | |
| 117 } | 127 } |
| 118 | 128 |
| 119 SpeechRecognizerImpl::~SpeechRecognizerImpl() { | 129 SpeechRecognizerImpl::~SpeechRecognizerImpl() { |
| 120 // Recording should have stopped earlier due to the endpointer or | |
| 121 // |StopRecording| being called. | |
| 122 DCHECK(!audio_controller_.get()); | |
| 123 DCHECK(!recognition_engine_.get() || | |
| 124 !recognition_engine_->IsRecognitionPending()); | |
| 125 endpointer_.EndSession(); | 130 endpointer_.EndSession(); |
| 126 } | 131 } |
| 127 | 132 |
| 133 // ------- Methods that trigger Finite State Machine (FSM) events ------------ | |
| 134 | |
| 135 // NOTE: all the external events and request should be enqueued (PostTask), even | |
|
hans
2012/04/02 16:05:59
s/request/requests/ ?
Primiano Tucci (use gerrit)
2012/04/03 10:16:39
Done.
| |
| 136 // if they come from the same (IO) thread, in order to preserve the relationship | |
| 137 // of causality between events and avoid interleaved event processing due to | |
| 138 // synchronous callbacks. | |
| 139 | |
| 128 void SpeechRecognizerImpl::StartRecognition() { | 140 void SpeechRecognizerImpl::StartRecognition() { |
| 141 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
| 142 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
| 143 this, EVENT_START, FSMEventArgs())); | |
| 144 } | |
| 145 | |
| 146 void SpeechRecognizerImpl::AbortRecognition() { | |
| 147 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
| 148 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
| 149 this, EVENT_ABORT, FSMEventArgs())); | |
| 150 } | |
| 151 | |
| 152 void SpeechRecognizerImpl::StopAudioCapture() { | |
| 153 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
| 154 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
| 155 this, EVENT_STOP_CAPTURE, | |
| 156 FSMEventArgs())); | |
| 157 } | |
| 158 | |
| 159 bool SpeechRecognizerImpl::IsActive() const { | |
| 160 // Checking the FSM state from another thread (thus, while the FSM is | |
| 161 // potentially concurrently evolving) is meaningless. | |
| 162 // If you're doing it, probably you have some design issues. | |
|
hans
2012/04/02 16:05:59
i'm not sure this comment adds much.. i think the
Primiano Tucci (use gerrit)
2012/04/03 10:16:39
Agree, removed the last line.
| |
| 129 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 163 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 130 DCHECK(!audio_controller_.get()); | 164 return state_ != STATE_IDLE; |
| 131 DCHECK(!recognition_engine_.get() || | 165 } |
| 132 !recognition_engine_->IsRecognitionPending()); | 166 |
| 133 | 167 bool SpeechRecognizerImpl::IsCapturingAudio() const { |
| 134 // The endpointer needs to estimate the environment/background noise before | 168 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // See IsActive(). |
| 135 // starting to treat the audio as user input. In |HandleOnData| we wait until | 169 const bool is_capturing_audio = state_ >= STATE_STARTING && |
| 136 // such time has passed before switching to user input mode. | 170 state_ <= STATE_RECOGNIZING; |
| 137 endpointer_.SetEnvironmentEstimationMode(); | 171 DCHECK((is_capturing_audio && (audio_controller_.get() != NULL)) || |
| 138 | 172 (!is_capturing_audio && audio_controller_.get() == NULL)); |
| 173 return is_capturing_audio; | |
| 174 } | |
| 175 | |
| 176 // Invoked in the audio thread. | |
| 177 void SpeechRecognizerImpl::OnError(AudioInputController* controller, | |
| 178 int error_code) { | |
| 179 FSMEventArgs args; | |
| 180 args.audio_error_code = error_code; | |
| 181 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
| 182 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
| 183 this, EVENT_AUDIO_ERROR, args)); | |
| 184 } | |
| 185 | |
| 186 void SpeechRecognizerImpl::OnData(AudioInputController* controller, | |
| 187 const uint8* data, uint32 size) { | |
| 188 if (size == 0) // This could happen when audio capture stops and is normal. | |
| 189 return; | |
| 190 | |
| 191 FSMEventArgs args; | |
| 192 args.audio_data = new AudioChunk(data, static_cast<size_t>(size), | |
| 193 kNumBitsPerAudioSample / 8); | |
| 194 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
| 195 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
| 196 this, EVENT_AUDIO_DATA, args)); | |
| 197 } | |
| 198 | |
| 199 void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {} | |
| 200 | |
| 201 void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult( | |
| 202 const content::SpeechRecognitionResult& result) { | |
| 203 FSMEventArgs args; | |
| 204 args.engine_result = result; | |
| 205 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
| 206 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
| 207 this, EVENT_ENGINE_RESULT, args)); | |
| 208 } | |
| 209 | |
| 210 void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( | |
| 211 const content::SpeechRecognitionError& error) { | |
| 212 FSMEventArgs args; | |
| 213 args.engine_error = error; | |
| 214 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
| 215 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | |
| 216 this, EVENT_ENGINE_ERROR, args)); | |
| 217 } | |
| 218 | |
| 219 // ----------------------- Core FSM implementation --------------------------- | |
| 220 // TODO(primiano) After the changes in the media package (r129173), this class | |
| 221 // slightly violates the SpeechRecognitionEventListener interface contract. In | |
| 222 // particular, it is not true anymore that this class can be freed after the | |
| 223 // OnRecognitionEnd event, since the audio_controller_.Close() asynchronous | |
| 224 // call can be still in progress after the end event. Currently, it does not | |
| 225 // represent a problem for the browser itself, since since refcounting protects | |
|
hans
2012/04/02 16:05:59
s/since since/since/
Primiano Tucci (use gerrit)
2012/04/03 10:16:39
Done.
| |
| 226 // us against such race conditions. However, we should fix this in the next CLs. | |
| 227 // For instance, tests are currently working just because the | |
| 228 // TestAudioInputController is not closing asynchronously as the real controller | |
| 229 // does, but they will become flaky if TestAudioInputController will be fixed. | |
| 230 | |
| 231 void SpeechRecognizerImpl::InitializeFSM() { | |
| 232 fsm[STATE_IDLE][EVENT_ABORT] = BIND(DoNothing); | |
| 233 fsm[STATE_IDLE][EVENT_START] = BIND(StartRecording); | |
| 234 fsm[STATE_IDLE][EVENT_STOP_CAPTURE] = BIND(DoNothing); | |
| 235 fsm[STATE_IDLE][EVENT_AUDIO_DATA] = BIND(DoNothing); | |
| 236 fsm[STATE_IDLE][EVENT_ENGINE_RESULT] = BIND(DoNothing); | |
| 237 fsm[STATE_IDLE][EVENT_ENGINE_ERROR] = BIND(DoNothing); | |
| 238 fsm[STATE_IDLE][EVENT_AUDIO_ERROR] = BIND(DoNothing); | |
| 239 | |
| 240 fsm[STATE_STARTING][EVENT_ABORT] = BIND(Abort); | |
| 241 fsm[STATE_STARTING][EVENT_START] = kUnfeasibleTransition; | |
| 242 fsm[STATE_STARTING][EVENT_STOP_CAPTURE] = BIND(Abort); | |
| 243 fsm[STATE_STARTING][EVENT_AUDIO_DATA] = BIND(StartRecognitionEngine); | |
| 244 fsm[STATE_STARTING][EVENT_ENGINE_RESULT] = kUnfeasibleTransition; | |
| 245 fsm[STATE_STARTING][EVENT_ENGINE_ERROR] = BIND(Abort); | |
| 246 fsm[STATE_STARTING][EVENT_AUDIO_ERROR] = BIND(Abort); | |
| 247 | |
| 248 fsm[STATE_ESTIMATING_ENVIRONMENT][EVENT_ABORT] = BIND(Abort); | |
| 249 fsm[STATE_ESTIMATING_ENVIRONMENT][EVENT_START] = kUnfeasibleTransition; | |
| 250 fsm[STATE_ESTIMATING_ENVIRONMENT][EVENT_STOP_CAPTURE] = | |
| 251 BIND(StopCaptureAndWaitResult); | |
| 252 fsm[STATE_ESTIMATING_ENVIRONMENT][EVENT_AUDIO_DATA] = | |
| 253 BIND(WaitEnvironmentEstimationCompletion); | |
| 254 fsm[STATE_ESTIMATING_ENVIRONMENT][EVENT_ENGINE_RESULT] = | |
| 255 BIND(ProcessIntermediateResult); | |
| 256 fsm[STATE_ESTIMATING_ENVIRONMENT][EVENT_ENGINE_ERROR] = BIND(Abort); | |
| 257 fsm[STATE_ESTIMATING_ENVIRONMENT][EVENT_AUDIO_ERROR] = BIND(Abort); | |
| 258 | |
| 259 fsm[STATE_WAITING_FOR_SPEECH][EVENT_ABORT] = BIND(Abort); | |
| 260 fsm[STATE_WAITING_FOR_SPEECH][EVENT_START] = kUnfeasibleTransition; | |
| 261 fsm[STATE_WAITING_FOR_SPEECH][EVENT_STOP_CAPTURE] = | |
| 262 BIND(StopCaptureAndWaitResult); | |
| 263 fsm[STATE_WAITING_FOR_SPEECH][EVENT_AUDIO_DATA] = | |
| 264 BIND(DetectUserSpeechOrTimeout); | |
| 265 fsm[STATE_WAITING_FOR_SPEECH][EVENT_ENGINE_RESULT] = | |
| 266 BIND(ProcessIntermediateResult); | |
| 267 fsm[STATE_WAITING_FOR_SPEECH][EVENT_ENGINE_ERROR] = BIND(Abort); | |
| 268 fsm[STATE_WAITING_FOR_SPEECH][EVENT_AUDIO_ERROR] = BIND(Abort); | |
| 269 | |
| 270 fsm[STATE_RECOGNIZING][EVENT_ABORT] = BIND(Abort); | |
| 271 fsm[STATE_RECOGNIZING][EVENT_START] = kUnfeasibleTransition; | |
| 272 fsm[STATE_RECOGNIZING][EVENT_STOP_CAPTURE] = BIND(StopCaptureAndWaitResult); | |
| 273 fsm[STATE_RECOGNIZING][EVENT_AUDIO_DATA] = BIND(DetectEndOfSpeech); | |
| 274 fsm[STATE_RECOGNIZING][EVENT_ENGINE_RESULT] = BIND(ProcessIntermediateResult); | |
| 275 fsm[STATE_RECOGNIZING][EVENT_ENGINE_ERROR] = BIND(Abort); | |
| 276 fsm[STATE_RECOGNIZING][EVENT_AUDIO_ERROR] = BIND(Abort); | |
| 277 | |
| 278 fsm[STATE_WAITING_FINAL_RESULT][EVENT_ABORT] = BIND(Abort); | |
| 279 fsm[STATE_WAITING_FINAL_RESULT][EVENT_START] = kUnfeasibleTransition; | |
| 280 fsm[STATE_WAITING_FINAL_RESULT][EVENT_STOP_CAPTURE] = BIND(DoNothing); | |
| 281 fsm[STATE_WAITING_FINAL_RESULT][EVENT_AUDIO_DATA] = BIND(DoNothing); | |
| 282 fsm[STATE_WAITING_FINAL_RESULT][EVENT_ENGINE_RESULT] = | |
| 283 BIND(ProcessFinalResult); | |
| 284 fsm[STATE_WAITING_FINAL_RESULT][EVENT_ENGINE_ERROR] = BIND(Abort); | |
| 285 fsm[STATE_WAITING_FINAL_RESULT][EVENT_AUDIO_ERROR] = BIND(Abort); | |
| 286 } | |
| 287 | |
| 288 void SpeechRecognizerImpl::DispatchEvent(FSMEvent event, FSMEventArgs args) { | |
| 289 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
| 290 DCHECK_LE(event, EVENT_MAX); | |
| 291 DCHECK_LE(state_, STATE_MAX); | |
| 292 | |
| 293 // Event dispatching must be sequential, otherwise it will break all the rules | |
| 294 // and the assumptions of the finite state automata model. | |
| 295 DCHECK(!in_event_dispatching_); | |
| 296 in_event_dispatching_ = true; | |
| 297 | |
| 298 // Guard against the delegate freeing us until we finish processing the event. | |
| 299 scoped_refptr<SpeechRecognizerImpl> me(this); | |
| 300 | |
| 301 args.event = event; | |
| 302 | |
| 303 if (event == EVENT_AUDIO_DATA) { | |
| 304 DCHECK(args.audio_data.get() != NULL); | |
| 305 ProcessAudioPipeline(*(args.audio_data.get())); | |
|
hans
2012/04/02 16:05:59
I think you can just do ProcessAudioPipeline(*args
Primiano Tucci (use gerrit)
2012/04/03 10:16:39
Done.
| |
| 306 } | |
| 307 | |
| 308 // The audio pipeline must be processed before the event dispatch, otherwise | |
| 309 // it would take actions according to the future state instead of the current. | |
| 310 const TransitionFunction& transition = fsm[state_][event]; | |
|
hans
2012/04/02 16:05:59
i liked the switch-case better
Satish
2012/04/02 21:57:09
I was thinking earlier that a table would be appea
| |
| 311 if(transition.Equals(kUnfeasibleTransition)) { | |
| 312 NOTREACHED() << "Unfeasible event " << event << " in state " << state_; | |
| 313 } else { | |
| 314 state_ = transition.Run(args); | |
| 315 } | |
| 316 | |
| 317 in_event_dispatching_ = false; | |
| 318 } | |
| 319 | |
| 320 // ----------- Contract for all the FSM evolution functions below ------------- | |
| 321 // - Are guaranteed to be executed in the IO thread; | |
| 322 // - Are guaranteed to be not reentrant (themselves and each other); | |
| 323 // - event_args members are guaranteed to be stable during the call; | |
| 324 // - The class won't be freed in the meanwhile due to callbacks; | |
| 325 // - IsCapturingAudio() returns true if and only if audio_controller_ != NULL. | |
| 326 | |
| 327 // TODO(primiano) the audio pipeline is currently serial. However, the | |
| 328 // clipper->endpointer->vumeter chain and the sr_engine could be parallelized. | |
| 329 // We should profile the execution to see if it would be worth or not. | |
| 330 void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) { | |
| 331 const bool route_to_endpointer = state_ >= STATE_ESTIMATING_ENVIRONMENT && | |
| 332 state_ <= STATE_RECOGNIZING; | |
| 333 const bool route_to_sr_engine = route_to_endpointer; | |
| 334 const bool route_to_vumeter = state_ >= STATE_WAITING_FOR_SPEECH && | |
| 335 state_ <= STATE_RECOGNIZING; | |
| 336 const bool clip_detected = DetectClipping(raw_audio); | |
| 337 float rms = 0; | |
| 338 | |
| 339 num_samples_recorded_ += raw_audio.NumSamples(); | |
| 340 | |
| 341 if (route_to_endpointer) { | |
| 342 endpointer_.ProcessAudio(raw_audio, &rms); | |
| 343 } | |
| 344 if (route_to_vumeter) { | |
| 345 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|. | |
| 346 UpdateSignalAndNoiseLevels(rms, clip_detected); | |
| 347 } | |
| 348 if (route_to_sr_engine) { | |
| 349 DCHECK(recognition_engine_.get()); | |
| 350 recognition_engine_->TakeAudioChunk(raw_audio); | |
| 351 } | |
| 352 } | |
| 353 | |
| 354 SpeechRecognizerImpl::FSMState | |
| 355 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { | |
| 356 DCHECK(recognition_engine_.get()); | |
| 357 DCHECK(!IsCapturingAudio()); | |
| 139 AudioManager* audio_manager = (testing_audio_manager_ != NULL) ? | 358 AudioManager* audio_manager = (testing_audio_manager_ != NULL) ? |
| 140 testing_audio_manager_ : | 359 testing_audio_manager_ : |
| 141 BrowserMainLoop::GetAudioManager(); | 360 BrowserMainLoop::GetAudioManager(); |
| 142 const int samples_per_packet = kAudioSampleRate * | 361 DCHECK(audio_manager != NULL); |
| 143 GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000; | 362 |
| 363 VLOG(1) << "SpeechRecognizerImpl starting audio capture."; | |
| 364 num_samples_recorded_ = 0; | |
| 365 audio_level_ = 0; | |
| 366 listener_->OnRecognitionStart(caller_id_); | |
| 367 | |
| 368 if (!audio_manager->HasAudioInputDevices()) { | |
| 369 return AbortWithError(SpeechRecognitionError( | |
| 370 content::SPEECH_RECOGNITION_ERROR_AUDIO, | |
| 371 content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); | |
| 372 } | |
| 373 | |
| 374 if (audio_manager->IsRecordingInProcess()) { | |
| 375 return AbortWithError(SpeechRecognitionError( | |
| 376 content::SPEECH_RECOGNITION_ERROR_AUDIO, | |
| 377 content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE)); | |
| 378 } | |
| 379 | |
| 380 const int samples_per_packet = (kAudioSampleRate * | |
| 381 recognition_engine_->GetDesiredAudioChunkDurationMs()) / 1000; | |
| 144 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, | 382 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, |
| 145 kAudioSampleRate, kNumBitsPerAudioSample, | 383 kAudioSampleRate, kNumBitsPerAudioSample, |
| 146 samples_per_packet); | 384 samples_per_packet); |
| 147 audio_controller_ = AudioInputController::Create(audio_manager, this, params); | 385 audio_controller_ = AudioInputController::Create(audio_manager, this, params); |
| 148 DCHECK(audio_controller_.get()); | 386 |
| 149 VLOG(1) << "SpeechRecognizer starting record."; | 387 if (audio_controller_.get() == NULL) { |
| 150 num_samples_recorded_ = 0; | 388 return AbortWithError( |
| 389 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO)); | |
| 390 } | |
| 391 | |
| 392 // The endpointer needs to estimate the environment/background noise before | |
| 393 // starting to treat the audio as user input. We wait in the state | |
| 394 // ESTIMATING_ENVIRONMENT until such interval has elapsed before switching | |
| 395 // to user input mode. | |
| 396 endpointer_.SetEnvironmentEstimationMode(); | |
| 151 audio_controller_->Record(); | 397 audio_controller_->Record(); |
| 152 } | 398 return STATE_STARTING; |
| 153 | 399 } |
| 154 void SpeechRecognizerImpl::AbortRecognition() { | 400 |
| 155 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 401 SpeechRecognizerImpl::FSMState |
| 156 DCHECK(audio_controller_.get() || recognition_engine_.get()); | 402 SpeechRecognizerImpl::StartRecognitionEngine(const FSMEventArgs& event_args) { |
| 157 | 403 // This is the first audio packet captured, so the recognition engine is |
| 158 // Stop recording if required. | 404 // started and the delegate notifies about the event. |
|
hans
2012/04/02 16:05:59
s/notifies/notified/
Primiano Tucci (use gerrit)
2012/04/03 10:16:39
Done.
| |
| 159 if (audio_controller_.get()) { | 405 DCHECK(recognition_engine_.get()); |
| 406 recognition_engine_->StartRecognition(); | |
| 407 listener_->OnAudioStart(caller_id_); | |
| 408 | |
| 409 // This is a little hack, since TakeAudioChunk() is already called by | |
| 410 // ProcessAudioPipeline(). It is the best tradeoff, unless we allow dropping | |
| 411 // the first audio chunk captured after opening the audio device. | |
| 412 recognition_engine_->TakeAudioChunk(*(event_args.audio_data)); | |
| 413 return STATE_ESTIMATING_ENVIRONMENT; | |
| 414 } | |
| 415 | |
| 416 SpeechRecognizerImpl::FSMState | |
| 417 SpeechRecognizerImpl::WaitEnvironmentEstimationCompletion(const FSMEventArgs&) { | |
| 418 DCHECK(endpointer_.IsEstimatingEnvironment()); | |
| 419 if (GetElapsedTimeMs() >= kEndpointerEstimationTimeMs) { | |
| 420 endpointer_.SetUserInputMode(); | |
| 421 listener_->OnEnvironmentEstimationComplete(caller_id_); | |
| 422 return STATE_WAITING_FOR_SPEECH; | |
| 423 } else { | |
| 424 return STATE_ESTIMATING_ENVIRONMENT; | |
| 425 } | |
| 426 } | |
| 427 | |
| 428 SpeechRecognizerImpl::FSMState | |
| 429 SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) { | |
| 430 if (endpointer_.DidStartReceivingSpeech()) { | |
| 431 listener_->OnSoundStart(caller_id_); | |
| 432 return STATE_RECOGNIZING; | |
| 433 } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) { | |
| 434 return AbortWithError( | |
| 435 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_SPEECH)); | |
| 436 } else { | |
| 437 return STATE_WAITING_FOR_SPEECH; | |
| 438 } | |
| 439 } | |
| 440 | |
| 441 SpeechRecognizerImpl::FSMState | |
| 442 SpeechRecognizerImpl::DetectEndOfSpeech(const FSMEventArgs& event_args) { | |
| 443 if (endpointer_.speech_input_complete()) { | |
| 444 return StopCaptureAndWaitResult(event_args); | |
| 445 } else { | |
| 446 return STATE_RECOGNIZING; | |
| 447 } | |
| 448 } | |
| 449 | |
| 450 SpeechRecognizerImpl::FSMState | |
| 451 SpeechRecognizerImpl::StopCaptureAndWaitResult(const FSMEventArgs&) { | |
| 452 DCHECK(state_ >= STATE_ESTIMATING_ENVIRONMENT && state_ <= STATE_RECOGNIZING); | |
| 453 | |
| 454 VLOG(1) << "Concluding recognition"; | |
| 455 CloseAudioControllerAsynchronously(); | |
| 456 recognition_engine_->AudioChunksEnded(); | |
| 457 | |
| 458 if (state_ > STATE_WAITING_FOR_SPEECH) | |
| 459 listener_->OnSoundEnd(caller_id_); | |
| 460 | |
| 461 listener_->OnAudioEnd(caller_id_); | |
| 462 return STATE_WAITING_FINAL_RESULT; | |
| 463 } | |
| 464 | |
| 465 SpeechRecognizerImpl::FSMState | |
| 466 SpeechRecognizerImpl::Abort(const FSMEventArgs& event_args) { | |
| 467 // TODO(primiano) Should raise SPEECH_RECOGNITION_ERROR_ABORTED in lack of | |
| 468 // other specific error sources (so that it was an explicit abort request). | |
| 469 // However, SPEECH_RECOGNITION_ERROR_ABORTED is not caught in UI layers | |
| 470 // and currently would cause an exception. JS will probably need it in future. | |
| 471 if (event_args.event == EVENT_AUDIO_ERROR) { | |
| 472 return AbortWithError( | |
| 473 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_AUDIO)); | |
| 474 } else if (event_args.event == EVENT_ENGINE_ERROR) { | |
| 475 return AbortWithError(event_args.engine_error); | |
| 476 } | |
| 477 return AbortWithError(NULL); | |
| 478 } | |
| 479 | |
| 480 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::AbortWithError( | |
| 481 const SpeechRecognitionError& error) { | |
| 482 return AbortWithError(&error); | |
| 483 } | |
| 484 | |
| 485 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::AbortWithError( | |
| 486 const SpeechRecognitionError* error) { | |
| 487 if (IsCapturingAudio()) | |
| 160 CloseAudioControllerAsynchronously(); | 488 CloseAudioControllerAsynchronously(); |
| 161 } | 489 |
| 162 | 490 VLOG(1) << "SpeechRecognizerImpl canceling recognition. "; |
| 163 VLOG(1) << "SpeechRecognizer canceling recognition."; | 491 |
| 164 recognition_engine_.reset(); | 492 // The recognition engine is initialized only after STATE_STARTING. |
| 165 } | 493 if (state_ > STATE_STARTING) { |
| 166 | 494 DCHECK(recognition_engine_.get()); |
| 167 void SpeechRecognizerImpl::StopAudioCapture() { | 495 recognition_engine_->EndRecognition(); |
| 168 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 496 } |
| 169 | 497 |
| 170 // If audio recording has already stopped and we are in recognition phase, | 498 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) |
| 171 // silently ignore any more calls to stop recording. | 499 listener_->OnSoundEnd(caller_id_); |
| 172 if (!audio_controller_.get()) | 500 |
| 173 return; | 501 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) |
| 174 | 502 listener_->OnAudioEnd(caller_id_); |
| 175 CloseAudioControllerAsynchronously(); | 503 |
| 176 listener_->OnSoundEnd(caller_id_); | 504 if (error != NULL) |
| 177 listener_->OnAudioEnd(caller_id_); | 505 listener_->OnRecognitionError(caller_id_, *error); |
|
hans
2012/04/02 16:05:59
just a thought (maybe for the future).. i wonder w
Primiano Tucci (use gerrit)
2012/04/03 10:16:39
We should think on the implications that it might
| |
| 178 | 506 |
| 179 // If we haven't got any audio yet end the recognition sequence here. | 507 listener_->OnRecognitionEnd(caller_id_); |
| 180 if (recognition_engine_ == NULL) { | 508 |
| 181 // Guard against the listener freeing us until we finish our job. | 509 return STATE_IDLE; |
| 182 scoped_refptr<SpeechRecognizerImpl> me(this); | 510 } |
| 183 listener_->OnRecognitionEnd(caller_id_); | 511 |
| 184 } else { | 512 SpeechRecognizerImpl::FSMState |
| 185 recognition_engine_->AudioChunksEnded(); | 513 SpeechRecognizerImpl::ProcessIntermediateResult(const FSMEventArgs&) { |
| 186 } | 514 // This is in preparation for future speech recognition functions. |
| 187 } | 515 NOTREACHED(); |
| 188 | 516 return state_; |
| 189 // Invoked in the audio thread. | 517 } |
| 190 void SpeechRecognizerImpl::OnError(AudioInputController* controller, | 518 |
| 191 int error_code) { | 519 SpeechRecognizerImpl::FSMState |
| 192 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 520 SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) { |
| 193 base::Bind(&SpeechRecognizerImpl::HandleOnError, | 521 const SpeechRecognitionResult& result = event_args.engine_result; |
| 194 this, error_code)); | 522 VLOG(1) << "Got valid result"; |
| 195 } | 523 recognition_engine_->EndRecognition(); |
| 196 | |
| 197 void SpeechRecognizerImpl::HandleOnError(int error_code) { | |
| 198 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; | |
| 199 | |
| 200 // Check if we are still recording before canceling recognition, as | |
| 201 // recording might have been stopped after this error was posted to the queue | |
| 202 // by |OnError|. | |
| 203 if (!audio_controller_.get()) | |
| 204 return; | |
| 205 | |
| 206 InformErrorAndAbortRecognition(content::SPEECH_RECOGNITION_ERROR_AUDIO); | |
| 207 } | |
| 208 | |
| 209 void SpeechRecognizerImpl::OnData(AudioInputController* controller, | |
| 210 const uint8* data, uint32 size) { | |
| 211 if (size == 0) // This could happen when recording stops and is normal. | |
| 212 return; | |
| 213 scoped_refptr<AudioChunk> raw_audio( | |
| 214 new AudioChunk(data, | |
| 215 static_cast<size_t>(size), | |
| 216 kNumBitsPerAudioSample / 8)); | |
| 217 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
| 218 base::Bind(&SpeechRecognizerImpl::HandleOnData, | |
| 219 this, raw_audio)); | |
| 220 } | |
| 221 | |
| 222 void SpeechRecognizerImpl::HandleOnData(scoped_refptr<AudioChunk> raw_audio) { | |
| 223 // Check if we are still recording and if not discard this buffer, as | |
| 224 // recording might have been stopped after this buffer was posted to the queue | |
| 225 // by |OnData|. | |
| 226 if (!audio_controller_.get()) | |
| 227 return; | |
| 228 | |
| 229 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); | |
| 230 | |
| 231 float rms; | |
| 232 endpointer_.ProcessAudio(*raw_audio, &rms); | |
| 233 bool did_clip = DetectClipping(*raw_audio); | |
| 234 num_samples_recorded_ += raw_audio->NumSamples(); | |
| 235 | |
| 236 if (recognition_engine_ == NULL) { | |
| 237 // This was the first audio packet recorded, so start a request to the | |
| 238 // server to send the data and inform the listener. | |
| 239 listener_->OnAudioStart(caller_id_); | |
| 240 GoogleOneShotRemoteEngineConfig google_sr_config; | |
| 241 google_sr_config.language = language_; | |
| 242 google_sr_config.grammar = grammar_; | |
| 243 google_sr_config.audio_sample_rate = kAudioSampleRate; | |
| 244 google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample; | |
| 245 google_sr_config.filter_profanities = filter_profanities_; | |
| 246 google_sr_config.hardware_info = hardware_info_; | |
| 247 google_sr_config.origin_url = origin_url_; | |
| 248 GoogleOneShotRemoteEngine* google_sr_engine = | |
| 249 new GoogleOneShotRemoteEngine(context_getter_.get()); | |
| 250 google_sr_engine->SetConfig(google_sr_config); | |
| 251 recognition_engine_.reset(google_sr_engine); | |
| 252 recognition_engine_->set_delegate(this); | |
| 253 recognition_engine_->StartRecognition(); | |
| 254 } | |
| 255 | |
| 256 recognition_engine_->TakeAudioChunk(*raw_audio); | |
| 257 | |
| 258 if (endpointer_.IsEstimatingEnvironment()) { | |
| 259 // Check if we have gathered enough audio for the endpointer to do | |
| 260 // environment estimation and should move on to detect speech/end of speech. | |
| 261 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | |
| 262 kAudioSampleRate) / 1000) { | |
| 263 endpointer_.SetUserInputMode(); | |
| 264 listener_->OnEnvironmentEstimationComplete(caller_id_); | |
| 265 } | |
| 266 return; // No more processing since we are still estimating environment. | |
| 267 } | |
| 268 | |
| 269 // Check if we have waited too long without hearing any speech. | |
| 270 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech(); | |
| 271 if (!speech_was_heard_after_packet && | |
| 272 num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) { | |
| 273 InformErrorAndAbortRecognition( | |
| 274 content::SPEECH_RECOGNITION_ERROR_NO_SPEECH); | |
| 275 return; | |
| 276 } | |
| 277 | |
| 278 if (!speech_was_heard_before_packet && speech_was_heard_after_packet) | |
| 279 listener_->OnSoundStart(caller_id_); | |
| 280 | |
| 281 // Calculate the input volume to display in the UI, smoothing towards the | |
| 282 // new level. | |
| 283 float level = (rms - kAudioMeterMinDb) / | |
| 284 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
| 285 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); | |
| 286 if (level > audio_level_) { | |
| 287 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; | |
| 288 } else { | |
| 289 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; | |
| 290 } | |
| 291 | |
| 292 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | |
| 293 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
| 294 noise_level = std::min(std::max(0.0f, noise_level), | |
| 295 kAudioMeterRangeMaxUnclipped); | |
| 296 | |
| 297 listener_->OnAudioLevelsChange(caller_id_, did_clip ? 1.0f : audio_level_, | |
| 298 noise_level); | |
| 299 | |
| 300 if (endpointer_.speech_input_complete()) | |
| 301 StopAudioCapture(); | |
| 302 } | |
| 303 | |
| 304 void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {} | |
| 305 | |
| 306 void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult( | |
| 307 const content::SpeechRecognitionResult& result) { | |
| 308 // Guard against the listener freeing us until we finish our job. | |
| 309 scoped_refptr<SpeechRecognizerImpl> me(this); | |
| 310 listener_->OnRecognitionResult(caller_id_, result); | 524 listener_->OnRecognitionResult(caller_id_, result); |
| 311 listener_->OnRecognitionEnd(caller_id_); | 525 listener_->OnRecognitionEnd(caller_id_); |
| 312 } | 526 return STATE_IDLE; |
| 313 | 527 } |
| 314 void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( | 528 |
| 315 const content::SpeechRecognitionError& error) { | 529 SpeechRecognizerImpl::FSMState |
| 316 InformErrorAndAbortRecognition(error.code); | 530 SpeechRecognizerImpl::DoNothing(const FSMEventArgs&) const { |
| 317 } | 531 return state_; // Just keep the current state. |
| 318 | |
| 319 void SpeechRecognizerImpl::InformErrorAndAbortRecognition( | |
| 320 content::SpeechRecognitionErrorCode error) { | |
| 321 DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE); | |
| 322 AbortRecognition(); | |
| 323 | |
| 324 // Guard against the listener freeing us until we finish our job. | |
| 325 scoped_refptr<SpeechRecognizerImpl> me(this); | |
| 326 listener_->OnRecognitionError(caller_id_, error); | |
| 327 } | 532 } |
| 328 | 533 |
| 329 void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() { | 534 void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() { |
| 330 VLOG(1) << "SpeechRecognizer stopping record."; | 535 DCHECK(IsCapturingAudio()); |
| 536 VLOG(1) << "SpeechRecognizerImpl stopping audio capture."; | |
| 331 // Issues a Close on the audio controller, passing an empty callback. The only | 537 // Issues a Close on the audio controller, passing an empty callback. The only |
| 332 // purpose of such callback is to keep the audio controller refcounted until | 538 // purpose of such callback is to keep the audio controller refcounted until |
| 333 // Close has completed (in the audio thread) and automatically destroy it | 539 // Close has completed (in the audio thread) and automatically destroy it |
| 334 // afterwards (upon return from OnAudioClosed). | 540 // afterwards (upon return from OnAudioClosed). |
| 335 audio_controller_->Close(base::Bind(&SpeechRecognizerImpl::OnAudioClosed, | 541 audio_controller_->Close(base::Bind(&SpeechRecognizerImpl::OnAudioClosed, |
| 336 this, audio_controller_)); | 542 this, audio_controller_)); |
| 337 audio_controller_ = NULL; // The controller is still refcounted by Bind. | 543 audio_controller_ = NULL; // The controller is still refcounted by Bind. |
| 338 } | 544 } |
| 339 | 545 |
| 340 bool SpeechRecognizerImpl::IsActive() const { | 546 int SpeechRecognizerImpl::GetElapsedTimeMs() const { |
| 341 return (recognition_engine_.get() != NULL); | 547 return (num_samples_recorded_ * 1000) / kAudioSampleRate; |
| 342 } | 548 } |
| 343 | 549 |
| 344 bool SpeechRecognizerImpl::IsCapturingAudio() const { | 550 void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms, |
| 345 return (audio_controller_.get() != NULL); | 551 bool clip_detected) { |
| 552 // Calculate the input volume to display in the UI, smoothing towards the | |
| 553 // new level. | |
| 554 // TODO(primiano) Do we really need all this floating point arith here? | |
| 555 // Perhaps it might be quite expensive on mobile. | |
| 556 float level = (rms - kAudioMeterMinDb) / | |
| 557 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
| 558 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); | |
| 559 if (level > audio_level_) { | |
| 560 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; | |
| 561 } else { | |
| 562 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; | |
| 563 } | |
| 564 | |
| 565 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | |
| 566 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | |
| 567 noise_level = std::min(std::max(0.0f, noise_level), | |
| 568 kAudioMeterRangeMaxUnclipped); | |
| 569 | |
| 570 listener_->OnAudioLevelsChange( | |
| 571 caller_id_, clip_detected ? 1.0f : audio_level_, noise_level); | |
| 346 } | 572 } |
| 347 | 573 |
| 348 const SpeechRecognitionEngine& | 574 const SpeechRecognitionEngine& |
| 349 SpeechRecognizerImpl::recognition_engine() const { | 575 SpeechRecognizerImpl::recognition_engine() const { |
| 350 return *(recognition_engine_.get()); | 576 return *(recognition_engine_.get()); |
| 351 } | 577 } |
| 352 | 578 |
| 353 void SpeechRecognizerImpl::SetAudioManagerForTesting( | 579 void SpeechRecognizerImpl::SetAudioManagerForTesting( |
| 354 AudioManager* audio_manager) { | 580 AudioManager* audio_manager) { |
| 355 testing_audio_manager_ = audio_manager; | 581 testing_audio_manager_ = audio_manager; |
| 356 } | 582 } |
| 357 | 583 |
| 584 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs() | |
| 585 : audio_error_code(0), | |
| 586 audio_data(NULL), | |
| 587 engine_error(content::SPEECH_RECOGNITION_ERROR_NONE) { | |
| 588 } | |
| 589 | |
| 590 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { | |
| 591 } | |
| 358 | 592 |
| 359 } // namespace speech | 593 } // namespace speech |
| OLD | NEW |