OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognition_manager_impl.h" | 5 #include "content/browser/speech/speech_recognition_manager_impl.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
| 8 #include "base/memory/singleton.h" |
8 #include "content/browser/browser_main_loop.h" | 9 #include "content/browser/browser_main_loop.h" |
9 #include "content/browser/renderer_host/render_view_host_impl.h" | 10 #include "content/browser/speech/google_one_shot_remote_engine.h" |
10 #include "content/browser/speech/input_tag_speech_dispatcher_host.h" | 11 #include "content/browser/speech/speech_recognition_engine.h" |
| 12 #include "content/browser/speech/speech_recognizer_impl.h" |
11 #include "content/public/browser/browser_thread.h" | 13 #include "content/public/browser/browser_thread.h" |
12 #include "content/public/browser/content_browser_client.h" | 14 #include "content/public/browser/content_browser_client.h" |
13 #include "content/public/browser/speech_recognizer.h" | |
14 #include "content/public/browser/render_view_host_delegate.h" | |
15 #include "content/public/browser/resource_context.h" | 15 #include "content/public/browser/resource_context.h" |
| 16 #include "content/public/browser/speech_recognition_event_listener.h" |
16 #include "content/public/browser/speech_recognition_manager_delegate.h" | 17 #include "content/public/browser/speech_recognition_manager_delegate.h" |
17 #include "content/public/browser/speech_recognition_preferences.h" | 18 #include "content/public/browser/speech_recognition_session_config.h" |
18 #include "content/public/common/view_type.h" | 19 #include "content/public/browser/speech_recognition_session_context.h" |
| 20 #include "content/public/common/speech_recognition_result.h" |
19 #include "media/audio/audio_manager.h" | 21 #include "media/audio/audio_manager.h" |
20 | 22 |
| 23 using base::Callback; |
| 24 using base::Unretained; |
21 using content::BrowserMainLoop; | 25 using content::BrowserMainLoop; |
22 using content::BrowserThread; | 26 using content::BrowserThread; |
23 using content::RenderViewHostImpl; | 27 using content::SpeechRecognitionError; |
| 28 using content::SpeechRecognitionEventListener; |
24 using content::SpeechRecognitionManager; | 29 using content::SpeechRecognitionManager; |
25 using content::SpeechRecognitionManagerDelegate; | 30 using content::SpeechRecognitionResult; |
| 31 using content::SpeechRecognitionSessionContext; |
| 32 using content::SpeechRecognitionSessionConfig; |
| 33 |
| 34 namespace content { |
| 35 const int SpeechRecognitionManager::kSessionIDInvalid = 0; |
26 | 36 |
27 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() { | 37 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() { |
28 return speech::SpeechRecognitionManagerImpl::GetInstance(); | 38 return speech::SpeechRecognitionManagerImpl::GetInstance(); |
29 } | 39 } |
| 40 } // namespace content |
30 | 41 |
31 namespace speech { | 42 namespace speech { |
32 | 43 |
33 struct SpeechRecognitionManagerImpl::SpeechRecognitionParams { | |
34 SpeechRecognitionParams( | |
35 InputTagSpeechDispatcherHost* delegate, | |
36 int session_id, | |
37 int render_process_id, | |
38 int render_view_id, | |
39 const gfx::Rect& element_rect, | |
40 const std::string& language, | |
41 const std::string& grammar, | |
42 const std::string& origin_url, | |
43 net::URLRequestContextGetter* context_getter, | |
44 content::SpeechRecognitionPreferences* recognition_prefs) | |
45 : delegate(delegate), | |
46 session_id(session_id), | |
47 render_process_id(render_process_id), | |
48 render_view_id(render_view_id), | |
49 element_rect(element_rect), | |
50 language(language), | |
51 grammar(grammar), | |
52 origin_url(origin_url), | |
53 context_getter(context_getter), | |
54 recognition_prefs(recognition_prefs) { | |
55 } | |
56 | |
57 InputTagSpeechDispatcherHost* delegate; | |
58 int session_id; | |
59 int render_process_id; | |
60 int render_view_id; | |
61 gfx::Rect element_rect; | |
62 std::string language; | |
63 std::string grammar; | |
64 std::string origin_url; | |
65 net::URLRequestContextGetter* context_getter; | |
66 content::SpeechRecognitionPreferences* recognition_prefs; | |
67 }; | |
68 | |
69 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() { | 44 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() { |
70 return Singleton<SpeechRecognitionManagerImpl>::get(); | 45 return Singleton<SpeechRecognitionManagerImpl>::get(); |
71 } | 46 } |
72 | 47 |
73 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl() | 48 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl() |
74 : can_report_metrics_(false), | 49 : interactive_session_id_(kSessionIDInvalid), |
75 recording_session_id_(0) { | 50 last_session_id_(kSessionIDInvalid), |
76 delegate_.reset(content::GetContentClient()->browser()-> | 51 is_dispatching_event_(false) { |
77 GetSpeechRecognitionManagerDelegate()); | 52 delegate_ = content::GetContentClient()->browser()-> |
| 53 GetSpeechRecognitionManagerDelegate(); |
78 } | 54 } |
79 | 55 |
80 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() { | 56 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() { |
81 while (requests_.begin() != requests_.end()) | 57 // Recognition sessions will be aborted by the corresponding destructors. |
82 CancelRecognition(requests_.begin()->first); | 58 sessions_.clear(); |
83 } | 59 } |
| 60 |
| 61 int SpeechRecognitionManagerImpl::CreateSession( |
| 62 const SpeechRecognitionSessionConfig& config, |
| 63 SpeechRecognitionEventListener* event_listener) { |
| 64 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 65 |
| 66 const int session_id = GetNextSessionID(); |
| 67 DCHECK(!SessionExists(session_id)); |
| 68 // Set-up the new session. |
| 69 Session& session = sessions_[session_id]; |
| 70 session.id = session_id; |
| 71 session.event_listener = event_listener; |
| 72 session.context = config.initial_context; |
| 73 |
| 74 std::string hardware_info; |
| 75 bool can_report_metrics = false; |
| 76 if (delegate_) |
| 77 delegate_->GetDiagnosticInformation(&can_report_metrics, &hardware_info); |
| 78 |
| 79 GoogleOneShotRemoteEngineConfig remote_engine_config; |
| 80 remote_engine_config.language = config.language; |
| 81 remote_engine_config.grammar = config.grammar; |
| 82 remote_engine_config.audio_sample_rate = |
| 83 SpeechRecognizerImpl::kAudioSampleRate; |
| 84 remote_engine_config.audio_num_bits_per_sample = |
| 85 SpeechRecognizerImpl::kNumBitsPerAudioSample; |
| 86 remote_engine_config.filter_profanities = config.filter_profanities; |
| 87 remote_engine_config.hardware_info = hardware_info; |
| 88 remote_engine_config.origin_url = can_report_metrics ? config.origin_url : ""; |
| 89 |
| 90 GoogleOneShotRemoteEngine* google_remote_engine = |
| 91 new GoogleOneShotRemoteEngine(config.url_request_context_getter); |
| 92 google_remote_engine->SetConfig(remote_engine_config); |
| 93 |
| 94 session.recognizer = new SpeechRecognizerImpl(this, |
| 95 session_id, |
| 96 google_remote_engine); |
| 97 return session_id; |
| 98 } |
| 99 |
| 100 void SpeechRecognitionManagerImpl::StartSession(int session_id) { |
| 101 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 102 DCHECK(SessionExists(session_id)); |
| 103 |
| 104 // If there is another interactive session, send it to background. |
| 105 if (interactive_session_id_ != kSessionIDInvalid && |
| 106 interactive_session_id_ != session_id) { |
| 107 SendSessionToBackground(interactive_session_id_); |
| 108 } |
| 109 |
| 110 if (delegate_) |
| 111 delegate_->CheckRecognitionIsAllowed( |
| 112 session_id, |
| 113 base::Bind(&SpeechRecognitionManagerImpl::RecognitionAllowedCallback, |
| 114 base::Unretained(this))); |
| 115 } |
| 116 |
| 117 void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id, |
| 118 bool is_allowed) { |
| 119 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 120 DCHECK(SessionExists(session_id)); |
| 121 if (is_allowed) { |
| 122 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 123 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, |
| 124 Unretained(this), session_id, FSMEventArgs(EVENT_START))); |
| 125 } else { |
| 126 sessions_.erase(session_id); |
| 127 } |
| 128 } |
| 129 |
| 130 void SpeechRecognitionManagerImpl::AbortSession(int session_id) { |
| 131 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 132 DCHECK(SessionExists(session_id)); |
| 133 |
| 134 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 135 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), |
| 136 session_id, FSMEventArgs(EVENT_ABORT))); |
| 137 } |
| 138 |
| 139 void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id) { |
| 140 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 141 DCHECK(SessionExists(session_id)); |
| 142 |
| 143 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 144 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), |
| 145 session_id, FSMEventArgs(EVENT_STOP_CAPTURE))); |
| 146 } |
| 147 |
| 148 void SpeechRecognitionManagerImpl::SendSessionToBackground(int session_id) { |
| 149 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 150 DCHECK(SessionExists(session_id)); |
| 151 |
| 152 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 153 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), |
| 154 session_id, FSMEventArgs(EVENT_SET_BACKGROUND))); |
| 155 } |
| 156 |
| 157 // Here begins the SpeechRecognitionEventListener interface implementation, |
| 158 // which will simply relay the events to the proper listener registered for the |
| 159 // particular session (most likely InputTagSpeechDispatcherHost) and intercept |
| 160 // some of them to provide UI notifications. |
| 161 |
| 162 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) { |
| 163 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 164 if (!SessionExists(session_id)) |
| 165 return; |
| 166 |
| 167 DCHECK_EQ(interactive_session_id_, session_id); |
| 168 if (delegate_) |
| 169 delegate_->ShowWarmUp(session_id); |
| 170 GetListener(session_id)->OnRecognitionStart(session_id); |
| 171 } |
| 172 |
| 173 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) { |
| 174 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 175 if (!SessionExists(session_id)) |
| 176 return; |
| 177 |
| 178 DCHECK_EQ(interactive_session_id_, session_id); |
| 179 if (delegate_) |
| 180 delegate_->ShowRecording(session_id); |
| 181 GetListener(session_id)->OnAudioStart(session_id); |
| 182 } |
| 183 |
| 184 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete( |
| 185 int session_id) { |
| 186 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 187 if (!SessionExists(session_id)) |
| 188 return; |
| 189 |
| 190 DCHECK_EQ(interactive_session_id_, session_id); |
| 191 GetListener(session_id)->OnEnvironmentEstimationComplete(session_id); |
| 192 } |
| 193 |
| 194 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) { |
| 195 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 196 if (!SessionExists(session_id)) |
| 197 return; |
| 198 |
| 199 DCHECK_EQ(interactive_session_id_, session_id); |
| 200 GetListener(session_id)->OnSoundStart(session_id); |
| 201 } |
| 202 |
| 203 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) { |
| 204 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 205 if (!SessionExists(session_id)) |
| 206 return; |
| 207 |
| 208 GetListener(session_id)->OnSoundEnd(session_id); |
| 209 } |
| 210 |
| 211 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { |
| 212 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 213 if (!SessionExists(session_id)) |
| 214 return; |
| 215 |
| 216 // OnAudioEnd can also be raised after an abort request, when the session is |
| 217 // not interactive anymore. |
| 218 if (interactive_session_id_ == session_id && delegate_) |
| 219 delegate_->ShowRecognizing(session_id); |
| 220 |
| 221 GetListener(session_id)->OnAudioEnd(session_id); |
| 222 } |
| 223 |
| 224 void SpeechRecognitionManagerImpl::OnRecognitionResult( |
| 225 int session_id, const content::SpeechRecognitionResult& result) { |
| 226 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 227 if (!SessionExists(session_id)) |
| 228 return; |
| 229 |
| 230 GetListener(session_id)->OnRecognitionResult(session_id, result); |
| 231 FSMEventArgs event_args(EVENT_RECOGNITION_RESULT); |
| 232 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 233 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), |
| 234 session_id, event_args)); |
| 235 } |
| 236 |
| 237 void SpeechRecognitionManagerImpl::OnRecognitionError( |
| 238 int session_id, const content::SpeechRecognitionError& error) { |
| 239 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 240 if (!SessionExists(session_id)) |
| 241 return; |
| 242 |
| 243 GetListener(session_id)->OnRecognitionError(session_id, error); |
| 244 FSMEventArgs event_args(EVENT_RECOGNITION_ERROR); |
| 245 event_args.speech_error = error; |
| 246 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 247 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), |
| 248 session_id, event_args)); |
| 249 } |
| 250 |
| 251 void SpeechRecognitionManagerImpl::OnAudioLevelsChange( |
| 252 int session_id, float volume, float noise_volume) { |
| 253 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 254 if (!SessionExists(session_id)) |
| 255 return; |
| 256 |
| 257 if (delegate_) |
| 258 delegate_->ShowInputVolume(session_id, volume, noise_volume); |
| 259 |
| 260 GetListener(session_id)->OnAudioLevelsChange(session_id, volume, |
| 261 noise_volume); |
| 262 } |
| 263 |
| 264 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) { |
| 265 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 266 if (!SessionExists(session_id)) |
| 267 return; |
| 268 |
| 269 GetListener(session_id)->OnRecognitionEnd(session_id); |
| 270 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 271 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), |
| 272 session_id, FSMEventArgs(EVENT_RECOGNITION_ENDED))); |
| 273 } |
| 274 |
| 275 // TODO(primiano) After CL2: if we see that both InputTagDispatcherHost and |
| 276 // SpeechRecognitionDispatcherHost do the same lookup operations, implement the |
| 277 // lookup method directly here. |
| 278 int SpeechRecognitionManagerImpl::LookupSessionByContext( |
| 279 Callback<bool(const SpeechRecognitionSessionContext&)> matcher) const { |
| 280 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 281 SessionsTable::const_iterator iter; |
| 282 // Note: the callback (matcher) must NEVER perform non-const calls on us. |
| 283 for(iter = sessions_.begin(); iter != sessions_.end(); ++iter) { |
| 284 const int session_id = iter->first; |
| 285 const Session& session = iter->second; |
| 286 bool matches = matcher.Run(session.context); |
| 287 if (matches) |
| 288 return session_id; |
| 289 } |
| 290 return kSessionIDInvalid; |
| 291 } |
| 292 |
| 293 SpeechRecognitionSessionContext |
| 294 SpeechRecognitionManagerImpl::GetSessionContext(int session_id) const { |
| 295 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 296 SessionsTable::const_iterator iter = sessions_.find(session_id); |
| 297 DCHECK(iter != sessions_.end()); |
| 298 return iter->second.context; |
| 299 } |
| 300 |
| 301 void SpeechRecognitionManagerImpl::AbortAllSessionsForListener( |
| 302 SpeechRecognitionEventListener* listener) { |
| 303 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 304 |
| 305 // AbortSession is asynchronous and the session will not be removed from the |
| 306 // collection while we are iterating over it. |
| 307 for (SessionsTable::iterator it = sessions_.begin(); it != sessions_.end(); |
| 308 ++it) { |
| 309 if (it->second.event_listener == listener) |
| 310 AbortSession(it->first); |
| 311 } |
| 312 } |
| 313 |
| 314 // ----------------------- Core FSM implementation --------------------------- |
| 315 void SpeechRecognitionManagerImpl::DispatchEvent(int session_id, |
| 316 FSMEventArgs event_args) { |
| 317 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 318 if (!SessionExists(session_id)) |
| 319 return; |
| 320 |
| 321 Session& session = sessions_[session_id]; |
| 322 DCHECK_LE(session.state, STATE_MAX_VALUE); |
| 323 DCHECK_LE(event_args.event, EVENT_MAX_VALUE); |
| 324 |
| 325 // Event dispatching must be sequential, otherwise it will break all the rules |
| 326 // and the assumptions of the finite state automata model. |
| 327 DCHECK(!is_dispatching_event_); |
| 328 is_dispatching_event_ = true; |
| 329 |
| 330 // Pedantic preconditions consistency checks. |
| 331 if (session.state == STATE_INTERACTIVE) |
| 332 DCHECK_EQ(interactive_session_id_, session_id); |
| 333 |
| 334 if (session.state == STATE_BACKGROUND || |
| 335 session.state == STATE_WAITING_FOR_DELETION) { |
| 336 DCHECK_NE(interactive_session_id_, session_id); |
| 337 } |
| 338 |
| 339 session.state = ExecuteTransitionAndGetNextState(session, event_args); |
| 340 |
| 341 is_dispatching_event_ = false; |
| 342 } |
| 343 |
| 344 // This FSM handles the evolution of each session, from the viewpoint of the |
| 345 // interaction with the user (that may be either the browser end-user which |
| 346 // interacts with UI bubbles, or JS developer intracting with JS methods). |
| 347 // All the events received by the SpeechRecognizerImpl instances (one for each |
| 348 // session) are always routed to the SpeechRecognitionEventListener(s) |
| 349 // regardless the choices taken in this FSM. |
| 350 SpeechRecognitionManagerImpl::FSMState |
| 351 SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState( |
| 352 Session& session, const FSMEventArgs& event_args) { |
| 353 // Some notes for the code below: |
| 354 // - A session can be deleted only if it is not active, thus only if it ended |
| 355 // spontaneously or we issued a prior SessionAbort. In these cases, we must |
| 356 // wait for a RECOGNITION_ENDED event (which is guaranteed to come always at |
| 357 // last by the SpeechRecognizer) in order to free resources gracefully. |
| 358 // - Use SessionDelete only when absolutely sure that the recognizer is not |
| 359 // active. Prefer SessionAbort, which will do it gracefully, otherwise. |
| 360 // - Since this class methods are publicly exported, START, ABORT, |
| 361 // STOP_CAPTURE and SET_BACKGROUND events can arrive in every moment from |
| 362 // the outside wild wolrd, even if they make no sense. |
| 363 const FSMEvent event = event_args.event; |
| 364 switch (session.state) { |
| 365 case STATE_IDLE: |
| 366 // Session has just been created or had an error while interactive. |
| 367 switch (event) { |
| 368 case EVENT_START: |
| 369 return SessionStart(session, event_args); |
| 370 case EVENT_ABORT: |
| 371 case EVENT_SET_BACKGROUND: |
| 372 return SessionAbort(session, event_args); |
| 373 case EVENT_STOP_CAPTURE: |
| 374 case EVENT_RECOGNITION_ENDED: |
| 375 // In case of error, we come back in this state before receiving the |
| 376 // OnRecognitionEnd event, thus EVENT_RECOGNITION_ENDED is feasible. |
| 377 return DoNothing(session, event_args); |
| 378 case EVENT_RECOGNITION_RESULT: |
| 379 case EVENT_RECOGNITION_ERROR: |
| 380 return NotFeasible(session, event_args); |
| 381 } |
| 382 break; |
| 383 case STATE_INTERACTIVE: |
| 384 // The recognizer can be either capturing audio or waiting for a result. |
| 385 switch (event) { |
| 386 case EVENT_RECOGNITION_RESULT: |
| 387 // TODO(primiano) Valid only in single shot mode. Review in next CLs. |
| 388 return SessionSetBackground(session, event_args); |
| 389 case EVENT_SET_BACKGROUND: |
| 390 return SessionAbortIfCapturingAudioOrBackground(session, event_args); |
| 391 case EVENT_STOP_CAPTURE: |
| 392 return SessionStopAudioCapture(session, event_args); |
| 393 case EVENT_ABORT: |
| 394 return SessionAbort(session, event_args); |
| 395 case EVENT_RECOGNITION_ERROR: |
| 396 return SessionReportError(session, event_args); |
| 397 case EVENT_RECOGNITION_ENDED: |
| 398 // If we're still interactive it means that no result was received |
| 399 // in the meanwhile (otherwise we'd have been sent to background). |
| 400 return SessionReportNoMatch(session, event_args); |
| 401 case EVENT_START: |
| 402 return DoNothing(session, event_args); |
| 403 } |
| 404 break; |
| 405 case STATE_BACKGROUND: |
| 406 switch (event) { |
| 407 case EVENT_ABORT: |
| 408 return SessionAbort(session, event_args); |
| 409 case EVENT_RECOGNITION_ENDED: |
| 410 return SessionDelete(session, event_args); |
| 411 case EVENT_START: |
| 412 case EVENT_STOP_CAPTURE: |
| 413 case EVENT_RECOGNITION_RESULT: |
| 414 case EVENT_RECOGNITION_ERROR: |
| 415 return DoNothing(session, event_args); |
| 416 case EVENT_SET_BACKGROUND: |
| 417 return NotFeasible(session, event_args); |
| 418 } |
| 419 break; |
| 420 case STATE_WAITING_FOR_DELETION: |
| 421 switch (event) { |
| 422 case EVENT_RECOGNITION_ENDED: |
| 423 return SessionDelete(session, event_args); |
| 424 case EVENT_ABORT: |
| 425 case EVENT_START: |
| 426 case EVENT_STOP_CAPTURE: |
| 427 case EVENT_SET_BACKGROUND: |
| 428 case EVENT_RECOGNITION_RESULT: |
| 429 case EVENT_RECOGNITION_ERROR: |
| 430 return DoNothing(session, event_args); |
| 431 } |
| 432 break; |
| 433 } |
| 434 return NotFeasible(session, event_args); |
| 435 } |
| 436 |
| 437 // ----------- Contract for all the FSM evolution functions below ------------- |
| 438 // - Are guaranteed to be executed in the IO thread; |
| 439 // - Are guaranteed to be not reentrant (themselves and each other); |
| 440 // - event_args members are guaranteed to be stable during the call; |
| 441 |
| 442 SpeechRecognitionManagerImpl::FSMState |
| 443 SpeechRecognitionManagerImpl::SessionStart(Session& session, |
| 444 const FSMEventArgs& event_args) { |
| 445 if (interactive_session_id_ != kSessionIDInvalid && delegate_) |
| 446 delegate_->DoClose(interactive_session_id_); |
| 447 interactive_session_id_ = session.id; |
| 448 if (delegate_) |
| 449 delegate_->ShowRecognitionRequested(session.id); |
| 450 session.recognizer->StartRecognition(); |
| 451 return STATE_INTERACTIVE; |
| 452 } |
| 453 |
| 454 SpeechRecognitionManagerImpl::FSMState |
| 455 SpeechRecognitionManagerImpl::SessionAbort(Session& session, |
| 456 const FSMEventArgs& event_args) { |
| 457 if (interactive_session_id_ == session.id) { |
| 458 interactive_session_id_ = kSessionIDInvalid; |
| 459 if (delegate_) |
| 460 delegate_->DoClose(session.id); |
| 461 } |
| 462 |
| 463 // If abort was requested while the recognizer was inactive, delete directly. |
| 464 if (session.recognizer == NULL || !session.recognizer->IsActive()) |
| 465 return SessionDelete(session, event_args); |
| 466 |
| 467 // Otherwise issue an abort and delete gracefully, waiting for a |
| 468 // RECOGNITION_ENDED event first. |
| 469 session.recognizer->AbortRecognition(); |
| 470 return STATE_WAITING_FOR_DELETION; |
| 471 } |
| 472 |
| 473 SpeechRecognitionManagerImpl::FSMState |
| 474 SpeechRecognitionManagerImpl::SessionStopAudioCapture( |
| 475 Session& session, const FSMEventArgs& event_args) { |
| 476 DCHECK(session.recognizer != NULL); |
| 477 DCHECK(session.recognizer->IsActive()); |
| 478 if (session.recognizer->IsCapturingAudio()) |
| 479 session.recognizer->StopAudioCapture(); |
| 480 return STATE_INTERACTIVE; |
| 481 } |
| 482 |
| 483 SpeechRecognitionManagerImpl::FSMState |
| 484 SpeechRecognitionManagerImpl::SessionAbortIfCapturingAudioOrBackground( |
| 485 Session& session, const FSMEventArgs& event_args) { |
| 486 DCHECK_EQ(interactive_session_id_, session.id); |
| 487 |
| 488 DCHECK(session.recognizer != NULL); |
| 489 DCHECK(session.recognizer->IsActive()); |
| 490 if (session.recognizer->IsCapturingAudio()) |
| 491 return SessionAbort(session, event_args); |
| 492 |
| 493 interactive_session_id_ = kSessionIDInvalid; |
| 494 if (delegate_) |
| 495 delegate_->DoClose(session.id); |
| 496 return STATE_BACKGROUND; |
| 497 } |
| 498 |
| 499 |
| 500 SpeechRecognitionManagerImpl::FSMState |
| 501 SpeechRecognitionManagerImpl::SessionSetBackground( |
| 502 Session& session, const FSMEventArgs& event_args) { |
| 503 DCHECK_EQ(interactive_session_id_, session.id); |
| 504 interactive_session_id_ = kSessionIDInvalid; |
| 505 if (delegate_) |
| 506 delegate_->DoClose(session.id); |
| 507 return STATE_BACKGROUND; |
| 508 } |
| 509 |
| 510 SpeechRecognitionManagerImpl::FSMState |
| 511 SpeechRecognitionManagerImpl::SessionReportError( |
| 512 Session& session, const FSMEventArgs& event_args) { |
| 513 DCHECK_EQ(interactive_session_id_, session.id); |
| 514 if (delegate_) |
| 515 delegate_->ShowError(session.id, event_args.speech_error); |
| 516 return STATE_IDLE; |
| 517 } |
| 518 |
| 519 SpeechRecognitionManagerImpl::FSMState |
| 520 SpeechRecognitionManagerImpl::SessionReportNoMatch( |
| 521 Session& session, const FSMEventArgs& event_args) { |
| 522 DCHECK_EQ(interactive_session_id_, session.id); |
| 523 if (delegate_) { |
| 524 delegate_->ShowError( |
| 525 session.id, |
| 526 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_MATCH)); |
| 527 } |
| 528 return STATE_IDLE; |
| 529 } |
| 530 |
| 531 SpeechRecognitionManagerImpl::FSMState |
| 532 SpeechRecognitionManagerImpl::SessionDelete(Session& session, |
| 533 const FSMEventArgs& event_args) { |
| 534 DCHECK(session.recognizer == NULL || !session.recognizer->IsActive()); |
| 535 if (interactive_session_id_ == session.id) { |
| 536 interactive_session_id_ = kSessionIDInvalid; |
| 537 if (delegate_) |
| 538 delegate_->DoClose(session.id); |
| 539 } |
| 540 sessions_.erase(session.id); |
| 541 // Next state is irrelevant, the session will be deleted afterwards. |
| 542 return STATE_WAITING_FOR_DELETION; |
| 543 } |
| 544 |
| 545 SpeechRecognitionManagerImpl::FSMState |
| 546 SpeechRecognitionManagerImpl::DoNothing(Session& session, |
| 547 const FSMEventArgs& event_args) { |
| 548 return session.state; |
| 549 } |
| 550 |
| 551 SpeechRecognitionManagerImpl::FSMState |
| 552 SpeechRecognitionManagerImpl::NotFeasible(Session& session, |
| 553 const FSMEventArgs& event_args) { |
| 554 NOTREACHED() << "Unfeasible event " << event_args.event |
| 555 << " in state " << session.state |
| 556 << " for session " << session.id; |
| 557 return session.state; |
| 558 } |
| 559 |
| 560 int SpeechRecognitionManagerImpl::GetNextSessionID() { |
| 561 ++last_session_id_; |
| 562 // Deal with wrapping of last_session_id_. (How civilized). |
| 563 if (last_session_id_ <= 0) |
| 564 last_session_id_ = 1; |
| 565 return last_session_id_; |
| 566 } |
| 567 |
| 568 bool SpeechRecognitionManagerImpl::SessionExists(int session_id) const { |
| 569 return sessions_.find(session_id) != sessions_.end(); |
| 570 } |
| 571 |
| 572 SpeechRecognitionEventListener* SpeechRecognitionManagerImpl::GetListener( |
| 573 int session_id) const { |
| 574 SessionsTable::const_iterator iter = sessions_.find(session_id); |
| 575 DCHECK(iter != sessions_.end()); |
| 576 return iter->second.event_listener; |
| 577 } |
| 578 |
84 | 579 |
85 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() { | 580 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() { |
86 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices(); | 581 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices(); |
87 } | 582 } |
88 | 583 |
89 bool SpeechRecognitionManagerImpl::IsCapturingAudio() { | 584 bool SpeechRecognitionManagerImpl::IsCapturingAudio() { |
90 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess(); | 585 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess(); |
91 } | 586 } |
92 | 587 |
93 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() { | 588 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() { |
94 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel(); | 589 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel(); |
95 } | 590 } |
96 | 591 |
97 bool SpeechRecognitionManagerImpl::HasPendingRequest(int session_id) const { | |
98 return requests_.find(session_id) != requests_.end(); | |
99 } | |
100 | |
101 InputTagSpeechDispatcherHost* SpeechRecognitionManagerImpl::GetDelegate( | |
102 int session_id) const { | |
103 return requests_.find(session_id)->second.delegate; | |
104 } | |
105 | |
106 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() { | 592 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() { |
107 // Since AudioManager::ShowAudioInputSettings can potentially launch external | 593 // Since AudioManager::ShowAudioInputSettings can potentially launch external |
108 // processes, do that in the FILE thread to not block the calling threads. | 594 // processes, do that in the FILE thread to not block the calling threads. |
109 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { | 595 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { |
110 BrowserThread::PostTask( | 596 BrowserThread::PostTask( |
111 BrowserThread::FILE, FROM_HERE, | 597 BrowserThread::FILE, FROM_HERE, |
112 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings, | 598 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings, |
113 base::Unretained(this))); | 599 base::Unretained(this))); |
114 return; | 600 return; |
115 } | 601 } |
116 | 602 |
117 media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager(); | 603 media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager(); |
118 DCHECK(audio_manager->CanShowAudioInputSettings()); | 604 DCHECK(audio_manager->CanShowAudioInputSettings()); |
119 if (audio_manager->CanShowAudioInputSettings()) | 605 if (audio_manager->CanShowAudioInputSettings()) |
120 audio_manager->ShowAudioInputSettings(); | 606 audio_manager->ShowAudioInputSettings(); |
121 } | 607 } |
122 | 608 |
123 void SpeechRecognitionManagerImpl::StartRecognition( | 609 SpeechRecognitionManagerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
124 InputTagSpeechDispatcherHost* delegate, | 610 : event(event_value), |
125 int session_id, | 611 speech_error(content::SPEECH_RECOGNITION_ERROR_NONE) { |
126 int render_process_id, | 612 } |
127 int render_view_id, | 613 |
128 const gfx::Rect& element_rect, | 614 SpeechRecognitionManagerImpl::FSMEventArgs::~FSMEventArgs() { |
129 const std::string& language, | 615 } |
130 const std::string& grammar, | 616 |
131 const std::string& origin_url, | 617 SpeechRecognitionManagerImpl::Session::Session() |
132 net::URLRequestContextGetter* context_getter, | 618 : id(kSessionIDInvalid), |
133 content::SpeechRecognitionPreferences* recognition_prefs) { | 619 event_listener(NULL), |
134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 620 state(STATE_IDLE) { |
135 BrowserThread::PostTask( | 621 } |
136 BrowserThread::UI, FROM_HERE, | 622 |
137 base::Bind( | 623 SpeechRecognitionManagerImpl::Session::~Session() { |
138 &SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition, | |
139 base::Unretained(this), | |
140 SpeechRecognitionParams( | |
141 delegate, session_id, render_process_id, render_view_id, | |
142 element_rect, language, grammar, origin_url, context_getter, | |
143 recognition_prefs))); | |
144 } | |
145 | |
146 void SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition( | |
147 const SpeechRecognitionParams& params) { | |
148 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
149 | |
150 RenderViewHostImpl* render_view_host = RenderViewHostImpl::FromID( | |
151 params.render_process_id, params.render_view_id); | |
152 if (!render_view_host || !render_view_host->GetDelegate()) | |
153 return; | |
154 | |
155 // For host delegates other than VIEW_TYPE_WEB_CONTENTS we can't reliably show | |
156 // a popup, including the speech input bubble. In these cases for privacy | |
157 // reasons we don't want to start recording if the user can't be properly | |
158 // notified. An example of this is trying to show the speech input bubble | |
159 // within an extension popup: http://crbug.com/92083. In these situations the | |
160 // speech input extension API should be used instead. | |
161 if (render_view_host->GetDelegate()->GetRenderViewType() == | |
162 content::VIEW_TYPE_WEB_CONTENTS) { | |
163 BrowserThread::PostTask( | |
164 BrowserThread::IO, FROM_HERE, | |
165 base::Bind(&SpeechRecognitionManagerImpl::ProceedStartingRecognition, | |
166 base::Unretained(this), params)); | |
167 } | |
168 } | |
169 | |
170 void SpeechRecognitionManagerImpl::ProceedStartingRecognition( | |
171 const SpeechRecognitionParams& params) { | |
172 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
173 DCHECK(!HasPendingRequest(params.session_id)); | |
174 | |
175 if (delegate_.get()) { | |
176 delegate_->ShowRecognitionRequested( | |
177 params.session_id, params.render_process_id, params.render_view_id, | |
178 params.element_rect); | |
179 delegate_->GetRequestInfo(&can_report_metrics_, &request_info_); | |
180 } | |
181 | |
182 Request* request = &requests_[params.session_id]; | |
183 request->delegate = params.delegate; | |
184 request->recognizer = content::SpeechRecognizer::Create( | |
185 this, params.session_id, params.language, params.grammar, | |
186 params.context_getter, params.recognition_prefs->FilterProfanities(), | |
187 request_info_, can_report_metrics_ ? params.origin_url : ""); | |
188 request->is_active = false; | |
189 | |
190 StartRecognitionForRequest(params.session_id); | |
191 } | |
192 | |
193 void SpeechRecognitionManagerImpl::StartRecognitionForRequest(int session_id) { | |
194 SpeechRecognizerMap::iterator request = requests_.find(session_id); | |
195 if (request == requests_.end()) { | |
196 NOTREACHED(); | |
197 return; | |
198 } | |
199 | |
200 // We should not currently be recording for the session. | |
201 CHECK(recording_session_id_ != session_id); | |
202 | |
203 // If we are currently recording audio for another session, abort it cleanly. | |
204 if (recording_session_id_) | |
205 CancelRecognitionAndInformDelegate(recording_session_id_); | |
206 recording_session_id_ = session_id; | |
207 requests_[session_id].is_active = true; | |
208 requests_[session_id].recognizer->StartRecognition(); | |
209 if (delegate_.get()) | |
210 delegate_->ShowWarmUp(session_id); | |
211 } | |
212 | |
213 void SpeechRecognitionManagerImpl::CancelRecognitionForRequest(int session_id) { | |
214 // Ignore if the session id was not in our active recognizers list because the | |
215 // user might have clicked more than once, or recognition could have been | |
216 // ended due to other reasons before the user click was processed. | |
217 if (!HasPendingRequest(session_id)) | |
218 return; | |
219 | |
220 CancelRecognitionAndInformDelegate(session_id); | |
221 } | |
222 | |
223 void SpeechRecognitionManagerImpl::FocusLostForRequest(int session_id) { | |
224 // See above comment. | |
225 if (!HasPendingRequest(session_id)) | |
226 return; | |
227 | |
228 // If this is an ongoing recording or if we were displaying an error message | |
229 // to the user, abort it since user has switched focus. Otherwise | |
230 // recognition has started and keep that going so user can start speaking to | |
231 // another element while this gets the results in parallel. | |
232 if (recording_session_id_ == session_id || !requests_[session_id].is_active) | |
233 CancelRecognitionAndInformDelegate(session_id); | |
234 } | |
235 | |
236 void SpeechRecognitionManagerImpl::CancelRecognition(int session_id) { | |
237 DCHECK(HasPendingRequest(session_id)); | |
238 if (requests_[session_id].is_active) | |
239 requests_[session_id].recognizer->AbortRecognition(); | |
240 requests_.erase(session_id); | |
241 if (recording_session_id_ == session_id) | |
242 recording_session_id_ = 0; | |
243 if (delegate_.get()) | |
244 delegate_->DoClose(session_id); | |
245 } | |
246 | |
247 void SpeechRecognitionManagerImpl::CancelAllRequestsWithDelegate( | |
248 InputTagSpeechDispatcherHost* delegate) { | |
249 SpeechRecognizerMap::iterator it = requests_.begin(); | |
250 while (it != requests_.end()) { | |
251 if (it->second.delegate == delegate) { | |
252 CancelRecognition(it->first); | |
253 // This map will have very few elements so it is simpler to restart. | |
254 it = requests_.begin(); | |
255 } else { | |
256 ++it; | |
257 } | |
258 } | |
259 } | |
260 | |
261 void SpeechRecognitionManagerImpl::StopRecording(int session_id) { | |
262 // No pending requests on extension popups. | |
263 if (!HasPendingRequest(session_id)) | |
264 return; | |
265 | |
266 requests_[session_id].recognizer->StopAudioCapture(); | |
267 } | |
268 | |
269 // -------- SpeechRecognitionEventListener interface implementation. --------- | |
270 | |
271 void SpeechRecognitionManagerImpl::OnRecognitionResult( | |
272 int session_id, const content::SpeechRecognitionResult& result) { | |
273 DCHECK(HasPendingRequest(session_id)); | |
274 GetDelegate(session_id)->SetRecognitionResult(session_id, result); | |
275 } | |
276 | |
277 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { | |
278 if (recording_session_id_ != session_id) | |
279 return; | |
280 DCHECK_EQ(recording_session_id_, session_id); | |
281 DCHECK(HasPendingRequest(session_id)); | |
282 if (!requests_[session_id].is_active) | |
283 return; | |
284 recording_session_id_ = 0; | |
285 GetDelegate(session_id)->DidCompleteRecording(session_id); | |
286 if (delegate_.get()) | |
287 delegate_->ShowRecognizing(session_id); | |
288 } | |
289 | |
290 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) { | |
291 if (!HasPendingRequest(session_id) || !requests_[session_id].is_active) | |
292 return; | |
293 GetDelegate(session_id)->DidCompleteRecognition(session_id); | |
294 requests_.erase(session_id); | |
295 if (delegate_.get()) | |
296 delegate_->DoClose(session_id); | |
297 } | |
298 | |
299 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) { | |
300 } | |
301 | |
302 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) { | |
303 } | |
304 | |
305 void SpeechRecognitionManagerImpl::OnRecognitionError( | |
306 int session_id, const content::SpeechRecognitionError& error) { | |
307 DCHECK(HasPendingRequest(session_id)); | |
308 if (session_id == recording_session_id_) | |
309 recording_session_id_ = 0; | |
310 requests_[session_id].is_active = false; | |
311 if (delegate_.get()) { | |
312 if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO && | |
313 error.details == content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC) { | |
314 delegate_->ShowMicError(session_id, | |
315 SpeechRecognitionManagerDelegate::MIC_ERROR_NO_DEVICE_AVAILABLE); | |
316 } else if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO && | |
317 error.details == content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE) { | |
318 delegate_->ShowMicError(session_id, | |
319 SpeechRecognitionManagerDelegate::MIC_ERROR_DEVICE_IN_USE); | |
320 } else { | |
321 delegate_->ShowRecognizerError(session_id, error.code); | |
322 } | |
323 } | |
324 } | |
325 | |
326 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) { | |
327 DCHECK(HasPendingRequest(session_id)); | |
328 DCHECK_EQ(recording_session_id_, session_id); | |
329 if (delegate_.get()) | |
330 delegate_->ShowRecording(session_id); | |
331 } | |
332 | |
333 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) { | |
334 } | |
335 | |
336 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete( | |
337 int session_id) { | |
338 DCHECK(HasPendingRequest(session_id)); | |
339 DCHECK_EQ(recording_session_id_, session_id); | |
340 } | |
341 | |
342 void SpeechRecognitionManagerImpl::OnAudioLevelsChange( | |
343 int session_id, float volume, float noise_volume) { | |
344 DCHECK(HasPendingRequest(session_id)); | |
345 DCHECK_EQ(recording_session_id_, session_id); | |
346 if (delegate_.get()) | |
347 delegate_->ShowInputVolume(session_id, volume, noise_volume); | |
348 } | |
349 | |
350 void SpeechRecognitionManagerImpl::CancelRecognitionAndInformDelegate( | |
351 int session_id) { | |
352 InputTagSpeechDispatcherHost* cur_delegate = GetDelegate(session_id); | |
353 CancelRecognition(session_id); | |
354 cur_delegate->DidCompleteRecording(session_id); | |
355 cur_delegate->DidCompleteRecognition(session_id); | |
356 } | |
357 | |
358 SpeechRecognitionManagerImpl::Request::Request() | |
359 : is_active(false) { | |
360 } | |
361 | |
362 SpeechRecognitionManagerImpl::Request::~Request() { | |
363 } | 624 } |
364 | 625 |
365 } // namespace speech | 626 } // namespace speech |
OLD | NEW |