OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognition_manager_impl.h" | 5 #include "content/browser/speech/speech_recognition_manager_impl.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/memory/singleton.h" | |
8 #include "content/browser/browser_main_loop.h" | 9 #include "content/browser/browser_main_loop.h" |
9 #include "content/browser/renderer_host/render_view_host_impl.h" | 10 #include "content/browser/speech/google_one_shot_remote_engine.h" |
10 #include "content/browser/speech/input_tag_speech_dispatcher_host.h" | 11 #include "content/browser/speech/speech_recognition_engine.h" |
12 #include "content/browser/speech/speech_recognizer_impl.h" | |
11 #include "content/public/browser/browser_thread.h" | 13 #include "content/public/browser/browser_thread.h" |
12 #include "content/public/browser/content_browser_client.h" | 14 #include "content/public/browser/content_browser_client.h" |
13 #include "content/public/browser/speech_recognizer.h" | |
14 #include "content/public/browser/render_view_host_delegate.h" | |
15 #include "content/public/browser/resource_context.h" | 15 #include "content/public/browser/resource_context.h" |
16 #include "content/public/browser/speech_recognition_event_listener.h" | |
16 #include "content/public/browser/speech_recognition_manager_delegate.h" | 17 #include "content/public/browser/speech_recognition_manager_delegate.h" |
17 #include "content/public/browser/speech_recognition_preferences.h" | 18 #include "content/public/browser/speech_recognition_session_config.h" |
18 #include "content/public/common/view_type.h" | 19 #include "content/public/browser/speech_recognition_session_context.h" |
20 #include "content/public/common/speech_recognition_result.h" | |
19 #include "media/audio/audio_manager.h" | 21 #include "media/audio/audio_manager.h" |
20 | 22 |
23 using base::Callback; | |
24 using base::Unretained; | |
21 using content::BrowserMainLoop; | 25 using content::BrowserMainLoop; |
22 using content::BrowserThread; | 26 using content::BrowserThread; |
23 using content::RenderViewHostImpl; | 27 using content::SpeechRecognitionError; |
28 using content::SpeechRecognitionEventListener; | |
24 using content::SpeechRecognitionManager; | 29 using content::SpeechRecognitionManager; |
25 using content::SpeechRecognitionManagerDelegate; | 30 using content::SpeechRecognitionResult; |
31 using content::SpeechRecognitionSessionContext; | |
32 using content::SpeechRecognitionSessionConfig; | |
33 | |
34 namespace content { | |
35 const int SpeechRecognitionManager::kSessionIDInvalid = 0; | |
26 | 36 |
27 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() { | 37 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() { |
28 return speech::SpeechRecognitionManagerImpl::GetInstance(); | 38 return speech::SpeechRecognitionManagerImpl::GetInstance(); |
29 } | 39 } |
40 } // namespace content | |
30 | 41 |
31 namespace speech { | 42 namespace speech { |
32 | 43 |
33 struct SpeechRecognitionManagerImpl::SpeechRecognitionParams { | |
34 SpeechRecognitionParams( | |
35 InputTagSpeechDispatcherHost* delegate, | |
36 int session_id, | |
37 int render_process_id, | |
38 int render_view_id, | |
39 const gfx::Rect& element_rect, | |
40 const std::string& language, | |
41 const std::string& grammar, | |
42 const std::string& origin_url, | |
43 net::URLRequestContextGetter* context_getter, | |
44 content::SpeechRecognitionPreferences* recognition_prefs) | |
45 : delegate(delegate), | |
46 session_id(session_id), | |
47 render_process_id(render_process_id), | |
48 render_view_id(render_view_id), | |
49 element_rect(element_rect), | |
50 language(language), | |
51 grammar(grammar), | |
52 origin_url(origin_url), | |
53 context_getter(context_getter), | |
54 recognition_prefs(recognition_prefs) { | |
55 } | |
56 | |
57 InputTagSpeechDispatcherHost* delegate; | |
58 int session_id; | |
59 int render_process_id; | |
60 int render_view_id; | |
61 gfx::Rect element_rect; | |
62 std::string language; | |
63 std::string grammar; | |
64 std::string origin_url; | |
65 net::URLRequestContextGetter* context_getter; | |
66 content::SpeechRecognitionPreferences* recognition_prefs; | |
67 }; | |
68 | |
69 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() { | 44 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() { |
70 return Singleton<SpeechRecognitionManagerImpl>::get(); | 45 return Singleton<SpeechRecognitionManagerImpl>::get(); |
71 } | 46 } |
72 | 47 |
73 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl() | 48 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl() |
74 : can_report_metrics_(false), | 49 : interactive_session_id_(kSessionIDInvalid), |
75 recording_session_id_(0) { | 50 last_session_id_(kSessionIDInvalid), |
76 delegate_.reset(content::GetContentClient()->browser()-> | 51 is_dispatching_event_(false) { |
77 GetSpeechRecognitionManagerDelegate()); | 52 delegate_ = content::GetContentClient()->browser()-> |
53 GetSpeechRecognitionManagerDelegate(); | |
78 } | 54 } |
79 | 55 |
80 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() { | 56 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() { |
81 while (requests_.begin() != requests_.end()) | 57 // Recognition sessions will be aborted by the corresponding destructors. |
82 CancelRecognition(requests_.begin()->first); | 58 sessions_.clear(); |
83 } | 59 } |
60 | |
61 int SpeechRecognitionManagerImpl::CreateSession( | |
62 const SpeechRecognitionSessionConfig& config, | |
63 SpeechRecognitionEventListener* event_listener) { | |
64 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
65 | |
66 const int session_id = GetNextSessionID(); | |
67 DCHECK(!SessionExists(session_id)); | |
68 // Set-up the new session. | |
69 Session& session = sessions_[session_id]; | |
70 session.id = session_id; | |
71 session.event_listener = event_listener; | |
72 session.context = config.initial_context; | |
73 | |
74 std::string hardware_info; | |
75 bool can_report_metrics = false; | |
76 if (delegate_ != NULL) | |
jam
2012/04/25 15:14:08
nit: all the
if (delegate_ != NULL)
should just be
Primiano Tucci (use gerrit)
2012/04/25 16:55:49
Done.
| |
77 delegate_->GetDiagnosticInformation(&can_report_metrics, &hardware_info); | |
78 | |
79 GoogleOneShotRemoteEngineConfig remote_engine_config; | |
80 remote_engine_config.language = config.language; | |
81 remote_engine_config.grammar = config.grammar; | |
82 remote_engine_config.audio_sample_rate = | |
83 SpeechRecognizerImpl::kAudioSampleRate; | |
84 remote_engine_config.audio_num_bits_per_sample = | |
85 SpeechRecognizerImpl::kNumBitsPerAudioSample; | |
86 remote_engine_config.filter_profanities = config.filter_profanities; | |
87 remote_engine_config.hardware_info = hardware_info; | |
88 remote_engine_config.origin_url = can_report_metrics ? config.origin_url : ""; | |
89 | |
90 GoogleOneShotRemoteEngine* google_remote_engine = | |
91 new GoogleOneShotRemoteEngine(config.url_request_context_getter); | |
92 google_remote_engine->SetConfig(remote_engine_config); | |
93 | |
94 session.recognizer = new SpeechRecognizerImpl(this, | |
95 session_id, | |
96 google_remote_engine); | |
97 return session_id; | |
98 } | |
99 | |
100 void SpeechRecognitionManagerImpl::StartSession(int session_id) { | |
101 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
102 DCHECK(SessionExists(session_id)); | |
103 | |
104 // If there is another interactive session, send it to background. | |
105 if (interactive_session_id_ != kSessionIDInvalid && | |
106 interactive_session_id_ != session_id) { | |
107 SendSessionToBackground(interactive_session_id_); | |
108 } | |
109 | |
110 if (delegate_ != NULL) | |
111 delegate_->CheckRecognitionIsAllowedAsync( | |
112 session_id, | |
113 base::Bind(&SpeechRecognitionManagerImpl::RecognitionAllowedCallback, | |
114 base::Unretained(this))); | |
115 } | |
116 | |
117 void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id, | |
118 bool is_allowed) { | |
119 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
120 DCHECK(SessionExists(session_id)); | |
121 if (is_allowed) { | |
122 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
123 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, | |
124 Unretained(this), session_id, FSMEventArgs(EVENT_START))); | |
125 } else { | |
126 sessions_.erase(session_id); | |
127 } | |
128 } | |
129 | |
130 void SpeechRecognitionManagerImpl::AbortSession(int session_id) { | |
131 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
132 DCHECK(SessionExists(session_id)); | |
133 | |
134 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
135 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
136 session_id, FSMEventArgs(EVENT_ABORT))); | |
137 } | |
138 | |
139 void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id) { | |
140 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
141 DCHECK(SessionExists(session_id)); | |
142 | |
143 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
144 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
145 session_id, FSMEventArgs(EVENT_STOP_CAPTURE))); | |
146 } | |
147 | |
148 void SpeechRecognitionManagerImpl::SendSessionToBackground(int session_id) { | |
149 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
150 DCHECK(SessionExists(session_id)); | |
151 | |
152 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
153 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
154 session_id, FSMEventArgs(EVENT_SET_BACKGROUND))); | |
155 } | |
156 | |
157 // Here begins the SpeechRecognitionEventListener interface implementation, | |
158 // which will simply relay the events to the proper listener registered for the | |
159 // particular session (most likely InputTagSpeechDispatcherHost) and intercept | |
160 // some of them to provide UI notifications. | |
161 | |
162 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) { | |
163 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
164 if (!SessionExists(session_id)) | |
165 return; | |
166 | |
167 DCHECK_EQ(interactive_session_id_, session_id); | |
168 if (delegate_ != NULL) | |
169 delegate_->ShowWarmUp(session_id); | |
170 GetListener(session_id)->OnRecognitionStart(session_id); | |
171 } | |
172 | |
173 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) { | |
174 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
175 if (!SessionExists(session_id)) | |
176 return; | |
177 | |
178 DCHECK_EQ(interactive_session_id_, session_id); | |
179 if (delegate_ != NULL) | |
180 delegate_->ShowRecording(session_id); | |
181 GetListener(session_id)->OnAudioStart(session_id); | |
182 } | |
183 | |
184 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete( | |
185 int session_id) { | |
186 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
187 if (!SessionExists(session_id)) | |
188 return; | |
189 | |
190 DCHECK_EQ(interactive_session_id_, session_id); | |
191 GetListener(session_id)->OnEnvironmentEstimationComplete(session_id); | |
192 } | |
193 | |
194 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) { | |
195 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
196 if (!SessionExists(session_id)) | |
197 return; | |
198 | |
199 DCHECK_EQ(interactive_session_id_, session_id); | |
200 GetListener(session_id)->OnSoundStart(session_id); | |
201 } | |
202 | |
203 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) { | |
204 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
205 if (!SessionExists(session_id)) | |
206 return; | |
207 | |
208 GetListener(session_id)->OnSoundEnd(session_id); | |
209 } | |
210 | |
211 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { | |
212 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
213 if (!SessionExists(session_id)) | |
214 return; | |
215 | |
216 // OnAudioEnd can also be raised after an abort request, when the session is | |
217 // not interactive anymore. | |
218 if (interactive_session_id_ == session_id && delegate_ != NULL) | |
219 delegate_->ShowRecognizing(session_id); | |
220 | |
221 GetListener(session_id)->OnAudioEnd(session_id); | |
222 } | |
223 | |
224 void SpeechRecognitionManagerImpl::OnRecognitionResult( | |
225 int session_id, const content::SpeechRecognitionResult& result) { | |
226 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
227 if (!SessionExists(session_id)) | |
228 return; | |
229 | |
230 GetListener(session_id)->OnRecognitionResult(session_id, result); | |
231 FSMEventArgs event_args(EVENT_RECOGNITION_RESULT); | |
232 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
233 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
234 session_id, event_args)); | |
235 } | |
236 | |
237 void SpeechRecognitionManagerImpl::OnRecognitionError( | |
238 int session_id, const content::SpeechRecognitionError& error) { | |
239 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
240 if (!SessionExists(session_id)) | |
241 return; | |
242 | |
243 GetListener(session_id)->OnRecognitionError(session_id, error); | |
244 FSMEventArgs event_args(EVENT_RECOGNITION_ERROR); | |
245 event_args.speech_error = error; | |
246 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
247 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
248 session_id, event_args)); | |
249 } | |
250 | |
251 void SpeechRecognitionManagerImpl::OnAudioLevelsChange( | |
252 int session_id, float volume, float noise_volume) { | |
253 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
254 if (!SessionExists(session_id)) | |
255 return; | |
256 | |
257 if (delegate_ != NULL) | |
258 delegate_->ShowInputVolume(session_id, volume, noise_volume); | |
259 | |
260 GetListener(session_id)->OnAudioLevelsChange(session_id, volume, | |
261 noise_volume); | |
262 } | |
263 | |
264 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) { | |
265 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
266 if (!SessionExists(session_id)) | |
267 return; | |
268 | |
269 GetListener(session_id)->OnRecognitionEnd(session_id); | |
270 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
271 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
272 session_id, FSMEventArgs(EVENT_RECOGNITION_ENDED))); | |
273 } | |
274 | |
275 // TODO(primiano) After CL2: if we see that both InputTagDispatcherHost and | |
276 // SpeechRecognitionDispatcherHost do the same lookup operations, implement the | |
277 // lookup method directly here. | |
278 int SpeechRecognitionManagerImpl::LookupSessionByContext( | |
279 Callback<bool(const SpeechRecognitionSessionContext&)> matcher) const { | |
280 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
281 SessionsTable::const_iterator iter; | |
282 // Note: the callback (matcher) must NEVER perform non-const calls on us. | |
283 for(iter = sessions_.begin(); iter != sessions_.end(); ++iter) { | |
284 const int session_id = iter->first; | |
285 const Session& session = iter->second; | |
286 bool matches = matcher.Run(session.context); | |
287 if (matches) | |
288 return session_id; | |
289 } | |
290 return kSessionIDInvalid; | |
291 } | |
292 | |
293 SpeechRecognitionSessionContext | |
294 SpeechRecognitionManagerImpl::GetSessionContext(int session_id) const { | |
295 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
296 SessionsTable::const_iterator iter = sessions_.find(session_id); | |
297 DCHECK(iter != sessions_.end()); | |
298 return iter->second.context; | |
299 } | |
300 | |
301 void SpeechRecognitionManagerImpl::AbortAllSessionsForListener( | |
302 SpeechRecognitionEventListener* listener) { | |
303 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
304 | |
305 // AbortSession is asynchronous and the session will not be removed from the | |
306 // collection while we are iterating over it. | |
307 for (SessionsTable::iterator it = sessions_.begin(); it != sessions_.end(); | |
308 ++it) { | |
309 if (it->second.event_listener == listener) | |
310 AbortSession(it->first); | |
311 } | |
312 } | |
313 | |
314 // ----------------------- Core FSM implementation --------------------------- | |
315 void SpeechRecognitionManagerImpl::DispatchEvent(int session_id, | |
316 FSMEventArgs event_args) { | |
317 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
318 if (!SessionExists(session_id)) | |
319 return; | |
320 | |
321 Session& session = sessions_[session_id]; | |
322 DCHECK_LE(session.state, STATE_MAX_VALUE); | |
323 DCHECK_LE(event_args.event, EVENT_MAX_VALUE); | |
324 | |
325 // Event dispatching must be sequential, otherwise it will break all the rules | |
326 // and the assumptions of the finite state automata model. | |
327 DCHECK(!is_dispatching_event_); | |
328 is_dispatching_event_ = true; | |
329 | |
330 // Pedantic preconditions consistency checks. | |
331 if (session.state == STATE_INTERACTIVE) | |
332 DCHECK_EQ(interactive_session_id_, session_id); | |
333 | |
334 if (session.state == STATE_BACKGROUND || | |
335 session.state == STATE_WAITING_FOR_DELETION) { | |
336 DCHECK_NE(interactive_session_id_, session_id); | |
337 } | |
338 | |
339 session.state = ExecuteTransitionAndGetNextState(session, event_args); | |
340 | |
341 is_dispatching_event_ = false; | |
342 } | |
343 | |
344 // This FSM handles the evolution of each session, from the viewpoint of the | |
345 // interaction with the user (that may be either the browser end-user which | |
346 // interacts with UI bubbles, or JS developer intracting with JS methods). | |
347 // All the events received by the SpeechRecognizerImpl instances (one for each | |
348 // session) are always routed to the SpeechRecognitionEventListener(s) | |
349 // regardless the choices taken in this FSM. | |
350 SpeechRecognitionManagerImpl::FSMState | |
351 SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState( | |
352 Session& session, const FSMEventArgs& event_args) { | |
353 // Some notes for the code below: | |
354 // - A session can be deleted only if it is not active, thus only if it ended | |
355 // spontaneously or we issued a prior SessionAbort. In these cases, we must | |
356 // wait for a RECOGNITION_ENDED event (which is guaranteed to come always at | |
357 // last by the SpeechRecognizer) in order to free resources gracefully. | |
358 // - Use SessionDelete only when absolutely sure that the recognizer is not | |
359 // active. Prefer SessionAbort, which will do it gracefully, otherwise. | |
360 // - Since this class methods are publicly exported, START, ABORT, | |
361 // STOP_CAPTURE and SET_BACKGROUND events can arrive in every moment from | |
362 // the outside wild wolrd, even if they make no sense. | |
363 const FSMEvent event = event_args.event; | |
364 switch (session.state) { | |
365 case STATE_IDLE: | |
366 // Session has just been created or had an error while interactive. | |
367 switch (event) { | |
368 case EVENT_START: | |
369 return SessionStart(session, event_args); | |
370 case EVENT_ABORT: | |
371 case EVENT_SET_BACKGROUND: | |
372 return SessionAbort(session, event_args); | |
373 case EVENT_STOP_CAPTURE: | |
374 case EVENT_RECOGNITION_ENDED: | |
375 // In case of error, we come back in this state before receiving the | |
376 // OnRecognitionEnd event, thus EVENT_RECOGNITION_ENDED is feasible. | |
377 return DoNothing(session, event_args); | |
378 case EVENT_RECOGNITION_RESULT: | |
379 case EVENT_RECOGNITION_ERROR: | |
380 return NotFeasible(session, event_args); | |
381 } | |
382 break; | |
383 case STATE_INTERACTIVE: | |
384 // The recognizer can be either capturing audio or waiting for a result. | |
385 switch (event) { | |
386 case EVENT_RECOGNITION_RESULT: | |
387 // TODO(primiano) Valid only in single shot mode. Review in next CLs. | |
388 return SessionSetBackground(session, event_args); | |
389 case EVENT_SET_BACKGROUND: | |
390 return SessionAbortIfCapturingAudioOrBackground(session, event_args); | |
391 case EVENT_STOP_CAPTURE: | |
392 return SessionStopAudioCapture(session, event_args); | |
393 case EVENT_ABORT: | |
394 return SessionAbort(session, event_args); | |
395 case EVENT_RECOGNITION_ERROR: | |
396 return SessionReportError(session, event_args); | |
397 case EVENT_RECOGNITION_ENDED: | |
398 // If we're still interactive it means that no result was received | |
399 // in the meanwhile (otherwise we'd have been sent to background). | |
400 return SessionReportNoMatch(session, event_args); | |
401 case EVENT_START: | |
402 return DoNothing(session, event_args); | |
403 } | |
404 break; | |
405 case STATE_BACKGROUND: | |
406 switch (event) { | |
407 case EVENT_ABORT: | |
408 return SessionAbort(session, event_args); | |
409 case EVENT_RECOGNITION_ENDED: | |
410 return SessionDelete(session, event_args); | |
411 case EVENT_START: | |
412 case EVENT_STOP_CAPTURE: | |
413 case EVENT_RECOGNITION_RESULT: | |
414 case EVENT_RECOGNITION_ERROR: | |
415 return DoNothing(session, event_args); | |
416 case EVENT_SET_BACKGROUND: | |
417 return NotFeasible(session, event_args); | |
418 } | |
419 break; | |
420 case STATE_WAITING_FOR_DELETION: | |
421 switch (event) { | |
422 case EVENT_RECOGNITION_ENDED: | |
423 return SessionDelete(session, event_args); | |
424 case EVENT_ABORT: | |
425 case EVENT_START: | |
426 case EVENT_STOP_CAPTURE: | |
427 case EVENT_SET_BACKGROUND: | |
428 case EVENT_RECOGNITION_RESULT: | |
429 case EVENT_RECOGNITION_ERROR: | |
430 return DoNothing(session, event_args); | |
431 } | |
432 break; | |
433 } | |
434 return NotFeasible(session, event_args); | |
435 } | |
436 | |
437 // ----------- Contract for all the FSM evolution functions below ------------- | |
438 // - Are guaranteed to be executed in the IO thread; | |
439 // - Are guaranteed to be not reentrant (themselves and each other); | |
440 // - event_args members are guaranteed to be stable during the call; | |
441 | |
442 SpeechRecognitionManagerImpl::FSMState | |
443 SpeechRecognitionManagerImpl::SessionStart(Session& session, | |
444 const FSMEventArgs& event_args) { | |
445 if (interactive_session_id_ != kSessionIDInvalid && delegate_ != NULL) | |
446 delegate_->DoClose(interactive_session_id_); | |
447 interactive_session_id_ = session.id; | |
448 if (delegate_ != NULL) | |
449 delegate_->ShowRecognitionRequested(session.id); | |
450 session.recognizer->StartRecognition(); | |
451 return STATE_INTERACTIVE; | |
452 } | |
453 | |
454 SpeechRecognitionManagerImpl::FSMState | |
455 SpeechRecognitionManagerImpl::SessionAbort(Session& session, | |
456 const FSMEventArgs& event_args) { | |
457 if (interactive_session_id_ == session.id) { | |
458 interactive_session_id_ = kSessionIDInvalid; | |
459 if (delegate_ != NULL) | |
460 delegate_->DoClose(session.id); | |
461 } | |
462 | |
463 // If abort was requested while the recognizer was inactive, delete directly. | |
464 if (session.recognizer == NULL || !session.recognizer->IsActive()) | |
465 return SessionDelete(session, event_args); | |
466 | |
467 // Otherwise issue an abort and delete gracefully, waiting for a | |
468 // RECOGNITION_ENDED event first. | |
469 session.recognizer->AbortRecognition(); | |
470 return STATE_WAITING_FOR_DELETION; | |
471 } | |
472 | |
473 SpeechRecognitionManagerImpl::FSMState | |
474 SpeechRecognitionManagerImpl::SessionStopAudioCapture( | |
475 Session& session, const FSMEventArgs& event_args) { | |
476 DCHECK(session.recognizer != NULL); | |
477 DCHECK(session.recognizer->IsActive()); | |
478 if (session.recognizer->IsCapturingAudio()) | |
479 session.recognizer->StopAudioCapture(); | |
480 return STATE_INTERACTIVE; | |
481 } | |
482 | |
483 SpeechRecognitionManagerImpl::FSMState | |
484 SpeechRecognitionManagerImpl::SessionAbortIfCapturingAudioOrBackground( | |
485 Session& session, const FSMEventArgs& event_args) { | |
486 DCHECK_EQ(interactive_session_id_, session.id); | |
487 | |
488 DCHECK(session.recognizer != NULL); | |
489 DCHECK(session.recognizer->IsActive()); | |
490 if (session.recognizer->IsCapturingAudio()) | |
491 return SessionAbort(session, event_args); | |
492 | |
493 interactive_session_id_ = kSessionIDInvalid; | |
494 if (delegate_ != NULL) | |
495 delegate_->DoClose(session.id); | |
496 return STATE_BACKGROUND; | |
497 } | |
498 | |
499 | |
500 SpeechRecognitionManagerImpl::FSMState | |
501 SpeechRecognitionManagerImpl::SessionSetBackground( | |
502 Session& session, const FSMEventArgs& event_args) { | |
503 DCHECK_EQ(interactive_session_id_, session.id); | |
504 interactive_session_id_ = kSessionIDInvalid; | |
505 if (delegate_ != NULL) | |
506 delegate_->DoClose(session.id); | |
507 return STATE_BACKGROUND; | |
508 } | |
509 | |
510 SpeechRecognitionManagerImpl::FSMState | |
511 SpeechRecognitionManagerImpl::SessionReportError( | |
512 Session& session, const FSMEventArgs& event_args) { | |
513 DCHECK_EQ(interactive_session_id_, session.id); | |
514 if (delegate_ != NULL) | |
515 delegate_->ShowError(session.id, event_args.speech_error); | |
516 return STATE_IDLE; | |
517 } | |
518 | |
519 SpeechRecognitionManagerImpl::FSMState | |
520 SpeechRecognitionManagerImpl::SessionReportNoMatch( | |
521 Session& session, const FSMEventArgs& event_args) { | |
522 DCHECK_EQ(interactive_session_id_, session.id); | |
523 if (delegate_ != NULL) { | |
524 delegate_->ShowError( | |
525 session.id, | |
526 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_MATCH)); | |
527 } | |
528 return STATE_IDLE; | |
529 } | |
530 | |
531 SpeechRecognitionManagerImpl::FSMState | |
532 SpeechRecognitionManagerImpl::SessionDelete(Session& session, | |
533 const FSMEventArgs& event_args) { | |
534 DCHECK(session.recognizer == NULL || !session.recognizer->IsActive()); | |
535 if (interactive_session_id_ == session.id) { | |
536 interactive_session_id_ = kSessionIDInvalid; | |
537 if (delegate_ != NULL) | |
538 delegate_->DoClose(session.id); | |
539 } | |
540 sessions_.erase(session.id); | |
541 // Next state is irrelevant, the session will be deleted afterwards. | |
542 return STATE_WAITING_FOR_DELETION; | |
543 } | |
544 | |
545 SpeechRecognitionManagerImpl::FSMState | |
546 SpeechRecognitionManagerImpl::DoNothing(Session& session, | |
547 const FSMEventArgs& event_args) { | |
548 return session.state; | |
549 } | |
550 | |
551 SpeechRecognitionManagerImpl::FSMState | |
552 SpeechRecognitionManagerImpl::NotFeasible(Session& session, | |
553 const FSMEventArgs& event_args) { | |
554 NOTREACHED() << "Unfeasible event " << event_args.event | |
555 << " in state " << session.state | |
556 << " for session " << session.id; | |
557 return session.state; | |
558 } | |
559 | |
560 int SpeechRecognitionManagerImpl::GetNextSessionID() { | |
561 ++last_session_id_; | |
562 // Deal with wrapping of last_session_id_. (How civilized). | |
563 if (last_session_id_ <= 0) | |
564 last_session_id_ = 1; | |
565 return last_session_id_; | |
566 } | |
567 | |
568 bool SpeechRecognitionManagerImpl::SessionExists(int session_id) const { | |
569 return sessions_.find(session_id) != sessions_.end(); | |
570 } | |
571 | |
572 SpeechRecognitionEventListener* SpeechRecognitionManagerImpl::GetListener( | |
573 int session_id) const { | |
574 SessionsTable::const_iterator iter = sessions_.find(session_id); | |
575 DCHECK(iter != sessions_.end()); | |
576 return iter->second.event_listener; | |
577 } | |
578 | |
84 | 579 |
85 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() { | 580 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() { |
86 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices(); | 581 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices(); |
87 } | 582 } |
88 | 583 |
89 bool SpeechRecognitionManagerImpl::IsCapturingAudio() { | 584 bool SpeechRecognitionManagerImpl::IsCapturingAudio() { |
90 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess(); | 585 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess(); |
91 } | 586 } |
92 | 587 |
93 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() { | 588 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() { |
94 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel(); | 589 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel(); |
95 } | 590 } |
96 | 591 |
97 bool SpeechRecognitionManagerImpl::HasPendingRequest(int session_id) const { | |
98 return requests_.find(session_id) != requests_.end(); | |
99 } | |
100 | |
101 InputTagSpeechDispatcherHost* SpeechRecognitionManagerImpl::GetDelegate( | |
102 int session_id) const { | |
103 return requests_.find(session_id)->second.delegate; | |
104 } | |
105 | |
106 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() { | 592 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() { |
107 // Since AudioManager::ShowAudioInputSettings can potentially launch external | 593 // Since AudioManager::ShowAudioInputSettings can potentially launch external |
108 // processes, do that in the FILE thread to not block the calling threads. | 594 // processes, do that in the FILE thread to not block the calling threads. |
109 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { | 595 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { |
110 BrowserThread::PostTask( | 596 BrowserThread::PostTask( |
111 BrowserThread::FILE, FROM_HERE, | 597 BrowserThread::FILE, FROM_HERE, |
112 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings, | 598 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings, |
113 base::Unretained(this))); | 599 base::Unretained(this))); |
114 return; | 600 return; |
115 } | 601 } |
116 | 602 |
117 media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager(); | 603 media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager(); |
118 DCHECK(audio_manager->CanShowAudioInputSettings()); | 604 DCHECK(audio_manager->CanShowAudioInputSettings()); |
119 if (audio_manager->CanShowAudioInputSettings()) | 605 if (audio_manager->CanShowAudioInputSettings()) |
120 audio_manager->ShowAudioInputSettings(); | 606 audio_manager->ShowAudioInputSettings(); |
121 } | 607 } |
122 | 608 |
123 void SpeechRecognitionManagerImpl::StartRecognition( | 609 SpeechRecognitionManagerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
124 InputTagSpeechDispatcherHost* delegate, | 610 : event(event_value), |
125 int session_id, | 611 speech_error(content::SPEECH_RECOGNITION_ERROR_NONE) { |
126 int render_process_id, | 612 } |
127 int render_view_id, | 613 |
128 const gfx::Rect& element_rect, | 614 SpeechRecognitionManagerImpl::FSMEventArgs::~FSMEventArgs() { |
129 const std::string& language, | 615 } |
130 const std::string& grammar, | 616 |
131 const std::string& origin_url, | 617 SpeechRecognitionManagerImpl::Session::Session() |
132 net::URLRequestContextGetter* context_getter, | 618 : id(kSessionIDInvalid), |
133 content::SpeechRecognitionPreferences* recognition_prefs) { | 619 event_listener(NULL), |
134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 620 state(STATE_IDLE) { |
135 BrowserThread::PostTask( | 621 } |
136 BrowserThread::UI, FROM_HERE, | 622 |
137 base::Bind( | 623 SpeechRecognitionManagerImpl::Session::~Session() { |
138 &SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition, | |
139 base::Unretained(this), | |
140 SpeechRecognitionParams( | |
141 delegate, session_id, render_process_id, render_view_id, | |
142 element_rect, language, grammar, origin_url, context_getter, | |
143 recognition_prefs))); | |
144 } | |
145 | |
146 void SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition( | |
147 const SpeechRecognitionParams& params) { | |
148 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
149 | |
150 RenderViewHostImpl* render_view_host = RenderViewHostImpl::FromID( | |
151 params.render_process_id, params.render_view_id); | |
152 if (!render_view_host || !render_view_host->GetDelegate()) | |
153 return; | |
154 | |
155 // For host delegates other than VIEW_TYPE_WEB_CONTENTS we can't reliably show | |
156 // a popup, including the speech input bubble. In these cases for privacy | |
157 // reasons we don't want to start recording if the user can't be properly | |
158 // notified. An example of this is trying to show the speech input bubble | |
159 // within an extension popup: http://crbug.com/92083. In these situations the | |
160 // speech input extension API should be used instead. | |
161 if (render_view_host->GetDelegate()->GetRenderViewType() == | |
162 content::VIEW_TYPE_WEB_CONTENTS) { | |
163 BrowserThread::PostTask( | |
164 BrowserThread::IO, FROM_HERE, | |
165 base::Bind(&SpeechRecognitionManagerImpl::ProceedStartingRecognition, | |
166 base::Unretained(this), params)); | |
167 } | |
168 } | |
169 | |
170 void SpeechRecognitionManagerImpl::ProceedStartingRecognition( | |
171 const SpeechRecognitionParams& params) { | |
172 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
173 DCHECK(!HasPendingRequest(params.session_id)); | |
174 | |
175 if (delegate_.get()) { | |
176 delegate_->ShowRecognitionRequested( | |
177 params.session_id, params.render_process_id, params.render_view_id, | |
178 params.element_rect); | |
179 delegate_->GetRequestInfo(&can_report_metrics_, &request_info_); | |
180 } | |
181 | |
182 Request* request = &requests_[params.session_id]; | |
183 request->delegate = params.delegate; | |
184 request->recognizer = content::SpeechRecognizer::Create( | |
185 this, params.session_id, params.language, params.grammar, | |
186 params.context_getter, params.recognition_prefs->FilterProfanities(), | |
187 request_info_, can_report_metrics_ ? params.origin_url : ""); | |
188 request->is_active = false; | |
189 | |
190 StartRecognitionForRequest(params.session_id); | |
191 } | |
192 | |
193 void SpeechRecognitionManagerImpl::StartRecognitionForRequest(int session_id) { | |
194 SpeechRecognizerMap::iterator request = requests_.find(session_id); | |
195 if (request == requests_.end()) { | |
196 NOTREACHED(); | |
197 return; | |
198 } | |
199 | |
200 // We should not currently be recording for the session. | |
201 CHECK(recording_session_id_ != session_id); | |
202 | |
203 // If we are currently recording audio for another session, abort it cleanly. | |
204 if (recording_session_id_) | |
205 CancelRecognitionAndInformDelegate(recording_session_id_); | |
206 recording_session_id_ = session_id; | |
207 requests_[session_id].is_active = true; | |
208 requests_[session_id].recognizer->StartRecognition(); | |
209 if (delegate_.get()) | |
210 delegate_->ShowWarmUp(session_id); | |
211 } | |
212 | |
213 void SpeechRecognitionManagerImpl::CancelRecognitionForRequest(int session_id) { | |
214 // Ignore if the session id was not in our active recognizers list because the | |
215 // user might have clicked more than once, or recognition could have been | |
216 // ended due to other reasons before the user click was processed. | |
217 if (!HasPendingRequest(session_id)) | |
218 return; | |
219 | |
220 CancelRecognitionAndInformDelegate(session_id); | |
221 } | |
222 | |
223 void SpeechRecognitionManagerImpl::FocusLostForRequest(int session_id) { | |
224 // See above comment. | |
225 if (!HasPendingRequest(session_id)) | |
226 return; | |
227 | |
228 // If this is an ongoing recording or if we were displaying an error message | |
229 // to the user, abort it since user has switched focus. Otherwise | |
230 // recognition has started and keep that going so user can start speaking to | |
231 // another element while this gets the results in parallel. | |
232 if (recording_session_id_ == session_id || !requests_[session_id].is_active) | |
233 CancelRecognitionAndInformDelegate(session_id); | |
234 } | |
235 | |
236 void SpeechRecognitionManagerImpl::CancelRecognition(int session_id) { | |
237 DCHECK(HasPendingRequest(session_id)); | |
238 if (requests_[session_id].is_active) | |
239 requests_[session_id].recognizer->AbortRecognition(); | |
240 requests_.erase(session_id); | |
241 if (recording_session_id_ == session_id) | |
242 recording_session_id_ = 0; | |
243 if (delegate_.get()) | |
244 delegate_->DoClose(session_id); | |
245 } | |
246 | |
247 void SpeechRecognitionManagerImpl::CancelAllRequestsWithDelegate( | |
248 InputTagSpeechDispatcherHost* delegate) { | |
249 SpeechRecognizerMap::iterator it = requests_.begin(); | |
250 while (it != requests_.end()) { | |
251 if (it->second.delegate == delegate) { | |
252 CancelRecognition(it->first); | |
253 // This map will have very few elements so it is simpler to restart. | |
254 it = requests_.begin(); | |
255 } else { | |
256 ++it; | |
257 } | |
258 } | |
259 } | |
260 | |
261 void SpeechRecognitionManagerImpl::StopRecording(int session_id) { | |
262 // No pending requests on extension popups. | |
263 if (!HasPendingRequest(session_id)) | |
264 return; | |
265 | |
266 requests_[session_id].recognizer->StopAudioCapture(); | |
267 } | |
268 | |
269 // -------- SpeechRecognitionEventListener interface implementation. --------- | |
270 | |
271 void SpeechRecognitionManagerImpl::OnRecognitionResult( | |
272 int session_id, const content::SpeechRecognitionResult& result) { | |
273 DCHECK(HasPendingRequest(session_id)); | |
274 GetDelegate(session_id)->SetRecognitionResult(session_id, result); | |
275 } | |
276 | |
277 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { | |
278 if (recording_session_id_ != session_id) | |
279 return; | |
280 DCHECK_EQ(recording_session_id_, session_id); | |
281 DCHECK(HasPendingRequest(session_id)); | |
282 if (!requests_[session_id].is_active) | |
283 return; | |
284 recording_session_id_ = 0; | |
285 GetDelegate(session_id)->DidCompleteRecording(session_id); | |
286 if (delegate_.get()) | |
287 delegate_->ShowRecognizing(session_id); | |
288 } | |
289 | |
290 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) { | |
291 if (!HasPendingRequest(session_id) || !requests_[session_id].is_active) | |
292 return; | |
293 GetDelegate(session_id)->DidCompleteRecognition(session_id); | |
294 requests_.erase(session_id); | |
295 if (delegate_.get()) | |
296 delegate_->DoClose(session_id); | |
297 } | |
298 | |
299 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) { | |
300 } | |
301 | |
302 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) { | |
303 } | |
304 | |
305 void SpeechRecognitionManagerImpl::OnRecognitionError( | |
306 int session_id, const content::SpeechRecognitionError& error) { | |
307 DCHECK(HasPendingRequest(session_id)); | |
308 if (session_id == recording_session_id_) | |
309 recording_session_id_ = 0; | |
310 requests_[session_id].is_active = false; | |
311 if (delegate_.get()) { | |
312 if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO && | |
313 error.details == content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC) { | |
314 delegate_->ShowMicError(session_id, | |
315 SpeechRecognitionManagerDelegate::MIC_ERROR_NO_DEVICE_AVAILABLE); | |
316 } else if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO && | |
317 error.details == content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE) { | |
318 delegate_->ShowMicError(session_id, | |
319 SpeechRecognitionManagerDelegate::MIC_ERROR_DEVICE_IN_USE); | |
320 } else { | |
321 delegate_->ShowRecognizerError(session_id, error.code); | |
322 } | |
323 } | |
324 } | |
325 | |
326 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) { | |
327 DCHECK(HasPendingRequest(session_id)); | |
328 DCHECK_EQ(recording_session_id_, session_id); | |
329 if (delegate_.get()) | |
330 delegate_->ShowRecording(session_id); | |
331 } | |
332 | |
333 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) { | |
334 } | |
335 | |
336 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete( | |
337 int session_id) { | |
338 DCHECK(HasPendingRequest(session_id)); | |
339 DCHECK_EQ(recording_session_id_, session_id); | |
340 } | |
341 | |
342 void SpeechRecognitionManagerImpl::OnAudioLevelsChange( | |
343 int session_id, float volume, float noise_volume) { | |
344 DCHECK(HasPendingRequest(session_id)); | |
345 DCHECK_EQ(recording_session_id_, session_id); | |
346 if (delegate_.get()) | |
347 delegate_->ShowInputVolume(session_id, volume, noise_volume); | |
348 } | |
349 | |
350 void SpeechRecognitionManagerImpl::CancelRecognitionAndInformDelegate( | |
351 int session_id) { | |
352 InputTagSpeechDispatcherHost* cur_delegate = GetDelegate(session_id); | |
353 CancelRecognition(session_id); | |
354 cur_delegate->DidCompleteRecording(session_id); | |
355 cur_delegate->DidCompleteRecognition(session_id); | |
356 } | |
357 | |
358 SpeechRecognitionManagerImpl::Request::Request() | |
359 : is_active(false) { | |
360 } | |
361 | |
362 SpeechRecognitionManagerImpl::Request::~Request() { | |
363 } | 624 } |
364 | 625 |
365 } // namespace speech | 626 } // namespace speech |
OLD | NEW |