OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognition_manager_impl.h" | 5 #include "content/browser/speech/speech_recognition_manager_impl.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/memory/singleton.h" | |
8 #include "content/browser/browser_main_loop.h" | 9 #include "content/browser/browser_main_loop.h" |
9 #include "content/browser/renderer_host/render_view_host_impl.h" | 10 #include "content/browser/speech/google_one_shot_remote_engine.h" |
10 #include "content/browser/speech/input_tag_speech_dispatcher_host.h" | 11 #include "content/browser/speech/speech_recognition_engine.h" |
12 #include "content/browser/speech/speech_recognizer_impl.h" | |
11 #include "content/public/browser/browser_thread.h" | 13 #include "content/public/browser/browser_thread.h" |
12 #include "content/public/browser/content_browser_client.h" | 14 #include "content/public/browser/content_browser_client.h" |
13 #include "content/public/browser/speech_recognizer.h" | |
14 #include "content/public/browser/render_view_host_delegate.h" | |
15 #include "content/public/browser/resource_context.h" | 15 #include "content/public/browser/resource_context.h" |
16 #include "content/public/browser/speech_recognition_event_listener.h" | |
16 #include "content/public/browser/speech_recognition_manager_delegate.h" | 17 #include "content/public/browser/speech_recognition_manager_delegate.h" |
17 #include "content/public/browser/speech_recognition_preferences.h" | 18 #include "content/public/browser/speech_recognition_session_config.h" |
18 #include "content/public/common/view_type.h" | 19 #include "content/public/browser/speech_recognition_session_context.h" |
20 #include "content/public/common/speech_recognition_result.h" | |
19 #include "media/audio/audio_manager.h" | 21 #include "media/audio/audio_manager.h" |
20 | 22 |
23 using base::Callback; | |
24 using base::Unretained; | |
21 using content::BrowserMainLoop; | 25 using content::BrowserMainLoop; |
22 using content::BrowserThread; | 26 using content::BrowserThread; |
23 using content::RenderViewHostImpl; | 27 using content::SpeechRecognitionError; |
28 using content::SpeechRecognitionEventListener; | |
24 using content::SpeechRecognitionManager; | 29 using content::SpeechRecognitionManager; |
25 using content::SpeechRecognitionManagerDelegate; | 30 using content::SpeechRecognitionResult; |
31 using content::SpeechRecognitionSessionContext; | |
32 using content::SpeechRecognitionSessionConfig; | |
33 | |
34 namespace { | |
35 | |
36 // A dummy implementation of the SpeechRecognitionManagerDelegate interface | |
37 // used when no delegate has been passed to the SpeechRecognitionManagerImpl. | |
38 class VoidRecognitionManagerDelegate : | |
39 public content::SpeechRecognitionManagerDelegate { | |
40 public: | |
41 static VoidRecognitionManagerDelegate* GetInstance() { | |
42 return Singleton<VoidRecognitionManagerDelegate>::get(); | |
43 } | |
44 virtual void GetDiagnosticInformation( | |
45 bool* can_report_metrics, std::string* request_info) OVERRIDE {} | |
46 virtual bool IsRecognitionAllowed(int session_id) OVERRIDE { return false; } | |
47 virtual void ShowRecognitionRequested(int session_id) OVERRIDE {} | |
48 virtual void ShowWarmUp(int session_id) OVERRIDE {} | |
49 virtual void ShowRecognizing(int session_id) OVERRIDE {} | |
50 virtual void ShowRecording(int session_id) OVERRIDE {} | |
51 virtual void ShowInputVolume( | |
52 int session_id, float volume, float noise_volume) OVERRIDE {} | |
53 virtual void ShowError(int session_id, | |
54 const content::SpeechRecognitionError& error) OVERRIDE {} | |
55 virtual void DoClose(int session_id) OVERRIDE {} | |
56 | |
57 private: | |
58 VoidRecognitionManagerDelegate() {} | |
59 virtual ~VoidRecognitionManagerDelegate() {} | |
60 friend struct DefaultSingletonTraits<VoidRecognitionManagerDelegate>; | |
61 }; | |
62 | |
63 } // namespace | |
64 | |
65 namespace content { | |
66 const int SpeechRecognitionManager::kSessionIDInvalid = 0; | |
26 | 67 |
27 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() { | 68 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() { |
28 return speech::SpeechRecognitionManagerImpl::GetInstance(); | 69 return speech::SpeechRecognitionManagerImpl::GetInstance(); |
29 } | 70 } |
71 } // namespace content | |
30 | 72 |
31 namespace speech { | 73 namespace speech { |
32 | 74 |
33 struct SpeechRecognitionManagerImpl::SpeechRecognitionParams { | |
34 SpeechRecognitionParams( | |
35 InputTagSpeechDispatcherHost* delegate, | |
36 int session_id, | |
37 int render_process_id, | |
38 int render_view_id, | |
39 const gfx::Rect& element_rect, | |
40 const std::string& language, | |
41 const std::string& grammar, | |
42 const std::string& origin_url, | |
43 net::URLRequestContextGetter* context_getter, | |
44 content::SpeechRecognitionPreferences* recognition_prefs) | |
45 : delegate(delegate), | |
46 session_id(session_id), | |
47 render_process_id(render_process_id), | |
48 render_view_id(render_view_id), | |
49 element_rect(element_rect), | |
50 language(language), | |
51 grammar(grammar), | |
52 origin_url(origin_url), | |
53 context_getter(context_getter), | |
54 recognition_prefs(recognition_prefs) { | |
55 } | |
56 | |
57 InputTagSpeechDispatcherHost* delegate; | |
58 int session_id; | |
59 int render_process_id; | |
60 int render_view_id; | |
61 gfx::Rect element_rect; | |
62 std::string language; | |
63 std::string grammar; | |
64 std::string origin_url; | |
65 net::URLRequestContextGetter* context_getter; | |
66 content::SpeechRecognitionPreferences* recognition_prefs; | |
67 }; | |
68 | |
69 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() { | 75 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() { |
70 return Singleton<SpeechRecognitionManagerImpl>::get(); | 76 return Singleton<SpeechRecognitionManagerImpl>::get(); |
71 } | 77 } |
72 | 78 |
73 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl() | 79 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl() |
74 : can_report_metrics_(false), | 80 : interactive_session_id_(kSessionIDInvalid), |
75 recording_session_id_(0) { | 81 last_session_id_(kSessionIDInvalid), |
76 delegate_.reset(content::GetContentClient()->browser()-> | 82 is_dispatching_event_(false) { |
77 GetSpeechRecognitionManagerDelegate()); | 83 delegate_ = content::GetContentClient()->browser()-> |
84 GetSpeechRecognitionManagerDelegate(); | |
85 // In lack of one being provided, instantiate a void delegate so we can avoid | |
86 // unaesthetic "if (delegate_ != NULL)" statements. | |
87 if (delegate_ == NULL) | |
88 delegate_ = VoidRecognitionManagerDelegate::GetInstance(); | |
78 } | 89 } |
79 | 90 |
80 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() { | 91 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() { |
81 while (requests_.begin() != requests_.end()) | 92 // Recognition sessions will be aborted by the corresponding destructors. |
82 CancelRecognition(requests_.begin()->first); | 93 sessions_.clear(); |
83 } | 94 } |
95 | |
96 int SpeechRecognitionManagerImpl::CreateSession( | |
97 const SpeechRecognitionSessionConfig& config, | |
98 SpeechRecognitionEventListener* event_listener) { | |
99 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
100 | |
101 const int session_id = GetNextSessionID(); | |
102 DCHECK(!SessionExists(session_id)); | |
103 // Set-up the new session. | |
104 Session& session = sessions_[session_id]; | |
105 session.id = session_id; | |
106 session.event_listener = event_listener; | |
107 session.context = config.initial_context; | |
108 | |
109 // TODO(primiano) Is this check enough just on creation or shall we move/copy | |
110 // it on SessionStart in order to repeat the check every time?. | |
hans
2012/04/23 16:04:14
both ? and .
Primiano Tucci (use gerrit)
2012/04/23 18:32:17
Done.
| |
111 if (!delegate_->IsRecognitionAllowed(session_id)) { | |
112 sessions_.erase(session_id); | |
113 return kSessionIDInvalid; | |
114 } | |
115 | |
116 std::string hardware_info; | |
117 bool can_report_metrics; | |
118 delegate_->GetDiagnosticInformation(&can_report_metrics, &hardware_info); | |
119 | |
120 GoogleOneShotRemoteEngineConfig remote_engine_config; | |
121 remote_engine_config.language = config.language; | |
122 remote_engine_config.grammar = config.grammar; | |
123 remote_engine_config.audio_sample_rate = | |
124 SpeechRecognizerImpl::kAudioSampleRate; | |
125 remote_engine_config.audio_num_bits_per_sample = | |
126 SpeechRecognizerImpl::kNumBitsPerAudioSample; | |
127 remote_engine_config.filter_profanities = config.filter_profanities; | |
128 remote_engine_config.hardware_info = hardware_info; | |
129 remote_engine_config.origin_url = can_report_metrics ? config.origin_url : ""; | |
130 | |
131 GoogleOneShotRemoteEngine* google_remote_engine = | |
132 new GoogleOneShotRemoteEngine(config.url_request_context_getter); | |
133 google_remote_engine->SetConfig(remote_engine_config); | |
134 | |
135 session.recognizer = new SpeechRecognizerImpl(this, | |
136 session_id, | |
137 google_remote_engine); | |
138 return session_id; | |
139 } | |
140 | |
141 void SpeechRecognitionManagerImpl::StartSession(int session_id) { | |
142 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
143 DCHECK(SessionExists(session_id)); | |
144 | |
145 // If there is another interactive session, detach prior to start the new one. | |
146 if (interactive_session_id_ != kSessionIDInvalid && | |
147 interactive_session_id_ != session_id) { | |
148 DetachSession(interactive_session_id_); | |
149 } | |
150 | |
151 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
152 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
153 session_id, FSMEventArgs(EVENT_START))); | |
154 } | |
155 | |
156 void SpeechRecognitionManagerImpl::AbortSession(int session_id) { | |
157 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
158 DCHECK(SessionExists(session_id)); | |
159 | |
160 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
161 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
162 session_id, FSMEventArgs(EVENT_ABORT))); | |
163 } | |
164 | |
165 void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id) { | |
166 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
167 DCHECK(SessionExists(session_id)); | |
168 | |
169 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
170 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
171 session_id, FSMEventArgs(EVENT_STOP_CAPTURE))); | |
172 } | |
173 | |
174 void SpeechRecognitionManagerImpl::DetachSession(int session_id) { | |
175 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
176 DCHECK(SessionExists(session_id)); | |
177 | |
178 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
179 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
180 session_id, FSMEventArgs(EVENT_DETACH))); | |
181 } | |
182 | |
183 // Here begins the SpeechRecognitionEventListener interface implementation, | |
184 // which will simply relay the events to the proper listener registered for the | |
185 // particular session (most likely InputTagSpeechDispatcherHost) and intercept | |
186 // some of them to provide UI notifications. | |
187 | |
188 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) { | |
189 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
190 if (!SessionExists(session_id)) | |
191 return; | |
192 | |
193 DCHECK_EQ(interactive_session_id_, session_id); | |
194 delegate_->ShowWarmUp(session_id); | |
195 GetListener(session_id)->OnRecognitionStart(session_id); | |
196 } | |
197 | |
198 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) { | |
199 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
200 if (!SessionExists(session_id)) | |
201 return; | |
202 | |
203 DCHECK_EQ(interactive_session_id_, session_id); | |
204 delegate_->ShowRecording(session_id); | |
205 GetListener(session_id)->OnAudioStart(session_id); | |
206 } | |
207 | |
208 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete( | |
209 int session_id) { | |
210 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
211 if (!SessionExists(session_id)) | |
212 return; | |
213 | |
214 DCHECK_EQ(interactive_session_id_, session_id); | |
215 GetListener(session_id)->OnEnvironmentEstimationComplete(session_id); | |
216 } | |
217 | |
218 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) { | |
219 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
220 if (!SessionExists(session_id)) | |
221 return; | |
222 | |
223 DCHECK_EQ(interactive_session_id_, session_id); | |
224 GetListener(session_id)->OnSoundStart(session_id); | |
225 } | |
226 | |
227 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) { | |
228 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
229 if (!SessionExists(session_id)) | |
230 return; | |
231 | |
232 GetListener(session_id)->OnSoundEnd(session_id); | |
233 } | |
234 | |
235 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { | |
236 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
237 if (!SessionExists(session_id)) | |
238 return; | |
239 | |
240 // OnAudioEnd can also be raised after an abort request, when the session is | |
241 // not interactive anymore. | |
242 if (interactive_session_id_ == session_id) | |
243 delegate_->ShowRecognizing(session_id); | |
244 | |
245 GetListener(session_id)->OnAudioEnd(session_id); | |
246 } | |
247 | |
248 void SpeechRecognitionManagerImpl::OnRecognitionResult( | |
249 int session_id, const content::SpeechRecognitionResult& result) { | |
250 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
251 if (!SessionExists(session_id)) | |
252 return; | |
253 | |
254 GetListener(session_id)->OnRecognitionResult(session_id, result); | |
255 FSMEventArgs event_args(EVENT_RECOGNITION_RESULT); | |
256 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
257 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
258 session_id, event_args)); | |
259 } | |
260 | |
261 void SpeechRecognitionManagerImpl::OnRecognitionError( | |
262 int session_id, const content::SpeechRecognitionError& error) { | |
263 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
264 if (!SessionExists(session_id)) | |
265 return; | |
266 | |
267 GetListener(session_id)->OnRecognitionError(session_id, error); | |
268 FSMEventArgs event_args(EVENT_RECOGNITION_ERROR); | |
269 event_args.speech_error = error; | |
270 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
271 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
272 session_id, event_args)); | |
273 } | |
274 | |
275 void SpeechRecognitionManagerImpl::OnAudioLevelsChange( | |
276 int session_id, float volume, float noise_volume) { | |
277 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
278 if (!SessionExists(session_id)) | |
279 return; | |
280 | |
281 delegate_->ShowInputVolume(session_id, volume, noise_volume); | |
282 GetListener(session_id)->OnAudioLevelsChange(session_id, volume, | |
283 noise_volume); | |
284 } | |
285 | |
286 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) { | |
287 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
288 if (!SessionExists(session_id)) | |
289 return; | |
290 | |
291 GetListener(session_id)->OnRecognitionEnd(session_id); | |
292 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
293 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this), | |
294 session_id, FSMEventArgs(EVENT_RECOGNITION_ENDED))); | |
295 } | |
296 | |
297 // TODO(primiano) After CL2: if we see that both InputTagDispatcherHost and | |
298 // SpeechRecognitionDispatcherHost do the same lookup operations, implement the | |
299 // lookup method directly here. | |
300 int SpeechRecognitionManagerImpl::LookupSessionByContext( | |
301 Callback<bool(const SpeechRecognitionSessionContext&)> matcher) const { | |
302 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
303 SessionsTable::const_iterator iter; | |
304 // Note: the callback (matcher) must NEVER perform non-const calls on us. | |
305 for(iter = sessions_.begin(); iter != sessions_.end(); iter++) { | |
hans
2012/04/23 16:04:14
please use the prefix increment operator: ++iter
Primiano Tucci (use gerrit)
2012/04/23 18:32:17
Done.
| |
306 const int session_id = iter->first; | |
307 const Session& session = iter->second; | |
308 bool matches = matcher.Run(session.context); | |
309 if (matches) | |
310 return session_id; | |
311 } | |
312 return kSessionIDInvalid; | |
313 } | |
314 | |
315 SpeechRecognitionSessionContext | |
316 SpeechRecognitionManagerImpl::GetSessionContext(int session_id) const { | |
317 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
318 SessionsTable::const_iterator iter = sessions_.find(session_id); | |
319 DCHECK(iter != sessions_.end()); | |
320 return const_cast<SpeechRecognitionSessionContext&>(iter->second.context); | |
hans
2012/04/23 16:04:14
i'm curious why the const_cast is needed here
Primiano Tucci (use gerrit)
2012/04/23 18:32:17
Uh right. Some patches ago it was returning a refe
| |
321 } | |
322 | |
323 void SpeechRecognitionManagerImpl::AbortAllSessionsForListener( | |
324 SpeechRecognitionEventListener* listener) { | |
325 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
326 SessionsTable::iterator it = sessions_.begin(); | |
327 // AbortSession is asynchronous and the session will not be removed from the | |
328 // collection while we are iterating over it. | |
329 while (it != sessions_.end()) { | |
hans
2012/04/23 16:04:14
i'd prefer a for loop
Primiano Tucci (use gerrit)
2012/04/23 18:32:17
Definitely agree.
| |
330 if (it->second.event_listener == listener) | |
331 AbortSession(it->first); | |
332 ++it; | |
333 } | |
334 } | |
335 | |
336 // ----------------------- Core FSM implementation --------------------------- | |
337 void SpeechRecognitionManagerImpl::DispatchEvent(int session_id, | |
338 FSMEventArgs event_args) { | |
339 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
340 if (!SessionExists(session_id)) | |
341 return; | |
342 | |
343 Session& session = sessions_[session_id]; | |
344 DCHECK_LE(session.state, STATE_MAX_VALUE); | |
345 DCHECK_LE(event_args.event, EVENT_MAX_VALUE); | |
346 | |
347 // Event dispatching must be sequential, otherwise it will break all the rules | |
348 // and the assumptions of the finite state automata model. | |
349 DCHECK(!is_dispatching_event_); | |
350 is_dispatching_event_ = true; | |
351 | |
352 // Pedantic preconditions consistency checks. | |
353 if (session.state == STATE_INTERACTIVE) | |
354 DCHECK_EQ(interactive_session_id_, session_id); | |
355 | |
356 if (session.state == STATE_DETACHED || | |
357 session.state == STATE_WAITING_FOR_DELETION) { | |
358 DCHECK_NE(interactive_session_id_, session_id); | |
359 } | |
360 | |
361 session.state = ExecuteTransitionAndGetNextState(session, event_args); | |
362 | |
363 is_dispatching_event_ = false; | |
364 } | |
365 | |
366 // This FSM handles the evolution of each session, from the viewpoint of the | |
367 // interaction with the user (that may be either the browser end-user which | |
368 // interacts with UI bubbles, or JS developer intracting with JS methods). | |
369 // All the events received by the SpeechRecognizerImpl instances (one for each | |
370 // session) are always routed to the SpeechRecognitionEventListener(s) | |
371 // regardless the choices taken in this FSM. | |
372 SpeechRecognitionManagerImpl::FSMState | |
373 SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState( | |
374 Session& session, const FSMEventArgs& event_args) { | |
375 // Some notes for the code below: | |
376 // - A session can be deleted only if it is not active, thus only if it ended | |
377 // spontaneously or we issued a prior SessionAbort. In these cases, we must | |
378 // wait for a RECOGNITION_ENDED event (which is guaranteed to come always at | |
379 // last by the SpeechRecognizer) in order to free resources gracefully. | |
380 // - Use SessionDelete only when absolutely sure that the recognizer is not | |
381 // active. Prefer SessionAbort, which will do it gracefully, otherwise. | |
382 const FSMEvent event = event_args.event; | |
383 switch (session.state) { | |
384 case STATE_IDLE: | |
385 // Session has just been created or had an error while interactive (thus, | |
386 // it is still interactive). | |
387 switch (event) { | |
388 case EVENT_START: | |
389 return SessionStart(session, event_args); | |
390 case EVENT_ABORT: | |
391 case EVENT_DETACH: | |
392 return SessionAbort(session, event_args); | |
393 case EVENT_STOP_CAPTURE: | |
394 case EVENT_RECOGNITION_ENDED: | |
395 return DoNothing(session, event_args); | |
396 case EVENT_RECOGNITION_RESULT: | |
397 case EVENT_RECOGNITION_ERROR: | |
398 return NotFeasible(session, event_args); | |
399 } | |
400 break; | |
401 case STATE_INTERACTIVE: | |
402 // The recognizer can be either capturing audio or waiting for a result. | |
403 switch (event) { | |
404 case EVENT_RECOGNITION_RESULT: | |
405 // TODO(primiano) Valid only in single shot mode. Review in next CLs. | |
406 return SessionDetach(session, event_args); | |
407 case EVENT_DETACH: | |
408 return SessionAbortIfCapturingAudioOrDetach(session, event_args); | |
409 case EVENT_STOP_CAPTURE: | |
410 return SessionStopAudioCapture(session, event_args); | |
411 case EVENT_ABORT: | |
412 return SessionAbort(session, event_args); | |
413 case EVENT_RECOGNITION_ERROR: | |
414 return SessionReportError(session, event_args); | |
415 case EVENT_RECOGNITION_ENDED: | |
416 // If we're still interactive it means that no result was received | |
417 // in the meanwhile (otherwise we'd have been detached). | |
418 return SessionReportNoMatch(session, event_args); | |
419 case EVENT_START: | |
420 return DoNothing(session, event_args); | |
421 } | |
422 break; | |
423 case STATE_DETACHED: | |
424 switch (event) { | |
425 case EVENT_ABORT: | |
426 return SessionAbort(session, event_args); | |
427 case EVENT_RECOGNITION_ENDED: | |
428 return SessionDelete(session, event_args); | |
429 case EVENT_START: | |
430 case EVENT_STOP_CAPTURE: | |
431 case EVENT_RECOGNITION_RESULT: | |
432 case EVENT_RECOGNITION_ERROR: | |
433 return DoNothing(session, event_args); | |
434 case EVENT_DETACH: | |
435 return NotFeasible(session, event_args); | |
436 } | |
437 break; | |
438 case STATE_WAITING_FOR_DELETION: | |
439 switch (event) { | |
440 case EVENT_RECOGNITION_ENDED: | |
441 return SessionDelete(session, event_args); | |
442 case EVENT_ABORT: | |
443 case EVENT_START: | |
444 case EVENT_STOP_CAPTURE: | |
445 case EVENT_DETACH: | |
446 case EVENT_RECOGNITION_RESULT: | |
447 case EVENT_RECOGNITION_ERROR: | |
448 return DoNothing(session, event_args); | |
449 } | |
450 break; | |
451 } | |
452 return NotFeasible(session, event_args); | |
453 } | |
454 | |
455 // ----------- Contract for all the FSM evolution functions below ------------- | |
456 // - Are guaranteed to be executed in the IO thread; | |
457 // - Are guaranteed to be not reentrant (themselves and each other); | |
458 // - event_args members are guaranteed to be stable during the call; | |
459 | |
460 SpeechRecognitionManagerImpl::FSMState | |
461 SpeechRecognitionManagerImpl::SessionStart(Session& session, | |
462 const FSMEventArgs& event_args) { | |
463 if (interactive_session_id_ != 0) | |
hans
2012/04/23 16:04:14
indent is off by one
hans
2012/04/23 16:04:14
s/0/kInvalidSessionID/ ?
Primiano Tucci (use gerrit)
2012/04/23 18:32:17
Done.
Primiano Tucci (use gerrit)
2012/04/23 18:32:17
Done.
| |
464 delegate_->DoClose(interactive_session_id_); | |
465 interactive_session_id_ = session.id; | |
466 delegate_->ShowRecognitionRequested(session.id); | |
467 session.recognizer->StartRecognition(); | |
468 return STATE_INTERACTIVE; | |
469 } | |
470 | |
471 SpeechRecognitionManagerImpl::FSMState | |
472 SpeechRecognitionManagerImpl::SessionAbort(Session& session, | |
473 const FSMEventArgs& event_args) { | |
474 if (interactive_session_id_ == session.id) { | |
475 interactive_session_id_ = kSessionIDInvalid; | |
476 delegate_->DoClose(session.id); | |
477 } | |
478 | |
479 // If abort was requested while the recognizer was inactive, delete directly. | |
480 if (session.recognizer == NULL || !session.recognizer->IsActive()) | |
481 return SessionDelete(session, event_args); | |
482 | |
483 // Otherwise issue an abort and delete gracefully, waiting for a | |
484 // RECOGNITION_ENDED event first. | |
485 session.recognizer->AbortRecognition(); | |
486 return STATE_WAITING_FOR_DELETION; | |
487 } | |
488 | |
489 SpeechRecognitionManagerImpl::FSMState | |
490 SpeechRecognitionManagerImpl::SessionStopAudioCapture( | |
491 Session& session, const FSMEventArgs& event_args) { | |
492 DCHECK(session.recognizer != NULL); | |
493 DCHECK(session.recognizer->IsActive()); | |
494 if (session.recognizer->IsCapturingAudio()) | |
495 session.recognizer->StopAudioCapture(); | |
496 return STATE_INTERACTIVE; | |
497 } | |
498 | |
499 SpeechRecognitionManagerImpl::FSMState | |
500 SpeechRecognitionManagerImpl::SessionAbortIfCapturingAudioOrDetach( | |
501 Session& session, const FSMEventArgs& event_args) { | |
502 DCHECK_EQ(interactive_session_id_, session.id); | |
503 | |
504 DCHECK(session.recognizer != NULL); | |
505 DCHECK(session.recognizer->IsActive()); | |
506 if (session.recognizer->IsCapturingAudio()) | |
507 return SessionAbort(session, event_args); | |
508 | |
509 interactive_session_id_ = kSessionIDInvalid; | |
510 delegate_->DoClose(session.id); | |
511 return STATE_DETACHED; | |
512 } | |
513 | |
514 | |
515 SpeechRecognitionManagerImpl::FSMState | |
516 SpeechRecognitionManagerImpl::SessionDetach(Session& session, | |
517 const FSMEventArgs& event_args) { | |
518 DCHECK_EQ(interactive_session_id_, session.id); | |
519 interactive_session_id_ = kSessionIDInvalid; | |
520 delegate_->DoClose(session.id); | |
521 return STATE_DETACHED; | |
522 } | |
523 | |
524 SpeechRecognitionManagerImpl::FSMState | |
525 SpeechRecognitionManagerImpl::SessionReportError( | |
526 Session& session, const FSMEventArgs& event_args) { | |
527 DCHECK_EQ(interactive_session_id_, session.id); | |
528 delegate_->ShowError(session.id, event_args.speech_error); | |
529 return STATE_IDLE; | |
530 } | |
531 | |
532 SpeechRecognitionManagerImpl::FSMState | |
533 SpeechRecognitionManagerImpl::SessionReportNoMatch( | |
534 Session& session, const FSMEventArgs& event_args) { | |
535 DCHECK_EQ(interactive_session_id_, session.id); | |
536 delegate_->ShowError( | |
537 session.id, | |
538 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_MATCH)); | |
539 return STATE_IDLE; | |
540 } | |
541 | |
542 SpeechRecognitionManagerImpl::FSMState | |
543 SpeechRecognitionManagerImpl::SessionDelete(Session& session, | |
544 const FSMEventArgs& event_args) { | |
545 DCHECK(session.recognizer == NULL || !session.recognizer->IsActive()); | |
546 if (interactive_session_id_ == session.id) { | |
547 interactive_session_id_ = kSessionIDInvalid; | |
548 delegate_->DoClose(session.id); | |
549 } | |
550 sessions_.erase(session.id); | |
551 // Next state is ininfluent, the session will be deleted afterwards. | |
hans
2012/04/23 16:04:14
ininfluent?
Primiano Tucci (use gerrit)
2012/04/23 18:32:17
irrelevant! :)
| |
552 return STATE_WAITING_FOR_DELETION; | |
553 } | |
554 | |
555 SpeechRecognitionManagerImpl::FSMState | |
556 SpeechRecognitionManagerImpl::DoNothing(Session& session, | |
557 const FSMEventArgs& event_args) { | |
558 return session.state; | |
559 } | |
560 | |
561 SpeechRecognitionManagerImpl::FSMState | |
562 SpeechRecognitionManagerImpl::NotFeasible(Session& session, | |
563 const FSMEventArgs& event_args) { | |
564 NOTREACHED() << "Unfeasible event " << event_args.event | |
565 << " in state " << session.state | |
566 << " for session " << session.id; | |
567 return session.state; | |
568 } | |
569 | |
570 int SpeechRecognitionManagerImpl::GetNextSessionID() { | |
571 ++last_session_id_; | |
572 // Deal with wrapping of last_session_id_. (How civilized). | |
573 if (last_session_id_ <= 0) | |
574 last_session_id_ = 1; | |
575 return last_session_id_; | |
576 } | |
577 | |
578 bool SpeechRecognitionManagerImpl::SessionExists(int session_id) const { | |
579 return sessions_.find(session_id) != sessions_.end(); | |
580 } | |
581 | |
582 SpeechRecognitionEventListener* SpeechRecognitionManagerImpl::GetListener( | |
583 int session_id) const { | |
584 SessionsTable::const_iterator iter = sessions_.find(session_id); | |
585 DCHECK(iter != sessions_.end()); | |
586 return iter->second.event_listener; | |
587 } | |
588 | |
84 | 589 |
85 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() { | 590 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() { |
86 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices(); | 591 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices(); |
87 } | 592 } |
88 | 593 |
89 bool SpeechRecognitionManagerImpl::IsCapturingAudio() { | 594 bool SpeechRecognitionManagerImpl::IsCapturingAudio() { |
90 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess(); | 595 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess(); |
91 } | 596 } |
92 | 597 |
93 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() { | 598 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() { |
94 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel(); | 599 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel(); |
95 } | 600 } |
96 | 601 |
97 bool SpeechRecognitionManagerImpl::HasPendingRequest(int session_id) const { | |
98 return requests_.find(session_id) != requests_.end(); | |
99 } | |
100 | |
101 InputTagSpeechDispatcherHost* SpeechRecognitionManagerImpl::GetDelegate( | |
102 int session_id) const { | |
103 return requests_.find(session_id)->second.delegate; | |
104 } | |
105 | |
106 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() { | 602 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() { |
107 // Since AudioManager::ShowAudioInputSettings can potentially launch external | 603 // Since AudioManager::ShowAudioInputSettings can potentially launch external |
108 // processes, do that in the FILE thread to not block the calling threads. | 604 // processes, do that in the FILE thread to not block the calling threads. |
109 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { | 605 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { |
110 BrowserThread::PostTask( | 606 BrowserThread::PostTask( |
111 BrowserThread::FILE, FROM_HERE, | 607 BrowserThread::FILE, FROM_HERE, |
112 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings, | 608 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings, |
113 base::Unretained(this))); | 609 base::Unretained(this))); |
114 return; | 610 return; |
115 } | 611 } |
116 | 612 |
117 media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager(); | 613 media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager(); |
118 DCHECK(audio_manager->CanShowAudioInputSettings()); | 614 DCHECK(audio_manager->CanShowAudioInputSettings()); |
119 if (audio_manager->CanShowAudioInputSettings()) | 615 if (audio_manager->CanShowAudioInputSettings()) |
120 audio_manager->ShowAudioInputSettings(); | 616 audio_manager->ShowAudioInputSettings(); |
121 } | 617 } |
122 | 618 |
123 void SpeechRecognitionManagerImpl::StartRecognition( | 619 SpeechRecognitionManagerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
124 InputTagSpeechDispatcherHost* delegate, | 620 : event(event_value), |
125 int session_id, | 621 speech_error(content::SPEECH_RECOGNITION_ERROR_NONE) { |
126 int render_process_id, | 622 } |
127 int render_view_id, | 623 |
128 const gfx::Rect& element_rect, | 624 SpeechRecognitionManagerImpl::FSMEventArgs::~FSMEventArgs() { |
129 const std::string& language, | 625 } |
130 const std::string& grammar, | 626 |
131 const std::string& origin_url, | 627 SpeechRecognitionManagerImpl::Session::Session() |
132 net::URLRequestContextGetter* context_getter, | 628 : id(kSessionIDInvalid), |
133 content::SpeechRecognitionPreferences* recognition_prefs) { | 629 event_listener(NULL), |
134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 630 state(STATE_IDLE) { |
135 BrowserThread::PostTask( | 631 } |
136 BrowserThread::UI, FROM_HERE, | 632 |
137 base::Bind( | 633 SpeechRecognitionManagerImpl::Session::~Session() { |
138 &SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition, | |
139 base::Unretained(this), | |
140 SpeechRecognitionParams( | |
141 delegate, session_id, render_process_id, render_view_id, | |
142 element_rect, language, grammar, origin_url, context_getter, | |
143 recognition_prefs))); | |
144 } | |
145 | |
146 void SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition( | |
147 const SpeechRecognitionParams& params) { | |
148 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); | |
149 | |
150 RenderViewHostImpl* render_view_host = RenderViewHostImpl::FromID( | |
151 params.render_process_id, params.render_view_id); | |
152 if (!render_view_host || !render_view_host->GetDelegate()) | |
153 return; | |
154 | |
155 // For host delegates other than VIEW_TYPE_WEB_CONTENTS we can't reliably show | |
156 // a popup, including the speech input bubble. In these cases for privacy | |
157 // reasons we don't want to start recording if the user can't be properly | |
158 // notified. An example of this is trying to show the speech input bubble | |
159 // within an extension popup: http://crbug.com/92083. In these situations the | |
160 // speech input extension API should be used instead. | |
161 if (render_view_host->GetDelegate()->GetRenderViewType() == | |
162 content::VIEW_TYPE_WEB_CONTENTS) { | |
163 BrowserThread::PostTask( | |
164 BrowserThread::IO, FROM_HERE, | |
165 base::Bind(&SpeechRecognitionManagerImpl::ProceedStartingRecognition, | |
166 base::Unretained(this), params)); | |
167 } | |
168 } | |
169 | |
170 void SpeechRecognitionManagerImpl::ProceedStartingRecognition( | |
171 const SpeechRecognitionParams& params) { | |
172 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | |
173 DCHECK(!HasPendingRequest(params.session_id)); | |
174 | |
175 if (delegate_.get()) { | |
176 delegate_->ShowRecognitionRequested( | |
177 params.session_id, params.render_process_id, params.render_view_id, | |
178 params.element_rect); | |
179 delegate_->GetRequestInfo(&can_report_metrics_, &request_info_); | |
180 } | |
181 | |
182 Request* request = &requests_[params.session_id]; | |
183 request->delegate = params.delegate; | |
184 request->recognizer = content::SpeechRecognizer::Create( | |
185 this, params.session_id, params.language, params.grammar, | |
186 params.context_getter, params.recognition_prefs->FilterProfanities(), | |
187 request_info_, can_report_metrics_ ? params.origin_url : ""); | |
188 request->is_active = false; | |
189 | |
190 StartRecognitionForRequest(params.session_id); | |
191 } | |
192 | |
193 void SpeechRecognitionManagerImpl::StartRecognitionForRequest(int session_id) { | |
194 SpeechRecognizerMap::iterator request = requests_.find(session_id); | |
195 if (request == requests_.end()) { | |
196 NOTREACHED(); | |
197 return; | |
198 } | |
199 | |
200 // We should not currently be recording for the session. | |
201 CHECK(recording_session_id_ != session_id); | |
202 | |
203 // If we are currently recording audio for another session, abort it cleanly. | |
204 if (recording_session_id_) | |
205 CancelRecognitionAndInformDelegate(recording_session_id_); | |
206 recording_session_id_ = session_id; | |
207 requests_[session_id].is_active = true; | |
208 requests_[session_id].recognizer->StartRecognition(); | |
209 if (delegate_.get()) | |
210 delegate_->ShowWarmUp(session_id); | |
211 } | |
212 | |
213 void SpeechRecognitionManagerImpl::CancelRecognitionForRequest(int session_id) { | |
214 // Ignore if the session id was not in our active recognizers list because the | |
215 // user might have clicked more than once, or recognition could have been | |
216 // ended due to other reasons before the user click was processed. | |
217 if (!HasPendingRequest(session_id)) | |
218 return; | |
219 | |
220 CancelRecognitionAndInformDelegate(session_id); | |
221 } | |
222 | |
223 void SpeechRecognitionManagerImpl::FocusLostForRequest(int session_id) { | |
224 // See above comment. | |
225 if (!HasPendingRequest(session_id)) | |
226 return; | |
227 | |
228 // If this is an ongoing recording or if we were displaying an error message | |
229 // to the user, abort it since user has switched focus. Otherwise | |
230 // recognition has started and keep that going so user can start speaking to | |
231 // another element while this gets the results in parallel. | |
232 if (recording_session_id_ == session_id || !requests_[session_id].is_active) | |
233 CancelRecognitionAndInformDelegate(session_id); | |
234 } | |
235 | |
236 void SpeechRecognitionManagerImpl::CancelRecognition(int session_id) { | |
237 DCHECK(HasPendingRequest(session_id)); | |
238 if (requests_[session_id].is_active) | |
239 requests_[session_id].recognizer->AbortRecognition(); | |
240 requests_.erase(session_id); | |
241 if (recording_session_id_ == session_id) | |
242 recording_session_id_ = 0; | |
243 if (delegate_.get()) | |
244 delegate_->DoClose(session_id); | |
245 } | |
246 | |
247 void SpeechRecognitionManagerImpl::CancelAllRequestsWithDelegate( | |
248 InputTagSpeechDispatcherHost* delegate) { | |
249 SpeechRecognizerMap::iterator it = requests_.begin(); | |
250 while (it != requests_.end()) { | |
251 if (it->second.delegate == delegate) { | |
252 CancelRecognition(it->first); | |
253 // This map will have very few elements so it is simpler to restart. | |
254 it = requests_.begin(); | |
255 } else { | |
256 ++it; | |
257 } | |
258 } | |
259 } | |
260 | |
261 void SpeechRecognitionManagerImpl::StopRecording(int session_id) { | |
262 // No pending requests on extension popups. | |
263 if (!HasPendingRequest(session_id)) | |
264 return; | |
265 | |
266 requests_[session_id].recognizer->StopAudioCapture(); | |
267 } | |
268 | |
269 // -------- SpeechRecognitionEventListener interface implementation. --------- | |
270 | |
271 void SpeechRecognitionManagerImpl::OnRecognitionResult( | |
272 int session_id, const content::SpeechRecognitionResult& result) { | |
273 DCHECK(HasPendingRequest(session_id)); | |
274 GetDelegate(session_id)->SetRecognitionResult(session_id, result); | |
275 } | |
276 | |
277 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) { | |
278 if (recording_session_id_ != session_id) | |
279 return; | |
280 DCHECK_EQ(recording_session_id_, session_id); | |
281 DCHECK(HasPendingRequest(session_id)); | |
282 if (!requests_[session_id].is_active) | |
283 return; | |
284 recording_session_id_ = 0; | |
285 GetDelegate(session_id)->DidCompleteRecording(session_id); | |
286 if (delegate_.get()) | |
287 delegate_->ShowRecognizing(session_id); | |
288 } | |
289 | |
290 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) { | |
291 if (!HasPendingRequest(session_id) || !requests_[session_id].is_active) | |
292 return; | |
293 GetDelegate(session_id)->DidCompleteRecognition(session_id); | |
294 requests_.erase(session_id); | |
295 if (delegate_.get()) | |
296 delegate_->DoClose(session_id); | |
297 } | |
298 | |
299 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) { | |
300 } | |
301 | |
302 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) { | |
303 } | |
304 | |
305 void SpeechRecognitionManagerImpl::OnRecognitionError( | |
306 int session_id, const content::SpeechRecognitionError& error) { | |
307 DCHECK(HasPendingRequest(session_id)); | |
308 if (session_id == recording_session_id_) | |
309 recording_session_id_ = 0; | |
310 requests_[session_id].is_active = false; | |
311 if (delegate_.get()) { | |
312 if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO && | |
313 error.details == content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC) { | |
314 delegate_->ShowMicError(session_id, | |
315 SpeechRecognitionManagerDelegate::MIC_ERROR_NO_DEVICE_AVAILABLE); | |
316 } else if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO && | |
317 error.details == content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE) { | |
318 delegate_->ShowMicError(session_id, | |
319 SpeechRecognitionManagerDelegate::MIC_ERROR_DEVICE_IN_USE); | |
320 } else { | |
321 delegate_->ShowRecognizerError(session_id, error.code); | |
322 } | |
323 } | |
324 } | |
325 | |
326 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) { | |
327 DCHECK(HasPendingRequest(session_id)); | |
328 DCHECK_EQ(recording_session_id_, session_id); | |
329 if (delegate_.get()) | |
330 delegate_->ShowRecording(session_id); | |
331 } | |
332 | |
333 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) { | |
334 } | |
335 | |
336 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete( | |
337 int session_id) { | |
338 DCHECK(HasPendingRequest(session_id)); | |
339 DCHECK_EQ(recording_session_id_, session_id); | |
340 } | |
341 | |
342 void SpeechRecognitionManagerImpl::OnAudioLevelsChange( | |
343 int session_id, float volume, float noise_volume) { | |
344 DCHECK(HasPendingRequest(session_id)); | |
345 DCHECK_EQ(recording_session_id_, session_id); | |
346 if (delegate_.get()) | |
347 delegate_->ShowInputVolume(session_id, volume, noise_volume); | |
348 } | |
349 | |
350 void SpeechRecognitionManagerImpl::CancelRecognitionAndInformDelegate( | |
351 int session_id) { | |
352 InputTagSpeechDispatcherHost* cur_delegate = GetDelegate(session_id); | |
353 CancelRecognition(session_id); | |
354 cur_delegate->DidCompleteRecording(session_id); | |
355 cur_delegate->DidCompleteRecognition(session_id); | |
356 } | |
357 | |
358 SpeechRecognitionManagerImpl::Request::Request() | |
359 : is_active(false) { | |
360 } | |
361 | |
362 SpeechRecognitionManagerImpl::Request::~Request() { | |
363 } | 634 } |
364 | 635 |
365 } // namespace speech | 636 } // namespace speech |
OLD | NEW |