Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(202)

Side by Side Diff: content/browser/speech/speech_recognition_manager_impl.cc

Issue 9972011: Speech refactoring: Reimplemented SpeechRecognitionManagerImpl as a FSM. (CL1.7) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Further Satish comments. Created 8 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/speech/speech_recognition_manager_impl.h" 5 #include "content/browser/speech/speech_recognition_manager_impl.h"
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/memory/singleton.h"
8 #include "content/browser/browser_main_loop.h" 9 #include "content/browser/browser_main_loop.h"
9 #include "content/browser/renderer_host/render_view_host_impl.h" 10 #include "content/browser/speech/google_one_shot_remote_engine.h"
10 #include "content/browser/speech/input_tag_speech_dispatcher_host.h" 11 #include "content/browser/speech/speech_recognition_engine.h"
12 #include "content/browser/speech/speech_recognizer_impl.h"
11 #include "content/public/browser/browser_thread.h" 13 #include "content/public/browser/browser_thread.h"
12 #include "content/public/browser/content_browser_client.h" 14 #include "content/public/browser/content_browser_client.h"
13 #include "content/public/browser/speech_recognizer.h"
14 #include "content/public/browser/render_view_host_delegate.h"
15 #include "content/public/browser/resource_context.h" 15 #include "content/public/browser/resource_context.h"
16 #include "content/public/browser/speech_recognition_event_listener.h"
16 #include "content/public/browser/speech_recognition_manager_delegate.h" 17 #include "content/public/browser/speech_recognition_manager_delegate.h"
17 #include "content/public/browser/speech_recognition_preferences.h" 18 #include "content/public/browser/speech_recognition_session_config.h"
18 #include "content/public/common/view_type.h" 19 #include "content/public/browser/speech_recognition_session_context.h"
20 #include "content/public/common/speech_recognition_result.h"
19 #include "media/audio/audio_manager.h" 21 #include "media/audio/audio_manager.h"
20 22
23 using base::Callback;
24 using base::Unretained;
21 using content::BrowserMainLoop; 25 using content::BrowserMainLoop;
22 using content::BrowserThread; 26 using content::BrowserThread;
23 using content::RenderViewHostImpl; 27 using content::SpeechRecognitionError;
28 using content::SpeechRecognitionEventListener;
24 using content::SpeechRecognitionManager; 29 using content::SpeechRecognitionManager;
25 using content::SpeechRecognitionManagerDelegate; 30 using content::SpeechRecognitionResult;
31 using content::SpeechRecognitionSessionContext;
32 using content::SpeechRecognitionSessionConfig;
33
34 namespace {
35
36 // A dummy implementation of the SpeechRecognitionManagerDelegate interface
37 // used when no delegate has been passed to the SpeechRecognitionManagerImpl.
38 class VoidRecognitionManagerDelegate :
jam 2012/04/24 15:56:32 the convention we have for other delegates is to n
Primiano Tucci (use gerrit) 2012/04/25 11:30:03 I thought it was more efficient and "robust" (no n
39 public content::SpeechRecognitionManagerDelegate {
40 public:
41 static VoidRecognitionManagerDelegate* GetInstance() {
42 return Singleton<VoidRecognitionManagerDelegate>::get();
43 }
44 virtual void GetDiagnosticInformation(
45 bool* can_report_metrics, std::string* request_info) OVERRIDE {}
46 virtual bool IsRecognitionAllowed(int session_id) OVERRIDE { return false; }
47 virtual void ShowRecognitionRequested(int session_id) OVERRIDE {}
48 virtual void ShowWarmUp(int session_id) OVERRIDE {}
49 virtual void ShowRecognizing(int session_id) OVERRIDE {}
50 virtual void ShowRecording(int session_id) OVERRIDE {}
51 virtual void ShowInputVolume(
52 int session_id, float volume, float noise_volume) OVERRIDE {}
53 virtual void ShowError(int session_id,
54 const content::SpeechRecognitionError& error) OVERRIDE {}
55 virtual void DoClose(int session_id) OVERRIDE {}
56
57 private:
58 VoidRecognitionManagerDelegate() {}
59 virtual ~VoidRecognitionManagerDelegate() {}
60 friend struct DefaultSingletonTraits<VoidRecognitionManagerDelegate>;
61 };
62
63 } // namespace
64
65 namespace content {
66 const int SpeechRecognitionManager::kSessionIDInvalid = 0;
26 67
27 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() { 68 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() {
28 return speech::SpeechRecognitionManagerImpl::GetInstance(); 69 return speech::SpeechRecognitionManagerImpl::GetInstance();
29 } 70 }
71 } // namespace content
30 72
31 namespace speech { 73 namespace speech {
32 74
33 struct SpeechRecognitionManagerImpl::SpeechRecognitionParams {
34 SpeechRecognitionParams(
35 InputTagSpeechDispatcherHost* delegate,
36 int session_id,
37 int render_process_id,
38 int render_view_id,
39 const gfx::Rect& element_rect,
40 const std::string& language,
41 const std::string& grammar,
42 const std::string& origin_url,
43 net::URLRequestContextGetter* context_getter,
44 content::SpeechRecognitionPreferences* recognition_prefs)
45 : delegate(delegate),
46 session_id(session_id),
47 render_process_id(render_process_id),
48 render_view_id(render_view_id),
49 element_rect(element_rect),
50 language(language),
51 grammar(grammar),
52 origin_url(origin_url),
53 context_getter(context_getter),
54 recognition_prefs(recognition_prefs) {
55 }
56
57 InputTagSpeechDispatcherHost* delegate;
58 int session_id;
59 int render_process_id;
60 int render_view_id;
61 gfx::Rect element_rect;
62 std::string language;
63 std::string grammar;
64 std::string origin_url;
65 net::URLRequestContextGetter* context_getter;
66 content::SpeechRecognitionPreferences* recognition_prefs;
67 };
68
69 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() { 75 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() {
70 return Singleton<SpeechRecognitionManagerImpl>::get(); 76 return Singleton<SpeechRecognitionManagerImpl>::get();
71 } 77 }
72 78
73 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl() 79 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl()
74 : can_report_metrics_(false), 80 : interactive_session_id_(kSessionIDInvalid),
75 recording_session_id_(0) { 81 last_session_id_(kSessionIDInvalid),
76 delegate_.reset(content::GetContentClient()->browser()-> 82 is_dispatching_event_(false) {
77 GetSpeechRecognitionManagerDelegate()); 83 delegate_ = content::GetContentClient()->browser()->
84 GetSpeechRecognitionManagerDelegate();
85 // In lack of one being provided, instantiate a void delegate so we can avoid
86 // unaesthetic "if (delegate_ != NULL)" statements.
87 if (delegate_ == NULL)
88 delegate_ = VoidRecognitionManagerDelegate::GetInstance();
78 } 89 }
79 90
80 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() { 91 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() {
81 while (requests_.begin() != requests_.end()) 92 // Recognition sessions will be aborted by the corresponding destructors.
82 CancelRecognition(requests_.begin()->first); 93 sessions_.clear();
83 } 94 }
95
96 int SpeechRecognitionManagerImpl::CreateSession(
97 const SpeechRecognitionSessionConfig& config,
98 SpeechRecognitionEventListener* event_listener) {
99 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
100
101 const int session_id = GetNextSessionID();
102 DCHECK(!SessionExists(session_id));
103 // Set-up the new session.
104 Session& session = sessions_[session_id];
105 session.id = session_id;
106 session.event_listener = event_listener;
107 session.context = config.initial_context;
108
109 // TODO(primiano) Is this check enough just on creation or shall we move/copy
110 // it on SessionStart in order to repeat the check every time?
111 if (!delegate_->IsRecognitionAllowed(session_id)) {
112 sessions_.erase(session_id);
113 return kSessionIDInvalid;
114 }
115
116 std::string hardware_info;
117 bool can_report_metrics;
118 delegate_->GetDiagnosticInformation(&can_report_metrics, &hardware_info);
119
120 GoogleOneShotRemoteEngineConfig remote_engine_config;
121 remote_engine_config.language = config.language;
122 remote_engine_config.grammar = config.grammar;
123 remote_engine_config.audio_sample_rate =
124 SpeechRecognizerImpl::kAudioSampleRate;
125 remote_engine_config.audio_num_bits_per_sample =
126 SpeechRecognizerImpl::kNumBitsPerAudioSample;
127 remote_engine_config.filter_profanities = config.filter_profanities;
128 remote_engine_config.hardware_info = hardware_info;
129 remote_engine_config.origin_url = can_report_metrics ? config.origin_url : "";
130
131 GoogleOneShotRemoteEngine* google_remote_engine =
132 new GoogleOneShotRemoteEngine(config.url_request_context_getter);
133 google_remote_engine->SetConfig(remote_engine_config);
134
135 session.recognizer = new SpeechRecognizerImpl(this,
136 session_id,
137 google_remote_engine);
138 return session_id;
139 }
140
141 void SpeechRecognitionManagerImpl::StartSession(int session_id) {
142 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
143 DCHECK(SessionExists(session_id));
144
145 // If there is another interactive session, send it to background.
146 if (interactive_session_id_ != kSessionIDInvalid &&
147 interactive_session_id_ != session_id) {
148 SendSessionToBackground(interactive_session_id_);
149 }
150
151 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
152 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
153 session_id, FSMEventArgs(EVENT_START)));
154 }
155
156 void SpeechRecognitionManagerImpl::AbortSession(int session_id) {
157 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
158 DCHECK(SessionExists(session_id));
159
160 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
161 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
162 session_id, FSMEventArgs(EVENT_ABORT)));
163 }
164
165 void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id) {
166 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
167 DCHECK(SessionExists(session_id));
168
169 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
170 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
171 session_id, FSMEventArgs(EVENT_STOP_CAPTURE)));
172 }
173
174 void SpeechRecognitionManagerImpl::SendSessionToBackground(int session_id) {
175 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
176 DCHECK(SessionExists(session_id));
177
178 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
179 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
180 session_id, FSMEventArgs(EVENT_SET_BACKGROUND)));
181 }
182
183 // Here begins the SpeechRecognitionEventListener interface implementation,
184 // which will simply relay the events to the proper listener registered for the
185 // particular session (most likely InputTagSpeechDispatcherHost) and intercept
186 // some of them to provide UI notifications.
187
188 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) {
189 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
190 if (!SessionExists(session_id))
191 return;
192
193 DCHECK_EQ(interactive_session_id_, session_id);
194 delegate_->ShowWarmUp(session_id);
195 GetListener(session_id)->OnRecognitionStart(session_id);
196 }
197
198 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) {
199 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
200 if (!SessionExists(session_id))
201 return;
202
203 DCHECK_EQ(interactive_session_id_, session_id);
204 delegate_->ShowRecording(session_id);
205 GetListener(session_id)->OnAudioStart(session_id);
206 }
207
208 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete(
209 int session_id) {
210 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
211 if (!SessionExists(session_id))
212 return;
213
214 DCHECK_EQ(interactive_session_id_, session_id);
215 GetListener(session_id)->OnEnvironmentEstimationComplete(session_id);
216 }
217
218 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) {
219 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
220 if (!SessionExists(session_id))
221 return;
222
223 DCHECK_EQ(interactive_session_id_, session_id);
224 GetListener(session_id)->OnSoundStart(session_id);
225 }
226
227 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) {
228 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
229 if (!SessionExists(session_id))
230 return;
231
232 GetListener(session_id)->OnSoundEnd(session_id);
233 }
234
235 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) {
236 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
237 if (!SessionExists(session_id))
238 return;
239
240 // OnAudioEnd can also be raised after an abort request, when the session is
241 // not interactive anymore.
242 if (interactive_session_id_ == session_id)
243 delegate_->ShowRecognizing(session_id);
244
245 GetListener(session_id)->OnAudioEnd(session_id);
246 }
247
248 void SpeechRecognitionManagerImpl::OnRecognitionResult(
249 int session_id, const content::SpeechRecognitionResult& result) {
250 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
251 if (!SessionExists(session_id))
252 return;
253
254 GetListener(session_id)->OnRecognitionResult(session_id, result);
255 FSMEventArgs event_args(EVENT_RECOGNITION_RESULT);
256 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
257 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
258 session_id, event_args));
259 }
260
261 void SpeechRecognitionManagerImpl::OnRecognitionError(
262 int session_id, const content::SpeechRecognitionError& error) {
263 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
264 if (!SessionExists(session_id))
265 return;
266
267 GetListener(session_id)->OnRecognitionError(session_id, error);
268 FSMEventArgs event_args(EVENT_RECOGNITION_ERROR);
269 event_args.speech_error = error;
270 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
271 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
272 session_id, event_args));
273 }
274
275 void SpeechRecognitionManagerImpl::OnAudioLevelsChange(
276 int session_id, float volume, float noise_volume) {
277 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
278 if (!SessionExists(session_id))
279 return;
280
281 delegate_->ShowInputVolume(session_id, volume, noise_volume);
282 GetListener(session_id)->OnAudioLevelsChange(session_id, volume,
283 noise_volume);
284 }
285
286 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) {
287 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
288 if (!SessionExists(session_id))
289 return;
290
291 GetListener(session_id)->OnRecognitionEnd(session_id);
292 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
293 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent, Unretained(this),
294 session_id, FSMEventArgs(EVENT_RECOGNITION_ENDED)));
295 }
296
297 // TODO(primiano) After CL2: if we see that both InputTagDispatcherHost and
298 // SpeechRecognitionDispatcherHost do the same lookup operations, implement the
299 // lookup method directly here.
300 int SpeechRecognitionManagerImpl::LookupSessionByContext(
301 Callback<bool(const SpeechRecognitionSessionContext&)> matcher) const {
302 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
303 SessionsTable::const_iterator iter;
304 // Note: the callback (matcher) must NEVER perform non-const calls on us.
305 for(iter = sessions_.begin(); iter != sessions_.end(); ++iter) {
306 const int session_id = iter->first;
307 const Session& session = iter->second;
308 bool matches = matcher.Run(session.context);
309 if (matches)
310 return session_id;
311 }
312 return kSessionIDInvalid;
313 }
314
315 SpeechRecognitionSessionContext
316 SpeechRecognitionManagerImpl::GetSessionContext(int session_id) const {
317 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
318 SessionsTable::const_iterator iter = sessions_.find(session_id);
319 DCHECK(iter != sessions_.end());
320 return iter->second.context;
321 }
322
323 void SpeechRecognitionManagerImpl::AbortAllSessionsForListener(
324 SpeechRecognitionEventListener* listener) {
325 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
326
327 // AbortSession is asynchronous and the session will not be removed from the
328 // collection while we are iterating over it.
329 for (SessionsTable::iterator it = sessions_.begin(); it != sessions_.end();
330 ++it) {
331 if (it->second.event_listener == listener)
332 AbortSession(it->first);
333 }
334 }
335
336 // ----------------------- Core FSM implementation ---------------------------
337 void SpeechRecognitionManagerImpl::DispatchEvent(int session_id,
338 FSMEventArgs event_args) {
339 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
340 if (!SessionExists(session_id))
341 return;
342
343 Session& session = sessions_[session_id];
344 DCHECK_LE(session.state, STATE_MAX_VALUE);
345 DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
346
347 // Event dispatching must be sequential, otherwise it will break all the rules
348 // and the assumptions of the finite state automata model.
349 DCHECK(!is_dispatching_event_);
350 is_dispatching_event_ = true;
351
352 // Pedantic preconditions consistency checks.
353 if (session.state == STATE_INTERACTIVE)
354 DCHECK_EQ(interactive_session_id_, session_id);
355
356 if (session.state == STATE_BACKGROUND ||
357 session.state == STATE_WAITING_FOR_DELETION) {
358 DCHECK_NE(interactive_session_id_, session_id);
359 }
360
361 session.state = ExecuteTransitionAndGetNextState(session, event_args);
362
363 is_dispatching_event_ = false;
364 }
365
366 // This FSM handles the evolution of each session, from the viewpoint of the
367 // interaction with the user (that may be either the browser end-user which
368 // interacts with UI bubbles, or JS developer intracting with JS methods).
369 // All the events received by the SpeechRecognizerImpl instances (one for each
370 // session) are always routed to the SpeechRecognitionEventListener(s)
371 // regardless the choices taken in this FSM.
372 SpeechRecognitionManagerImpl::FSMState
373 SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState(
374 Session& session, const FSMEventArgs& event_args) {
375 // Some notes for the code below:
376 // - A session can be deleted only if it is not active, thus only if it ended
377 // spontaneously or we issued a prior SessionAbort. In these cases, we must
378 // wait for a RECOGNITION_ENDED event (which is guaranteed to come always at
379 // last by the SpeechRecognizer) in order to free resources gracefully.
380 // - Use SessionDelete only when absolutely sure that the recognizer is not
381 // active. Prefer SessionAbort, which will do it gracefully, otherwise.
382 // - Since this class methods are publicly exported, START, ABORT,
383 // STOP_CAPTURE and SET_BACKGROUND events can arrive in every moment from
384 // the outside wild wolrd, even if they make no sense.
385 const FSMEvent event = event_args.event;
386 switch (session.state) {
387 case STATE_IDLE:
388 // Session has just been created or had an error while interactive.
389 switch (event) {
390 case EVENT_START:
391 return SessionStart(session, event_args);
392 case EVENT_ABORT:
393 case EVENT_SET_BACKGROUND:
394 return SessionAbort(session, event_args);
395 case EVENT_STOP_CAPTURE:
396 case EVENT_RECOGNITION_ENDED:
397 // In case of error, we come back in this state before receiving the
398 // OnRecognitionEnd event, thus EVENT_RECOGNITION_ENDED is feasible.
399 return DoNothing(session, event_args);
400 case EVENT_RECOGNITION_RESULT:
401 case EVENT_RECOGNITION_ERROR:
402 return NotFeasible(session, event_args);
403 }
404 break;
405 case STATE_INTERACTIVE:
406 // The recognizer can be either capturing audio or waiting for a result.
407 switch (event) {
408 case EVENT_RECOGNITION_RESULT:
409 // TODO(primiano) Valid only in single shot mode. Review in next CLs.
410 return SessionSetBackground(session, event_args);
411 case EVENT_SET_BACKGROUND:
412 return SessionAbortIfCapturingAudioOrBackground(session, event_args);
413 case EVENT_STOP_CAPTURE:
414 return SessionStopAudioCapture(session, event_args);
415 case EVENT_ABORT:
416 return SessionAbort(session, event_args);
417 case EVENT_RECOGNITION_ERROR:
418 return SessionReportError(session, event_args);
419 case EVENT_RECOGNITION_ENDED:
420 // If we're still interactive it means that no result was received
421 // in the meanwhile (otherwise we'd have been sent to background).
422 return SessionReportNoMatch(session, event_args);
423 case EVENT_START:
424 return DoNothing(session, event_args);
425 }
426 break;
427 case STATE_BACKGROUND:
428 switch (event) {
429 case EVENT_ABORT:
430 return SessionAbort(session, event_args);
431 case EVENT_RECOGNITION_ENDED:
432 return SessionDelete(session, event_args);
433 case EVENT_START:
434 case EVENT_STOP_CAPTURE:
435 case EVENT_RECOGNITION_RESULT:
436 case EVENT_RECOGNITION_ERROR:
437 return DoNothing(session, event_args);
438 case EVENT_SET_BACKGROUND:
439 return NotFeasible(session, event_args);
440 }
441 break;
442 case STATE_WAITING_FOR_DELETION:
443 switch (event) {
444 case EVENT_RECOGNITION_ENDED:
445 return SessionDelete(session, event_args);
446 case EVENT_ABORT:
447 case EVENT_START:
448 case EVENT_STOP_CAPTURE:
449 case EVENT_SET_BACKGROUND:
450 case EVENT_RECOGNITION_RESULT:
451 case EVENT_RECOGNITION_ERROR:
452 return DoNothing(session, event_args);
453 }
454 break;
455 }
456 return NotFeasible(session, event_args);
457 }
458
459 // ----------- Contract for all the FSM evolution functions below -------------
460 // - Are guaranteed to be executed in the IO thread;
461 // - Are guaranteed to be not reentrant (themselves and each other);
462 // - event_args members are guaranteed to be stable during the call;
463
464 SpeechRecognitionManagerImpl::FSMState
465 SpeechRecognitionManagerImpl::SessionStart(Session& session,
466 const FSMEventArgs& event_args) {
467 if (interactive_session_id_ != kSessionIDInvalid)
468 delegate_->DoClose(interactive_session_id_);
469 interactive_session_id_ = session.id;
470 delegate_->ShowRecognitionRequested(session.id);
471 session.recognizer->StartRecognition();
472 return STATE_INTERACTIVE;
473 }
474
475 SpeechRecognitionManagerImpl::FSMState
476 SpeechRecognitionManagerImpl::SessionAbort(Session& session,
477 const FSMEventArgs& event_args) {
478 if (interactive_session_id_ == session.id) {
479 interactive_session_id_ = kSessionIDInvalid;
480 delegate_->DoClose(session.id);
481 }
482
483 // If abort was requested while the recognizer was inactive, delete directly.
484 if (session.recognizer == NULL || !session.recognizer->IsActive())
485 return SessionDelete(session, event_args);
486
487 // Otherwise issue an abort and delete gracefully, waiting for a
488 // RECOGNITION_ENDED event first.
489 session.recognizer->AbortRecognition();
490 return STATE_WAITING_FOR_DELETION;
491 }
492
493 SpeechRecognitionManagerImpl::FSMState
494 SpeechRecognitionManagerImpl::SessionStopAudioCapture(
495 Session& session, const FSMEventArgs& event_args) {
496 DCHECK(session.recognizer != NULL);
497 DCHECK(session.recognizer->IsActive());
498 if (session.recognizer->IsCapturingAudio())
499 session.recognizer->StopAudioCapture();
500 return STATE_INTERACTIVE;
501 }
502
503 SpeechRecognitionManagerImpl::FSMState
504 SpeechRecognitionManagerImpl::SessionAbortIfCapturingAudioOrBackground(
505 Session& session, const FSMEventArgs& event_args) {
506 DCHECK_EQ(interactive_session_id_, session.id);
507
508 DCHECK(session.recognizer != NULL);
509 DCHECK(session.recognizer->IsActive());
510 if (session.recognizer->IsCapturingAudio())
511 return SessionAbort(session, event_args);
512
513 interactive_session_id_ = kSessionIDInvalid;
514 delegate_->DoClose(session.id);
515 return STATE_BACKGROUND;
516 }
517
518
519 SpeechRecognitionManagerImpl::FSMState
520 SpeechRecognitionManagerImpl::SessionSetBackground(
521 Session& session, const FSMEventArgs& event_args) {
522 DCHECK_EQ(interactive_session_id_, session.id);
523 interactive_session_id_ = kSessionIDInvalid;
524 delegate_->DoClose(session.id);
525 return STATE_BACKGROUND;
526 }
527
528 SpeechRecognitionManagerImpl::FSMState
529 SpeechRecognitionManagerImpl::SessionReportError(
530 Session& session, const FSMEventArgs& event_args) {
531 DCHECK_EQ(interactive_session_id_, session.id);
532 delegate_->ShowError(session.id, event_args.speech_error);
533 return STATE_IDLE;
534 }
535
536 SpeechRecognitionManagerImpl::FSMState
537 SpeechRecognitionManagerImpl::SessionReportNoMatch(
538 Session& session, const FSMEventArgs& event_args) {
539 DCHECK_EQ(interactive_session_id_, session.id);
540 delegate_->ShowError(
541 session.id,
542 SpeechRecognitionError(content::SPEECH_RECOGNITION_ERROR_NO_MATCH));
543 return STATE_IDLE;
544 }
545
546 SpeechRecognitionManagerImpl::FSMState
547 SpeechRecognitionManagerImpl::SessionDelete(Session& session,
548 const FSMEventArgs& event_args) {
549 DCHECK(session.recognizer == NULL || !session.recognizer->IsActive());
550 if (interactive_session_id_ == session.id) {
551 interactive_session_id_ = kSessionIDInvalid;
552 delegate_->DoClose(session.id);
553 }
554 sessions_.erase(session.id);
555 // Next state is irrelevant, the session will be deleted afterwards.
556 return STATE_WAITING_FOR_DELETION;
557 }
558
559 SpeechRecognitionManagerImpl::FSMState
560 SpeechRecognitionManagerImpl::DoNothing(Session& session,
561 const FSMEventArgs& event_args) {
562 return session.state;
563 }
564
565 SpeechRecognitionManagerImpl::FSMState
566 SpeechRecognitionManagerImpl::NotFeasible(Session& session,
567 const FSMEventArgs& event_args) {
568 NOTREACHED() << "Unfeasible event " << event_args.event
569 << " in state " << session.state
570 << " for session " << session.id;
571 return session.state;
572 }
573
574 int SpeechRecognitionManagerImpl::GetNextSessionID() {
575 ++last_session_id_;
576 // Deal with wrapping of last_session_id_. (How civilized).
577 if (last_session_id_ <= 0)
578 last_session_id_ = 1;
579 return last_session_id_;
580 }
581
582 bool SpeechRecognitionManagerImpl::SessionExists(int session_id) const {
583 return sessions_.find(session_id) != sessions_.end();
584 }
585
586 SpeechRecognitionEventListener* SpeechRecognitionManagerImpl::GetListener(
587 int session_id) const {
588 SessionsTable::const_iterator iter = sessions_.find(session_id);
589 DCHECK(iter != sessions_.end());
590 return iter->second.event_listener;
591 }
592
84 593
85 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() { 594 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() {
86 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices(); 595 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices();
87 } 596 }
88 597
89 bool SpeechRecognitionManagerImpl::IsCapturingAudio() { 598 bool SpeechRecognitionManagerImpl::IsCapturingAudio() {
90 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess(); 599 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess();
91 } 600 }
92 601
93 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() { 602 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() {
94 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel(); 603 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel();
95 } 604 }
96 605
97 bool SpeechRecognitionManagerImpl::HasPendingRequest(int session_id) const {
98 return requests_.find(session_id) != requests_.end();
99 }
100
101 InputTagSpeechDispatcherHost* SpeechRecognitionManagerImpl::GetDelegate(
102 int session_id) const {
103 return requests_.find(session_id)->second.delegate;
104 }
105
106 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() { 606 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() {
107 // Since AudioManager::ShowAudioInputSettings can potentially launch external 607 // Since AudioManager::ShowAudioInputSettings can potentially launch external
108 // processes, do that in the FILE thread to not block the calling threads. 608 // processes, do that in the FILE thread to not block the calling threads.
109 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) { 609 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
110 BrowserThread::PostTask( 610 BrowserThread::PostTask(
111 BrowserThread::FILE, FROM_HERE, 611 BrowserThread::FILE, FROM_HERE,
112 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings, 612 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings,
113 base::Unretained(this))); 613 base::Unretained(this)));
114 return; 614 return;
115 } 615 }
116 616
117 media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager(); 617 media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager();
118 DCHECK(audio_manager->CanShowAudioInputSettings()); 618 DCHECK(audio_manager->CanShowAudioInputSettings());
119 if (audio_manager->CanShowAudioInputSettings()) 619 if (audio_manager->CanShowAudioInputSettings())
120 audio_manager->ShowAudioInputSettings(); 620 audio_manager->ShowAudioInputSettings();
121 } 621 }
122 622
123 void SpeechRecognitionManagerImpl::StartRecognition( 623 SpeechRecognitionManagerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
124 InputTagSpeechDispatcherHost* delegate, 624 : event(event_value),
125 int session_id, 625 speech_error(content::SPEECH_RECOGNITION_ERROR_NONE) {
126 int render_process_id, 626 }
127 int render_view_id, 627
128 const gfx::Rect& element_rect, 628 SpeechRecognitionManagerImpl::FSMEventArgs::~FSMEventArgs() {
129 const std::string& language, 629 }
130 const std::string& grammar, 630
131 const std::string& origin_url, 631 SpeechRecognitionManagerImpl::Session::Session()
132 net::URLRequestContextGetter* context_getter, 632 : id(kSessionIDInvalid),
133 content::SpeechRecognitionPreferences* recognition_prefs) { 633 event_listener(NULL),
134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 634 state(STATE_IDLE) {
135 BrowserThread::PostTask( 635 }
136 BrowserThread::UI, FROM_HERE, 636
137 base::Bind( 637 SpeechRecognitionManagerImpl::Session::~Session() {
138 &SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition,
139 base::Unretained(this),
140 SpeechRecognitionParams(
141 delegate, session_id, render_process_id, render_view_id,
142 element_rect, language, grammar, origin_url, context_getter,
143 recognition_prefs)));
144 }
145
146 void SpeechRecognitionManagerImpl::CheckRenderViewTypeAndStartRecognition(
147 const SpeechRecognitionParams& params) {
148 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
149
150 RenderViewHostImpl* render_view_host = RenderViewHostImpl::FromID(
151 params.render_process_id, params.render_view_id);
152 if (!render_view_host || !render_view_host->GetDelegate())
153 return;
154
155 // For host delegates other than VIEW_TYPE_WEB_CONTENTS we can't reliably show
156 // a popup, including the speech input bubble. In these cases for privacy
157 // reasons we don't want to start recording if the user can't be properly
158 // notified. An example of this is trying to show the speech input bubble
159 // within an extension popup: http://crbug.com/92083. In these situations the
160 // speech input extension API should be used instead.
161 if (render_view_host->GetDelegate()->GetRenderViewType() ==
162 content::VIEW_TYPE_WEB_CONTENTS) {
163 BrowserThread::PostTask(
164 BrowserThread::IO, FROM_HERE,
165 base::Bind(&SpeechRecognitionManagerImpl::ProceedStartingRecognition,
166 base::Unretained(this), params));
167 }
168 }
169
170 void SpeechRecognitionManagerImpl::ProceedStartingRecognition(
171 const SpeechRecognitionParams& params) {
172 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
173 DCHECK(!HasPendingRequest(params.session_id));
174
175 if (delegate_.get()) {
176 delegate_->ShowRecognitionRequested(
177 params.session_id, params.render_process_id, params.render_view_id,
178 params.element_rect);
179 delegate_->GetRequestInfo(&can_report_metrics_, &request_info_);
180 }
181
182 Request* request = &requests_[params.session_id];
183 request->delegate = params.delegate;
184 request->recognizer = content::SpeechRecognizer::Create(
185 this, params.session_id, params.language, params.grammar,
186 params.context_getter, params.recognition_prefs->FilterProfanities(),
187 request_info_, can_report_metrics_ ? params.origin_url : "");
188 request->is_active = false;
189
190 StartRecognitionForRequest(params.session_id);
191 }
192
193 void SpeechRecognitionManagerImpl::StartRecognitionForRequest(int session_id) {
194 SpeechRecognizerMap::iterator request = requests_.find(session_id);
195 if (request == requests_.end()) {
196 NOTREACHED();
197 return;
198 }
199
200 // We should not currently be recording for the session.
201 CHECK(recording_session_id_ != session_id);
202
203 // If we are currently recording audio for another session, abort it cleanly.
204 if (recording_session_id_)
205 CancelRecognitionAndInformDelegate(recording_session_id_);
206 recording_session_id_ = session_id;
207 requests_[session_id].is_active = true;
208 requests_[session_id].recognizer->StartRecognition();
209 if (delegate_.get())
210 delegate_->ShowWarmUp(session_id);
211 }
212
213 void SpeechRecognitionManagerImpl::CancelRecognitionForRequest(int session_id) {
214 // Ignore if the session id was not in our active recognizers list because the
215 // user might have clicked more than once, or recognition could have been
216 // ended due to other reasons before the user click was processed.
217 if (!HasPendingRequest(session_id))
218 return;
219
220 CancelRecognitionAndInformDelegate(session_id);
221 }
222
223 void SpeechRecognitionManagerImpl::FocusLostForRequest(int session_id) {
224 // See above comment.
225 if (!HasPendingRequest(session_id))
226 return;
227
228 // If this is an ongoing recording or if we were displaying an error message
229 // to the user, abort it since user has switched focus. Otherwise
230 // recognition has started and keep that going so user can start speaking to
231 // another element while this gets the results in parallel.
232 if (recording_session_id_ == session_id || !requests_[session_id].is_active)
233 CancelRecognitionAndInformDelegate(session_id);
234 }
235
236 void SpeechRecognitionManagerImpl::CancelRecognition(int session_id) {
237 DCHECK(HasPendingRequest(session_id));
238 if (requests_[session_id].is_active)
239 requests_[session_id].recognizer->AbortRecognition();
240 requests_.erase(session_id);
241 if (recording_session_id_ == session_id)
242 recording_session_id_ = 0;
243 if (delegate_.get())
244 delegate_->DoClose(session_id);
245 }
246
247 void SpeechRecognitionManagerImpl::CancelAllRequestsWithDelegate(
248 InputTagSpeechDispatcherHost* delegate) {
249 SpeechRecognizerMap::iterator it = requests_.begin();
250 while (it != requests_.end()) {
251 if (it->second.delegate == delegate) {
252 CancelRecognition(it->first);
253 // This map will have very few elements so it is simpler to restart.
254 it = requests_.begin();
255 } else {
256 ++it;
257 }
258 }
259 }
260
261 void SpeechRecognitionManagerImpl::StopRecording(int session_id) {
262 // No pending requests on extension popups.
263 if (!HasPendingRequest(session_id))
264 return;
265
266 requests_[session_id].recognizer->StopAudioCapture();
267 }
268
269 // -------- SpeechRecognitionEventListener interface implementation. ---------
270
271 void SpeechRecognitionManagerImpl::OnRecognitionResult(
272 int session_id, const content::SpeechRecognitionResult& result) {
273 DCHECK(HasPendingRequest(session_id));
274 GetDelegate(session_id)->SetRecognitionResult(session_id, result);
275 }
276
277 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) {
278 if (recording_session_id_ != session_id)
279 return;
280 DCHECK_EQ(recording_session_id_, session_id);
281 DCHECK(HasPendingRequest(session_id));
282 if (!requests_[session_id].is_active)
283 return;
284 recording_session_id_ = 0;
285 GetDelegate(session_id)->DidCompleteRecording(session_id);
286 if (delegate_.get())
287 delegate_->ShowRecognizing(session_id);
288 }
289
290 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) {
291 if (!HasPendingRequest(session_id) || !requests_[session_id].is_active)
292 return;
293 GetDelegate(session_id)->DidCompleteRecognition(session_id);
294 requests_.erase(session_id);
295 if (delegate_.get())
296 delegate_->DoClose(session_id);
297 }
298
299 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) {
300 }
301
302 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) {
303 }
304
305 void SpeechRecognitionManagerImpl::OnRecognitionError(
306 int session_id, const content::SpeechRecognitionError& error) {
307 DCHECK(HasPendingRequest(session_id));
308 if (session_id == recording_session_id_)
309 recording_session_id_ = 0;
310 requests_[session_id].is_active = false;
311 if (delegate_.get()) {
312 if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO &&
313 error.details == content::SPEECH_AUDIO_ERROR_DETAILS_NO_MIC) {
314 delegate_->ShowMicError(session_id,
315 SpeechRecognitionManagerDelegate::MIC_ERROR_NO_DEVICE_AVAILABLE);
316 } else if (error.code == content::SPEECH_RECOGNITION_ERROR_AUDIO &&
317 error.details == content::SPEECH_AUDIO_ERROR_DETAILS_IN_USE) {
318 delegate_->ShowMicError(session_id,
319 SpeechRecognitionManagerDelegate::MIC_ERROR_DEVICE_IN_USE);
320 } else {
321 delegate_->ShowRecognizerError(session_id, error.code);
322 }
323 }
324 }
325
326 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) {
327 DCHECK(HasPendingRequest(session_id));
328 DCHECK_EQ(recording_session_id_, session_id);
329 if (delegate_.get())
330 delegate_->ShowRecording(session_id);
331 }
332
333 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) {
334 }
335
336 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete(
337 int session_id) {
338 DCHECK(HasPendingRequest(session_id));
339 DCHECK_EQ(recording_session_id_, session_id);
340 }
341
342 void SpeechRecognitionManagerImpl::OnAudioLevelsChange(
343 int session_id, float volume, float noise_volume) {
344 DCHECK(HasPendingRequest(session_id));
345 DCHECK_EQ(recording_session_id_, session_id);
346 if (delegate_.get())
347 delegate_->ShowInputVolume(session_id, volume, noise_volume);
348 }
349
350 void SpeechRecognitionManagerImpl::CancelRecognitionAndInformDelegate(
351 int session_id) {
352 InputTagSpeechDispatcherHost* cur_delegate = GetDelegate(session_id);
353 CancelRecognition(session_id);
354 cur_delegate->DidCompleteRecording(session_id);
355 cur_delegate->DidCompleteRecognition(session_id);
356 }
357
358 SpeechRecognitionManagerImpl::Request::Request()
359 : is_active(false) {
360 }
361
362 SpeechRecognitionManagerImpl::Request::~Request() {
363 } 638 }
364 639
365 } // namespace speech 640 } // namespace speech
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698