OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
7 | 7 |
8 #include <map> | 8 #include <map> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/basictypes.h" | 11 #include "base/basictypes.h" |
12 #include "base/callback.h" | |
12 #include "base/compiler_specific.h" | 13 #include "base/compiler_specific.h" |
13 #include "base/memory/ref_counted.h" | |
14 #include "base/memory/scoped_ptr.h" | |
15 #include "base/memory/singleton.h" | 14 #include "base/memory/singleton.h" |
15 #include "content/browser/speech/speech_recognizer_impl.h" | |
16 #include "content/common/content_export.h" | |
16 #include "content/public/browser/speech_recognition_event_listener.h" | 17 #include "content/public/browser/speech_recognition_event_listener.h" |
17 #include "content/public/browser/speech_recognition_manager.h" | 18 #include "content/public/browser/speech_recognition_manager.h" |
18 #include "ui/gfx/rect.h" | 19 #include "content/public/browser/speech_recognition_session_context.h" |
20 #include "content/public/common/speech_recognition_error.h" | |
19 | 21 |
20 namespace content { | 22 namespace content { |
21 class ResourceContext; | 23 class ResourceContext; |
22 class SpeechRecognitionManagerDelegate; | 24 class SpeechRecognitionManagerDelegate; |
23 class SpeechRecognitionPreferences; | 25 struct SpeechRecognitionSessionConfig; |
26 struct SpeechRecognitionError; | |
Satish
2012/04/19 13:03:19
order alphabetically
Primiano Tucci (use gerrit)
2012/04/20 16:06:43
Done.
| |
24 struct SpeechRecognitionResult; | 27 struct SpeechRecognitionResult; |
25 class SpeechRecognizer; | |
26 } | |
27 | |
28 namespace net { | |
29 class URLRequestContextGetter; | |
30 } | 28 } |
31 | 29 |
32 namespace speech { | 30 namespace speech { |
33 | 31 |
34 class InputTagSpeechDispatcherHost; | 32 // This is the manager for speech recognition in the browser process. |
35 | 33 // The manager is unique for each renderer, and can serve several requests. Each |
36 class CONTENT_EXPORT SpeechRecognitionManagerImpl | 34 // recognition request corresponds to a session, initiated via |CreateSession|. |
37 : NON_EXPORTED_BASE(public content::SpeechRecognitionManager), | 35 // In every moment the manager has at most one "interactive" session (identified |
38 NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) { | 36 // by |interactive_session_id_|), that is the session that is currently holding |
37 // user attention (showing a bubble). For privacy reasons, only the interactive | |
Satish
2012/04/19 13:03:19
since the bubble concept is not applicable for all
Primiano Tucci (use gerrit)
2012/04/20 16:06:43
Done.
| |
38 // session is allowed to capture audio from the microphone. However, after audio | |
39 // capture is completed, a session can be detached and can live in parallel | |
40 // with other sessions, while waiting for its results. | |
41 // | |
42 // More in details, SpeechRecognitionManager has the following responsibilities: | |
43 // - Handles requests received from various render views and makes sure only | |
44 // one of them accesses the audio device coherently. | |
Satish
2012/04/19 13:03:19
coherently -> at any given time
Primiano Tucci (use gerrit)
2012/04/20 16:06:43
Done.
| |
45 // - Relays recognition results/status/error events of each session to the | |
46 // corresponding listener (demuxing on the base of their session_id). | |
47 // - Handles the instantiation of SpeechRecognitionEngine objects when | |
48 // requested by SpeechRecognitionSessions. | |
49 class CONTENT_EXPORT SpeechRecognitionManagerImpl : | |
50 public NON_EXPORTED_BASE(content::SpeechRecognitionManager), | |
51 public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) { | |
39 public: | 52 public: |
40 static SpeechRecognitionManagerImpl* GetInstance(); | 53 static SpeechRecognitionManagerImpl* GetInstance(); |
41 | 54 |
42 // SpeechRecognitionManager implementation: | 55 // SpeechRecognitionManager implementation. |
43 virtual void StartRecognitionForRequest(int session_id) OVERRIDE; | 56 virtual int CreateSession( |
44 virtual void CancelRecognitionForRequest(int session_id) OVERRIDE; | 57 content::SpeechRecognitionSessionConfig& config, |
45 virtual void FocusLostForRequest(int session_id) OVERRIDE; | 58 SpeechRecognitionEventListener* event_listener) OVERRIDE; |
59 virtual void StartSession(int session_id) OVERRIDE; | |
60 virtual void AbortSession(int session_id) OVERRIDE; | |
61 virtual void AbortAllSessionsForListener( | |
62 content::SpeechRecognitionEventListener* listener) OVERRIDE; | |
63 virtual void StopAudioCaptureForSession(int session_id) OVERRIDE; | |
64 virtual void DetachSession(int session_id) OVERRIDE; | |
65 virtual content::SpeechRecognitionSessionContext& GetSessionContext( | |
66 int session_id) const OVERRIDE; | |
67 virtual int LookupSessionByContext( | |
68 base::Callback<bool( | |
69 const content::SpeechRecognitionSessionContext&)> matcher) | |
70 const OVERRIDE; | |
46 virtual bool HasAudioInputDevices() OVERRIDE; | 71 virtual bool HasAudioInputDevices() OVERRIDE; |
47 virtual bool IsCapturingAudio() OVERRIDE; | 72 virtual bool IsCapturingAudio() OVERRIDE; |
48 virtual string16 GetAudioInputDeviceModel() OVERRIDE; | 73 virtual string16 GetAudioInputDeviceModel() OVERRIDE; |
49 virtual void ShowAudioInputSettings() OVERRIDE; | 74 virtual void ShowAudioInputSettings() OVERRIDE; |
50 | 75 |
51 // Handlers for requests from render views. | |
52 | |
53 // |delegate| is a weak pointer and should remain valid until | |
54 // its |DidCompleteRecognition| method is called or recognition is cancelled. | |
55 // |render_process_id| is the ID of the renderer process initiating the | |
56 // request. | |
57 // |element_rect| is the display bounds of the html element requesting speech | |
58 // input (in page coordinates). | |
59 virtual void StartRecognition( | |
60 InputTagSpeechDispatcherHost* delegate, | |
61 int session_id, | |
62 int render_process_id, | |
63 int render_view_id, | |
64 const gfx::Rect& element_rect, | |
65 const std::string& language, | |
66 const std::string& grammar, | |
67 const std::string& origin_url, | |
68 net::URLRequestContextGetter* context_getter, | |
69 content::SpeechRecognitionPreferences* speech_recognition_prefs); | |
70 virtual void CancelRecognition(int session_id); | |
71 virtual void CancelAllRequestsWithDelegate( | |
72 InputTagSpeechDispatcherHost* delegate); | |
73 virtual void StopRecording(int session_id); | |
74 | |
75 // SpeechRecognitionEventListener methods. | 76 // SpeechRecognitionEventListener methods. |
76 virtual void OnRecognitionStart(int session_id) OVERRIDE; | 77 virtual void OnRecognitionStart(int session_id) OVERRIDE; |
77 virtual void OnAudioStart(int session_id) OVERRIDE; | 78 virtual void OnAudioStart(int session_id) OVERRIDE; |
78 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; | 79 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; |
79 virtual void OnSoundStart(int session_id) OVERRIDE; | 80 virtual void OnSoundStart(int session_id) OVERRIDE; |
80 virtual void OnSoundEnd(int session_id) OVERRIDE; | 81 virtual void OnSoundEnd(int session_id) OVERRIDE; |
81 virtual void OnAudioEnd(int session_id) OVERRIDE; | 82 virtual void OnAudioEnd(int session_id) OVERRIDE; |
82 virtual void OnRecognitionEnd(int session_id) OVERRIDE; | 83 virtual void OnRecognitionEnd(int session_id) OVERRIDE; |
83 virtual void OnRecognitionResult( | 84 virtual void OnRecognitionResult( |
84 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; | 85 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; |
85 virtual void OnRecognitionError( | 86 virtual void OnRecognitionError( |
86 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; | 87 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; |
87 virtual void OnAudioLevelsChange( | 88 virtual void OnAudioLevelsChange(int session_id, float volume, |
88 int session_id, float volume, float noise_volume) OVERRIDE; | 89 float noise_volume) OVERRIDE; |
89 | 90 |
90 protected: | 91 protected: |
91 // Private constructor to enforce singleton. | 92 // Private constructor to enforce singleton. |
92 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; | 93 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; |
93 SpeechRecognitionManagerImpl(); | 94 SpeechRecognitionManagerImpl(); |
94 virtual ~SpeechRecognitionManagerImpl(); | 95 virtual ~SpeechRecognitionManagerImpl(); |
95 | 96 |
96 bool HasPendingRequest(int session_id) const; | |
97 | |
98 private: | 97 private: |
99 struct Request { | 98 enum FSMState { |
100 Request(); | 99 STATE_IDLE = 0, |
101 ~Request(); | 100 STATE_INTERACTIVE, |
102 | 101 STATE_DETACHABLE, |
103 InputTagSpeechDispatcherHost* delegate; | 102 STATE_DETACHED, |
104 scoped_refptr<content::SpeechRecognizer> recognizer; | 103 STATE_ABORTING, |
105 bool is_active; // Set to true when recording or recognition is going on. | 104 STATE_ENDED_WITH_ERROR, |
105 STATE_MAX_VALUE = STATE_ENDED_WITH_ERROR | |
106 }; | 106 }; |
107 | 107 |
108 struct SpeechRecognitionParams; | 108 enum FSMEvent { |
109 EVENT_ABORT = 0, | |
110 EVENT_START, | |
111 EVENT_STOP_CAPTURE, | |
112 EVENT_DETACH, | |
113 EVENT_AUDIO_ENDED, | |
114 EVENT_RECOGNITION_ENDED, | |
115 EVENT_RECOGNITION_RESULT, | |
116 EVENT_RECOGNITION_ERROR, | |
117 EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR | |
118 }; | |
109 | 119 |
110 InputTagSpeechDispatcherHost* GetDelegate(int session_id) const; | 120 struct Session { |
121 Session(); | |
122 ~Session(); | |
111 | 123 |
112 void CheckRenderViewTypeAndStartRecognition( | 124 int id; |
113 const SpeechRecognitionParams& params); | 125 content::SpeechRecognitionEventListener* event_listener; |
114 void ProceedStartingRecognition(const SpeechRecognitionParams& params); | 126 content::SpeechRecognitionSessionContext context; |
127 scoped_refptr<SpeechRecognizerImpl> recognizer; | |
128 FSMState state; | |
129 bool error_occurred; | |
130 }; | |
115 | 131 |
116 void CancelRecognitionAndInformDelegate(int session_id); | 132 struct FSMEventArgs { |
133 explicit FSMEventArgs(FSMEvent event_value); | |
134 ~FSMEventArgs(); | |
117 | 135 |
118 typedef std::map<int, Request> SpeechRecognizerMap; | 136 FSMEvent event; |
119 SpeechRecognizerMap requests_; | 137 const content::SpeechRecognitionResult* speech_result; |
120 std::string request_info_; | 138 const content::SpeechRecognitionError* speech_error; |
121 bool can_report_metrics_; | 139 }; |
122 int recording_session_id_; | 140 |
123 scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_; | 141 // Entry point for pushing any external event into the session handling FSM. |
142 void DispatchEvent(int session_id, FSMEventArgs args); | |
143 | |
144 // Defines the behavior of the session handling FSM, selecting the appropriate | |
145 // transition according to the session, its current state and the event. | |
146 FSMState ExecuteTransitionAndGetNextState(Session& session, | |
147 const FSMEventArgs& event_args); | |
148 | |
149 // The methods below handle transitions of the session handling FSM. | |
150 FSMState SessionStart(Session& session, const FSMEventArgs& event_args); | |
151 FSMState SessionAbort(Session& session, const FSMEventArgs& event_args); | |
152 FSMState SessionStopAudioCapture(Session& session, | |
153 const FSMEventArgs& event_args); | |
154 FSMState SessionDetach(Session& session, const FSMEventArgs& event_args); | |
155 FSMState SessionReportError(Session& session, const FSMEventArgs& event_args); | |
156 FSMState SessionReportNoMatch(Session& session, | |
157 const FSMEventArgs& event_args); | |
158 FSMState SessionDelete(Session& session, const FSMEventArgs& event_args); | |
159 FSMState SessionSetDetachable(Session& session, | |
160 const FSMEventArgs& event_args); | |
161 FSMState DoNothing(Session& session, const FSMEventArgs& event_args); | |
162 FSMState NotFeasible(Session& session, const FSMEventArgs& event_args); | |
163 | |
164 bool SessionExists(int session_id) const; | |
165 content::SpeechRecognitionEventListener* GetListener(int session_id) const; | |
166 int GetNextSessionID(); | |
167 | |
168 typedef std::map<int, Session> SessionsTable; | |
169 SessionsTable sessions_; | |
170 int interactive_session_id_; | |
Satish
2012/04/19 13:03:19
interactive -> active ?
Primiano Tucci (use gerrit)
2012/04/20 16:06:43
A session can be active but not interactive (while
| |
171 int last_session_id_; | |
172 bool is_dispatching_event_; | |
173 content::SpeechRecognitionManagerDelegate* delegate_; | |
124 }; | 174 }; |
125 | 175 |
126 } // namespace speech | 176 } // namespace speech |
127 | 177 |
128 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 178 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
OLD | NEW |