OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
7 | 7 |
8 #include <map> | 8 #include <map> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/basictypes.h" | 11 #include "base/basictypes.h" |
12 #include "base/callback.h" | |
12 #include "base/compiler_specific.h" | 13 #include "base/compiler_specific.h" |
13 #include "base/memory/ref_counted.h" | |
14 #include "base/memory/scoped_ptr.h" | |
15 #include "base/memory/singleton.h" | 14 #include "base/memory/singleton.h" |
15 #include "content/browser/speech/speech_recognizer_impl.h" | |
jam
2012/04/24 15:56:32
nit: just forward declare instead of including the
Primiano Tucci (use gerrit)
2012/04/25 11:30:03
Done.
| |
16 #include "content/common/content_export.h" | |
jam
2012/04/24 15:56:32
nit: not needed
Primiano Tucci (use gerrit)
2012/04/25 11:30:03
Done.
| |
16 #include "content/public/browser/speech_recognition_event_listener.h" | 17 #include "content/public/browser/speech_recognition_event_listener.h" |
17 #include "content/public/browser/speech_recognition_manager.h" | 18 #include "content/public/browser/speech_recognition_manager.h" |
18 #include "ui/gfx/rect.h" | 19 #include "content/public/browser/speech_recognition_session_context.h" |
20 #include "content/public/common/speech_recognition_error.h" | |
19 | 21 |
20 namespace content { | 22 namespace content { |
23 struct SpeechRecognitionError; | |
jam
2012/04/24 15:56:32
are you sure you need all these forward declared g
Primiano Tucci (use gerrit)
2012/04/25 11:30:03
Done.
| |
24 class SpeechRecognitionManagerDelegate; | |
25 struct SpeechRecognitionResult; | |
26 struct SpeechRecognitionSessionConfig; | |
21 class ResourceContext; | 27 class ResourceContext; |
22 class SpeechRecognitionManagerDelegate; | |
23 class SpeechRecognitionPreferences; | |
24 struct SpeechRecognitionResult; | |
25 class SpeechRecognizer; | |
26 } | |
27 | |
28 namespace net { | |
29 class URLRequestContextGetter; | |
30 } | 28 } |
31 | 29 |
32 namespace speech { | 30 namespace speech { |
33 | 31 |
34 class InputTagSpeechDispatcherHost; | 32 // This is the manager for speech recognition. It is a singleton instance in |
35 | 33 // the browser process and can serve several requests. Each recognition request |
36 class CONTENT_EXPORT SpeechRecognitionManagerImpl | 34 // corresponds to a session, initiated via |CreateSession|. |
37 : NON_EXPORTED_BASE(public content::SpeechRecognitionManager), | 35 // In every moment the manager has at most one "interactive" session (identified |
38 NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) { | 36 // by |interactive_session_id_|), that is the session that is currently holding |
37 // user attention. For privacy reasons, only the interactive session is allowed | |
38 // to capture audio from the microphone. However, after audio capture is | |
39 // completed, a session can be sent to background and can live in parallel with | |
40 // other sessions, while waiting for its results. | |
41 // | |
42 // More in details, SpeechRecognitionManager has the following responsibilities: | |
43 // - Handles requests received from various render views and makes sure only | |
44 // one of them accesses the audio device at any given time. | |
45 // - Relays recognition results/status/error events of each session to the | |
46 // corresponding listener (demuxing on the base of their session_id). | |
47 // - Handles the instantiation of SpeechRecognitionEngine objects when | |
48 // requested by SpeechRecognitionSessions. | |
49 class CONTENT_EXPORT SpeechRecognitionManagerImpl : | |
50 public NON_EXPORTED_BASE(content::SpeechRecognitionManager), | |
51 public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) { | |
39 public: | 52 public: |
40 static SpeechRecognitionManagerImpl* GetInstance(); | 53 static SpeechRecognitionManagerImpl* GetInstance(); |
41 | 54 |
42 // SpeechRecognitionManager implementation: | 55 // SpeechRecognitionManager implementation. |
43 virtual void StartRecognitionForRequest(int session_id) OVERRIDE; | 56 virtual int CreateSession( |
44 virtual void CancelRecognitionForRequest(int session_id) OVERRIDE; | 57 const content::SpeechRecognitionSessionConfig& config, |
45 virtual void FocusLostForRequest(int session_id) OVERRIDE; | 58 SpeechRecognitionEventListener* event_listener) OVERRIDE; |
59 virtual void StartSession(int session_id) OVERRIDE; | |
60 virtual void AbortSession(int session_id) OVERRIDE; | |
61 virtual void AbortAllSessionsForListener( | |
62 content::SpeechRecognitionEventListener* listener) OVERRIDE; | |
63 virtual void StopAudioCaptureForSession(int session_id) OVERRIDE; | |
64 virtual void SendSessionToBackground(int session_id) OVERRIDE; | |
65 virtual content::SpeechRecognitionSessionContext GetSessionContext( | |
66 int session_id) const OVERRIDE; | |
67 virtual int LookupSessionByContext( | |
68 base::Callback<bool( | |
69 const content::SpeechRecognitionSessionContext&)> matcher) | |
70 const OVERRIDE; | |
46 virtual bool HasAudioInputDevices() OVERRIDE; | 71 virtual bool HasAudioInputDevices() OVERRIDE; |
47 virtual bool IsCapturingAudio() OVERRIDE; | 72 virtual bool IsCapturingAudio() OVERRIDE; |
48 virtual string16 GetAudioInputDeviceModel() OVERRIDE; | 73 virtual string16 GetAudioInputDeviceModel() OVERRIDE; |
49 virtual void ShowAudioInputSettings() OVERRIDE; | 74 virtual void ShowAudioInputSettings() OVERRIDE; |
50 | 75 |
51 // Handlers for requests from render views. | |
52 | |
53 // |delegate| is a weak pointer and should remain valid until | |
54 // its |DidCompleteRecognition| method is called or recognition is cancelled. | |
55 // |render_process_id| is the ID of the renderer process initiating the | |
56 // request. | |
57 // |element_rect| is the display bounds of the html element requesting speech | |
58 // input (in page coordinates). | |
59 virtual void StartRecognition( | |
60 InputTagSpeechDispatcherHost* delegate, | |
61 int session_id, | |
62 int render_process_id, | |
63 int render_view_id, | |
64 const gfx::Rect& element_rect, | |
65 const std::string& language, | |
66 const std::string& grammar, | |
67 const std::string& origin_url, | |
68 net::URLRequestContextGetter* context_getter, | |
69 content::SpeechRecognitionPreferences* speech_recognition_prefs); | |
70 virtual void CancelRecognition(int session_id); | |
71 virtual void CancelAllRequestsWithDelegate( | |
72 InputTagSpeechDispatcherHost* delegate); | |
73 virtual void StopRecording(int session_id); | |
74 | |
75 // SpeechRecognitionEventListener methods. | 76 // SpeechRecognitionEventListener methods. |
76 virtual void OnRecognitionStart(int session_id) OVERRIDE; | 77 virtual void OnRecognitionStart(int session_id) OVERRIDE; |
77 virtual void OnAudioStart(int session_id) OVERRIDE; | 78 virtual void OnAudioStart(int session_id) OVERRIDE; |
78 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; | 79 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; |
79 virtual void OnSoundStart(int session_id) OVERRIDE; | 80 virtual void OnSoundStart(int session_id) OVERRIDE; |
80 virtual void OnSoundEnd(int session_id) OVERRIDE; | 81 virtual void OnSoundEnd(int session_id) OVERRIDE; |
81 virtual void OnAudioEnd(int session_id) OVERRIDE; | 82 virtual void OnAudioEnd(int session_id) OVERRIDE; |
82 virtual void OnRecognitionEnd(int session_id) OVERRIDE; | 83 virtual void OnRecognitionEnd(int session_id) OVERRIDE; |
83 virtual void OnRecognitionResult( | 84 virtual void OnRecognitionResult( |
84 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; | 85 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; |
85 virtual void OnRecognitionError( | 86 virtual void OnRecognitionError( |
86 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; | 87 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; |
87 virtual void OnAudioLevelsChange( | 88 virtual void OnAudioLevelsChange(int session_id, float volume, |
88 int session_id, float volume, float noise_volume) OVERRIDE; | 89 float noise_volume) OVERRIDE; |
89 | 90 |
90 protected: | 91 protected: |
91 // Private constructor to enforce singleton. | 92 // Private constructor to enforce singleton. |
92 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; | 93 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; |
93 SpeechRecognitionManagerImpl(); | 94 SpeechRecognitionManagerImpl(); |
94 virtual ~SpeechRecognitionManagerImpl(); | 95 virtual ~SpeechRecognitionManagerImpl(); |
95 | 96 |
96 bool HasPendingRequest(int session_id) const; | |
97 | |
98 private: | 97 private: |
99 struct Request { | 98 enum FSMState { |
jam
2012/04/24 15:56:32
nit: you have FSM all over but what does it stand
Primiano Tucci (use gerrit)
2012/04/25 11:30:03
Right. Added comment.
| |
100 Request(); | 99 STATE_IDLE = 0, |
101 ~Request(); | 100 STATE_INTERACTIVE, |
102 | 101 STATE_BACKGROUND, |
103 InputTagSpeechDispatcherHost* delegate; | 102 STATE_WAITING_FOR_DELETION, |
104 scoped_refptr<content::SpeechRecognizer> recognizer; | 103 STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION |
105 bool is_active; // Set to true when recording or recognition is going on. | |
106 }; | 104 }; |
107 | 105 |
108 struct SpeechRecognitionParams; | 106 enum FSMEvent { |
107 EVENT_ABORT = 0, | |
108 EVENT_START, | |
109 EVENT_STOP_CAPTURE, | |
110 EVENT_SET_BACKGROUND, | |
111 EVENT_RECOGNITION_ENDED, | |
112 EVENT_RECOGNITION_RESULT, | |
113 EVENT_RECOGNITION_ERROR, | |
114 EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR | |
115 }; | |
109 | 116 |
110 InputTagSpeechDispatcherHost* GetDelegate(int session_id) const; | 117 struct Session { |
118 Session(); | |
119 ~Session(); | |
111 | 120 |
112 void CheckRenderViewTypeAndStartRecognition( | 121 int id; |
113 const SpeechRecognitionParams& params); | 122 content::SpeechRecognitionEventListener* event_listener; |
114 void ProceedStartingRecognition(const SpeechRecognitionParams& params); | 123 content::SpeechRecognitionSessionContext context; |
124 scoped_refptr<SpeechRecognizerImpl> recognizer; | |
125 FSMState state; | |
126 bool error_occurred; | |
127 }; | |
115 | 128 |
116 void CancelRecognitionAndInformDelegate(int session_id); | 129 struct FSMEventArgs { |
130 explicit FSMEventArgs(FSMEvent event_value); | |
131 ~FSMEventArgs(); | |
117 | 132 |
118 typedef std::map<int, Request> SpeechRecognizerMap; | 133 FSMEvent event; |
119 SpeechRecognizerMap requests_; | 134 content::SpeechRecognitionError speech_error; |
120 std::string request_info_; | 135 }; |
121 bool can_report_metrics_; | 136 |
122 int recording_session_id_; | 137 // Entry point for pushing any external event into the session handling FSM. |
123 scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_; | 138 void DispatchEvent(int session_id, FSMEventArgs args); |
139 | |
140 // Defines the behavior of the session handling FSM, selecting the appropriate | |
141 // transition according to the session, its current state and the event. | |
142 FSMState ExecuteTransitionAndGetNextState(Session& session, | |
143 const FSMEventArgs& event_args); | |
144 | |
145 // The methods below handle transitions of the session handling FSM. | |
146 FSMState SessionStart(Session& session, const FSMEventArgs& event_args); | |
147 FSMState SessionAbort(Session& session, const FSMEventArgs& event_args); | |
148 FSMState SessionStopAudioCapture(Session& session, | |
149 const FSMEventArgs& event_args); | |
150 FSMState SessionAbortIfCapturingAudioOrBackground( | |
151 Session& session, const FSMEventArgs& event_args); | |
152 FSMState SessionSetBackground(Session& session, | |
153 const FSMEventArgs& event_args); | |
154 FSMState SessionReportError(Session& session, const FSMEventArgs& event_args); | |
155 FSMState SessionReportNoMatch(Session& session, | |
156 const FSMEventArgs& event_args); | |
157 FSMState SessionDelete(Session& session, const FSMEventArgs& event_args); | |
158 FSMState DoNothing(Session& session, const FSMEventArgs& event_args); | |
159 FSMState NotFeasible(Session& session, const FSMEventArgs& event_args); | |
160 | |
161 bool SessionExists(int session_id) const; | |
162 content::SpeechRecognitionEventListener* GetListener(int session_id) const; | |
163 int GetNextSessionID(); | |
164 | |
165 typedef std::map<int, Session> SessionsTable; | |
166 SessionsTable sessions_; | |
167 int interactive_session_id_; | |
168 int last_session_id_; | |
169 bool is_dispatching_event_; | |
170 content::SpeechRecognitionManagerDelegate* delegate_; | |
124 }; | 171 }; |
125 | 172 |
126 } // namespace speech | 173 } // namespace speech |
127 | 174 |
128 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 175 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
OLD | NEW |