OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
7 | 7 |
8 #include <map> | 8 #include <map> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/basictypes.h" | 11 #include "base/basictypes.h" |
| 12 #include "base/callback.h" |
12 #include "base/compiler_specific.h" | 13 #include "base/compiler_specific.h" |
13 #include "base/memory/ref_counted.h" | |
14 #include "base/memory/scoped_ptr.h" | |
15 #include "base/memory/singleton.h" | 14 #include "base/memory/singleton.h" |
16 #include "content/public/browser/speech_recognition_event_listener.h" | 15 #include "content/public/browser/speech_recognition_event_listener.h" |
17 #include "content/public/browser/speech_recognition_manager.h" | 16 #include "content/public/browser/speech_recognition_manager.h" |
18 #include "ui/gfx/rect.h" | 17 #include "content/public/browser/speech_recognition_session_context.h" |
| 18 #include "content/public/common/speech_recognition_error.h" |
19 | 19 |
20 namespace content { | 20 namespace content { |
21 class ResourceContext; | |
22 class SpeechRecognitionManagerDelegate; | 21 class SpeechRecognitionManagerDelegate; |
23 class SpeechRecognitionPreferences; | |
24 struct SpeechRecognitionResult; | |
25 class SpeechRecognizer; | |
26 } | |
27 | |
28 namespace net { | |
29 class URLRequestContextGetter; | |
30 } | 22 } |
31 | 23 |
32 namespace speech { | 24 namespace speech { |
33 | 25 |
34 class InputTagSpeechDispatcherHost; | 26 class SpeechRecognizerImpl; |
35 | 27 |
36 class CONTENT_EXPORT SpeechRecognitionManagerImpl | 28 // This is the manager for speech recognition. It is a singleton instance in |
37 : NON_EXPORTED_BASE(public content::SpeechRecognitionManager), | 29 // the browser process and can serve several requests. Each recognition request |
38 NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) { | 30 // corresponds to a session, initiated via |CreateSession|. |
| 31 // In every moment the manager has at most one "interactive" session (identified |
| 32 // by |interactive_session_id_|), that is the session that is currently holding |
| 33 // user attention. For privacy reasons, only the interactive session is allowed |
| 34 // to capture audio from the microphone. However, after audio capture is |
| 35 // completed, a session can be sent to background and can live in parallel with |
| 36 // other sessions, while waiting for its results. |
| 37 // |
| 38 // More in details, SpeechRecognitionManager has the following responsibilities: |
| 39 // - Handles requests received from various render views and makes sure only |
| 40 // one of them accesses the audio device at any given time. |
| 41 // - Relays recognition results/status/error events of each session to the |
| 42 // corresponding listener (demuxing on the base of their session_id). |
| 43 // - Handles the instantiation of SpeechRecognitionEngine objects when |
| 44 // requested by SpeechRecognitionSessions. |
| 45 class CONTENT_EXPORT SpeechRecognitionManagerImpl : |
| 46 public NON_EXPORTED_BASE(content::SpeechRecognitionManager), |
| 47 public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) { |
39 public: | 48 public: |
40 static SpeechRecognitionManagerImpl* GetInstance(); | 49 static SpeechRecognitionManagerImpl* GetInstance(); |
41 | 50 |
42 // SpeechRecognitionManager implementation: | 51 // SpeechRecognitionManager implementation. |
43 virtual void StartRecognitionForRequest(int session_id) OVERRIDE; | 52 virtual int CreateSession( |
44 virtual void CancelRecognitionForRequest(int session_id) OVERRIDE; | 53 const content::SpeechRecognitionSessionConfig& config, |
45 virtual void FocusLostForRequest(int session_id) OVERRIDE; | 54 SpeechRecognitionEventListener* event_listener) OVERRIDE; |
| 55 virtual void StartSession(int session_id) OVERRIDE; |
| 56 virtual void AbortSession(int session_id) OVERRIDE; |
| 57 virtual void AbortAllSessionsForListener( |
| 58 content::SpeechRecognitionEventListener* listener) OVERRIDE; |
| 59 virtual void StopAudioCaptureForSession(int session_id) OVERRIDE; |
| 60 virtual void SendSessionToBackground(int session_id) OVERRIDE; |
| 61 virtual content::SpeechRecognitionSessionContext GetSessionContext( |
| 62 int session_id) const OVERRIDE; |
| 63 virtual int LookupSessionByContext( |
| 64 base::Callback<bool( |
| 65 const content::SpeechRecognitionSessionContext&)> matcher) |
| 66 const OVERRIDE; |
46 virtual bool HasAudioInputDevices() OVERRIDE; | 67 virtual bool HasAudioInputDevices() OVERRIDE; |
47 virtual bool IsCapturingAudio() OVERRIDE; | 68 virtual bool IsCapturingAudio() OVERRIDE; |
48 virtual string16 GetAudioInputDeviceModel() OVERRIDE; | 69 virtual string16 GetAudioInputDeviceModel() OVERRIDE; |
49 virtual void ShowAudioInputSettings() OVERRIDE; | 70 virtual void ShowAudioInputSettings() OVERRIDE; |
50 | 71 |
51 // Handlers for requests from render views. | |
52 | |
53 // |delegate| is a weak pointer and should remain valid until | |
54 // its |DidCompleteRecognition| method is called or recognition is cancelled. | |
55 // |render_process_id| is the ID of the renderer process initiating the | |
56 // request. | |
57 // |element_rect| is the display bounds of the html element requesting speech | |
58 // input (in page coordinates). | |
59 virtual void StartRecognition( | |
60 InputTagSpeechDispatcherHost* delegate, | |
61 int session_id, | |
62 int render_process_id, | |
63 int render_view_id, | |
64 const gfx::Rect& element_rect, | |
65 const std::string& language, | |
66 const std::string& grammar, | |
67 const std::string& origin_url, | |
68 net::URLRequestContextGetter* context_getter, | |
69 content::SpeechRecognitionPreferences* speech_recognition_prefs); | |
70 virtual void CancelRecognition(int session_id); | |
71 virtual void CancelAllRequestsWithDelegate( | |
72 InputTagSpeechDispatcherHost* delegate); | |
73 virtual void StopRecording(int session_id); | |
74 | |
75 // SpeechRecognitionEventListener methods. | 72 // SpeechRecognitionEventListener methods. |
76 virtual void OnRecognitionStart(int session_id) OVERRIDE; | 73 virtual void OnRecognitionStart(int session_id) OVERRIDE; |
77 virtual void OnAudioStart(int session_id) OVERRIDE; | 74 virtual void OnAudioStart(int session_id) OVERRIDE; |
78 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; | 75 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; |
79 virtual void OnSoundStart(int session_id) OVERRIDE; | 76 virtual void OnSoundStart(int session_id) OVERRIDE; |
80 virtual void OnSoundEnd(int session_id) OVERRIDE; | 77 virtual void OnSoundEnd(int session_id) OVERRIDE; |
81 virtual void OnAudioEnd(int session_id) OVERRIDE; | 78 virtual void OnAudioEnd(int session_id) OVERRIDE; |
82 virtual void OnRecognitionEnd(int session_id) OVERRIDE; | 79 virtual void OnRecognitionEnd(int session_id) OVERRIDE; |
83 virtual void OnRecognitionResult( | 80 virtual void OnRecognitionResult( |
84 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; | 81 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; |
85 virtual void OnRecognitionError( | 82 virtual void OnRecognitionError( |
86 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; | 83 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; |
87 virtual void OnAudioLevelsChange( | 84 virtual void OnAudioLevelsChange(int session_id, float volume, |
88 int session_id, float volume, float noise_volume) OVERRIDE; | 85 float noise_volume) OVERRIDE; |
89 | 86 |
90 protected: | 87 protected: |
91 // Private constructor to enforce singleton. | 88 // Private constructor to enforce singleton. |
92 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; | 89 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; |
93 SpeechRecognitionManagerImpl(); | 90 SpeechRecognitionManagerImpl(); |
94 virtual ~SpeechRecognitionManagerImpl(); | 91 virtual ~SpeechRecognitionManagerImpl(); |
95 | 92 |
96 bool HasPendingRequest(int session_id) const; | |
97 | |
98 private: | 93 private: |
99 struct Request { | 94 // Data types for the internal Finite State Machine (FSM). |
100 Request(); | 95 enum FSMState { |
101 ~Request(); | 96 STATE_IDLE = 0, |
102 | 97 STATE_INTERACTIVE, |
103 InputTagSpeechDispatcherHost* delegate; | 98 STATE_BACKGROUND, |
104 scoped_refptr<content::SpeechRecognizer> recognizer; | 99 STATE_WAITING_FOR_DELETION, |
105 bool is_active; // Set to true when recording or recognition is going on. | 100 STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION |
106 }; | 101 }; |
107 | 102 |
108 struct SpeechRecognitionParams; | 103 enum FSMEvent { |
| 104 EVENT_ABORT = 0, |
| 105 EVENT_START, |
| 106 EVENT_STOP_CAPTURE, |
| 107 EVENT_SET_BACKGROUND, |
| 108 EVENT_RECOGNITION_ENDED, |
| 109 EVENT_RECOGNITION_RESULT, |
| 110 EVENT_RECOGNITION_ERROR, |
| 111 EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR |
| 112 }; |
109 | 113 |
110 InputTagSpeechDispatcherHost* GetDelegate(int session_id) const; | 114 struct Session { |
| 115 Session(); |
| 116 ~Session(); |
111 | 117 |
112 void CheckRenderViewTypeAndStartRecognition( | 118 int id; |
113 const SpeechRecognitionParams& params); | 119 content::SpeechRecognitionEventListener* event_listener; |
114 void ProceedStartingRecognition(const SpeechRecognitionParams& params); | 120 content::SpeechRecognitionSessionContext context; |
| 121 scoped_refptr<SpeechRecognizerImpl> recognizer; |
| 122 FSMState state; |
| 123 bool error_occurred; |
| 124 }; |
115 | 125 |
116 void CancelRecognitionAndInformDelegate(int session_id); | 126 struct FSMEventArgs { |
| 127 explicit FSMEventArgs(FSMEvent event_value); |
| 128 ~FSMEventArgs(); |
117 | 129 |
118 typedef std::map<int, Request> SpeechRecognizerMap; | 130 FSMEvent event; |
119 SpeechRecognizerMap requests_; | 131 content::SpeechRecognitionError speech_error; |
120 std::string request_info_; | 132 }; |
121 bool can_report_metrics_; | 133 |
122 int recording_session_id_; | 134 // Callback issued by the SpeechRecognitionManagerDelegate for reporting |
123 scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_; | 135 // asynchronously the result of the CheckRecognitionIsAllowed call. |
| 136 void RecognitionAllowedCallback(int session_id, bool is_allowed); |
| 137 |
| 138 // Entry point for pushing any external event into the session handling FSM. |
| 139 void DispatchEvent(int session_id, FSMEventArgs args); |
| 140 |
| 141 // Defines the behavior of the session handling FSM, selecting the appropriate |
| 142 // transition according to the session, its current state and the event. |
| 143 FSMState ExecuteTransitionAndGetNextState(Session& session, |
| 144 const FSMEventArgs& event_args); |
| 145 |
| 146 // The methods below handle transitions of the session handling FSM. |
| 147 FSMState SessionStart(Session& session, const FSMEventArgs& event_args); |
| 148 FSMState SessionAbort(Session& session, const FSMEventArgs& event_args); |
| 149 FSMState SessionStopAudioCapture(Session& session, |
| 150 const FSMEventArgs& event_args); |
| 151 FSMState SessionAbortIfCapturingAudioOrBackground( |
| 152 Session& session, const FSMEventArgs& event_args); |
| 153 FSMState SessionSetBackground(Session& session, |
| 154 const FSMEventArgs& event_args); |
| 155 FSMState SessionReportError(Session& session, const FSMEventArgs& event_args); |
| 156 FSMState SessionReportNoMatch(Session& session, |
| 157 const FSMEventArgs& event_args); |
| 158 FSMState SessionDelete(Session& session, const FSMEventArgs& event_args); |
| 159 FSMState DoNothing(Session& session, const FSMEventArgs& event_args); |
| 160 FSMState NotFeasible(Session& session, const FSMEventArgs& event_args); |
| 161 |
| 162 bool SessionExists(int session_id) const; |
| 163 content::SpeechRecognitionEventListener* GetListener(int session_id) const; |
| 164 int GetNextSessionID(); |
| 165 |
| 166 typedef std::map<int, Session> SessionsTable; |
| 167 SessionsTable sessions_; |
| 168 int interactive_session_id_; |
| 169 int last_session_id_; |
| 170 bool is_dispatching_event_; |
| 171 content::SpeechRecognitionManagerDelegate* delegate_; |
124 }; | 172 }; |
125 | 173 |
126 } // namespace speech | 174 } // namespace speech |
127 | 175 |
128 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 176 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
OLD | NEW |