Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
| 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
| 7 | 7 |
| 8 #include <map> | 8 #include <map> |
| 9 #include <string> | 9 #include <string> |
| 10 | 10 |
| 11 #include "base/basictypes.h" | 11 #include "base/basictypes.h" |
| 12 #include "base/callback.h" | |
| 12 #include "base/compiler_specific.h" | 13 #include "base/compiler_specific.h" |
| 13 #include "base/memory/ref_counted.h" | |
| 14 #include "base/memory/scoped_ptr.h" | |
| 15 #include "base/memory/singleton.h" | 14 #include "base/memory/singleton.h" |
| 15 #include "content/browser/speech/speech_recognizer_impl.h" | |
|
jam
2012/04/24 15:56:32
nit: just forward declare instead of including the
Primiano Tucci (use gerrit)
2012/04/25 11:30:03
Done.
| |
| 16 #include "content/common/content_export.h" | |
|
jam
2012/04/24 15:56:32
nit: not needed
Primiano Tucci (use gerrit)
2012/04/25 11:30:03
Done.
| |
| 16 #include "content/public/browser/speech_recognition_event_listener.h" | 17 #include "content/public/browser/speech_recognition_event_listener.h" |
| 17 #include "content/public/browser/speech_recognition_manager.h" | 18 #include "content/public/browser/speech_recognition_manager.h" |
| 18 #include "ui/gfx/rect.h" | 19 #include "content/public/browser/speech_recognition_session_context.h" |
| 20 #include "content/public/common/speech_recognition_error.h" | |
| 19 | 21 |
| 20 namespace content { | 22 namespace content { |
| 23 struct SpeechRecognitionError; | |
|
jam
2012/04/24 15:56:32
are you sure you need all these forward declared g
Primiano Tucci (use gerrit)
2012/04/25 11:30:03
Done.
| |
| 24 class SpeechRecognitionManagerDelegate; | |
| 25 struct SpeechRecognitionResult; | |
| 26 struct SpeechRecognitionSessionConfig; | |
| 21 class ResourceContext; | 27 class ResourceContext; |
| 22 class SpeechRecognitionManagerDelegate; | |
| 23 class SpeechRecognitionPreferences; | |
| 24 struct SpeechRecognitionResult; | |
| 25 class SpeechRecognizer; | |
| 26 } | |
| 27 | |
| 28 namespace net { | |
| 29 class URLRequestContextGetter; | |
| 30 } | 28 } |
| 31 | 29 |
| 32 namespace speech { | 30 namespace speech { |
| 33 | 31 |
| 34 class InputTagSpeechDispatcherHost; | 32 // This is the manager for speech recognition. It is a singleton instance in |
| 35 | 33 // the browser process and can serve several requests. Each recognition request |
| 36 class CONTENT_EXPORT SpeechRecognitionManagerImpl | 34 // corresponds to a session, initiated via |CreateSession|. |
| 37 : NON_EXPORTED_BASE(public content::SpeechRecognitionManager), | 35 // In every moment the manager has at most one "interactive" session (identified |
| 38 NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) { | 36 // by |interactive_session_id_|), that is the session that is currently holding |
| 37 // user attention. For privacy reasons, only the interactive session is allowed | |
| 38 // to capture audio from the microphone. However, after audio capture is | |
| 39 // completed, a session can be sent to background and can live in parallel with | |
| 40 // other sessions, while waiting for its results. | |
| 41 // | |
| 42 // More in details, SpeechRecognitionManager has the following responsibilities: | |
| 43 // - Handles requests received from various render views and makes sure only | |
| 44 // one of them accesses the audio device at any given time. | |
| 45 // - Relays recognition results/status/error events of each session to the | |
| 46 // corresponding listener (demuxing on the base of their session_id). | |
| 47 // - Handles the instantiation of SpeechRecognitionEngine objects when | |
| 48 // requested by SpeechRecognitionSessions. | |
| 49 class CONTENT_EXPORT SpeechRecognitionManagerImpl : | |
| 50 public NON_EXPORTED_BASE(content::SpeechRecognitionManager), | |
| 51 public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) { | |
| 39 public: | 52 public: |
| 40 static SpeechRecognitionManagerImpl* GetInstance(); | 53 static SpeechRecognitionManagerImpl* GetInstance(); |
| 41 | 54 |
| 42 // SpeechRecognitionManager implementation: | 55 // SpeechRecognitionManager implementation. |
| 43 virtual void StartRecognitionForRequest(int session_id) OVERRIDE; | 56 virtual int CreateSession( |
| 44 virtual void CancelRecognitionForRequest(int session_id) OVERRIDE; | 57 const content::SpeechRecognitionSessionConfig& config, |
| 45 virtual void FocusLostForRequest(int session_id) OVERRIDE; | 58 SpeechRecognitionEventListener* event_listener) OVERRIDE; |
| 59 virtual void StartSession(int session_id) OVERRIDE; | |
| 60 virtual void AbortSession(int session_id) OVERRIDE; | |
| 61 virtual void AbortAllSessionsForListener( | |
| 62 content::SpeechRecognitionEventListener* listener) OVERRIDE; | |
| 63 virtual void StopAudioCaptureForSession(int session_id) OVERRIDE; | |
| 64 virtual void SendSessionToBackground(int session_id) OVERRIDE; | |
| 65 virtual content::SpeechRecognitionSessionContext GetSessionContext( | |
| 66 int session_id) const OVERRIDE; | |
| 67 virtual int LookupSessionByContext( | |
| 68 base::Callback<bool( | |
| 69 const content::SpeechRecognitionSessionContext&)> matcher) | |
| 70 const OVERRIDE; | |
| 46 virtual bool HasAudioInputDevices() OVERRIDE; | 71 virtual bool HasAudioInputDevices() OVERRIDE; |
| 47 virtual bool IsCapturingAudio() OVERRIDE; | 72 virtual bool IsCapturingAudio() OVERRIDE; |
| 48 virtual string16 GetAudioInputDeviceModel() OVERRIDE; | 73 virtual string16 GetAudioInputDeviceModel() OVERRIDE; |
| 49 virtual void ShowAudioInputSettings() OVERRIDE; | 74 virtual void ShowAudioInputSettings() OVERRIDE; |
| 50 | 75 |
| 51 // Handlers for requests from render views. | |
| 52 | |
| 53 // |delegate| is a weak pointer and should remain valid until | |
| 54 // its |DidCompleteRecognition| method is called or recognition is cancelled. | |
| 55 // |render_process_id| is the ID of the renderer process initiating the | |
| 56 // request. | |
| 57 // |element_rect| is the display bounds of the html element requesting speech | |
| 58 // input (in page coordinates). | |
| 59 virtual void StartRecognition( | |
| 60 InputTagSpeechDispatcherHost* delegate, | |
| 61 int session_id, | |
| 62 int render_process_id, | |
| 63 int render_view_id, | |
| 64 const gfx::Rect& element_rect, | |
| 65 const std::string& language, | |
| 66 const std::string& grammar, | |
| 67 const std::string& origin_url, | |
| 68 net::URLRequestContextGetter* context_getter, | |
| 69 content::SpeechRecognitionPreferences* speech_recognition_prefs); | |
| 70 virtual void CancelRecognition(int session_id); | |
| 71 virtual void CancelAllRequestsWithDelegate( | |
| 72 InputTagSpeechDispatcherHost* delegate); | |
| 73 virtual void StopRecording(int session_id); | |
| 74 | |
| 75 // SpeechRecognitionEventListener methods. | 76 // SpeechRecognitionEventListener methods. |
| 76 virtual void OnRecognitionStart(int session_id) OVERRIDE; | 77 virtual void OnRecognitionStart(int session_id) OVERRIDE; |
| 77 virtual void OnAudioStart(int session_id) OVERRIDE; | 78 virtual void OnAudioStart(int session_id) OVERRIDE; |
| 78 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; | 79 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; |
| 79 virtual void OnSoundStart(int session_id) OVERRIDE; | 80 virtual void OnSoundStart(int session_id) OVERRIDE; |
| 80 virtual void OnSoundEnd(int session_id) OVERRIDE; | 81 virtual void OnSoundEnd(int session_id) OVERRIDE; |
| 81 virtual void OnAudioEnd(int session_id) OVERRIDE; | 82 virtual void OnAudioEnd(int session_id) OVERRIDE; |
| 82 virtual void OnRecognitionEnd(int session_id) OVERRIDE; | 83 virtual void OnRecognitionEnd(int session_id) OVERRIDE; |
| 83 virtual void OnRecognitionResult( | 84 virtual void OnRecognitionResult( |
| 84 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; | 85 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; |
| 85 virtual void OnRecognitionError( | 86 virtual void OnRecognitionError( |
| 86 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; | 87 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; |
| 87 virtual void OnAudioLevelsChange( | 88 virtual void OnAudioLevelsChange(int session_id, float volume, |
| 88 int session_id, float volume, float noise_volume) OVERRIDE; | 89 float noise_volume) OVERRIDE; |
| 89 | 90 |
| 90 protected: | 91 protected: |
| 91 // Private constructor to enforce singleton. | 92 // Private constructor to enforce singleton. |
| 92 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; | 93 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; |
| 93 SpeechRecognitionManagerImpl(); | 94 SpeechRecognitionManagerImpl(); |
| 94 virtual ~SpeechRecognitionManagerImpl(); | 95 virtual ~SpeechRecognitionManagerImpl(); |
| 95 | 96 |
| 96 bool HasPendingRequest(int session_id) const; | |
| 97 | |
| 98 private: | 97 private: |
| 99 struct Request { | 98 enum FSMState { |
|
jam
2012/04/24 15:56:32
nit: you have FSM all over but what does it stand
Primiano Tucci (use gerrit)
2012/04/25 11:30:03
Right. Added comment.
| |
| 100 Request(); | 99 STATE_IDLE = 0, |
| 101 ~Request(); | 100 STATE_INTERACTIVE, |
| 102 | 101 STATE_BACKGROUND, |
| 103 InputTagSpeechDispatcherHost* delegate; | 102 STATE_WAITING_FOR_DELETION, |
| 104 scoped_refptr<content::SpeechRecognizer> recognizer; | 103 STATE_MAX_VALUE = STATE_WAITING_FOR_DELETION |
| 105 bool is_active; // Set to true when recording or recognition is going on. | |
| 106 }; | 104 }; |
| 107 | 105 |
| 108 struct SpeechRecognitionParams; | 106 enum FSMEvent { |
| 107 EVENT_ABORT = 0, | |
| 108 EVENT_START, | |
| 109 EVENT_STOP_CAPTURE, | |
| 110 EVENT_SET_BACKGROUND, | |
| 111 EVENT_RECOGNITION_ENDED, | |
| 112 EVENT_RECOGNITION_RESULT, | |
| 113 EVENT_RECOGNITION_ERROR, | |
| 114 EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR | |
| 115 }; | |
| 109 | 116 |
| 110 InputTagSpeechDispatcherHost* GetDelegate(int session_id) const; | 117 struct Session { |
| 118 Session(); | |
| 119 ~Session(); | |
| 111 | 120 |
| 112 void CheckRenderViewTypeAndStartRecognition( | 121 int id; |
| 113 const SpeechRecognitionParams& params); | 122 content::SpeechRecognitionEventListener* event_listener; |
| 114 void ProceedStartingRecognition(const SpeechRecognitionParams& params); | 123 content::SpeechRecognitionSessionContext context; |
| 124 scoped_refptr<SpeechRecognizerImpl> recognizer; | |
| 125 FSMState state; | |
| 126 bool error_occurred; | |
| 127 }; | |
| 115 | 128 |
| 116 void CancelRecognitionAndInformDelegate(int session_id); | 129 struct FSMEventArgs { |
| 130 explicit FSMEventArgs(FSMEvent event_value); | |
| 131 ~FSMEventArgs(); | |
| 117 | 132 |
| 118 typedef std::map<int, Request> SpeechRecognizerMap; | 133 FSMEvent event; |
| 119 SpeechRecognizerMap requests_; | 134 content::SpeechRecognitionError speech_error; |
| 120 std::string request_info_; | 135 }; |
| 121 bool can_report_metrics_; | 136 |
| 122 int recording_session_id_; | 137 // Entry point for pushing any external event into the session handling FSM. |
| 123 scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_; | 138 void DispatchEvent(int session_id, FSMEventArgs args); |
| 139 | |
| 140 // Defines the behavior of the session handling FSM, selecting the appropriate | |
| 141 // transition according to the session, its current state and the event. | |
| 142 FSMState ExecuteTransitionAndGetNextState(Session& session, | |
| 143 const FSMEventArgs& event_args); | |
| 144 | |
| 145 // The methods below handle transitions of the session handling FSM. | |
| 146 FSMState SessionStart(Session& session, const FSMEventArgs& event_args); | |
| 147 FSMState SessionAbort(Session& session, const FSMEventArgs& event_args); | |
| 148 FSMState SessionStopAudioCapture(Session& session, | |
| 149 const FSMEventArgs& event_args); | |
| 150 FSMState SessionAbortIfCapturingAudioOrBackground( | |
| 151 Session& session, const FSMEventArgs& event_args); | |
| 152 FSMState SessionSetBackground(Session& session, | |
| 153 const FSMEventArgs& event_args); | |
| 154 FSMState SessionReportError(Session& session, const FSMEventArgs& event_args); | |
| 155 FSMState SessionReportNoMatch(Session& session, | |
| 156 const FSMEventArgs& event_args); | |
| 157 FSMState SessionDelete(Session& session, const FSMEventArgs& event_args); | |
| 158 FSMState DoNothing(Session& session, const FSMEventArgs& event_args); | |
| 159 FSMState NotFeasible(Session& session, const FSMEventArgs& event_args); | |
| 160 | |
| 161 bool SessionExists(int session_id) const; | |
| 162 content::SpeechRecognitionEventListener* GetListener(int session_id) const; | |
| 163 int GetNextSessionID(); | |
| 164 | |
| 165 typedef std::map<int, Session> SessionsTable; | |
| 166 SessionsTable sessions_; | |
| 167 int interactive_session_id_; | |
| 168 int last_session_id_; | |
| 169 bool is_dispatching_event_; | |
| 170 content::SpeechRecognitionManagerDelegate* delegate_; | |
| 124 }; | 171 }; |
| 125 | 172 |
| 126 } // namespace speech | 173 } // namespace speech |
| 127 | 174 |
| 128 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 175 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
| OLD | NEW |