Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
| 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
| 7 | 7 |
| 8 #include <map> | 8 #include <map> |
| 9 #include <string> | 9 #include <string> |
| 10 | 10 |
| 11 #include "base/basictypes.h" | 11 #include "base/basictypes.h" |
| 12 #include "base/callback.h" | |
| 12 #include "base/compiler_specific.h" | 13 #include "base/compiler_specific.h" |
| 13 #include "base/memory/ref_counted.h" | |
| 14 #include "base/memory/scoped_ptr.h" | |
| 15 #include "base/memory/singleton.h" | 14 #include "base/memory/singleton.h" |
| 15 #include "content/browser/speech/speech_recognizer_impl.h" | |
| 16 #include "content/common/content_export.h" | |
| 16 #include "content/public/browser/speech_recognition_event_listener.h" | 17 #include "content/public/browser/speech_recognition_event_listener.h" |
| 17 #include "content/public/browser/speech_recognition_manager.h" | 18 #include "content/public/browser/speech_recognition_manager.h" |
| 18 #include "ui/gfx/rect.h" | 19 #include "content/public/browser/speech_recognition_session_context.h" |
| 20 #include "content/public/common/speech_recognition_error.h" | |
| 19 | 21 |
| 20 namespace content { | 22 namespace content { |
| 21 class ResourceContext; | 23 class ResourceContext; |
| 22 class SpeechRecognitionManagerDelegate; | 24 class SpeechRecognitionManagerDelegate; |
| 23 class SpeechRecognitionPreferences; | 25 struct SpeechRecognitionSessionConfig; |
| 26 struct SpeechRecognitionError; | |
|
Satish
2012/04/19 13:03:19
order alphabetically
Primiano Tucci (use gerrit)
2012/04/20 16:06:43
Done.
| |
| 24 struct SpeechRecognitionResult; | 27 struct SpeechRecognitionResult; |
| 25 class SpeechRecognizer; | |
| 26 } | |
| 27 | |
| 28 namespace net { | |
| 29 class URLRequestContextGetter; | |
| 30 } | 28 } |
| 31 | 29 |
| 32 namespace speech { | 30 namespace speech { |
| 33 | 31 |
| 34 class InputTagSpeechDispatcherHost; | 32 // This is the manager for speech recognition in the browser process. |
| 35 | 33 // The manager is unique for each renderer, and can serve several requests. Each |
| 36 class CONTENT_EXPORT SpeechRecognitionManagerImpl | 34 // recognition request corresponds to a session, initiated via |CreateSession|. |
| 37 : NON_EXPORTED_BASE(public content::SpeechRecognitionManager), | 35 // In every moment the manager has at most one "interactive" session (identified |
| 38 NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) { | 36 // by |interactive_session_id_|), that is the session that is currently holding |
| 37 // user attention (showing a bubble). For privacy reasons, only the interactive | |
|
Satish
2012/04/19 13:03:19
since the bubble concept is not applicable for all
Primiano Tucci (use gerrit)
2012/04/20 16:06:43
Done.
| |
| 38 // session is allowed to capture audio from the microphone. However, after audio | |
| 39 // capture is completed, a session can be detached and can live in parallel | |
| 40 // with other sessions, while waiting for its results. | |
| 41 // | |
| 42 // More in details, SpeechRecognitionManager has the following responsibilities: | |
| 43 // - Handles requests received from various render views and makes sure only | |
| 44 // one of them accesses the audio device coherently. | |
|
Satish
2012/04/19 13:03:19
coherently -> at any given time
Primiano Tucci (use gerrit)
2012/04/20 16:06:43
Done.
| |
| 45 // - Relays recognition results/status/error events of each session to the | |
| 46 // corresponding listener (demuxing on the base of their session_id). | |
| 47 // - Handles the instantiation of SpeechRecognitionEngine objects when | |
| 48 // requested by SpeechRecognitionSessions. | |
| 49 class CONTENT_EXPORT SpeechRecognitionManagerImpl : | |
| 50 public NON_EXPORTED_BASE(content::SpeechRecognitionManager), | |
| 51 public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) { | |
| 39 public: | 52 public: |
| 40 static SpeechRecognitionManagerImpl* GetInstance(); | 53 static SpeechRecognitionManagerImpl* GetInstance(); |
| 41 | 54 |
| 42 // SpeechRecognitionManager implementation: | 55 // SpeechRecognitionManager implementation. |
| 43 virtual void StartRecognitionForRequest(int session_id) OVERRIDE; | 56 virtual int CreateSession( |
| 44 virtual void CancelRecognitionForRequest(int session_id) OVERRIDE; | 57 content::SpeechRecognitionSessionConfig& config, |
| 45 virtual void FocusLostForRequest(int session_id) OVERRIDE; | 58 SpeechRecognitionEventListener* event_listener) OVERRIDE; |
| 59 virtual void StartSession(int session_id) OVERRIDE; | |
| 60 virtual void AbortSession(int session_id) OVERRIDE; | |
| 61 virtual void AbortAllSessionsForListener( | |
| 62 content::SpeechRecognitionEventListener* listener) OVERRIDE; | |
| 63 virtual void StopAudioCaptureForSession(int session_id) OVERRIDE; | |
| 64 virtual void DetachSession(int session_id) OVERRIDE; | |
| 65 virtual content::SpeechRecognitionSessionContext& GetSessionContext( | |
| 66 int session_id) const OVERRIDE; | |
| 67 virtual int LookupSessionByContext( | |
| 68 base::Callback<bool( | |
| 69 const content::SpeechRecognitionSessionContext&)> matcher) | |
| 70 const OVERRIDE; | |
| 46 virtual bool HasAudioInputDevices() OVERRIDE; | 71 virtual bool HasAudioInputDevices() OVERRIDE; |
| 47 virtual bool IsCapturingAudio() OVERRIDE; | 72 virtual bool IsCapturingAudio() OVERRIDE; |
| 48 virtual string16 GetAudioInputDeviceModel() OVERRIDE; | 73 virtual string16 GetAudioInputDeviceModel() OVERRIDE; |
| 49 virtual void ShowAudioInputSettings() OVERRIDE; | 74 virtual void ShowAudioInputSettings() OVERRIDE; |
| 50 | 75 |
| 51 // Handlers for requests from render views. | |
| 52 | |
| 53 // |delegate| is a weak pointer and should remain valid until | |
| 54 // its |DidCompleteRecognition| method is called or recognition is cancelled. | |
| 55 // |render_process_id| is the ID of the renderer process initiating the | |
| 56 // request. | |
| 57 // |element_rect| is the display bounds of the html element requesting speech | |
| 58 // input (in page coordinates). | |
| 59 virtual void StartRecognition( | |
| 60 InputTagSpeechDispatcherHost* delegate, | |
| 61 int session_id, | |
| 62 int render_process_id, | |
| 63 int render_view_id, | |
| 64 const gfx::Rect& element_rect, | |
| 65 const std::string& language, | |
| 66 const std::string& grammar, | |
| 67 const std::string& origin_url, | |
| 68 net::URLRequestContextGetter* context_getter, | |
| 69 content::SpeechRecognitionPreferences* speech_recognition_prefs); | |
| 70 virtual void CancelRecognition(int session_id); | |
| 71 virtual void CancelAllRequestsWithDelegate( | |
| 72 InputTagSpeechDispatcherHost* delegate); | |
| 73 virtual void StopRecording(int session_id); | |
| 74 | |
| 75 // SpeechRecognitionEventListener methods. | 76 // SpeechRecognitionEventListener methods. |
| 76 virtual void OnRecognitionStart(int session_id) OVERRIDE; | 77 virtual void OnRecognitionStart(int session_id) OVERRIDE; |
| 77 virtual void OnAudioStart(int session_id) OVERRIDE; | 78 virtual void OnAudioStart(int session_id) OVERRIDE; |
| 78 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; | 79 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; |
| 79 virtual void OnSoundStart(int session_id) OVERRIDE; | 80 virtual void OnSoundStart(int session_id) OVERRIDE; |
| 80 virtual void OnSoundEnd(int session_id) OVERRIDE; | 81 virtual void OnSoundEnd(int session_id) OVERRIDE; |
| 81 virtual void OnAudioEnd(int session_id) OVERRIDE; | 82 virtual void OnAudioEnd(int session_id) OVERRIDE; |
| 82 virtual void OnRecognitionEnd(int session_id) OVERRIDE; | 83 virtual void OnRecognitionEnd(int session_id) OVERRIDE; |
| 83 virtual void OnRecognitionResult( | 84 virtual void OnRecognitionResult( |
| 84 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; | 85 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE; |
| 85 virtual void OnRecognitionError( | 86 virtual void OnRecognitionError( |
| 86 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; | 87 int session_id, const content::SpeechRecognitionError& error) OVERRIDE; |
| 87 virtual void OnAudioLevelsChange( | 88 virtual void OnAudioLevelsChange(int session_id, float volume, |
| 88 int session_id, float volume, float noise_volume) OVERRIDE; | 89 float noise_volume) OVERRIDE; |
| 89 | 90 |
| 90 protected: | 91 protected: |
| 91 // Private constructor to enforce singleton. | 92 // Private constructor to enforce singleton. |
| 92 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; | 93 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>; |
| 93 SpeechRecognitionManagerImpl(); | 94 SpeechRecognitionManagerImpl(); |
| 94 virtual ~SpeechRecognitionManagerImpl(); | 95 virtual ~SpeechRecognitionManagerImpl(); |
| 95 | 96 |
| 96 bool HasPendingRequest(int session_id) const; | |
| 97 | |
| 98 private: | 97 private: |
| 99 struct Request { | 98 enum FSMState { |
| 100 Request(); | 99 STATE_IDLE = 0, |
| 101 ~Request(); | 100 STATE_INTERACTIVE, |
| 102 | 101 STATE_DETACHABLE, |
| 103 InputTagSpeechDispatcherHost* delegate; | 102 STATE_DETACHED, |
| 104 scoped_refptr<content::SpeechRecognizer> recognizer; | 103 STATE_ABORTING, |
| 105 bool is_active; // Set to true when recording or recognition is going on. | 104 STATE_ENDED_WITH_ERROR, |
| 105 STATE_MAX_VALUE = STATE_ENDED_WITH_ERROR | |
| 106 }; | 106 }; |
| 107 | 107 |
| 108 struct SpeechRecognitionParams; | 108 enum FSMEvent { |
| 109 EVENT_ABORT = 0, | |
| 110 EVENT_START, | |
| 111 EVENT_STOP_CAPTURE, | |
| 112 EVENT_DETACH, | |
| 113 EVENT_AUDIO_ENDED, | |
| 114 EVENT_RECOGNITION_ENDED, | |
| 115 EVENT_RECOGNITION_RESULT, | |
| 116 EVENT_RECOGNITION_ERROR, | |
| 117 EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR | |
| 118 }; | |
| 109 | 119 |
| 110 InputTagSpeechDispatcherHost* GetDelegate(int session_id) const; | 120 struct Session { |
| 121 Session(); | |
| 122 ~Session(); | |
| 111 | 123 |
| 112 void CheckRenderViewTypeAndStartRecognition( | 124 int id; |
| 113 const SpeechRecognitionParams& params); | 125 content::SpeechRecognitionEventListener* event_listener; |
| 114 void ProceedStartingRecognition(const SpeechRecognitionParams& params); | 126 content::SpeechRecognitionSessionContext context; |
| 127 scoped_refptr<SpeechRecognizerImpl> recognizer; | |
| 128 FSMState state; | |
| 129 bool error_occurred; | |
| 130 }; | |
| 115 | 131 |
| 116 void CancelRecognitionAndInformDelegate(int session_id); | 132 struct FSMEventArgs { |
| 133 explicit FSMEventArgs(FSMEvent event_value); | |
| 134 ~FSMEventArgs(); | |
| 117 | 135 |
| 118 typedef std::map<int, Request> SpeechRecognizerMap; | 136 FSMEvent event; |
| 119 SpeechRecognizerMap requests_; | 137 const content::SpeechRecognitionResult* speech_result; |
| 120 std::string request_info_; | 138 const content::SpeechRecognitionError* speech_error; |
| 121 bool can_report_metrics_; | 139 }; |
| 122 int recording_session_id_; | 140 |
| 123 scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_; | 141 // Entry point for pushing any external event into the session handling FSM. |
| 142 void DispatchEvent(int session_id, FSMEventArgs args); | |
| 143 | |
| 144 // Defines the behavior of the session handling FSM, selecting the appropriate | |
| 145 // transition according to the session, its current state and the event. | |
| 146 FSMState ExecuteTransitionAndGetNextState(Session& session, | |
| 147 const FSMEventArgs& event_args); | |
| 148 | |
| 149 // The methods below handle transitions of the session handling FSM. | |
| 150 FSMState SessionStart(Session& session, const FSMEventArgs& event_args); | |
| 151 FSMState SessionAbort(Session& session, const FSMEventArgs& event_args); | |
| 152 FSMState SessionStopAudioCapture(Session& session, | |
| 153 const FSMEventArgs& event_args); | |
| 154 FSMState SessionDetach(Session& session, const FSMEventArgs& event_args); | |
| 155 FSMState SessionReportError(Session& session, const FSMEventArgs& event_args); | |
| 156 FSMState SessionReportNoMatch(Session& session, | |
| 157 const FSMEventArgs& event_args); | |
| 158 FSMState SessionDelete(Session& session, const FSMEventArgs& event_args); | |
| 159 FSMState SessionSetDetachable(Session& session, | |
| 160 const FSMEventArgs& event_args); | |
| 161 FSMState DoNothing(Session& session, const FSMEventArgs& event_args); | |
| 162 FSMState NotFeasible(Session& session, const FSMEventArgs& event_args); | |
| 163 | |
| 164 bool SessionExists(int session_id) const; | |
| 165 content::SpeechRecognitionEventListener* GetListener(int session_id) const; | |
| 166 int GetNextSessionID(); | |
| 167 | |
| 168 typedef std::map<int, Session> SessionsTable; | |
| 169 SessionsTable sessions_; | |
| 170 int interactive_session_id_; | |
|
Satish
2012/04/19 13:03:19
interactive -> active ?
Primiano Tucci (use gerrit)
2012/04/20 16:06:43
A session can be active but not interactive (while
| |
| 171 int last_session_id_; | |
| 172 bool is_dispatching_event_; | |
| 173 content::SpeechRecognitionManagerDelegate* delegate_; | |
| 124 }; | 174 }; |
| 125 | 175 |
| 126 } // namespace speech | 176 } // namespace speech |
| 127 | 177 |
| 128 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ | 178 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ |
| OLD | NEW |