content/browser/speech/speech_recognition_manager_impl.h - Issue 9972011: Speech refactoring: Reimplemented SpeechRecognitionManagerImpl as a FSM. (CL1.7)

Side by Side Diff: content/browser/speech/speech_recognition_manager_impl.h

Issue 9972011: Speech refactoring: Reimplemented SpeechRecognitionManagerImpl as a FSM. (CL1.7) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Rebased from master. Created 8 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « content/browser/speech/speech_recognition_browsertest.cc ('k') | content/browser/speech/speech_recognition_manager_impl.cc » ('j') | content/browser/speech/speech_recognition_manager_impl.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_

7	7

8 #include <map>	8 #include <map>

9 #include <string>	9 #include <string>

10	10

11 #include "base/basictypes.h"	11 #include "base/basictypes.h"

	12 #include "base/callback.h"

12 #include "base/compiler_specific.h"	13 #include "base/compiler_specific.h"

13 #include "base/memory/ref_counted.h"

14 #include "base/memory/scoped_ptr.h"

15 #include "base/memory/singleton.h"	14 #include "base/memory/singleton.h"

	15 #include "content/browser/speech/speech_recognizer_impl.h"

	16 #include "content/common/content_export.h"

16 #include "content/public/browser/speech_recognition_event_listener.h"	17 #include "content/public/browser/speech_recognition_event_listener.h"

17 #include "content/public/browser/speech_recognition_manager.h"	18 #include "content/public/browser/speech_recognition_manager.h"

18 #include "ui/gfx/rect.h"	19 #include "content/public/browser/speech_recognition_session_context.h"

	20 #include "content/public/common/speech_recognition_error.h"

19	21

20 namespace content {	22 namespace content {

21 class ResourceContext;	23 class ResourceContext;

22 class SpeechRecognitionManagerDelegate;	24 class SpeechRecognitionManagerDelegate;

23 class SpeechRecognitionPreferences;	25 struct SpeechRecognitionSessionConfig;

	26 struct SpeechRecognitionError;
	Satish 2012/04/19 13:03:19 order alphabetically order alphabetically Primiano Tucci (use gerrit) 2012/04/20 16:06:43 Done. Show quoted text On 2012/04/19 13:03:19, Satish wrote: > order alphabetically Done.
24 struct SpeechRecognitionResult;	27 struct SpeechRecognitionResult;

25 class SpeechRecognizer;

26 }

27

28 namespace net {

29 class URLRequestContextGetter;

30 }	28 }

31	29

32 namespace speech {	30 namespace speech {

33	31

34 class InputTagSpeechDispatcherHost;	32 // This is the manager for speech recognition in the browser process.

35	33 // The manager is unique for each renderer, and can serve several requests. Each

36 class CONTENT_EXPORT SpeechRecognitionManagerImpl	34 // recognition request corresponds to a session, initiated via \|CreateSession\|.

37 : NON_EXPORTED_BASE(public content::SpeechRecognitionManager),	35 // In every moment the manager has at most one "interactive" session (identified

38 NON_EXPORTED_BASE(public content::SpeechRecognitionEventListener) {	36 // by \|interactive_session_id_\|), that is the session that is currently holding

	37 // user attention (showing a bubble). For privacy reasons, only the interactive
	Satish 2012/04/19 13:03:19 since the bubble concept is not applicable for all since the bubble concept is not applicable for all users of this class (e.g. extension api and in future continuous recognition) suggest removing that from the comment Primiano Tucci (use gerrit) 2012/04/20 16:06:43 Done. Show quoted text On 2012/04/19 13:03:19, Satish wrote: > since the bubble concept is not applicable for all users of this class (e.g. > extension api and in future continuous recognition) suggest removing that from > the comment Done.
	38 // session is allowed to capture audio from the microphone. However, after audio

	39 // capture is completed, a session can be detached and can live in parallel

	40 // with other sessions, while waiting for its results.

	41 //

	42 // More in details, SpeechRecognitionManager has the following responsibilities:

	43 // - Handles requests received from various render views and makes sure only

	44 // one of them accesses the audio device coherently.
	Satish 2012/04/19 13:03:19 coherently -> at any given time coherently -> at any given time Primiano Tucci (use gerrit) 2012/04/20 16:06:43 Done. Show quoted text On 2012/04/19 13:03:19, Satish wrote: > coherently -> at any given time Done.
	45 // - Relays recognition results/status/error events of each session to the

	46 // corresponding listener (demuxing on the base of their session_id).

	47 // - Handles the instantiation of SpeechRecognitionEngine objects when

	48 // requested by SpeechRecognitionSessions.

	49 class CONTENT_EXPORT SpeechRecognitionManagerImpl :

	50 public NON_EXPORTED_BASE(content::SpeechRecognitionManager),

	51 public NON_EXPORTED_BASE(content::SpeechRecognitionEventListener) {

39 public:	52 public:

40 static SpeechRecognitionManagerImpl* GetInstance();	53 static SpeechRecognitionManagerImpl* GetInstance();

41	54

42 // SpeechRecognitionManager implementation:	55 // SpeechRecognitionManager implementation.

43 virtual void StartRecognitionForRequest(int session_id) OVERRIDE;	56 virtual int CreateSession(

44 virtual void CancelRecognitionForRequest(int session_id) OVERRIDE;	57 content::SpeechRecognitionSessionConfig& config,

45 virtual void FocusLostForRequest(int session_id) OVERRIDE;	58 SpeechRecognitionEventListener* event_listener) OVERRIDE;

	59 virtual void StartSession(int session_id) OVERRIDE;

	60 virtual void AbortSession(int session_id) OVERRIDE;

	61 virtual void AbortAllSessionsForListener(

	62 content::SpeechRecognitionEventListener* listener) OVERRIDE;

	63 virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;

	64 virtual void DetachSession(int session_id) OVERRIDE;

	65 virtual content::SpeechRecognitionSessionContext& GetSessionContext(

	66 int session_id) const OVERRIDE;

	67 virtual int LookupSessionByContext(

	68 base::Callback<bool(

	69 const content::SpeechRecognitionSessionContext&)> matcher)

	70 const OVERRIDE;

46 virtual bool HasAudioInputDevices() OVERRIDE;	71 virtual bool HasAudioInputDevices() OVERRIDE;

47 virtual bool IsCapturingAudio() OVERRIDE;	72 virtual bool IsCapturingAudio() OVERRIDE;

48 virtual string16 GetAudioInputDeviceModel() OVERRIDE;	73 virtual string16 GetAudioInputDeviceModel() OVERRIDE;

49 virtual void ShowAudioInputSettings() OVERRIDE;	74 virtual void ShowAudioInputSettings() OVERRIDE;

50	75

51 // Handlers for requests from render views.

52

53 // \|delegate\| is a weak pointer and should remain valid until

54 // its \|DidCompleteRecognition\| method is called or recognition is cancelled.

55 // \|render_process_id\| is the ID of the renderer process initiating the

56 // request.

57 // \|element_rect\| is the display bounds of the html element requesting speech

58 // input (in page coordinates).

59 virtual void StartRecognition(

60 InputTagSpeechDispatcherHost* delegate,

61 int session_id,

62 int render_process_id,

63 int render_view_id,

64 const gfx::Rect& element_rect,

65 const std::string& language,

66 const std::string& grammar,

67 const std::string& origin_url,

68 net::URLRequestContextGetter* context_getter,

69 content::SpeechRecognitionPreferences* speech_recognition_prefs);

70 virtual void CancelRecognition(int session_id);

71 virtual void CancelAllRequestsWithDelegate(

72 InputTagSpeechDispatcherHost* delegate);

73 virtual void StopRecording(int session_id);

74

75 // SpeechRecognitionEventListener methods.	76 // SpeechRecognitionEventListener methods.

76 virtual void OnRecognitionStart(int session_id) OVERRIDE;	77 virtual void OnRecognitionStart(int session_id) OVERRIDE;

77 virtual void OnAudioStart(int session_id) OVERRIDE;	78 virtual void OnAudioStart(int session_id) OVERRIDE;

78 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;	79 virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;

79 virtual void OnSoundStart(int session_id) OVERRIDE;	80 virtual void OnSoundStart(int session_id) OVERRIDE;

80 virtual void OnSoundEnd(int session_id) OVERRIDE;	81 virtual void OnSoundEnd(int session_id) OVERRIDE;

81 virtual void OnAudioEnd(int session_id) OVERRIDE;	82 virtual void OnAudioEnd(int session_id) OVERRIDE;

82 virtual void OnRecognitionEnd(int session_id) OVERRIDE;	83 virtual void OnRecognitionEnd(int session_id) OVERRIDE;

83 virtual void OnRecognitionResult(	84 virtual void OnRecognitionResult(

84 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;	85 int session_id, const content::SpeechRecognitionResult& result) OVERRIDE;

85 virtual void OnRecognitionError(	86 virtual void OnRecognitionError(

86 int session_id, const content::SpeechRecognitionError& error) OVERRIDE;	87 int session_id, const content::SpeechRecognitionError& error) OVERRIDE;

87 virtual void OnAudioLevelsChange(	88 virtual void OnAudioLevelsChange(int session_id, float volume,

88 int session_id, float volume, float noise_volume) OVERRIDE;	89 float noise_volume) OVERRIDE;

89	90

90 protected:	91 protected:

91 // Private constructor to enforce singleton.	92 // Private constructor to enforce singleton.

92 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>;	93 friend struct DefaultSingletonTraits<SpeechRecognitionManagerImpl>;

93 SpeechRecognitionManagerImpl();	94 SpeechRecognitionManagerImpl();

94 virtual ~SpeechRecognitionManagerImpl();	95 virtual ~SpeechRecognitionManagerImpl();

95	96

96 bool HasPendingRequest(int session_id) const;

97

98 private:	97 private:

99 struct Request {	98 enum FSMState {

100 Request();	99 STATE_IDLE = 0,

101 ~Request();	100 STATE_INTERACTIVE,

102	101 STATE_DETACHABLE,

103 InputTagSpeechDispatcherHost* delegate;	102 STATE_DETACHED,

104 scoped_refptr<content::SpeechRecognizer> recognizer;	103 STATE_ABORTING,

105 bool is_active; // Set to true when recording or recognition is going on.	104 STATE_ENDED_WITH_ERROR,

	105 STATE_MAX_VALUE = STATE_ENDED_WITH_ERROR

106 };	106 };

107	107

108 struct SpeechRecognitionParams;	108 enum FSMEvent {

	109 EVENT_ABORT = 0,

	110 EVENT_START,

	111 EVENT_STOP_CAPTURE,

	112 EVENT_DETACH,

	113 EVENT_AUDIO_ENDED,

	114 EVENT_RECOGNITION_ENDED,

	115 EVENT_RECOGNITION_RESULT,

	116 EVENT_RECOGNITION_ERROR,

	117 EVENT_MAX_VALUE = EVENT_RECOGNITION_ERROR

	118 };

109	119

110 InputTagSpeechDispatcherHost* GetDelegate(int session_id) const;	120 struct Session {

	121 Session();

	122 ~Session();

111	123

112 void CheckRenderViewTypeAndStartRecognition(	124 int id;

113 const SpeechRecognitionParams& params);	125 content::SpeechRecognitionEventListener* event_listener;

114 void ProceedStartingRecognition(const SpeechRecognitionParams& params);	126 content::SpeechRecognitionSessionContext context;

	127 scoped_refptr<SpeechRecognizerImpl> recognizer;

	128 FSMState state;

	129 bool error_occurred;

	130 };

115	131

116 void CancelRecognitionAndInformDelegate(int session_id);	132 struct FSMEventArgs {

	133 explicit FSMEventArgs(FSMEvent event_value);

	134 ~FSMEventArgs();

117	135

118 typedef std::map<int, Request> SpeechRecognizerMap;	136 FSMEvent event;

119 SpeechRecognizerMap requests_;	137 const content::SpeechRecognitionResult* speech_result;

120 std::string request_info_;	138 const content::SpeechRecognitionError* speech_error;

121 bool can_report_metrics_;	139 };

122 int recording_session_id_;	140

123 scoped_ptr<content::SpeechRecognitionManagerDelegate> delegate_;	141 // Entry point for pushing any external event into the session handling FSM.

	142 void DispatchEvent(int session_id, FSMEventArgs args);

	143

	144 // Defines the behavior of the session handling FSM, selecting the appropriate

	145 // transition according to the session, its current state and the event.

	146 FSMState ExecuteTransitionAndGetNextState(Session& session,

	147 const FSMEventArgs& event_args);

	148

	149 // The methods below handle transitions of the session handling FSM.

	150 FSMState SessionStart(Session& session, const FSMEventArgs& event_args);

	151 FSMState SessionAbort(Session& session, const FSMEventArgs& event_args);

	152 FSMState SessionStopAudioCapture(Session& session,

	153 const FSMEventArgs& event_args);

	154 FSMState SessionDetach(Session& session, const FSMEventArgs& event_args);

	155 FSMState SessionReportError(Session& session, const FSMEventArgs& event_args);

	156 FSMState SessionReportNoMatch(Session& session,

	157 const FSMEventArgs& event_args);

	158 FSMState SessionDelete(Session& session, const FSMEventArgs& event_args);

	159 FSMState SessionSetDetachable(Session& session,

	160 const FSMEventArgs& event_args);

	161 FSMState DoNothing(Session& session, const FSMEventArgs& event_args);

	162 FSMState NotFeasible(Session& session, const FSMEventArgs& event_args);

	163

	164 bool SessionExists(int session_id) const;

	165 content::SpeechRecognitionEventListener* GetListener(int session_id) const;

	166 int GetNextSessionID();

	167

	168 typedef std::map<int, Session> SessionsTable;

	169 SessionsTable sessions_;

	170 int interactive_session_id_;
	Satish 2012/04/19 13:03:19 interactive -> active ? interactive -> active ? Primiano Tucci (use gerrit) 2012/04/20 16:06:43 A session can be active but not interactive (while Show quoted text On 2012/04/19 13:03:19, Satish wrote: > interactive -> active ? A session can be active but not interactive (while waiting for a result)
	171 int last_session_id_;

	172 bool is_dispatching_event_;

	173 content::SpeechRecognitionManagerDelegate* delegate_;

124 };	174 };

125	175

126 } // namespace speech	176 } // namespace speech

127	177

128 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_	178 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_

OLD	NEW