content/browser/speech/speech_recognizer_impl.h - Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions.

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions. (Closed)

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « content/browser/speech/speech_recognition_manager_impl.cc ('k') | content/browser/speech/speech_recognizer_impl.cc » ('j') | content/browser/speech/speech_recognizer_impl.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include <memory>	8 #include <memory>

9	9

10 #include "base/macros.h"	10 #include "base/macros.h"

	11 #include "base/memory/weak_ptr.h"

11 #include "content/browser/speech/endpointer/endpointer.h"	12 #include "content/browser/speech/endpointer/endpointer.h"

12 #include "content/browser/speech/speech_recognition_engine.h"	13 #include "content/browser/speech/speech_recognition_engine.h"

13 #include "content/browser/speech/speech_recognizer.h"	14 #include "content/browser/speech/speech_recognizer.h"

14 #include "content/public/common/speech_recognition_error.h"	15 #include "content/public/common/speech_recognition_error.h"

15 #include "content/public/common/speech_recognition_result.h"	16 #include "content/public/common/speech_recognition_result.h"

16 #include "media/audio/audio_input_controller.h"	17 #include "media/audio/audio_input_controller.h"

17 #include "media/audio/audio_logging.h"	18 #include "media/audio/audio_logging.h"

18 #include "net/url_request/url_request_context_getter.h"	19 #include "net/url_request/url_request_context_getter.h"

19	20

20 namespace media {	21 namespace media {

21 class AudioBus;	22 class AudioBus;

22 class AudioManager;	23 class AudioSystem;

23 }	24 }

24	25

25 namespace content {	26 namespace content {

26	27

27 class SpeechRecognitionEventListener;	28 class SpeechRecognitionEventListener;

28	29

29 // Handles speech recognition for a session (identified by \|session_id\|), taking	30 // Handles speech recognition for a session (identified by \|session_id\|), taking

30 // care of audio capture, silence detection/endpointer and interaction with the	31 // care of audio capture, silence detection/endpointer and interaction with the

31 // SpeechRecognitionEngine.	32 // SpeechRecognitionEngine.

32 class CONTENT_EXPORT SpeechRecognizerImpl	33 class CONTENT_EXPORT SpeechRecognizerImpl

33 : public SpeechRecognizer,	34 : public SpeechRecognizer,

34 public media::AudioInputController::EventHandler,	35 public media::AudioInputController::EventHandler,

35 public media::AudioInputController::SyncWriter,	36 public media::AudioInputController::SyncWriter,

36 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) {	37 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) {

37 public:	38 public:

38 static const int kAudioSampleRate;	39 static const int kAudioSampleRate;

39 static const media::ChannelLayout kChannelLayout;	40 static const media::ChannelLayout kChannelLayout;

40 static const int kNumBitsPerAudioSample;	41 static const int kNumBitsPerAudioSample;

41 static const int kNoSpeechTimeoutMs;	42 static const int kNoSpeechTimeoutMs;

42 static const int kEndpointerEstimationTimeMs;	43 static const int kEndpointerEstimationTimeMs;

43	44

44 static void SetAudioManagerForTesting(media::AudioManager* audio_manager);	45 static void SetAudioSystemForTesting(media::AudioSystem* audio_system);

45	46

46 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,	47 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,

	48 media::AudioSystem* audio_system,

47 int session_id,	49 int session_id,

48 bool continuous,	50 bool continuous,

49 bool provisional_results,	51 bool provisional_results,

50 SpeechRecognitionEngine* engine);	52 SpeechRecognitionEngine* engine);

51	53

52 void StartRecognition(const std::string& device_id) override;	54 void StartRecognition(const std::string& device_id) override;

53 void AbortRecognition() override;	55 void AbortRecognition() override;

54 void StopAudioCapture() override;	56 void StopAudioCapture() override;

55 bool IsActive() const override;	57 bool IsActive() const override;

56 bool IsCapturingAudio() const override;	58 bool IsCapturingAudio() const override;

57 const SpeechRecognitionEngine& recognition_engine() const;	59 const SpeechRecognitionEngine& recognition_engine() const;

58	60

59 private:	61 private:

60 friend class SpeechRecognizerTest;	62 friend class SpeechRecognizerTest;

61	63

62 enum FSMState {	64 enum FSMState {

63 STATE_IDLE = 0,	65 STATE_IDLE = 0,

	66 STATE_PREPARING,

64 STATE_STARTING,	67 STATE_STARTING,

65 STATE_ESTIMATING_ENVIRONMENT,	68 STATE_ESTIMATING_ENVIRONMENT,

66 STATE_WAITING_FOR_SPEECH,	69 STATE_WAITING_FOR_SPEECH,

67 STATE_RECOGNIZING,	70 STATE_RECOGNIZING,

68 STATE_WAITING_FINAL_RESULT,	71 STATE_WAITING_FINAL_RESULT,

69 STATE_ENDED,	72 STATE_ENDED,

70 STATE_MAX_VALUE = STATE_ENDED	73 STATE_MAX_VALUE = STATE_ENDED

71 };	74 };

72	75

73 enum FSMEvent {	76 enum FSMEvent {

74 EVENT_ABORT = 0,	77 EVENT_ABORT = 0,

	78 EVENT_PREPARE,

75 EVENT_START,	79 EVENT_START,

76 EVENT_STOP_CAPTURE,	80 EVENT_STOP_CAPTURE,

77 EVENT_AUDIO_DATA,	81 EVENT_AUDIO_DATA,

78 EVENT_ENGINE_RESULT,	82 EVENT_ENGINE_RESULT,

79 EVENT_ENGINE_ERROR,	83 EVENT_ENGINE_ERROR,

80 EVENT_AUDIO_ERROR,	84 EVENT_AUDIO_ERROR,

81 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR	85 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR

82 };	86 };

83	87

84 struct FSMEventArgs {	88 struct FSMEventArgs {

(...skipping 12 matching lines...) Expand all Loading...
97 // Entry point for pushing any new external event into the recognizer FSM.	101 // Entry point for pushing any new external event into the recognizer FSM.

98 void DispatchEvent(const FSMEventArgs& event_args);	102 void DispatchEvent(const FSMEventArgs& event_args);

99	103

100 // Defines the behavior of the recognizer FSM, selecting the appropriate	104 // Defines the behavior of the recognizer FSM, selecting the appropriate

101 // transition according to the current state and event.	105 // transition according to the current state and event.

102 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);	106 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);

103	107

104 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).	108 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).

105 void ProcessAudioPipeline(const AudioChunk& raw_audio);	109 void ProcessAudioPipeline(const AudioChunk& raw_audio);

106	110

	111 // Callback from AudioSystem.

	112 void OnDeviceInfo(const media::AudioParameters& params);

	113

107 // The methods below handle transitions of the recognizer FSM.	114 // The methods below handle transitions of the recognizer FSM.

	115 FSMState PrepareRecognition(const FSMEventArgs&);

108 FSMState StartRecording(const FSMEventArgs& event_args);	116 FSMState StartRecording(const FSMEventArgs& event_args);

109 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);	117 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);

110 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);	118 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);

111 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);	119 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);

112 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);	120 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);

113 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);	121 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);

114 FSMState ProcessFinalResult(const FSMEventArgs& event_args);	122 FSMState ProcessFinalResult(const FSMEventArgs& event_args);

115 FSMState AbortSilently(const FSMEventArgs& event_args);	123 FSMState AbortSilently(const FSMEventArgs& event_args);

116 FSMState AbortWithError(const FSMEventArgs& event_args);	124 FSMState AbortWithError(const FSMEventArgs& event_args);

117 FSMState Abort(const SpeechRecognitionError& error);	125 FSMState Abort(const SpeechRecognitionError& error);

(...skipping 27 matching lines...) Expand all Loading...
145 uint32_t hardware_delay_bytes) override;	153 uint32_t hardware_delay_bytes) override;

146 void Close() override;	154 void Close() override;

147	155

148 // SpeechRecognitionEngineDelegate methods.	156 // SpeechRecognitionEngineDelegate methods.

149 void OnSpeechRecognitionEngineResults(	157 void OnSpeechRecognitionEngineResults(

150 const SpeechRecognitionResults& results) override;	158 const SpeechRecognitionResults& results) override;

151 void OnSpeechRecognitionEngineEndOfUtterance() override;	159 void OnSpeechRecognitionEngineEndOfUtterance() override;

152 void OnSpeechRecognitionEngineError(	160 void OnSpeechRecognitionEngineError(

153 const SpeechRecognitionError& error) override;	161 const SpeechRecognitionError& error) override;

154	162

155 static media::AudioManager* audio_manager_for_tests_;	163 media::AudioSystem* GetAudioSystem();

156	164

	165 static media::AudioSystem* audio_system_for_tests_;

	166 media::AudioSystem* audio_system_;

157 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_;	167 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_;

158 Endpointer endpointer_;	168 Endpointer endpointer_;

159 scoped_refptr<media::AudioInputController> audio_controller_;	169 scoped_refptr<media::AudioInputController> audio_controller_;

160 std::unique_ptr<media::AudioLog> audio_log_;	170 std::unique_ptr<media::AudioLog> audio_log_;

161 int num_samples_recorded_;	171 int num_samples_recorded_;

162 float audio_level_;	172 float audio_level_;

163 bool is_dispatching_event_;	173 bool is_dispatching_event_;

164 bool provisional_results_;	174 bool provisional_results_;

165 bool end_of_utterance_;	175 bool end_of_utterance_;

166 FSMState state_;	176 FSMState state_;

167 std::string device_id_;	177 std::string device_id_;

	178 media::AudioParameters device_params_;

168	179

169 class OnDataConverter;	180 class OnDataConverter;

170	181

171 // Converts data between native input format and a WebSpeech specific	182 // Converts data between native input format and a WebSpeech specific

172 // output format.	183 // output format.

173 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;	184 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;

174	185

	186 base::WeakPtrFactory<SpeechRecognizerImpl> weak_ptr_factory_;

175 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	187 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

176 };	188 };

177	189

178 } // namespace content	190 } // namespace content

179	191

180 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	192 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW