content/browser/speech/speech_recognizer_impl.h - Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions.

Side by Side Diff: content/browser/speech/speech_recognizer_impl.h

Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions. (Closed)

Patch Set: review comments addressed Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

7	7

8 #include <memory>	8 #include <memory>

9 #include <string>	9 #include <string>

10	10

11 #include "base/macros.h"	11 #include "base/macros.h"

	12 #include "base/memory/weak_ptr.h"

12 #include "content/browser/speech/endpointer/endpointer.h"	13 #include "content/browser/speech/endpointer/endpointer.h"

13 #include "content/browser/speech/speech_recognition_engine.h"	14 #include "content/browser/speech/speech_recognition_engine.h"

14 #include "content/browser/speech/speech_recognizer.h"	15 #include "content/browser/speech/speech_recognizer.h"

15 #include "content/public/common/speech_recognition_error.h"	16 #include "content/public/common/speech_recognition_error.h"

16 #include "content/public/common/speech_recognition_result.h"	17 #include "content/public/common/speech_recognition_result.h"

17 #include "media/audio/audio_input_controller.h"	18 #include "media/audio/audio_input_controller.h"

18 #include "media/audio/audio_logging.h"	19 #include "media/audio/audio_logging.h"

19 #include "net/url_request/url_request_context_getter.h"	20 #include "net/url_request/url_request_context_getter.h"

20	21

21 namespace media {	22 namespace media {

22 class AudioBus;	23 class AudioBus;

23 class AudioManager;	24 class AudioSystem;

24 }	25 }

25	26

26 namespace content {	27 namespace content {

27	28

28 class SpeechRecognitionEventListener;	29 class SpeechRecognitionEventListener;

29	30

30 // Handles speech recognition for a session (identified by \|session_id\|), taking	31 // Handles speech recognition for a session (identified by \|session_id\|), taking

31 // care of audio capture, silence detection/endpointer and interaction with the	32 // care of audio capture, silence detection/endpointer and interaction with the

32 // SpeechRecognitionEngine.	33 // SpeechRecognitionEngine.

33 class CONTENT_EXPORT SpeechRecognizerImpl	34 class CONTENT_EXPORT SpeechRecognizerImpl

34 : public SpeechRecognizer,	35 : public SpeechRecognizer,

35 public media::AudioInputController::EventHandler,	36 public media::AudioInputController::EventHandler,

36 public media::AudioInputController::SyncWriter,	37 public media::AudioInputController::SyncWriter,

37 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) {	38 public NON_EXPORTED_BASE(SpeechRecognitionEngine::Delegate) {

38 public:	39 public:

39 static const int kAudioSampleRate;	40 static const int kAudioSampleRate;

40 static const media::ChannelLayout kChannelLayout;	41 static const media::ChannelLayout kChannelLayout;

41 static const int kNumBitsPerAudioSample;	42 static const int kNumBitsPerAudioSample;

42 static const int kNoSpeechTimeoutMs;	43 static const int kNoSpeechTimeoutMs;

43 static const int kEndpointerEstimationTimeMs;	44 static const int kEndpointerEstimationTimeMs;

44	45

45 static void SetAudioManagerForTesting(media::AudioManager* audio_manager);	46 static void SetAudioSystemForTesting(media::AudioSystem* audio_system);

46	47

47 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,	48 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,

	49 media::AudioSystem* audio_system,

48 int session_id,	50 int session_id,

49 bool continuous,	51 bool continuous,

50 bool provisional_results,	52 bool provisional_results,

51 SpeechRecognitionEngine* engine);	53 SpeechRecognitionEngine* engine);

52	54

53 void StartRecognition(const std::string& device_id) override;	55 void StartRecognition(const std::string& device_id) override;

54 void AbortRecognition() override;	56 void AbortRecognition() override;

55 void StopAudioCapture() override;	57 void StopAudioCapture() override;

56 bool IsActive() const override;	58 bool IsActive() const override;

57 bool IsCapturingAudio() const override;	59 bool IsCapturingAudio() const override;

58 const SpeechRecognitionEngine& recognition_engine() const;	60 const SpeechRecognitionEngine& recognition_engine() const;

59	61

60 private:	62 private:

61 friend class SpeechRecognizerTest;	63 friend class SpeechRecognizerTest;

62	64

63 enum FSMState {	65 enum FSMState {

64 STATE_IDLE = 0,	66 STATE_IDLE = 0,

	67 STATE_PREPARING,

65 STATE_STARTING,	68 STATE_STARTING,

66 STATE_ESTIMATING_ENVIRONMENT,	69 STATE_ESTIMATING_ENVIRONMENT,

67 STATE_WAITING_FOR_SPEECH,	70 STATE_WAITING_FOR_SPEECH,

68 STATE_RECOGNIZING,	71 STATE_RECOGNIZING,

69 STATE_WAITING_FINAL_RESULT,	72 STATE_WAITING_FINAL_RESULT,

70 STATE_ENDED,	73 STATE_ENDED,

71 STATE_MAX_VALUE = STATE_ENDED	74 STATE_MAX_VALUE = STATE_ENDED

72 };	75 };

73	76

74 enum FSMEvent {	77 enum FSMEvent {

75 EVENT_ABORT = 0,	78 EVENT_ABORT = 0,

	79 EVENT_PREPARE,

76 EVENT_START,	80 EVENT_START,

77 EVENT_STOP_CAPTURE,	81 EVENT_STOP_CAPTURE,

78 EVENT_AUDIO_DATA,	82 EVENT_AUDIO_DATA,

79 EVENT_ENGINE_RESULT,	83 EVENT_ENGINE_RESULT,

80 EVENT_ENGINE_ERROR,	84 EVENT_ENGINE_ERROR,

81 EVENT_AUDIO_ERROR,	85 EVENT_AUDIO_ERROR,

82 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR	86 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR

83 };	87 };

84	88

85 struct FSMEventArgs {	89 struct FSMEventArgs {

(...skipping 12 matching lines...) Expand all Loading...
98 // Entry point for pushing any new external event into the recognizer FSM.	102 // Entry point for pushing any new external event into the recognizer FSM.

99 void DispatchEvent(const FSMEventArgs& event_args);	103 void DispatchEvent(const FSMEventArgs& event_args);

100	104

101 // Defines the behavior of the recognizer FSM, selecting the appropriate	105 // Defines the behavior of the recognizer FSM, selecting the appropriate

102 // transition according to the current state and event.	106 // transition according to the current state and event.

103 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);	107 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);

104	108

105 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).	109 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).

106 void ProcessAudioPipeline(const AudioChunk& raw_audio);	110 void ProcessAudioPipeline(const AudioChunk& raw_audio);

107	111

	112 // Callback from AudioSystem.

	113 void OnDeviceInfo(const media::AudioParameters& params);

	114

108 // The methods below handle transitions of the recognizer FSM.	115 // The methods below handle transitions of the recognizer FSM.

	116 FSMState PrepareRecognition(const FSMEventArgs&);

109 FSMState StartRecording(const FSMEventArgs& event_args);	117 FSMState StartRecording(const FSMEventArgs& event_args);

110 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);	118 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);

111 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);	119 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);

112 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);	120 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);

113 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);	121 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);

114 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);	122 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);

115 FSMState ProcessFinalResult(const FSMEventArgs& event_args);	123 FSMState ProcessFinalResult(const FSMEventArgs& event_args);

116 FSMState AbortSilently(const FSMEventArgs& event_args);	124 FSMState AbortSilently(const FSMEventArgs& event_args);

117 FSMState AbortWithError(const FSMEventArgs& event_args);	125 FSMState AbortWithError(const FSMEventArgs& event_args);

118 FSMState Abort(const SpeechRecognitionError& error);	126 FSMState Abort(const SpeechRecognitionError& error);

(...skipping 27 matching lines...) Expand all Loading...
146 uint32_t hardware_delay_bytes) override;	154 uint32_t hardware_delay_bytes) override;

147 void Close() override;	155 void Close() override;

148	156

149 // SpeechRecognitionEngineDelegate methods.	157 // SpeechRecognitionEngineDelegate methods.

150 void OnSpeechRecognitionEngineResults(	158 void OnSpeechRecognitionEngineResults(

151 const SpeechRecognitionResults& results) override;	159 const SpeechRecognitionResults& results) override;

152 void OnSpeechRecognitionEngineEndOfUtterance() override;	160 void OnSpeechRecognitionEngineEndOfUtterance() override;

153 void OnSpeechRecognitionEngineError(	161 void OnSpeechRecognitionEngineError(

154 const SpeechRecognitionError& error) override;	162 const SpeechRecognitionError& error) override;

155	163

156 static media::AudioManager* audio_manager_for_tests_;	164 media::AudioSystem* GetAudioSystem();

157	165

	166 // Substitutes the real audio system in browser tests.

	167 static media::AudioSystem* audio_system_for_tests_;

	168 media::AudioSystem* audio_system_;

158 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_;	169 std::unique_ptr<SpeechRecognitionEngine> recognition_engine_;

159 Endpointer endpointer_;	170 Endpointer endpointer_;

160 scoped_refptr<media::AudioInputController> audio_controller_;	171 scoped_refptr<media::AudioInputController> audio_controller_;

161 std::unique_ptr<media::AudioLog> audio_log_;	172 std::unique_ptr<media::AudioLog> audio_log_;

162 int num_samples_recorded_;	173 int num_samples_recorded_;

163 float audio_level_;	174 float audio_level_;

164 bool is_dispatching_event_;	175 bool is_dispatching_event_;

165 bool provisional_results_;	176 bool provisional_results_;

166 bool end_of_utterance_;	177 bool end_of_utterance_;

167 FSMState state_;	178 FSMState state_;

168 std::string device_id_;	179 std::string device_id_;

	180 media::AudioParameters device_params_;

169	181

170 class OnDataConverter;	182 class OnDataConverter;

171	183

172 // Converts data between native input format and a WebSpeech specific	184 // Converts data between native input format and a WebSpeech specific

173 // output format.	185 // output format.

174 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;	186 std::unique_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;

175	187

	188 base::WeakPtrFactory<SpeechRecognizerImpl> weak_ptr_factory_;

176 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);	189 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);

177 };	190 };

178	191

179 } // namespace content	192 } // namespace content

180	193

181 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_	194 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_

OLD	NEW