OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
7 | 7 |
8 #include <list> | 8 #include <list> |
9 #include <string> | 9 #include <string> |
10 #include <utility> | 10 #include <utility> |
(...skipping 12 matching lines...) Expand all Loading... | |
23 | 23 |
24 namespace speech_input { | 24 namespace speech_input { |
25 | 25 |
26 // Records audio, sends recorded audio to server and translates server response | 26 // Records audio, sends recorded audio to server and translates server response |
27 // to recognition result. | 27 // to recognition result. |
28 class CONTENT_EXPORT SpeechRecognizer | 28 class CONTENT_EXPORT SpeechRecognizer |
29 : public base::RefCountedThreadSafe<SpeechRecognizer>, | 29 : public base::RefCountedThreadSafe<SpeechRecognizer>, |
30 public media::AudioInputController::EventHandler, | 30 public media::AudioInputController::EventHandler, |
31 public SpeechRecognitionRequestDelegate { | 31 public SpeechRecognitionRequestDelegate { |
32 public: | 32 public: |
33 enum ErrorCode { | |
34 RECOGNIZER_NO_ERROR, | |
35 RECOGNIZER_ERROR_CAPTURE, | |
36 RECOGNIZER_ERROR_NO_SPEECH, | |
37 RECOGNIZER_ERROR_NO_RESULTS, | |
38 RECOGNIZER_ERROR_NETWORK, | |
39 }; | |
40 | |
41 // Implemented by the caller to receive recognition events. | 33 // Implemented by the caller to receive recognition events. |
42 class CONTENT_EXPORT Delegate { | 34 class CONTENT_EXPORT Delegate { |
43 public: | 35 public: |
44 virtual void SetRecognitionResult( | 36 virtual void SetRecognitionResult( |
45 int caller_id, | 37 int caller_id, |
46 bool error, | 38 bool error, |
47 const SpeechInputResultArray& result) = 0; | 39 const SpeechInputResult& result) = 0; |
48 | 40 |
49 // Invoked when the first audio packet was received from the audio capture | 41 // Invoked when the first audio packet was received from the audio capture |
50 // device. | 42 // device. |
51 virtual void DidStartReceivingAudio(int caller_id) = 0; | 43 virtual void DidStartReceivingAudio(int caller_id) = 0; |
52 | 44 |
53 // Invoked when audio recording stops, either due to the end pointer | 45 // Invoked when audio recording stops, either due to the end pointer |
54 // detecting silence in user input or if |StopRecording| was called. The | 46 // detecting silence in user input or if |StopRecording| was called. The |
55 // delegate has to wait until |DidCompleteRecognition| is invoked before | 47 // delegate has to wait until |DidCompleteRecognition| is invoked before |
56 // destroying the |SpeechRecognizer| object. | 48 // destroying the |SpeechRecognizer| object. |
57 virtual void DidCompleteRecording(int caller_id) = 0; | 49 virtual void DidCompleteRecording(int caller_id) = 0; |
58 | 50 |
59 // This is guaranteed to be the last method invoked in the recognition | 51 // This is guaranteed to be the last method invoked in the recognition |
60 // sequence and the |SpeechRecognizer| object can be freed up if necessary. | 52 // sequence and the |SpeechRecognizer| object can be freed up if necessary. |
61 virtual void DidCompleteRecognition(int caller_id) = 0; | 53 virtual void DidCompleteRecognition(int caller_id) = 0; |
62 | 54 |
55 // Informs that the end pointer has started detecting input speech. | |
Satish
2011/10/06 09:09:06
"input speech" -> "speech"
Leandro Graciá Gil
2011/10/06 18:26:25
Done.
| |
56 virtual void DidStartReceivingSpeech(int caller_id) = 0; | |
57 | |
58 // Informs that the end pointer has stopped detecting input speech. | |
Satish
2011/10/06 09:09:06
ditto
Leandro Graciá Gil
2011/10/06 18:26:25
Done.
| |
59 virtual void DidStopReceivingSpeech(int caller_id) = 0; | |
60 | |
63 // Invoked if there was an error while recording or recognizing audio. The | 61 // Invoked if there was an error while recording or recognizing audio. The |
64 // session has already been cancelled when this call is made and the DidXxxx | 62 // session has already been cancelled when this call is made and the DidXxxx |
65 // callbacks will not be issued. It is safe to destroy/release the | 63 // callbacks will not be issued. It is safe to destroy/release the |
66 // |SpeechRecognizer| object while processing this call. | 64 // |SpeechRecognizer| object while processing this call. |
67 virtual void OnRecognizerError(int caller_id, | 65 virtual void OnRecognizerError(int caller_id, |
68 SpeechRecognizer::ErrorCode error) = 0; | 66 SpeechInputError error) = 0; |
69 | 67 |
70 // At the start of recognition, a short amount of audio is recorded to | 68 // At the start of recognition, a short amount of audio is recorded to |
71 // estimate the environment/background noise and this callback is issued | 69 // estimate the environment/background noise and this callback is issued |
72 // after that is complete. Typically the delegate brings up any speech | 70 // after that is complete. Typically the delegate brings up any speech |
73 // recognition UI once this callback is received. | 71 // recognition UI once this callback is received. |
74 virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0; | 72 virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0; |
75 | 73 |
76 // Informs of a change in the captured audio level, useful if displaying | 74 // Informs of a change in the captured audio level, useful if displaying |
77 // a microphone volume indicator while recording. | 75 // a microphone volume indicator while recording. |
78 // The value of |volume| and |noise_volume| is in the [0.0, 1.0] range. | 76 // The value of |volume| and |noise_volume| is in the [0.0, 1.0] range. |
79 virtual void SetInputVolume(int caller_id, float volume, | 77 virtual void SetInputVolume(int caller_id, float volume, |
80 float noise_volume) = 0; | 78 float noise_volume) = 0; |
81 | 79 |
82 protected: | 80 protected: |
83 virtual ~Delegate() {} | 81 virtual ~Delegate() {} |
84 }; | 82 }; |
85 | 83 |
86 SpeechRecognizer(Delegate* delegate, | 84 SpeechRecognizer(Delegate* delegate, |
87 int caller_id, | 85 int caller_id, |
88 const std::string& language, | 86 const std::string& language, |
89 const std::string& grammar, | 87 const std::string& grammar, |
90 net::URLRequestContextGetter* context_getter, | 88 net::URLRequestContextGetter* context_getter, |
91 bool censor_results, | 89 bool censor_results, |
92 const std::string& hardware_info, | 90 const std::string& hardware_info, |
93 const std::string& origin_url); | 91 const std::string& origin_url); |
92 | |
94 virtual ~SpeechRecognizer(); | 93 virtual ~SpeechRecognizer(); |
95 | 94 |
96 // Starts audio recording and does recognition after recording ends. The same | 95 // Starts audio recording and does recognition after recording ends. The same |
97 // SpeechRecognizer instance can be used multiple times for speech recognition | 96 // SpeechRecognizer instance can be used multiple times for speech recognition |
98 // though each recognition request can be made only after the previous one | 97 // though each recognition request can be made only after the previous one |
99 // completes (i.e. after receiving Delegate::DidCompleteRecognition). | 98 // completes (i.e. after receiving Delegate::DidCompleteRecognition). |
100 bool StartRecording(); | 99 bool StartRecording(); |
101 | 100 |
102 // Stops recording audio and starts recognition. | 101 // Stops recording audio and starts recognition. |
103 void StopRecording(); | 102 void StopRecording(); |
104 | 103 |
105 // Stops recording audio and cancels recognition. Any audio recorded so far | 104 // Stops recording audio and cancels recognition. Any audio recorded so far |
106 // gets discarded. | 105 // gets discarded. |
107 void CancelRecognition(); | 106 void CancelRecognition(); |
108 | 107 |
109 // AudioInputController::EventHandler methods. | 108 // AudioInputController::EventHandler methods. |
110 virtual void OnCreated(media::AudioInputController* controller) { } | 109 virtual void OnCreated(media::AudioInputController* controller) { } |
111 virtual void OnRecording(media::AudioInputController* controller) { } | 110 virtual void OnRecording(media::AudioInputController* controller) { } |
112 virtual void OnError(media::AudioInputController* controller, int error_code); | 111 virtual void OnError(media::AudioInputController* controller, int error_code); |
113 virtual void OnData(media::AudioInputController* controller, | 112 virtual void OnData(media::AudioInputController* controller, |
114 const uint8* data, | 113 const uint8* data, |
115 uint32 size); | 114 uint32 size); |
116 | 115 |
117 // SpeechRecognitionRequest::Delegate methods. | 116 // SpeechRecognitionRequest::Delegate methods. |
118 virtual void SetRecognitionResult(bool error, | 117 virtual void SetRecognitionResult(bool error, |
119 const SpeechInputResultArray& result); | 118 const SpeechInputResult& result); |
Satish
2011/10/06 09:09:06
could align with the previous param?
Leandro Graciá Gil
2011/10/06 18:26:25
Done.
| |
120 | 119 |
121 static const int kAudioSampleRate; | 120 static const int kAudioSampleRate; |
122 static const int kAudioPacketIntervalMs; // Duration of each audio packet. | 121 static const int kAudioPacketIntervalMs; // Duration of each audio packet. |
123 static const ChannelLayout kChannelLayout; | 122 static const ChannelLayout kChannelLayout; |
124 static const int kNumBitsPerAudioSample; | 123 static const int kNumBitsPerAudioSample; |
125 static const int kNoSpeechTimeoutSec; | 124 static const int kNoSpeechTimeoutSec; |
126 static const int kEndpointerEstimationTimeMs; | 125 static const int kEndpointerEstimationTimeMs; |
127 | 126 |
128 private: | 127 private: |
129 void InformErrorAndCancelRecognition(ErrorCode error); | 128 void InformErrorAndCancelRecognition(SpeechInputError error); |
130 void SendRecordedAudioToServer(); | 129 void SendRecordedAudioToServer(); |
131 | 130 |
132 void HandleOnError(int error_code); // Handles OnError in the IO thread. | 131 void HandleOnError(int error_code); // Handles OnError in the IO thread. |
133 | 132 |
134 // Handles OnData in the IO thread. Takes ownership of |data|. | 133 // Handles OnData in the IO thread. Takes ownership of |data|. |
135 void HandleOnData(std::string* data); | 134 void HandleOnData(std::string* data); |
136 | 135 |
137 Delegate* delegate_; | 136 Delegate* delegate_; |
138 int caller_id_; | 137 int caller_id_; |
139 std::string language_; | 138 std::string language_; |
(...skipping 16 matching lines...) Expand all Loading... | |
156 | 155 |
157 // This typedef is to workaround the issue with certain versions of | 156 // This typedef is to workaround the issue with certain versions of |
158 // Visual Studio where it gets confused between multiple Delegate | 157 // Visual Studio where it gets confused between multiple Delegate |
159 // classes and gives a C2500 error. (I saw this error on the try bots - | 158 // classes and gives a C2500 error. (I saw this error on the try bots - |
160 // the workaround was not needed for my machine). | 159 // the workaround was not needed for my machine). |
161 typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate; | 160 typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate; |
162 | 161 |
163 } // namespace speech_input | 162 } // namespace speech_input |
164 | 163 |
165 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 164 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
OLD | NEW |