| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
| 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
| 7 | 7 |
| 8 #include <list> | 8 #include <list> |
| 9 #include <string> | 9 #include <string> |
| 10 #include <utility> | 10 #include <utility> |
| 11 | 11 |
| 12 #include "base/memory/ref_counted.h" | 12 #include "base/memory/ref_counted.h" |
| 13 #include "base/memory/scoped_ptr.h" | 13 #include "base/memory/scoped_ptr.h" |
| 14 #include "content/browser/speech/audio_encoder.h" | 14 #include "content/browser/speech/audio_encoder.h" |
| 15 #include "content/browser/speech/endpointer/endpointer.h" | 15 #include "content/browser/speech/endpointer/endpointer.h" |
| 16 #include "content/browser/speech/speech_recognition_request.h" | 16 #include "content/browser/speech/speech_recognition_request.h" |
| 17 #include "content/common/content_export.h" | 17 #include "content/common/content_export.h" |
| 18 #include "content/public/common/speech_input_result.h" | 18 #include "content/public/common/speech_input_result.h" |
| 19 #include "media/audio/audio_input_controller.h" | 19 #include "media/audio/audio_input_controller.h" |
| 20 | 20 |
| 21 namespace content { |
| 22 class SpeechRecognizerDelegate; |
| 23 } |
| 24 |
| 21 namespace net { | 25 namespace net { |
| 22 class URLRequestContextGetter; | 26 class URLRequestContextGetter; |
| 23 } | 27 } |
| 24 | 28 |
| 25 namespace speech_input { | 29 namespace speech_input { |
| 26 | 30 |
| 27 // Records audio, sends recorded audio to server and translates server response | 31 // Records audio, sends recorded audio to server and translates server response |
| 28 // to recognition result. | 32 // to recognition result. |
| 29 class CONTENT_EXPORT SpeechRecognizer | 33 class CONTENT_EXPORT SpeechRecognizer |
| 30 : public base::RefCountedThreadSafe<SpeechRecognizer>, | 34 : public base::RefCountedThreadSafe<SpeechRecognizer>, |
| 31 public media::AudioInputController::EventHandler, | 35 public media::AudioInputController::EventHandler, |
| 32 public SpeechRecognitionRequestDelegate { | 36 public SpeechRecognitionRequestDelegate { |
| 33 public: | 37 public: |
| 34 // Implemented by the caller to receive recognition events. | 38 SpeechRecognizer(content::SpeechRecognizerDelegate* delegate, |
| 35 class CONTENT_EXPORT Delegate { | |
| 36 public: | |
| 37 virtual void SetRecognitionResult( | |
| 38 int caller_id, | |
| 39 const content::SpeechInputResult& result) = 0; | |
| 40 | |
| 41 // Invoked when the first audio packet was received from the audio capture | |
| 42 // device. | |
| 43 virtual void DidStartReceivingAudio(int caller_id) = 0; | |
| 44 | |
| 45 // Invoked when audio recording stops, either due to the end pointer | |
| 46 // detecting silence in user input or if |StopRecording| was called. The | |
| 47 // delegate has to wait until |DidCompleteRecognition| is invoked before | |
| 48 // destroying the |SpeechRecognizer| object. | |
| 49 virtual void DidCompleteRecording(int caller_id) = 0; | |
| 50 | |
| 51 // This is guaranteed to be the last method invoked in the recognition | |
| 52 // sequence and the |SpeechRecognizer| object can be freed up if necessary. | |
| 53 virtual void DidCompleteRecognition(int caller_id) = 0; | |
| 54 | |
| 55 // Informs that the end pointer has started detecting speech. | |
| 56 virtual void DidStartReceivingSpeech(int caller_id) = 0; | |
| 57 | |
| 58 // Informs that the end pointer has stopped detecting speech. | |
| 59 virtual void DidStopReceivingSpeech(int caller_id) = 0; | |
| 60 | |
| 61 // Invoked if there was an error while recording or recognizing audio. The | |
| 62 // session has already been cancelled when this call is made and the DidXxxx | |
| 63 // callbacks will not be issued. It is safe to destroy/release the | |
| 64 // |SpeechRecognizer| object while processing this call. | |
| 65 virtual void OnRecognizerError(int caller_id, | |
| 66 content::SpeechInputError error) = 0; | |
| 67 | |
| 68 // At the start of recognition, a short amount of audio is recorded to | |
| 69 // estimate the environment/background noise and this callback is issued | |
| 70 // after that is complete. Typically the delegate brings up any speech | |
| 71 // recognition UI once this callback is received. | |
| 72 virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0; | |
| 73 | |
| 74 // Informs of a change in the captured audio level, useful if displaying | |
| 75 // a microphone volume indicator while recording. | |
| 76 // The value of |volume| and |noise_volume| is in the [0.0, 1.0] range. | |
| 77 virtual void SetInputVolume(int caller_id, float volume, | |
| 78 float noise_volume) = 0; | |
| 79 | |
| 80 protected: | |
| 81 virtual ~Delegate() {} | |
| 82 }; | |
| 83 | |
| 84 SpeechRecognizer(Delegate* delegate, | |
| 85 int caller_id, | 39 int caller_id, |
| 86 const std::string& language, | 40 const std::string& language, |
| 87 const std::string& grammar, | 41 const std::string& grammar, |
| 88 net::URLRequestContextGetter* context_getter, | 42 net::URLRequestContextGetter* context_getter, |
| 89 AudioManager* audio_manager, | 43 AudioManager* audio_manager, |
| 90 bool filter_profanities, | 44 bool filter_profanities, |
| 91 const std::string& hardware_info, | 45 const std::string& hardware_info, |
| 92 const std::string& origin_url); | 46 const std::string& origin_url); |
| 93 | 47 |
| 94 virtual ~SpeechRecognizer(); | 48 virtual ~SpeechRecognizer(); |
| 95 | 49 |
| 96 // Starts audio recording and does recognition after recording ends. The same | 50 // Starts audio recording and does recognition after recording ends. The same |
| 97 // SpeechRecognizer instance can be used multiple times for speech recognition | 51 // SpeechRecognizer instance can be used multiple times for speech recognition |
| 98 // though each recognition request can be made only after the previous one | 52 // though each recognition request can be made only after the previous one |
| 99 // completes (i.e. after receiving Delegate::DidCompleteRecognition). | 53 // completes (i.e. after receiving |
| 54 // SpeechRecognizerDelegate::DidCompleteRecognition). |
| 100 bool StartRecording(); | 55 bool StartRecording(); |
| 101 | 56 |
| 102 // Stops recording audio and starts recognition. | 57 // Stops recording audio and starts recognition. |
| 103 void StopRecording(); | 58 void StopRecording(); |
| 104 | 59 |
| 105 // Stops recording audio and cancels recognition. Any audio recorded so far | 60 // Stops recording audio and cancels recognition. Any audio recorded so far |
| 106 // gets discarded. | 61 // gets discarded. |
| 107 void CancelRecognition(); | 62 void CancelRecognition(); |
| 108 | 63 |
| 109 // AudioInputController::EventHandler methods. | 64 // AudioInputController::EventHandler methods. |
| (...skipping 23 matching lines...) Expand all Loading... |
| 133 void SendRecordedAudioToServer(); | 88 void SendRecordedAudioToServer(); |
| 134 | 89 |
| 135 void HandleOnError(int error_code); // Handles OnError in the IO thread. | 90 void HandleOnError(int error_code); // Handles OnError in the IO thread. |
| 136 | 91 |
| 137 // Handles OnData in the IO thread. Takes ownership of |data|. | 92 // Handles OnData in the IO thread. Takes ownership of |data|. |
| 138 void HandleOnData(std::string* data); | 93 void HandleOnData(std::string* data); |
| 139 | 94 |
| 140 // Helper method which closes the audio controller and blocks until done. | 95 // Helper method which closes the audio controller and blocks until done. |
| 141 void CloseAudioControllerSynchronously(); | 96 void CloseAudioControllerSynchronously(); |
| 142 | 97 |
| 143 Delegate* delegate_; | 98 content::SpeechRecognizerDelegate* delegate_; |
| 144 int caller_id_; | 99 int caller_id_; |
| 145 std::string language_; | 100 std::string language_; |
| 146 std::string grammar_; | 101 std::string grammar_; |
| 147 bool filter_profanities_; | 102 bool filter_profanities_; |
| 148 std::string hardware_info_; | 103 std::string hardware_info_; |
| 149 std::string origin_url_; | 104 std::string origin_url_; |
| 150 | 105 |
| 151 scoped_ptr<SpeechRecognitionRequest> request_; | 106 scoped_ptr<SpeechRecognitionRequest> request_; |
| 152 scoped_refptr<media::AudioInputController> audio_controller_; | 107 scoped_refptr<media::AudioInputController> audio_controller_; |
| 153 scoped_refptr<net::URLRequestContextGetter> context_getter_; | 108 scoped_refptr<net::URLRequestContextGetter> context_getter_; |
| 154 scoped_refptr<AudioManager> audio_manager_; | 109 scoped_refptr<AudioManager> audio_manager_; |
| 155 AudioEncoder::Codec codec_; | 110 AudioEncoder::Codec codec_; |
| 156 scoped_ptr<AudioEncoder> encoder_; | 111 scoped_ptr<AudioEncoder> encoder_; |
| 157 Endpointer endpointer_; | 112 Endpointer endpointer_; |
| 158 int num_samples_recorded_; | 113 int num_samples_recorded_; |
| 159 float audio_level_; | 114 float audio_level_; |
| 160 | 115 |
| 161 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer); | 116 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer); |
| 162 }; | 117 }; |
| 163 | 118 |
| 164 // This typedef is to workaround the issue with certain versions of | |
| 165 // Visual Studio where it gets confused between multiple Delegate | |
| 166 // classes and gives a C2500 error. (I saw this error on the try bots - | |
| 167 // the workaround was not needed for my machine). | |
| 168 typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate; | |
| 169 | |
| 170 } // namespace speech_input | 119 } // namespace speech_input |
| 171 | 120 |
| 172 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 121 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
| OLD | NEW |