| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 5 #ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
| 6 #define CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 6 #define CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
| 7 | 7 |
| 8 #include <list> | 8 #include <list> |
| 9 #include <string> | 9 #include <string> |
| 10 #include <utility> | 10 #include <utility> |
| 11 | 11 |
| 12 #include "base/ref_counted.h" | 12 #include "base/ref_counted.h" |
| 13 #include "base/scoped_ptr.h" | 13 #include "base/scoped_ptr.h" |
| 14 #include "chrome/browser/speech/endpointer/endpointer.h" | 14 #include "chrome/browser/speech/endpointer/endpointer.h" |
| 15 #include "chrome/browser/speech/speech_recognition_request.h" | 15 #include "chrome/browser/speech/speech_recognition_request.h" |
| 16 #include "media/audio/audio_input_controller.h" | 16 #include "media/audio/audio_input_controller.h" |
| 17 | 17 |
| 18 namespace speech_input { | 18 namespace speech_input { |
| 19 | 19 |
| 20 class SpeexEncoder; | 20 class SpeexEncoder; |
| 21 | 21 |
| 22 // Records audio, sends recorded audio to server and translates server response | 22 // Records audio, sends recorded audio to server and translates server response |
| 23 // to recognition result. | 23 // to recognition result. |
| 24 class SpeechRecognizer | 24 class SpeechRecognizer |
| 25 : public base::RefCountedThreadSafe<SpeechRecognizer>, | 25 : public base::RefCountedThreadSafe<SpeechRecognizer>, |
| 26 public media::AudioInputController::EventHandler, | 26 public media::AudioInputController::EventHandler, |
| 27 public SpeechRecognitionRequestDelegate { | 27 public SpeechRecognitionRequestDelegate { |
| 28 public: | 28 public: |
| 29 enum ErrorCode { |
| 30 RECOGNIZER_NO_ERROR, |
| 31 RECOGNIZER_ERROR_CAPTURE, |
| 32 RECOGNIZER_ERROR_NO_SPEECH, |
| 33 RECOGNIZER_ERROR_NO_RESULTS, |
| 34 }; |
| 35 |
| 29 // Implemented by the caller to receive recognition events. | 36 // Implemented by the caller to receive recognition events. |
| 30 class Delegate { | 37 class Delegate { |
| 31 public: | 38 public: |
| 32 virtual void SetRecognitionResult(int caller_id, | 39 virtual void SetRecognitionResult(int caller_id, |
| 33 bool error, | 40 bool error, |
| 34 const string16& value) = 0; | 41 const string16& value) = 0; |
| 35 | 42 |
| 36 // Invoked when audio recording stops, either due to the end pointer | 43 // Invoked when audio recording stops, either due to the end pointer |
| 37 // detecting silence in user input or if |StopRecording| was called. The | 44 // detecting silence in user input or if |StopRecording| was called. The |
| 38 // delegate has to wait until |DidCompleteRecognition| is invoked before | 45 // delegate has to wait until |DidCompleteRecognition| is invoked before |
| 39 // destroying the |SpeechRecognizer| object. | 46 // destroying the |SpeechRecognizer| object. |
| 40 virtual void DidCompleteRecording(int caller_id) = 0; | 47 virtual void DidCompleteRecording(int caller_id) = 0; |
| 41 | 48 |
| 42 // This is guaranteed to be the last method invoked in the recognition | 49 // This is guaranteed to be the last method invoked in the recognition |
| 43 // sequence and the |SpeechRecognizer| object can be freed up if necessary. | 50 // sequence and the |SpeechRecognizer| object can be freed up if necessary. |
| 44 virtual void DidCompleteRecognition(int caller_id) = 0; | 51 virtual void DidCompleteRecognition(int caller_id) = 0; |
| 45 | 52 |
| 46 // Invoked if there was an error while recording or recognizing audio. The | 53 // Invoked if there was an error while recording or recognizing audio. The |
| 47 // session may get terminated and the DidXxxx callbacks may be issued after | 54 // session is terminated when this call is made and the DidXxxx callbacks |
| 48 // this call. | 55 // are issued after this call. |
| 49 virtual void OnRecognizerError(int caller_id) = 0; | 56 virtual void OnRecognizerError(int caller_id, |
| 57 SpeechRecognizer::ErrorCode error) = 0; |
| 58 |
| 59 // At the start of recognition, a short amount of audio is recorded to |
| 60 // estimate the environment/background noise and this callback is issued |
| 61 // after that is complete. Typically the delegate brings up any speech |
| 62 // recognition UI once this callback is received. |
| 63 virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0; |
| 50 | 64 |
| 51 protected: | 65 protected: |
| 52 virtual ~Delegate() {} | 66 virtual ~Delegate() {} |
| 53 }; | 67 }; |
| 54 | 68 |
| 55 SpeechRecognizer(Delegate* delegate, int caller_id); | 69 SpeechRecognizer(Delegate* delegate, int caller_id); |
| 56 ~SpeechRecognizer(); | 70 ~SpeechRecognizer(); |
| 57 | 71 |
| 58 // Starts audio recording and does recognition after recording ends. The same | 72 // Starts audio recording and does recognition after recording ends. The same |
| 59 // SpeechRecognizer instance can be used multiple times for speech recognition | 73 // SpeechRecognizer instance can be used multiple times for speech recognition |
| (...skipping 11 matching lines...) Expand all Loading... |
| 71 // AudioInputController::EventHandler methods. | 85 // AudioInputController::EventHandler methods. |
| 72 void OnCreated(media::AudioInputController* controller) { } | 86 void OnCreated(media::AudioInputController* controller) { } |
| 73 void OnRecording(media::AudioInputController* controller) { } | 87 void OnRecording(media::AudioInputController* controller) { } |
| 74 void OnError(media::AudioInputController* controller, int error_code); | 88 void OnError(media::AudioInputController* controller, int error_code); |
| 75 void OnData(media::AudioInputController* controller, const uint8* data, | 89 void OnData(media::AudioInputController* controller, const uint8* data, |
| 76 uint32 size); | 90 uint32 size); |
| 77 | 91 |
| 78 // SpeechRecognitionRequest::Delegate methods. | 92 // SpeechRecognitionRequest::Delegate methods. |
| 79 void SetRecognitionResult(bool error, const string16& value); | 93 void SetRecognitionResult(bool error, const string16& value); |
| 80 | 94 |
| 95 static const int kAudioSampleRate; |
| 96 static const int kAudioPacketIntervalMs; // Duration of each audio packet. |
| 97 static const int kNumAudioChannels; |
| 98 static const int kNumBitsPerAudioSample; |
| 99 static const int kNoSpeechTimeoutSec; |
| 100 |
| 81 private: | 101 private: |
| 82 void ReleaseAudioBuffers(); | 102 void ReleaseAudioBuffers(); |
| 103 void InformErrorAndCancelRecognition(ErrorCode error); |
| 83 | 104 |
| 84 void HandleOnError(int error_code); // Handles OnError in the IO thread. | 105 void HandleOnError(int error_code); // Handles OnError in the IO thread. |
| 85 | 106 |
| 86 // Handles OnData in the IO thread. Takes ownership of |data|. | 107 // Handles OnData in the IO thread. Takes ownership of |data|. |
| 87 void HandleOnData(std::string* data); | 108 void HandleOnData(std::string* data); |
| 88 | 109 |
| 89 Delegate* delegate_; | 110 Delegate* delegate_; |
| 90 int caller_id_; | 111 int caller_id_; |
| 91 | 112 |
| 92 // Buffer holding the recorded audio. Owns the strings inside the list. | 113 // Buffer holding the recorded audio. Owns the strings inside the list. |
| 93 typedef std::list<std::string*> AudioBufferQueue; | 114 typedef std::list<std::string*> AudioBufferQueue; |
| 94 AudioBufferQueue audio_buffers_; | 115 AudioBufferQueue audio_buffers_; |
| 95 | 116 |
| 96 scoped_ptr<SpeechRecognitionRequest> request_; | 117 scoped_ptr<SpeechRecognitionRequest> request_; |
| 97 scoped_refptr<media::AudioInputController> audio_controller_; | 118 scoped_refptr<media::AudioInputController> audio_controller_; |
| 98 scoped_ptr<SpeexEncoder> encoder_; | 119 scoped_ptr<SpeexEncoder> encoder_; |
| 99 Endpointer endpointer_; | 120 Endpointer endpointer_; |
| 121 int num_samples_recorded_; |
| 100 | 122 |
| 101 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer); | 123 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer); |
| 102 }; | 124 }; |
| 103 | 125 |
| 104 // This typedef is to workaround the issue with certain versions of | 126 // This typedef is to workaround the issue with certain versions of |
| 105 // Visual Studio where it gets confused between multiple Delegate | 127 // Visual Studio where it gets confused between multiple Delegate |
| 106 // classes and gives a C2500 error. (I saw this error on the try bots - | 128 // classes and gives a C2500 error. (I saw this error on the try bots - |
| 107 // the workaround was not needed for my machine). | 129 // the workaround was not needed for my machine). |
| 108 typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate; | 130 typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate; |
| 109 | 131 |
| 110 } // namespace speech_input | 132 } // namespace speech_input |
| 111 | 133 |
| 112 #endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ | 134 #endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ |
| OLD | NEW |