Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(322)

Side by Side Diff: chrome/browser/speech/speech_recognizer.h

Issue 3341020: Speech input: Do environment estimation and detect the no-speech case. (Closed)
Patch Set: . Created 10 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ 5 #ifndef CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
6 #define CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ 6 #define CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
7 7
8 #include <list> 8 #include <list>
9 #include <string> 9 #include <string>
10 #include <utility> 10 #include <utility>
11 11
12 #include "base/ref_counted.h" 12 #include "base/ref_counted.h"
13 #include "base/scoped_ptr.h" 13 #include "base/scoped_ptr.h"
14 #include "chrome/browser/speech/endpointer/endpointer.h" 14 #include "chrome/browser/speech/endpointer/endpointer.h"
15 #include "chrome/browser/speech/speech_recognition_request.h" 15 #include "chrome/browser/speech/speech_recognition_request.h"
16 #include "media/audio/audio_input_controller.h" 16 #include "media/audio/audio_input_controller.h"
17 17
18 namespace speech_input { 18 namespace speech_input {
19 19
20 class SpeexEncoder; 20 class SpeexEncoder;
21 21
22 // Records audio, sends recorded audio to server and translates server response 22 // Records audio, sends recorded audio to server and translates server response
23 // to recognition result. 23 // to recognition result.
24 class SpeechRecognizer 24 class SpeechRecognizer
25 : public base::RefCountedThreadSafe<SpeechRecognizer>, 25 : public base::RefCountedThreadSafe<SpeechRecognizer>,
26 public media::AudioInputController::EventHandler, 26 public media::AudioInputController::EventHandler,
27 public SpeechRecognitionRequestDelegate { 27 public SpeechRecognitionRequestDelegate {
28 public: 28 public:
29 enum ErrorCode {
30 RECOGNIZER_NO_ERROR,
31 RECOGNIZER_ERROR_CAPTURE,
32 RECOGNIZER_ERROR_NO_SPEECH,
33 RECOGNIZER_ERROR_NO_RESULTS,
34 };
35
29 // Implemented by the caller to receive recognition events. 36 // Implemented by the caller to receive recognition events.
30 class Delegate { 37 class Delegate {
31 public: 38 public:
32 virtual void SetRecognitionResult(int caller_id, 39 virtual void SetRecognitionResult(int caller_id,
33 bool error, 40 bool error,
34 const string16& value) = 0; 41 const string16& value) = 0;
35 42
36 // Invoked when audio recording stops, either due to the end pointer 43 // Invoked when audio recording stops, either due to the end pointer
37 // detecting silence in user input or if |StopRecording| was called. The 44 // detecting silence in user input or if |StopRecording| was called. The
38 // delegate has to wait until |DidCompleteRecognition| is invoked before 45 // delegate has to wait until |DidCompleteRecognition| is invoked before
39 // destroying the |SpeechRecognizer| object. 46 // destroying the |SpeechRecognizer| object.
40 virtual void DidCompleteRecording(int caller_id) = 0; 47 virtual void DidCompleteRecording(int caller_id) = 0;
41 48
42 // This is guaranteed to be the last method invoked in the recognition 49 // This is guaranteed to be the last method invoked in the recognition
43 // sequence and the |SpeechRecognizer| object can be freed up if necessary. 50 // sequence and the |SpeechRecognizer| object can be freed up if necessary.
44 virtual void DidCompleteRecognition(int caller_id) = 0; 51 virtual void DidCompleteRecognition(int caller_id) = 0;
45 52
46 // Invoked if there was an error while recording or recognizing audio. The 53 // Invoked if there was an error while recording or recognizing audio. The
47 // session may get terminated and the DidXxxx callbacks may be issued after 54 // session is terminated when this call is made and the DidXxxx callbacks
48 // this call. 55 // are issued after this call.
49 virtual void OnRecognizerError(int caller_id) = 0; 56 virtual void OnRecognizerError(int caller_id,
57 SpeechRecognizer::ErrorCode error) = 0;
58
59 // At the start of recognition, a short amount of audio is recorded to
60 // estimate the environment/background noise and this callback is issued
61 // after that is complete. Typically the delegate brings up any speech
62 // recognition UI once this callback is received.
63 virtual void DidCompleteEnvironmentEstimation(int caller_id) = 0;
50 64
51 protected: 65 protected:
52 virtual ~Delegate() {} 66 virtual ~Delegate() {}
53 }; 67 };
54 68
55 SpeechRecognizer(Delegate* delegate, int caller_id); 69 SpeechRecognizer(Delegate* delegate, int caller_id);
56 ~SpeechRecognizer(); 70 ~SpeechRecognizer();
57 71
58 // Starts audio recording and does recognition after recording ends. The same 72 // Starts audio recording and does recognition after recording ends. The same
59 // SpeechRecognizer instance can be used multiple times for speech recognition 73 // SpeechRecognizer instance can be used multiple times for speech recognition
(...skipping 11 matching lines...) Expand all
71 // AudioInputController::EventHandler methods. 85 // AudioInputController::EventHandler methods.
72 void OnCreated(media::AudioInputController* controller) { } 86 void OnCreated(media::AudioInputController* controller) { }
73 void OnRecording(media::AudioInputController* controller) { } 87 void OnRecording(media::AudioInputController* controller) { }
74 void OnError(media::AudioInputController* controller, int error_code); 88 void OnError(media::AudioInputController* controller, int error_code);
75 void OnData(media::AudioInputController* controller, const uint8* data, 89 void OnData(media::AudioInputController* controller, const uint8* data,
76 uint32 size); 90 uint32 size);
77 91
78 // SpeechRecognitionRequest::Delegate methods. 92 // SpeechRecognitionRequest::Delegate methods.
79 void SetRecognitionResult(bool error, const string16& value); 93 void SetRecognitionResult(bool error, const string16& value);
80 94
95 static const int kAudioSampleRate;
96 static const int kAudioPacketIntervalMs; // Duration of each audio packet.
97 static const int kNumAudioChannels;
98 static const int kNumBitsPerAudioSample;
99 static const int kNoSpeechTimeoutSec;
100
81 private: 101 private:
82 void ReleaseAudioBuffers(); 102 void ReleaseAudioBuffers();
103 void InformErrorAndCancelRecognition(ErrorCode error);
83 104
84 void HandleOnError(int error_code); // Handles OnError in the IO thread. 105 void HandleOnError(int error_code); // Handles OnError in the IO thread.
85 106
86 // Handles OnData in the IO thread. Takes ownership of |data|. 107 // Handles OnData in the IO thread. Takes ownership of |data|.
87 void HandleOnData(std::string* data); 108 void HandleOnData(std::string* data);
88 109
89 Delegate* delegate_; 110 Delegate* delegate_;
90 int caller_id_; 111 int caller_id_;
91 112
92 // Buffer holding the recorded audio. Owns the strings inside the list. 113 // Buffer holding the recorded audio. Owns the strings inside the list.
93 typedef std::list<std::string*> AudioBufferQueue; 114 typedef std::list<std::string*> AudioBufferQueue;
94 AudioBufferQueue audio_buffers_; 115 AudioBufferQueue audio_buffers_;
95 116
96 scoped_ptr<SpeechRecognitionRequest> request_; 117 scoped_ptr<SpeechRecognitionRequest> request_;
97 scoped_refptr<media::AudioInputController> audio_controller_; 118 scoped_refptr<media::AudioInputController> audio_controller_;
98 scoped_ptr<SpeexEncoder> encoder_; 119 scoped_ptr<SpeexEncoder> encoder_;
99 Endpointer endpointer_; 120 Endpointer endpointer_;
121 int num_samples_recorded_;
100 122
101 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer); 123 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizer);
102 }; 124 };
103 125
104 // This typedef is to workaround the issue with certain versions of 126 // This typedef is to workaround the issue with certain versions of
105 // Visual Studio where it gets confused between multiple Delegate 127 // Visual Studio where it gets confused between multiple Delegate
106 // classes and gives a C2500 error. (I saw this error on the try bots - 128 // classes and gives a C2500 error. (I saw this error on the try bots -
107 // the workaround was not needed for my machine). 129 // the workaround was not needed for my machine).
108 typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate; 130 typedef SpeechRecognizer::Delegate SpeechRecognizerDelegate;
109 131
110 } // namespace speech_input 132 } // namespace speech_input
111 133
112 #endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_ 134 #endif // CHROME_BROWSER_SPEECH_SPEECH_RECOGNIZER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698