OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/speech/speech_recognizer.h" | 5 #include "chrome/browser/speech/speech_recognizer.h" |
6 | 6 |
7 #include "base/ref_counted.h" | 7 #include "base/ref_counted.h" |
8 #include "base/scoped_ptr.h" | 8 #include "base/scoped_ptr.h" |
9 #include "base/time.h" | 9 #include "base/time.h" |
10 #include "chrome/browser/browser_thread.h" | 10 #include "chrome/browser/browser_thread.h" |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
134 // |StopRecording| being called. | 134 // |StopRecording| being called. |
135 DCHECK(!audio_controller_.get()); | 135 DCHECK(!audio_controller_.get()); |
136 DCHECK(!request_.get() || !request_->HasPendingRequest()); | 136 DCHECK(!request_.get() || !request_->HasPendingRequest()); |
137 DCHECK(audio_buffers_.empty()); | 137 DCHECK(audio_buffers_.empty()); |
138 endpointer_.EndSession(); | 138 endpointer_.EndSession(); |
139 } | 139 } |
140 | 140 |
141 bool SpeechRecognizer::StartRecording() { | 141 bool SpeechRecognizer::StartRecording() { |
142 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 142 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
143 DCHECK(!audio_controller_.get()); | 143 DCHECK(!audio_controller_.get()); |
144 DCHECK(!request_.get() || !request_->HasPendingRequest()); | 144 DCHECK(!request_.get()); // || !request_->HasPendingRequest()); |
145 | 145 |
146 // The endpointer needs to estimate the environment/background noise before | 146 // The endpointer needs to estimate the environment/background noise before |
147 // starting to treat the audio as user input. In |HandleOnData| we wait until | 147 // starting to treat the audio as user input. In |HandleOnData| we wait until |
148 // such time has passed before switching to user input mode. | 148 // such time has passed before switching to user input mode. |
149 endpointer_.SetEnvironmentEstimationMode(); | 149 endpointer_.SetEnvironmentEstimationMode(); |
150 | 150 |
151 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; | 151 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; |
152 DCHECK((samples_per_packet % encoder_->samples_per_frame()) == 0); | 152 DCHECK((samples_per_packet % encoder_->samples_per_frame()) == 0); |
153 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels, | 153 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels, |
154 kAudioSampleRate, kNumBitsPerAudioSample, | 154 kAudioSampleRate, kNumBitsPerAudioSample, |
155 samples_per_packet); | 155 samples_per_packet); |
156 audio_controller_ = AudioInputController::Create(this, params); | 156 audio_controller_ = AudioInputController::Create(this, params); |
157 DCHECK(audio_controller_.get()); | 157 DCHECK(audio_controller_.get()); |
158 VLOG(1) << "SpeechRecognizer starting record."; | 158 VLOG(1) << "SpeechRecognizer starting record."; |
159 num_samples_recorded_ = 0; | 159 num_samples_recorded_ = 0; |
160 audio_controller_->Record(); | 160 audio_controller_->Record(); |
161 | 161 |
| 162 request_.reset(new SpeechRecognitionRequest( |
| 163 Profile::GetDefaultRequestContext(), this)); |
| 164 request_->Start(language_, grammar_, hardware_info_, kContentTypeSpeex); |
| 165 |
162 return true; | 166 return true; |
163 } | 167 } |
164 | 168 |
165 void SpeechRecognizer::CancelRecognition() { | 169 void SpeechRecognizer::CancelRecognition() { |
166 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 170 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
167 DCHECK(audio_controller_.get() || request_.get()); | 171 DCHECK(audio_controller_.get() || request_.get()); |
168 | 172 |
169 // Stop recording if required. | 173 // Stop recording if required. |
170 if (audio_controller_.get()) { | 174 if (audio_controller_.get()) { |
171 VLOG(1) << "SpeechRecognizer stopping record."; | 175 VLOG(1) << "SpeechRecognizer stopping record."; |
(...skipping 12 matching lines...) Expand all Loading... |
184 // If audio recording has already stopped and we are in recognition phase, | 188 // If audio recording has already stopped and we are in recognition phase, |
185 // silently ignore any more calls to stop recording. | 189 // silently ignore any more calls to stop recording. |
186 if (!audio_controller_.get()) | 190 if (!audio_controller_.get()) |
187 return; | 191 return; |
188 | 192 |
189 VLOG(1) << "SpeechRecognizer stopping record."; | 193 VLOG(1) << "SpeechRecognizer stopping record."; |
190 audio_controller_->Close(); | 194 audio_controller_->Close(); |
191 audio_controller_ = NULL; // Releases the ref ptr. | 195 audio_controller_ = NULL; // Releases the ref ptr. |
192 | 196 |
193 delegate_->DidCompleteRecording(caller_id_); | 197 delegate_->DidCompleteRecording(caller_id_); |
194 | 198 DCHECK(request_.get()); |
| 199 request_->FinishAudioUpload(); |
| 200 /* |
195 // If we haven't got any audio yet end the recognition sequence here. | 201 // If we haven't got any audio yet end the recognition sequence here. |
196 if (audio_buffers_.empty()) { | 202 if (audio_buffers_.empty()) { |
197 // Guard against the delegate freeing us until we finish our job. | 203 // Guard against the delegate freeing us until we finish our job. |
198 scoped_refptr<SpeechRecognizer> me(this); | 204 scoped_refptr<SpeechRecognizer> me(this); |
199 delegate_->DidCompleteRecognition(caller_id_); | 205 delegate_->DidCompleteRecognition(caller_id_); |
200 return; | 206 return; |
201 } | 207 } |
202 | 208 |
203 // We now have recorded audio in our buffers, so start a recognition request. | 209 // We now have recorded audio in our buffers, so start a recognition request. |
204 // Since the http request takes a single string as POST data, allocate | 210 // Since the http request takes a single string as POST data, allocate |
205 // one and copy over bytes from the audio buffers to the string. | 211 // one and copy over bytes from the audio buffers to the string. |
206 int audio_buffer_length = 0; | 212 int audio_buffer_length = 0; |
207 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); | 213 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); |
208 it != audio_buffers_.end(); it++) { | 214 it != audio_buffers_.end(); it++) { |
209 audio_buffer_length += (*it)->length(); | 215 audio_buffer_length += (*it)->length(); |
210 } | 216 } |
211 string data; | 217 string data; |
212 data.reserve(audio_buffer_length); | 218 data.reserve(audio_buffer_length); |
213 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); | 219 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); |
214 it != audio_buffers_.end(); it++) { | 220 it != audio_buffers_.end(); it++) { |
215 data.append(*(*it)); | 221 data.append(*(*it)); |
216 } | 222 } |
217 | 223 |
218 DCHECK(!request_.get()); | |
219 request_.reset(new SpeechRecognitionRequest( | |
220 Profile::GetDefaultRequestContext(), this)); | |
221 request_->Send(language_, grammar_, hardware_info_, kContentTypeSpeex, data); | |
222 ReleaseAudioBuffers(); // No need to keep the audio anymore. | 224 ReleaseAudioBuffers(); // No need to keep the audio anymore. |
| 225 */ |
223 } | 226 } |
224 | 227 |
225 void SpeechRecognizer::ReleaseAudioBuffers() { | 228 void SpeechRecognizer::ReleaseAudioBuffers() { |
226 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); | 229 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); |
227 it != audio_buffers_.end(); it++) | 230 it != audio_buffers_.end(); it++) |
228 delete *it; | 231 delete *it; |
229 audio_buffers_.clear(); | 232 audio_buffers_.clear(); |
230 } | 233 } |
231 | 234 |
232 // Invoked in the audio thread. | 235 // Invoked in the audio thread. |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
274 const short* samples = reinterpret_cast<const short*>(data->data()); | 277 const short* samples = reinterpret_cast<const short*>(data->data()); |
275 DCHECK((data->length() % sizeof(short)) == 0); | 278 DCHECK((data->length() % sizeof(short)) == 0); |
276 int num_samples = data->length() / sizeof(short); | 279 int num_samples = data->length() / sizeof(short); |
277 | 280 |
278 encoder_->Encode(samples, num_samples, &audio_buffers_); | 281 encoder_->Encode(samples, num_samples, &audio_buffers_); |
279 float rms; | 282 float rms; |
280 endpointer_.ProcessAudio(samples, num_samples, &rms); | 283 endpointer_.ProcessAudio(samples, num_samples, &rms); |
281 delete data; | 284 delete data; |
282 num_samples_recorded_ += num_samples; | 285 num_samples_recorded_ += num_samples; |
283 | 286 |
| 287 DCHECK(request_.get()); |
| 288 request_->UploadAudioChunk(**audio_buffers_.begin()); |
| 289 ReleaseAudioBuffers(); |
| 290 |
284 if (endpointer_.IsEstimatingEnvironment()) { | 291 if (endpointer_.IsEstimatingEnvironment()) { |
285 // Check if we have gathered enough audio for the endpointer to do | 292 // Check if we have gathered enough audio for the endpointer to do |
286 // environment estimation and should move on to detect speech/end of speech. | 293 // environment estimation and should move on to detect speech/end of speech. |
287 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | 294 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * |
288 kAudioSampleRate) / 1000) { | 295 kAudioSampleRate) / 1000) { |
289 endpointer_.SetUserInputMode(); | 296 endpointer_.SetUserInputMode(); |
290 delegate_->DidCompleteEnvironmentEstimation(caller_id_); | 297 delegate_->DidCompleteEnvironmentEstimation(caller_id_); |
291 } | 298 } |
292 return; // No more processing since we are still estimating environment. | 299 return; // No more processing since we are still estimating environment. |
293 } | 300 } |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
334 | 341 |
335 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { | 342 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { |
336 CancelRecognition(); | 343 CancelRecognition(); |
337 | 344 |
338 // Guard against the delegate freeing us until we finish our job. | 345 // Guard against the delegate freeing us until we finish our job. |
339 scoped_refptr<SpeechRecognizer> me(this); | 346 scoped_refptr<SpeechRecognizer> me(this); |
340 delegate_->OnRecognizerError(caller_id_, error); | 347 delegate_->OnRecognizerError(caller_id_, error); |
341 } | 348 } |
342 | 349 |
343 } // namespace speech_input | 350 } // namespace speech_input |
OLD | NEW |