| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/speech/speech_recognizer.h" | 5 #include "chrome/browser/speech/speech_recognizer.h" |
| 6 | 6 |
| 7 #include "base/ref_counted.h" | 7 #include "base/ref_counted.h" |
| 8 #include "base/scoped_ptr.h" | 8 #include "base/scoped_ptr.h" |
| 9 #include "base/time.h" | 9 #include "base/time.h" |
| 10 #include "chrome/browser/browser_thread.h" | 10 #include "chrome/browser/browser_thread.h" |
| (...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 134 // |StopRecording| being called. | 134 // |StopRecording| being called. |
| 135 DCHECK(!audio_controller_.get()); | 135 DCHECK(!audio_controller_.get()); |
| 136 DCHECK(!request_.get() || !request_->HasPendingRequest()); | 136 DCHECK(!request_.get() || !request_->HasPendingRequest()); |
| 137 DCHECK(audio_buffers_.empty()); | 137 DCHECK(audio_buffers_.empty()); |
| 138 endpointer_.EndSession(); | 138 endpointer_.EndSession(); |
| 139 } | 139 } |
| 140 | 140 |
| 141 bool SpeechRecognizer::StartRecording() { | 141 bool SpeechRecognizer::StartRecording() { |
| 142 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 142 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 143 DCHECK(!audio_controller_.get()); | 143 DCHECK(!audio_controller_.get()); |
| 144 DCHECK(!request_.get() || !request_->HasPendingRequest()); | 144 DCHECK(!request_.get()); // || !request_->HasPendingRequest()); |
| 145 | 145 |
| 146 // The endpointer needs to estimate the environment/background noise before | 146 // The endpointer needs to estimate the environment/background noise before |
| 147 // starting to treat the audio as user input. In |HandleOnData| we wait until | 147 // starting to treat the audio as user input. In |HandleOnData| we wait until |
| 148 // such time has passed before switching to user input mode. | 148 // such time has passed before switching to user input mode. |
| 149 endpointer_.SetEnvironmentEstimationMode(); | 149 endpointer_.SetEnvironmentEstimationMode(); |
| 150 | 150 |
| 151 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; | 151 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; |
| 152 DCHECK((samples_per_packet % encoder_->samples_per_frame()) == 0); | 152 DCHECK((samples_per_packet % encoder_->samples_per_frame()) == 0); |
| 153 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels, | 153 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels, |
| 154 kAudioSampleRate, kNumBitsPerAudioSample, | 154 kAudioSampleRate, kNumBitsPerAudioSample, |
| 155 samples_per_packet); | 155 samples_per_packet); |
| 156 audio_controller_ = AudioInputController::Create(this, params); | 156 audio_controller_ = AudioInputController::Create(this, params); |
| 157 DCHECK(audio_controller_.get()); | 157 DCHECK(audio_controller_.get()); |
| 158 VLOG(1) << "SpeechRecognizer starting record."; | 158 VLOG(1) << "SpeechRecognizer starting record."; |
| 159 num_samples_recorded_ = 0; | 159 num_samples_recorded_ = 0; |
| 160 audio_controller_->Record(); | 160 audio_controller_->Record(); |
| 161 | 161 |
| 162 request_.reset(new SpeechRecognitionRequest( |
| 163 Profile::GetDefaultRequestContext(), this)); |
| 164 request_->Start(language_, grammar_, hardware_info_, kContentTypeSpeex); |
| 165 |
| 162 return true; | 166 return true; |
| 163 } | 167 } |
| 164 | 168 |
| 165 void SpeechRecognizer::CancelRecognition() { | 169 void SpeechRecognizer::CancelRecognition() { |
| 166 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 170 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 167 DCHECK(audio_controller_.get() || request_.get()); | 171 DCHECK(audio_controller_.get() || request_.get()); |
| 168 | 172 |
| 169 // Stop recording if required. | 173 // Stop recording if required. |
| 170 if (audio_controller_.get()) { | 174 if (audio_controller_.get()) { |
| 171 VLOG(1) << "SpeechRecognizer stopping record."; | 175 VLOG(1) << "SpeechRecognizer stopping record."; |
| (...skipping 12 matching lines...) Expand all Loading... |
| 184 // If audio recording has already stopped and we are in recognition phase, | 188 // If audio recording has already stopped and we are in recognition phase, |
| 185 // silently ignore any more calls to stop recording. | 189 // silently ignore any more calls to stop recording. |
| 186 if (!audio_controller_.get()) | 190 if (!audio_controller_.get()) |
| 187 return; | 191 return; |
| 188 | 192 |
| 189 VLOG(1) << "SpeechRecognizer stopping record."; | 193 VLOG(1) << "SpeechRecognizer stopping record."; |
| 190 audio_controller_->Close(); | 194 audio_controller_->Close(); |
| 191 audio_controller_ = NULL; // Releases the ref ptr. | 195 audio_controller_ = NULL; // Releases the ref ptr. |
| 192 | 196 |
| 193 delegate_->DidCompleteRecording(caller_id_); | 197 delegate_->DidCompleteRecording(caller_id_); |
| 194 | 198 DCHECK(request_.get()); |
| 199 request_->FinishAudioUpload(); |
| 200 /* |
| 195 // If we haven't got any audio yet end the recognition sequence here. | 201 // If we haven't got any audio yet end the recognition sequence here. |
| 196 if (audio_buffers_.empty()) { | 202 if (audio_buffers_.empty()) { |
| 197 // Guard against the delegate freeing us until we finish our job. | 203 // Guard against the delegate freeing us until we finish our job. |
| 198 scoped_refptr<SpeechRecognizer> me(this); | 204 scoped_refptr<SpeechRecognizer> me(this); |
| 199 delegate_->DidCompleteRecognition(caller_id_); | 205 delegate_->DidCompleteRecognition(caller_id_); |
| 200 return; | 206 return; |
| 201 } | 207 } |
| 202 | 208 |
| 203 // We now have recorded audio in our buffers, so start a recognition request. | 209 // We now have recorded audio in our buffers, so start a recognition request. |
| 204 // Since the http request takes a single string as POST data, allocate | 210 // Since the http request takes a single string as POST data, allocate |
| 205 // one and copy over bytes from the audio buffers to the string. | 211 // one and copy over bytes from the audio buffers to the string. |
| 206 int audio_buffer_length = 0; | 212 int audio_buffer_length = 0; |
| 207 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); | 213 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); |
| 208 it != audio_buffers_.end(); it++) { | 214 it != audio_buffers_.end(); it++) { |
| 209 audio_buffer_length += (*it)->length(); | 215 audio_buffer_length += (*it)->length(); |
| 210 } | 216 } |
| 211 string data; | 217 string data; |
| 212 data.reserve(audio_buffer_length); | 218 data.reserve(audio_buffer_length); |
| 213 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); | 219 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); |
| 214 it != audio_buffers_.end(); it++) { | 220 it != audio_buffers_.end(); it++) { |
| 215 data.append(*(*it)); | 221 data.append(*(*it)); |
| 216 } | 222 } |
| 217 | 223 |
| 218 DCHECK(!request_.get()); | |
| 219 request_.reset(new SpeechRecognitionRequest( | |
| 220 Profile::GetDefaultRequestContext(), this)); | |
| 221 request_->Send(language_, grammar_, hardware_info_, kContentTypeSpeex, data); | |
| 222 ReleaseAudioBuffers(); // No need to keep the audio anymore. | 224 ReleaseAudioBuffers(); // No need to keep the audio anymore. |
| 225 */ |
| 223 } | 226 } |
| 224 | 227 |
| 225 void SpeechRecognizer::ReleaseAudioBuffers() { | 228 void SpeechRecognizer::ReleaseAudioBuffers() { |
| 226 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); | 229 for (AudioBufferQueue::iterator it = audio_buffers_.begin(); |
| 227 it != audio_buffers_.end(); it++) | 230 it != audio_buffers_.end(); it++) |
| 228 delete *it; | 231 delete *it; |
| 229 audio_buffers_.clear(); | 232 audio_buffers_.clear(); |
| 230 } | 233 } |
| 231 | 234 |
| 232 // Invoked in the audio thread. | 235 // Invoked in the audio thread. |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 274 const short* samples = reinterpret_cast<const short*>(data->data()); | 277 const short* samples = reinterpret_cast<const short*>(data->data()); |
| 275 DCHECK((data->length() % sizeof(short)) == 0); | 278 DCHECK((data->length() % sizeof(short)) == 0); |
| 276 int num_samples = data->length() / sizeof(short); | 279 int num_samples = data->length() / sizeof(short); |
| 277 | 280 |
| 278 encoder_->Encode(samples, num_samples, &audio_buffers_); | 281 encoder_->Encode(samples, num_samples, &audio_buffers_); |
| 279 float rms; | 282 float rms; |
| 280 endpointer_.ProcessAudio(samples, num_samples, &rms); | 283 endpointer_.ProcessAudio(samples, num_samples, &rms); |
| 281 delete data; | 284 delete data; |
| 282 num_samples_recorded_ += num_samples; | 285 num_samples_recorded_ += num_samples; |
| 283 | 286 |
| 287 DCHECK(request_.get()); |
| 288 request_->UploadAudioChunk(**audio_buffers_.begin()); |
| 289 ReleaseAudioBuffers(); |
| 290 |
| 284 if (endpointer_.IsEstimatingEnvironment()) { | 291 if (endpointer_.IsEstimatingEnvironment()) { |
| 285 // Check if we have gathered enough audio for the endpointer to do | 292 // Check if we have gathered enough audio for the endpointer to do |
| 286 // environment estimation and should move on to detect speech/end of speech. | 293 // environment estimation and should move on to detect speech/end of speech. |
| 287 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | 294 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * |
| 288 kAudioSampleRate) / 1000) { | 295 kAudioSampleRate) / 1000) { |
| 289 endpointer_.SetUserInputMode(); | 296 endpointer_.SetUserInputMode(); |
| 290 delegate_->DidCompleteEnvironmentEstimation(caller_id_); | 297 delegate_->DidCompleteEnvironmentEstimation(caller_id_); |
| 291 } | 298 } |
| 292 return; // No more processing since we are still estimating environment. | 299 return; // No more processing since we are still estimating environment. |
| 293 } | 300 } |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 334 | 341 |
| 335 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { | 342 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { |
| 336 CancelRecognition(); | 343 CancelRecognition(); |
| 337 | 344 |
| 338 // Guard against the delegate freeing us until we finish our job. | 345 // Guard against the delegate freeing us until we finish our job. |
| 339 scoped_refptr<SpeechRecognizer> me(this); | 346 scoped_refptr<SpeechRecognizer> me(this); |
| 340 delegate_->OnRecognizerError(caller_id_, error); | 347 delegate_->OnRecognizerError(caller_id_, error); |
| 341 } | 348 } |
| 342 | 349 |
| 343 } // namespace speech_input | 350 } // namespace speech_input |
| OLD | NEW |