Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/speech_recognizer.h" | 5 #include "content/browser/speech/speech_recognizer.h" |
| 6 | 6 |
| 7 #include "base/time.h" | 7 #include "base/time.h" |
| 8 #include "chrome/browser/profiles/profile.h" | 8 #include "chrome/browser/profiles/profile.h" |
| 9 #include "chrome/common/net/url_request_context_getter.h" | 9 #include "chrome/common/net/url_request_context_getter.h" |
| 10 #include "content/browser/browser_thread.h" | 10 #include "content/browser/browser_thread.h" |
| (...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 103 kNumBitsPerAudioSample)); | 103 kNumBitsPerAudioSample)); |
| 104 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; | 104 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; |
| 105 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels, | 105 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels, |
| 106 kAudioSampleRate, kNumBitsPerAudioSample, | 106 kAudioSampleRate, kNumBitsPerAudioSample, |
| 107 samples_per_packet); | 107 samples_per_packet); |
| 108 audio_controller_ = AudioInputController::Create(this, params); | 108 audio_controller_ = AudioInputController::Create(this, params); |
| 109 DCHECK(audio_controller_.get()); | 109 DCHECK(audio_controller_.get()); |
| 110 VLOG(1) << "SpeechRecognizer starting record."; | 110 VLOG(1) << "SpeechRecognizer starting record."; |
| 111 num_samples_recorded_ = 0; | 111 num_samples_recorded_ = 0; |
| 112 audio_controller_->Record(); | 112 audio_controller_->Record(); |
| 113 previous_audio_chunk_.clear(); | |
| 113 | 114 |
| 114 return true; | 115 return true; |
| 115 } | 116 } |
| 116 | 117 |
| 117 void SpeechRecognizer::CancelRecognition() { | 118 void SpeechRecognizer::CancelRecognition() { |
| 118 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 119 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 119 DCHECK(audio_controller_.get() || request_.get()); | 120 DCHECK(audio_controller_.get() || request_.get()); |
| 120 | 121 |
| 121 // Stop recording if required. | 122 // Stop recording if required. |
| 122 if (audio_controller_.get()) { | 123 if (audio_controller_.get()) { |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 135 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| 135 | 136 |
| 136 // If audio recording has already stopped and we are in recognition phase, | 137 // If audio recording has already stopped and we are in recognition phase, |
| 137 // silently ignore any more calls to stop recording. | 138 // silently ignore any more calls to stop recording. |
| 138 if (!audio_controller_.get()) | 139 if (!audio_controller_.get()) |
| 139 return; | 140 return; |
| 140 | 141 |
| 141 VLOG(1) << "SpeechRecognizer stopping record."; | 142 VLOG(1) << "SpeechRecognizer stopping record."; |
| 142 audio_controller_->Close(); | 143 audio_controller_->Close(); |
| 143 audio_controller_ = NULL; // Releases the ref ptr. | 144 audio_controller_ = NULL; // Releases the ref ptr. |
| 144 encoder_->Flush(); | |
| 145 | 145 |
| 146 delegate_->DidCompleteRecording(caller_id_); | 146 delegate_->DidCompleteRecording(caller_id_); |
| 147 | 147 |
| 148 // Since the http request takes a single string as POST data, allocate | 148 // Get any last bits of encoded data left. |
| 149 // one and copy over bytes from the audio buffers to the string. | 149 encoder_->Flush(); |
| 150 // And If we haven't got any audio yet end the recognition sequence here. | 150 string encoded_data; |
| 151 string mime_type = encoder_->mime_type(); | 151 encoder_->GetEncodedDataAndClear(&encoded_data); |
| 152 string data; | |
| 153 encoder_->GetEncodedData(&data); | |
| 154 encoder_.reset(); | 152 encoder_.reset(); |
| 155 | 153 |
| 156 if (data.empty()) { | 154 // If we haven't got any audio yet end the recognition sequence here. |
| 155 if (request_ == NULL) { | |
| 157 // Guard against the delegate freeing us until we finish our job. | 156 // Guard against the delegate freeing us until we finish our job. |
| 158 scoped_refptr<SpeechRecognizer> me(this); | 157 scoped_refptr<SpeechRecognizer> me(this); |
| 159 delegate_->DidCompleteRecognition(caller_id_); | 158 delegate_->DidCompleteRecognition(caller_id_); |
| 160 } else { | 159 } else { |
| 161 DCHECK(!request_.get()); | 160 // UploadAudioChunk requires a non-empty buffer. So we check if there was |
| 162 request_.reset(new SpeechRecognitionRequest( | 161 // any data available since last time we sent and if not try to send the |
| 163 Profile::GetDefaultRequestContext(), this)); | 162 // last chunk again (i.e. repeat the last 100ms of audio). If nothing was |
| 164 request_->Send(language_, grammar_, hardware_info_, origin_url_, | 163 // recorded yet, we just send a whitespace string. |
|
bulach
2011/03/03 20:05:00
hmm, I haven't followed the other CL this one depe
| |
| 165 mime_type, data); | 164 if (encoded_data.empty()) { |
| 165 encoded_data = !previous_audio_chunk_.empty() ? | |
| 166 previous_audio_chunk_ : " "; | |
| 167 } | |
| 168 request_->UploadAudioChunk(encoded_data, true); | |
| 166 } | 169 } |
| 167 } | 170 } |
| 168 | 171 |
| 169 void SpeechRecognizer::ReleaseAudioBuffers() { | |
| 170 } | |
| 171 | |
| 172 // Invoked in the audio thread. | 172 // Invoked in the audio thread. |
| 173 void SpeechRecognizer::OnError(AudioInputController* controller, | 173 void SpeechRecognizer::OnError(AudioInputController* controller, |
| 174 int error_code) { | 174 int error_code) { |
| 175 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 175 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 176 NewRunnableMethod(this, | 176 NewRunnableMethod(this, |
| 177 &SpeechRecognizer::HandleOnError, | 177 &SpeechRecognizer::HandleOnError, |
| 178 error_code)); | 178 error_code)); |
| 179 } | 179 } |
| 180 | 180 |
| 181 void SpeechRecognizer::HandleOnError(int error_code) { | 181 void SpeechRecognizer::HandleOnError(int error_code) { |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 207 // recording might have been stopped after this buffer was posted to the queue | 207 // recording might have been stopped after this buffer was posted to the queue |
| 208 // by |OnData|. | 208 // by |OnData|. |
| 209 if (!audio_controller_.get()) { | 209 if (!audio_controller_.get()) { |
| 210 delete data; | 210 delete data; |
| 211 return; | 211 return; |
| 212 } | 212 } |
| 213 | 213 |
| 214 const short* samples = reinterpret_cast<const short*>(data->data()); | 214 const short* samples = reinterpret_cast<const short*>(data->data()); |
| 215 DCHECK((data->length() % sizeof(short)) == 0); | 215 DCHECK((data->length() % sizeof(short)) == 0); |
| 216 int num_samples = data->length() / sizeof(short); | 216 int num_samples = data->length() / sizeof(short); |
| 217 | |
| 218 encoder_->Encode(samples, num_samples); | 217 encoder_->Encode(samples, num_samples); |
| 219 float rms; | 218 float rms; |
| 220 endpointer_.ProcessAudio(samples, num_samples, &rms); | 219 endpointer_.ProcessAudio(samples, num_samples, &rms); |
| 221 bool did_clip = Clipping(samples, num_samples); | 220 bool did_clip = Clipping(samples, num_samples); |
| 222 delete data; | 221 delete data; |
| 223 num_samples_recorded_ += num_samples; | 222 num_samples_recorded_ += num_samples; |
| 224 | 223 |
| 224 if (request_ == NULL) { | |
| 225 // This was the first audio packet recorded, so start a request to the | |
| 226 // server to send the data. | |
| 227 request_.reset(new SpeechRecognitionRequest( | |
| 228 Profile::GetDefaultRequestContext(), this)); | |
| 229 request_->Start(language_, grammar_, hardware_info_, origin_url_, | |
| 230 encoder_->mime_type()); | |
| 231 } | |
| 232 | |
| 233 string encoded_data; | |
| 234 encoder_->GetEncodedDataAndClear(&encoded_data); | |
| 235 DCHECK(!encoded_data.empty()); | |
| 236 request_->UploadAudioChunk(encoded_data, false); | |
| 237 previous_audio_chunk_ = encoded_data; | |
| 238 | |
| 225 if (endpointer_.IsEstimatingEnvironment()) { | 239 if (endpointer_.IsEstimatingEnvironment()) { |
| 226 // Check if we have gathered enough audio for the endpointer to do | 240 // Check if we have gathered enough audio for the endpointer to do |
| 227 // environment estimation and should move on to detect speech/end of speech. | 241 // environment estimation and should move on to detect speech/end of speech. |
| 228 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | 242 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * |
| 229 kAudioSampleRate) / 1000) { | 243 kAudioSampleRate) / 1000) { |
| 230 endpointer_.SetUserInputMode(); | 244 endpointer_.SetUserInputMode(); |
| 231 delegate_->DidCompleteEnvironmentEstimation(caller_id_); | 245 delegate_->DidCompleteEnvironmentEstimation(caller_id_); |
| 232 } | 246 } |
| 233 return; // No more processing since we are still estimating environment. | 247 return; // No more processing since we are still estimating environment. |
| 234 } | 248 } |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 283 | 297 |
| 284 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { | 298 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { |
| 285 CancelRecognition(); | 299 CancelRecognition(); |
| 286 | 300 |
| 287 // Guard against the delegate freeing us until we finish our job. | 301 // Guard against the delegate freeing us until we finish our job. |
| 288 scoped_refptr<SpeechRecognizer> me(this); | 302 scoped_refptr<SpeechRecognizer> me(this); |
| 289 delegate_->OnRecognizerError(caller_id_, error); | 303 delegate_->OnRecognizerError(caller_id_, error); |
| 290 } | 304 } |
| 291 | 305 |
| 292 } // namespace speech_input | 306 } // namespace speech_input |
| OLD | NEW |