Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/speech_recognizer.h" | 5 #include "content/browser/speech/speech_recognizer.h" |
| 6 | 6 |
| 7 #include "base/time.h" | 7 #include "base/time.h" |
| 8 #include "content/browser/browser_thread.h" | 8 #include "content/browser/browser_thread.h" |
| 9 #include "net/url_request/url_request_context_getter.h" | 9 #include "net/url_request/url_request_context_getter.h" |
| 10 | 10 |
| (...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 162 if (request_ == NULL) { | 162 if (request_ == NULL) { |
| 163 // Guard against the delegate freeing us until we finish our job. | 163 // Guard against the delegate freeing us until we finish our job. |
| 164 scoped_refptr<SpeechRecognizer> me(this); | 164 scoped_refptr<SpeechRecognizer> me(this); |
| 165 delegate_->DidCompleteRecognition(caller_id_); | 165 delegate_->DidCompleteRecognition(caller_id_); |
| 166 } else { | 166 } else { |
| 167 request_->UploadAudioChunk(encoded_data, true /* is_last_chunk */); | 167 request_->UploadAudioChunk(encoded_data, true /* is_last_chunk */); |
| 168 } | 168 } |
| 169 } | 169 } |
| 170 | 170 |
| 171 // Invoked in the audio thread. | 171 // Invoked in the audio thread. |
| 172 void SpeechRecognizer::OnRecording(AudioInputController* controller) { | |
| 173 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
| 174 NewRunnableMethod(this, &SpeechRecognizer::HandleOnRecording)); | |
| 175 } | |
| 176 | |
| 177 void SpeechRecognizer::HandleOnRecording() { | |
| 178 // Guard against the delegate freeing us until we finish our job. | |
| 179 scoped_refptr<SpeechRecognizer> me(this); | |
| 180 delegate_->OnRecording(caller_id_); | |
| 181 } | |
| 182 | |
| 183 // Invoked in the audio thread. | |
| 172 void SpeechRecognizer::OnError(AudioInputController* controller, | 184 void SpeechRecognizer::OnError(AudioInputController* controller, |
| 173 int error_code) { | 185 int error_code) { |
| 174 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 186 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 175 NewRunnableMethod(this, | 187 NewRunnableMethod(this, |
| 176 &SpeechRecognizer::HandleOnError, | 188 &SpeechRecognizer::HandleOnError, |
| 177 error_code)); | 189 error_code)); |
| 178 } | 190 } |
| 179 | 191 |
| 180 void SpeechRecognizer::HandleOnError(int error_code) { | 192 void SpeechRecognizer::HandleOnError(int error_code) { |
| 181 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; | 193 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; |
| (...skipping 21 matching lines...) Expand all Loading... | |
| 203 | 215 |
| 204 void SpeechRecognizer::HandleOnData(string* data) { | 216 void SpeechRecognizer::HandleOnData(string* data) { |
| 205 // Check if we are still recording and if not discard this buffer, as | 217 // Check if we are still recording and if not discard this buffer, as |
| 206 // recording might have been stopped after this buffer was posted to the queue | 218 // recording might have been stopped after this buffer was posted to the queue |
| 207 // by |OnData|. | 219 // by |OnData|. |
| 208 if (!audio_controller_.get()) { | 220 if (!audio_controller_.get()) { |
| 209 delete data; | 221 delete data; |
| 210 return; | 222 return; |
| 211 } | 223 } |
| 212 | 224 |
| 225 bool speech_started = endpointer_.DidStartReceivingSpeech(); | |
|
Satish
2011/10/04 20:36:33
suggest renaming to speech_was_heard to indicate t
Leandro Graciá Gil
2011/10/05 22:09:00
Done.
| |
| 226 | |
| 213 const short* samples = reinterpret_cast<const short*>(data->data()); | 227 const short* samples = reinterpret_cast<const short*>(data->data()); |
| 214 DCHECK((data->length() % sizeof(short)) == 0); | 228 DCHECK((data->length() % sizeof(short)) == 0); |
| 215 int num_samples = data->length() / sizeof(short); | 229 int num_samples = data->length() / sizeof(short); |
| 216 encoder_->Encode(samples, num_samples); | 230 encoder_->Encode(samples, num_samples); |
| 217 float rms; | 231 float rms; |
| 218 endpointer_.ProcessAudio(samples, num_samples, &rms); | 232 endpointer_.ProcessAudio(samples, num_samples, &rms); |
| 219 bool did_clip = Clipping(samples, num_samples); | 233 bool did_clip = Clipping(samples, num_samples); |
| 220 delete data; | 234 delete data; |
| 221 num_samples_recorded_ += num_samples; | 235 num_samples_recorded_ += num_samples; |
| 222 | 236 |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 239 // environment estimation and should move on to detect speech/end of speech. | 253 // environment estimation and should move on to detect speech/end of speech. |
| 240 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | 254 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * |
| 241 kAudioSampleRate) / 1000) { | 255 kAudioSampleRate) / 1000) { |
| 242 endpointer_.SetUserInputMode(); | 256 endpointer_.SetUserInputMode(); |
| 243 delegate_->DidCompleteEnvironmentEstimation(caller_id_); | 257 delegate_->DidCompleteEnvironmentEstimation(caller_id_); |
| 244 } | 258 } |
| 245 return; // No more processing since we are still estimating environment. | 259 return; // No more processing since we are still estimating environment. |
| 246 } | 260 } |
| 247 | 261 |
| 248 // Check if we have waited too long without hearing any speech. | 262 // Check if we have waited too long without hearing any speech. |
| 249 if (!endpointer_.DidStartReceivingSpeech() && | 263 if (!endpointer_.DidStartReceivingSpeech() && |
|
Satish
2011/10/04 20:36:33
perhaps assign the value of DidStartReceivingSpeec
Leandro Graciá Gil
2011/10/05 22:09:00
Done.
| |
| 250 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { | 264 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { |
| 251 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH); | 265 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH); |
| 252 return; | 266 return; |
| 253 } | 267 } |
| 254 | 268 |
| 269 if (!speech_started && endpointer_.DidStartReceivingSpeech()) | |
| 270 delegate_->DidSpeechInputStart(caller_id_); | |
| 271 | |
| 255 // Calculate the input volume to display in the UI, smoothing towards the | 272 // Calculate the input volume to display in the UI, smoothing towards the |
| 256 // new level. | 273 // new level. |
| 257 float level = (rms - kAudioMeterMinDb) / | 274 float level = (rms - kAudioMeterMinDb) / |
| 258 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | 275 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
| 259 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); | 276 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); |
| 260 if (level > audio_level_) { | 277 if (level > audio_level_) { |
| 261 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; | 278 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; |
| 262 } else { | 279 } else { |
| 263 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; | 280 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; |
| 264 } | 281 } |
| 265 | 282 |
| 266 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | 283 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / |
| 267 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | 284 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
| 268 noise_level = std::min(std::max(0.0f, noise_level), | 285 noise_level = std::min(std::max(0.0f, noise_level), |
| 269 kAudioMeterRangeMaxUnclipped); | 286 kAudioMeterRangeMaxUnclipped); |
| 270 | 287 |
| 271 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_, | 288 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_, |
| 272 noise_level); | 289 noise_level); |
| 273 | 290 |
| 274 if (endpointer_.speech_input_complete()) { | 291 if (endpointer_.speech_input_complete()) { |
| 275 StopRecording(); | 292 StopRecording(); |
| 293 delegate_->DidSpeechInputStop(caller_id_); | |
|
Satish
2011/10/04 20:36:33
instead of invoking this here do it inside StopRec
Leandro Graciá Gil
2011/10/05 22:09:00
Done.
| |
| 276 } | 294 } |
| 277 | 295 |
| 278 // TODO(satish): Once we have streaming POST, start sending the data received | 296 // TODO(satish): Once we have streaming POST, start sending the data received |
| 279 // here as POST chunks. | 297 // here as POST chunks. |
| 280 } | 298 } |
| 281 | 299 |
| 282 void SpeechRecognizer::SetRecognitionResult( | 300 void SpeechRecognizer::SetRecognitionResult( |
| 283 bool error, const SpeechInputResultArray& result) { | 301 bool error, const SpeechInputResult& result) { |
| 284 if (error || result.empty()) { | 302 if (error) { |
| 285 InformErrorAndCancelRecognition(error ? RECOGNIZER_ERROR_NETWORK : | 303 // Request failed or received an invalid response that couldn't be parsed. |
| 286 RECOGNIZER_ERROR_NO_RESULTS); | 304 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NETWORK); |
| 287 return; | 305 return; |
| 288 } | 306 } |
| 289 | 307 |
| 290 delegate_->SetRecognitionResult(caller_id_, error, result); | 308 switch (result.status) { |
| 309 case kStatusSuccess: | |
| 310 break; | |
| 311 | |
| 312 case kStatusAborted: | |
| 313 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_INTERNAL); | |
| 314 return; | |
| 315 | |
| 316 case kStatusBadGrammar: | |
| 317 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_INVALID_PARAMS); | |
| 318 return; | |
| 319 | |
| 320 case kStatusAudio: | |
| 321 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_CAPTURE); | |
| 322 return; | |
| 323 | |
| 324 case kStatusNetwork: | |
| 325 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NETWORK); | |
| 326 return; | |
| 327 | |
| 328 case kStatusNoSpeech: | |
| 329 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH); | |
| 330 return; | |
| 331 | |
| 332 case kStatusNoMatch: | |
| 333 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_RESULTS); | |
| 334 return; | |
| 335 | |
| 336 default: | |
| 337 NOTREACHED(); | |
| 338 } | |
| 339 | |
| 340 StopRecording(); | |
|
Satish
2011/10/04 20:36:33
why is this call required here? wouldn't recogniti
Leandro Graciá Gil
2011/10/05 22:09:00
Done.
| |
| 291 | 341 |
| 292 // Guard against the delegate freeing us until we finish our job. | 342 // Guard against the delegate freeing us until we finish our job. |
| 293 scoped_refptr<SpeechRecognizer> me(this); | 343 scoped_refptr<SpeechRecognizer> me(this); |
| 344 delegate_->SetRecognitionResult(caller_id_, error, result); | |
| 294 delegate_->DidCompleteRecognition(caller_id_); | 345 delegate_->DidCompleteRecognition(caller_id_); |
| 295 } | 346 } |
| 296 | 347 |
| 297 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { | 348 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { |
| 298 CancelRecognition(); | 349 CancelRecognition(); |
| 299 | 350 |
| 300 // Guard against the delegate freeing us until we finish our job. | 351 // Guard against the delegate freeing us until we finish our job. |
| 301 scoped_refptr<SpeechRecognizer> me(this); | 352 scoped_refptr<SpeechRecognizer> me(this); |
| 302 delegate_->OnRecognizerError(caller_id_, error); | 353 delegate_->OnRecognizerError(caller_id_, error); |
| 303 } | 354 } |
| 304 | 355 |
| 305 } // namespace speech_input | 356 } // namespace speech_input |
| OLD | NEW |