OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognizer.h" | 5 #include "content/browser/speech/speech_recognizer.h" |
6 | 6 |
7 #include "base/time.h" | 7 #include "base/time.h" |
8 #include "content/browser/browser_thread.h" | 8 #include "content/browser/browser_thread.h" |
9 #include "net/url_request/url_request_context_getter.h" | 9 #include "net/url_request/url_request_context_getter.h" |
10 | 10 |
(...skipping 151 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
162 if (request_ == NULL) { | 162 if (request_ == NULL) { |
163 // Guard against the delegate freeing us until we finish our job. | 163 // Guard against the delegate freeing us until we finish our job. |
164 scoped_refptr<SpeechRecognizer> me(this); | 164 scoped_refptr<SpeechRecognizer> me(this); |
165 delegate_->DidCompleteRecognition(caller_id_); | 165 delegate_->DidCompleteRecognition(caller_id_); |
166 } else { | 166 } else { |
167 request_->UploadAudioChunk(encoded_data, true /* is_last_chunk */); | 167 request_->UploadAudioChunk(encoded_data, true /* is_last_chunk */); |
168 } | 168 } |
169 } | 169 } |
170 | 170 |
171 // Invoked in the audio thread. | 171 // Invoked in the audio thread. |
172 void SpeechRecognizer::OnRecording(AudioInputController* controller) { | |
173 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | |
174 NewRunnableMethod(this, &SpeechRecognizer::HandleOnRecording)); | |
175 } | |
176 | |
177 void SpeechRecognizer::HandleOnRecording() { | |
178 // Guard against the delegate freeing us until we finish our job. | |
179 scoped_refptr<SpeechRecognizer> me(this); | |
180 delegate_->OnRecording(caller_id_); | |
181 } | |
182 | |
183 // Invoked in the audio thread. | |
172 void SpeechRecognizer::OnError(AudioInputController* controller, | 184 void SpeechRecognizer::OnError(AudioInputController* controller, |
173 int error_code) { | 185 int error_code) { |
174 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 186 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
175 NewRunnableMethod(this, | 187 NewRunnableMethod(this, |
176 &SpeechRecognizer::HandleOnError, | 188 &SpeechRecognizer::HandleOnError, |
177 error_code)); | 189 error_code)); |
178 } | 190 } |
179 | 191 |
180 void SpeechRecognizer::HandleOnError(int error_code) { | 192 void SpeechRecognizer::HandleOnError(int error_code) { |
181 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; | 193 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; |
(...skipping 21 matching lines...) Expand all Loading... | |
203 | 215 |
204 void SpeechRecognizer::HandleOnData(string* data) { | 216 void SpeechRecognizer::HandleOnData(string* data) { |
205 // Check if we are still recording and if not discard this buffer, as | 217 // Check if we are still recording and if not discard this buffer, as |
206 // recording might have been stopped after this buffer was posted to the queue | 218 // recording might have been stopped after this buffer was posted to the queue |
207 // by |OnData|. | 219 // by |OnData|. |
208 if (!audio_controller_.get()) { | 220 if (!audio_controller_.get()) { |
209 delete data; | 221 delete data; |
210 return; | 222 return; |
211 } | 223 } |
212 | 224 |
225 bool speech_started = endpointer_.DidStartReceivingSpeech(); | |
Satish
2011/10/04 20:36:33
suggest renaming to speech_was_heard to indicate t
Leandro Graciá Gil
2011/10/05 22:09:00
Done.
| |
226 | |
213 const short* samples = reinterpret_cast<const short*>(data->data()); | 227 const short* samples = reinterpret_cast<const short*>(data->data()); |
214 DCHECK((data->length() % sizeof(short)) == 0); | 228 DCHECK((data->length() % sizeof(short)) == 0); |
215 int num_samples = data->length() / sizeof(short); | 229 int num_samples = data->length() / sizeof(short); |
216 encoder_->Encode(samples, num_samples); | 230 encoder_->Encode(samples, num_samples); |
217 float rms; | 231 float rms; |
218 endpointer_.ProcessAudio(samples, num_samples, &rms); | 232 endpointer_.ProcessAudio(samples, num_samples, &rms); |
219 bool did_clip = Clipping(samples, num_samples); | 233 bool did_clip = Clipping(samples, num_samples); |
220 delete data; | 234 delete data; |
221 num_samples_recorded_ += num_samples; | 235 num_samples_recorded_ += num_samples; |
222 | 236 |
(...skipping 16 matching lines...) Expand all Loading... | |
239 // environment estimation and should move on to detect speech/end of speech. | 253 // environment estimation and should move on to detect speech/end of speech. |
240 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | 254 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * |
241 kAudioSampleRate) / 1000) { | 255 kAudioSampleRate) / 1000) { |
242 endpointer_.SetUserInputMode(); | 256 endpointer_.SetUserInputMode(); |
243 delegate_->DidCompleteEnvironmentEstimation(caller_id_); | 257 delegate_->DidCompleteEnvironmentEstimation(caller_id_); |
244 } | 258 } |
245 return; // No more processing since we are still estimating environment. | 259 return; // No more processing since we are still estimating environment. |
246 } | 260 } |
247 | 261 |
248 // Check if we have waited too long without hearing any speech. | 262 // Check if we have waited too long without hearing any speech. |
249 if (!endpointer_.DidStartReceivingSpeech() && | 263 if (!endpointer_.DidStartReceivingSpeech() && |
Satish
2011/10/04 20:36:33
perhaps assign the value of DidStartReceivingSpeec
Leandro Graciá Gil
2011/10/05 22:09:00
Done.
| |
250 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { | 264 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { |
251 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH); | 265 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH); |
252 return; | 266 return; |
253 } | 267 } |
254 | 268 |
269 if (!speech_started && endpointer_.DidStartReceivingSpeech()) | |
270 delegate_->DidSpeechInputStart(caller_id_); | |
271 | |
255 // Calculate the input volume to display in the UI, smoothing towards the | 272 // Calculate the input volume to display in the UI, smoothing towards the |
256 // new level. | 273 // new level. |
257 float level = (rms - kAudioMeterMinDb) / | 274 float level = (rms - kAudioMeterMinDb) / |
258 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | 275 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
259 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); | 276 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); |
260 if (level > audio_level_) { | 277 if (level > audio_level_) { |
261 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; | 278 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; |
262 } else { | 279 } else { |
263 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; | 280 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; |
264 } | 281 } |
265 | 282 |
266 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | 283 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / |
267 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | 284 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
268 noise_level = std::min(std::max(0.0f, noise_level), | 285 noise_level = std::min(std::max(0.0f, noise_level), |
269 kAudioMeterRangeMaxUnclipped); | 286 kAudioMeterRangeMaxUnclipped); |
270 | 287 |
271 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_, | 288 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_, |
272 noise_level); | 289 noise_level); |
273 | 290 |
274 if (endpointer_.speech_input_complete()) { | 291 if (endpointer_.speech_input_complete()) { |
275 StopRecording(); | 292 StopRecording(); |
293 delegate_->DidSpeechInputStop(caller_id_); | |
Satish
2011/10/04 20:36:33
instead of invoking this here do it inside StopRec
Leandro Graciá Gil
2011/10/05 22:09:00
Done.
| |
276 } | 294 } |
277 | 295 |
278 // TODO(satish): Once we have streaming POST, start sending the data received | 296 // TODO(satish): Once we have streaming POST, start sending the data received |
279 // here as POST chunks. | 297 // here as POST chunks. |
280 } | 298 } |
281 | 299 |
282 void SpeechRecognizer::SetRecognitionResult( | 300 void SpeechRecognizer::SetRecognitionResult( |
283 bool error, const SpeechInputResultArray& result) { | 301 bool error, const SpeechInputResult& result) { |
284 if (error || result.empty()) { | 302 if (error) { |
285 InformErrorAndCancelRecognition(error ? RECOGNIZER_ERROR_NETWORK : | 303 // Request failed or received an invalid response that couldn't be parsed. |
286 RECOGNIZER_ERROR_NO_RESULTS); | 304 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NETWORK); |
287 return; | 305 return; |
288 } | 306 } |
289 | 307 |
290 delegate_->SetRecognitionResult(caller_id_, error, result); | 308 switch (result.status) { |
309 case kStatusSuccess: | |
310 break; | |
311 | |
312 case kStatusAborted: | |
313 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_INTERNAL); | |
314 return; | |
315 | |
316 case kStatusBadGrammar: | |
317 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_INVALID_PARAMS); | |
318 return; | |
319 | |
320 case kStatusAudio: | |
321 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_CAPTURE); | |
322 return; | |
323 | |
324 case kStatusNetwork: | |
325 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NETWORK); | |
326 return; | |
327 | |
328 case kStatusNoSpeech: | |
329 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH); | |
330 return; | |
331 | |
332 case kStatusNoMatch: | |
333 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_RESULTS); | |
334 return; | |
335 | |
336 default: | |
337 NOTREACHED(); | |
338 } | |
339 | |
340 StopRecording(); | |
Satish
2011/10/04 20:36:33
why is this call required here? wouldn't recogniti
Leandro Graciá Gil
2011/10/05 22:09:00
Done.
| |
291 | 341 |
292 // Guard against the delegate freeing us until we finish our job. | 342 // Guard against the delegate freeing us until we finish our job. |
293 scoped_refptr<SpeechRecognizer> me(this); | 343 scoped_refptr<SpeechRecognizer> me(this); |
344 delegate_->SetRecognitionResult(caller_id_, error, result); | |
294 delegate_->DidCompleteRecognition(caller_id_); | 345 delegate_->DidCompleteRecognition(caller_id_); |
295 } | 346 } |
296 | 347 |
297 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { | 348 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { |
298 CancelRecognition(); | 349 CancelRecognition(); |
299 | 350 |
300 // Guard against the delegate freeing us until we finish our job. | 351 // Guard against the delegate freeing us until we finish our job. |
301 scoped_refptr<SpeechRecognizer> me(this); | 352 scoped_refptr<SpeechRecognizer> me(this); |
302 delegate_->OnRecognizerError(caller_id_, error); | 353 delegate_->OnRecognizerError(caller_id_, error); |
303 } | 354 } |
304 | 355 |
305 } // namespace speech_input | 356 } // namespace speech_input |
OLD | NEW |