OLD | NEW |
---|---|
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognizer_impl.h" | 5 #include "content/browser/speech/speech_recognizer_impl.h" |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include "base/bind.h" | 9 #include "base/bind.h" |
10 #include "base/macros.h" | 10 #include "base/macros.h" |
11 #include "base/time/time.h" | 11 #include "base/time/time.h" |
12 #include "build/build_config.h" | 12 #include "build/build_config.h" |
13 #include "content/browser/browser_main_loop.h" | 13 #include "content/browser/browser_main_loop.h" |
14 #include "content/browser/media/media_internals.h" | 14 #include "content/browser/media/media_internals.h" |
15 #include "content/browser/speech/audio_buffer.h" | 15 #include "content/browser/speech/audio_buffer.h" |
16 #include "content/public/browser/speech_recognition_event_listener.h" | 16 #include "content/public/browser/speech_recognition_event_listener.h" |
17 #include "media/audio/audio_manager.h" | |
18 #include "media/audio/audio_system.h" | |
17 #include "media/base/audio_converter.h" | 19 #include "media/base/audio_converter.h" |
18 | 20 |
19 #if defined(OS_WIN) | 21 #if defined(OS_WIN) |
20 #include "media/audio/win/core_audio_util_win.h" | 22 #include "media/audio/win/core_audio_util_win.h" |
21 #endif | 23 #endif |
22 | 24 |
23 using media::AudioBus; | 25 using media::AudioBus; |
24 using media::AudioConverter; | 26 using media::AudioConverter; |
25 using media::AudioInputController; | 27 using media::AudioInputController; |
26 using media::AudioManager; | 28 using media::AudioManager; |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
102 } | 104 } |
103 | 105 |
104 } // namespace | 106 } // namespace |
105 | 107 |
106 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; | 108 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; |
107 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = | 109 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = |
108 media::CHANNEL_LAYOUT_MONO; | 110 media::CHANNEL_LAYOUT_MONO; |
109 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; | 111 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; |
110 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; | 112 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; |
111 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; | 113 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; |
112 media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL; | 114 media::AudioSystem* SpeechRecognizerImpl::audio_system_for_tests_ = nullptr; |
113 | 115 |
114 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0, | 116 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0, |
115 "kNumBitsPerAudioSample must be a multiple of 8"); | 117 "kNumBitsPerAudioSample must be a multiple of 8"); |
116 | 118 |
117 // SpeechRecognizerImpl::OnDataConverter implementation | 119 // SpeechRecognizerImpl::OnDataConverter implementation |
118 | 120 |
119 SpeechRecognizerImpl::OnDataConverter::OnDataConverter( | 121 SpeechRecognizerImpl::OnDataConverter::OnDataConverter( |
120 const AudioParameters& input_params, | 122 const AudioParameters& input_params, |
121 const AudioParameters& output_params) | 123 const AudioParameters& output_params) |
122 : audio_converter_(input_params, output_params, false), | 124 : audio_converter_(input_params, output_params, false), |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
166 input_bus_->CopyTo(dest); | 168 input_bus_->CopyTo(dest); |
167 // Indicate that the recorded audio has in fact been used by the converter. | 169 // Indicate that the recorded audio has in fact been used by the converter. |
168 data_was_converted_ = true; | 170 data_was_converted_ = true; |
169 return 1; | 171 return 1; |
170 } | 172 } |
171 | 173 |
172 // SpeechRecognizerImpl implementation | 174 // SpeechRecognizerImpl implementation |
173 | 175 |
174 SpeechRecognizerImpl::SpeechRecognizerImpl( | 176 SpeechRecognizerImpl::SpeechRecognizerImpl( |
175 SpeechRecognitionEventListener* listener, | 177 SpeechRecognitionEventListener* listener, |
178 media::AudioSystem* audio_system, | |
176 int session_id, | 179 int session_id, |
177 bool continuous, | 180 bool continuous, |
178 bool provisional_results, | 181 bool provisional_results, |
179 SpeechRecognitionEngine* engine) | 182 SpeechRecognitionEngine* engine) |
180 : SpeechRecognizer(listener, session_id), | 183 : SpeechRecognizer(listener, session_id), |
184 audio_system_(audio_system), | |
181 recognition_engine_(engine), | 185 recognition_engine_(engine), |
182 endpointer_(kAudioSampleRate), | 186 endpointer_(kAudioSampleRate), |
183 audio_log_(MediaInternals::GetInstance()->CreateAudioLog( | 187 audio_log_(MediaInternals::GetInstance()->CreateAudioLog( |
184 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)), | 188 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)), |
185 is_dispatching_event_(false), | 189 is_dispatching_event_(false), |
186 provisional_results_(provisional_results), | 190 provisional_results_(provisional_results), |
187 end_of_utterance_(false), | 191 end_of_utterance_(false), |
188 state_(STATE_IDLE) { | 192 state_(STATE_IDLE), |
189 DCHECK(recognition_engine_ != NULL); | 193 weak_ptr_factory_(this) { |
194 DCHECK(recognition_engine_ != nullptr); | |
195 DCHECK(audio_system_ != nullptr); | |
190 if (!continuous) { | 196 if (!continuous) { |
191 // In single shot (non-continous) recognition, | 197 // In single shot (non-continous) recognition, |
192 // the session is automatically ended after: | 198 // the session is automatically ended after: |
193 // - 0.5 seconds of silence if time < 3 seconds | 199 // - 0.5 seconds of silence if time < 3 seconds |
194 // - 1 seconds of silence if time >= 3 seconds | 200 // - 1 seconds of silence if time >= 3 seconds |
195 endpointer_.set_speech_input_complete_silence_length( | 201 endpointer_.set_speech_input_complete_silence_length( |
196 base::Time::kMicrosecondsPerSecond / 2); | 202 base::Time::kMicrosecondsPerSecond / 2); |
197 endpointer_.set_long_speech_input_complete_silence_length( | 203 endpointer_.set_long_speech_input_complete_silence_length( |
198 base::Time::kMicrosecondsPerSecond); | 204 base::Time::kMicrosecondsPerSecond); |
199 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); | 205 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); |
(...skipping 13 matching lines...) Expand all Loading... | |
213 // NOTE:all the external events and requests should be enqueued (PostTask), even | 219 // NOTE:all the external events and requests should be enqueued (PostTask), even |
214 // if they come from the same (IO) thread, in order to preserve the relationship | 220 // if they come from the same (IO) thread, in order to preserve the relationship |
215 // of causality between events and avoid interleaved event processing due to | 221 // of causality between events and avoid interleaved event processing due to |
216 // synchronous callbacks. | 222 // synchronous callbacks. |
217 | 223 |
218 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) { | 224 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) { |
219 DCHECK(!device_id.empty()); | 225 DCHECK(!device_id.empty()); |
220 device_id_ = device_id; | 226 device_id_ = device_id; |
221 | 227 |
222 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 228 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
223 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | 229 base::Bind(&SpeechRecognizerImpl::DispatchEvent, this, |
224 this, FSMEventArgs(EVENT_START))); | 230 FSMEventArgs(EVENT_PREPARE))); |
225 } | 231 } |
226 | 232 |
227 void SpeechRecognizerImpl::AbortRecognition() { | 233 void SpeechRecognizerImpl::AbortRecognition() { |
228 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 234 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
229 base::Bind(&SpeechRecognizerImpl::DispatchEvent, | 235 base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
230 this, FSMEventArgs(EVENT_ABORT))); | 236 this, FSMEventArgs(EVENT_ABORT))); |
231 } | 237 } |
232 | 238 |
233 void SpeechRecognizerImpl::StopAudioCapture() { | 239 void SpeechRecognizerImpl::StopAudioCapture() { |
234 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 240 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
366 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( | 372 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
367 const FSMEventArgs& event_args) { | 373 const FSMEventArgs& event_args) { |
368 const FSMEvent event = event_args.event; | 374 const FSMEvent event = event_args.event; |
369 switch (state_) { | 375 switch (state_) { |
370 case STATE_IDLE: | 376 case STATE_IDLE: |
371 switch (event) { | 377 switch (event) { |
372 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and | 378 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and |
373 // EVENT_STOP_CAPTURE below once speech input extensions are fixed. | 379 // EVENT_STOP_CAPTURE below once speech input extensions are fixed. |
374 case EVENT_ABORT: | 380 case EVENT_ABORT: |
375 return AbortSilently(event_args); | 381 return AbortSilently(event_args); |
382 case EVENT_PREPARE: | |
383 return PrepareRecognition(event_args); | |
384 case EVENT_START: | |
385 return NotFeasible(event_args); | |
386 case EVENT_STOP_CAPTURE: | |
387 return AbortSilently(event_args); | |
388 case EVENT_AUDIO_DATA: // Corner cases related to queued messages | |
389 case EVENT_ENGINE_RESULT: // being lately dispatched. | |
390 case EVENT_ENGINE_ERROR: | |
391 case EVENT_AUDIO_ERROR: | |
392 return DoNothing(event_args); | |
393 } | |
394 break; | |
395 case STATE_PREPARING: | |
396 switch (event) { | |
397 case EVENT_ABORT: | |
398 return AbortSilently(event_args); | |
399 case EVENT_PREPARE: | |
400 return NotFeasible(event_args); | |
376 case EVENT_START: | 401 case EVENT_START: |
377 return StartRecording(event_args); | 402 return StartRecording(event_args); |
378 case EVENT_STOP_CAPTURE: | 403 case EVENT_STOP_CAPTURE: |
379 return AbortSilently(event_args); | 404 return AbortSilently(event_args); |
380 case EVENT_AUDIO_DATA: // Corner cases related to queued messages | 405 case EVENT_AUDIO_DATA: // Corner cases related to queued messages |
381 case EVENT_ENGINE_RESULT: // being lately dispatched. | 406 case EVENT_ENGINE_RESULT: // being lately dispatched. |
382 case EVENT_ENGINE_ERROR: | 407 case EVENT_ENGINE_ERROR: |
383 case EVENT_AUDIO_ERROR: | 408 case EVENT_AUDIO_ERROR: |
384 return DoNothing(event_args); | 409 return DoNothing(event_args); |
385 } | 410 } |
386 break; | 411 break; |
387 case STATE_STARTING: | 412 case STATE_STARTING: |
388 switch (event) { | 413 switch (event) { |
389 case EVENT_ABORT: | 414 case EVENT_ABORT: |
390 return AbortWithError(event_args); | 415 return AbortWithError(event_args); |
416 case EVENT_PREPARE: | |
417 return NotFeasible(event_args); | |
391 case EVENT_START: | 418 case EVENT_START: |
392 return NotFeasible(event_args); | 419 return NotFeasible(event_args); |
393 case EVENT_STOP_CAPTURE: | 420 case EVENT_STOP_CAPTURE: |
394 return AbortSilently(event_args); | 421 return AbortSilently(event_args); |
395 case EVENT_AUDIO_DATA: | 422 case EVENT_AUDIO_DATA: |
396 return StartRecognitionEngine(event_args); | 423 return StartRecognitionEngine(event_args); |
397 case EVENT_ENGINE_RESULT: | 424 case EVENT_ENGINE_RESULT: |
398 return NotFeasible(event_args); | 425 return NotFeasible(event_args); |
399 case EVENT_ENGINE_ERROR: | 426 case EVENT_ENGINE_ERROR: |
400 case EVENT_AUDIO_ERROR: | 427 case EVENT_AUDIO_ERROR: |
401 return AbortWithError(event_args); | 428 return AbortWithError(event_args); |
402 } | 429 } |
403 break; | 430 break; |
404 case STATE_ESTIMATING_ENVIRONMENT: | 431 case STATE_ESTIMATING_ENVIRONMENT: |
405 switch (event) { | 432 switch (event) { |
406 case EVENT_ABORT: | 433 case EVENT_ABORT: |
407 return AbortWithError(event_args); | 434 return AbortWithError(event_args); |
435 case EVENT_PREPARE: | |
436 return NotFeasible(event_args); | |
408 case EVENT_START: | 437 case EVENT_START: |
409 return NotFeasible(event_args); | 438 return NotFeasible(event_args); |
410 case EVENT_STOP_CAPTURE: | 439 case EVENT_STOP_CAPTURE: |
411 return StopCaptureAndWaitForResult(event_args); | 440 return StopCaptureAndWaitForResult(event_args); |
412 case EVENT_AUDIO_DATA: | 441 case EVENT_AUDIO_DATA: |
413 return WaitEnvironmentEstimationCompletion(event_args); | 442 return WaitEnvironmentEstimationCompletion(event_args); |
414 case EVENT_ENGINE_RESULT: | 443 case EVENT_ENGINE_RESULT: |
415 return ProcessIntermediateResult(event_args); | 444 return ProcessIntermediateResult(event_args); |
416 case EVENT_ENGINE_ERROR: | 445 case EVENT_ENGINE_ERROR: |
417 case EVENT_AUDIO_ERROR: | 446 case EVENT_AUDIO_ERROR: |
418 return AbortWithError(event_args); | 447 return AbortWithError(event_args); |
419 } | 448 } |
420 break; | 449 break; |
421 case STATE_WAITING_FOR_SPEECH: | 450 case STATE_WAITING_FOR_SPEECH: |
422 switch (event) { | 451 switch (event) { |
423 case EVENT_ABORT: | 452 case EVENT_ABORT: |
424 return AbortWithError(event_args); | 453 return AbortWithError(event_args); |
454 case EVENT_PREPARE: | |
455 return NotFeasible(event_args); | |
425 case EVENT_START: | 456 case EVENT_START: |
426 return NotFeasible(event_args); | 457 return NotFeasible(event_args); |
427 case EVENT_STOP_CAPTURE: | 458 case EVENT_STOP_CAPTURE: |
428 return StopCaptureAndWaitForResult(event_args); | 459 return StopCaptureAndWaitForResult(event_args); |
429 case EVENT_AUDIO_DATA: | 460 case EVENT_AUDIO_DATA: |
430 return DetectUserSpeechOrTimeout(event_args); | 461 return DetectUserSpeechOrTimeout(event_args); |
431 case EVENT_ENGINE_RESULT: | 462 case EVENT_ENGINE_RESULT: |
432 return ProcessIntermediateResult(event_args); | 463 return ProcessIntermediateResult(event_args); |
433 case EVENT_ENGINE_ERROR: | 464 case EVENT_ENGINE_ERROR: |
434 case EVENT_AUDIO_ERROR: | 465 case EVENT_AUDIO_ERROR: |
435 return AbortWithError(event_args); | 466 return AbortWithError(event_args); |
436 } | 467 } |
437 break; | 468 break; |
438 case STATE_RECOGNIZING: | 469 case STATE_RECOGNIZING: |
439 switch (event) { | 470 switch (event) { |
440 case EVENT_ABORT: | 471 case EVENT_ABORT: |
441 return AbortWithError(event_args); | 472 return AbortWithError(event_args); |
473 case EVENT_PREPARE: | |
474 return NotFeasible(event_args); | |
442 case EVENT_START: | 475 case EVENT_START: |
443 return NotFeasible(event_args); | 476 return NotFeasible(event_args); |
444 case EVENT_STOP_CAPTURE: | 477 case EVENT_STOP_CAPTURE: |
445 return StopCaptureAndWaitForResult(event_args); | 478 return StopCaptureAndWaitForResult(event_args); |
446 case EVENT_AUDIO_DATA: | 479 case EVENT_AUDIO_DATA: |
447 return DetectEndOfSpeech(event_args); | 480 return DetectEndOfSpeech(event_args); |
448 case EVENT_ENGINE_RESULT: | 481 case EVENT_ENGINE_RESULT: |
449 return ProcessIntermediateResult(event_args); | 482 return ProcessIntermediateResult(event_args); |
450 case EVENT_ENGINE_ERROR: | 483 case EVENT_ENGINE_ERROR: |
451 case EVENT_AUDIO_ERROR: | 484 case EVENT_AUDIO_ERROR: |
452 return AbortWithError(event_args); | 485 return AbortWithError(event_args); |
453 } | 486 } |
454 break; | 487 break; |
455 case STATE_WAITING_FINAL_RESULT: | 488 case STATE_WAITING_FINAL_RESULT: |
456 switch (event) { | 489 switch (event) { |
457 case EVENT_ABORT: | 490 case EVENT_ABORT: |
458 return AbortWithError(event_args); | 491 return AbortWithError(event_args); |
492 case EVENT_PREPARE: | |
493 return NotFeasible(event_args); | |
459 case EVENT_START: | 494 case EVENT_START: |
460 return NotFeasible(event_args); | 495 return NotFeasible(event_args); |
461 case EVENT_STOP_CAPTURE: | 496 case EVENT_STOP_CAPTURE: |
462 case EVENT_AUDIO_DATA: | 497 case EVENT_AUDIO_DATA: |
463 return DoNothing(event_args); | 498 return DoNothing(event_args); |
464 case EVENT_ENGINE_RESULT: | 499 case EVENT_ENGINE_RESULT: |
465 return ProcessFinalResult(event_args); | 500 return ProcessFinalResult(event_args); |
466 case EVENT_ENGINE_ERROR: | 501 case EVENT_ENGINE_ERROR: |
467 case EVENT_AUDIO_ERROR: | 502 case EVENT_AUDIO_ERROR: |
468 return AbortWithError(event_args); | 503 return AbortWithError(event_args); |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
505 if (route_to_vumeter) { | 540 if (route_to_vumeter) { |
506 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|. | 541 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|. |
507 UpdateSignalAndNoiseLevels(rms, clip_detected); | 542 UpdateSignalAndNoiseLevels(rms, clip_detected); |
508 } | 543 } |
509 if (route_to_sr_engine) { | 544 if (route_to_sr_engine) { |
510 DCHECK(recognition_engine_.get() != NULL); | 545 DCHECK(recognition_engine_.get() != NULL); |
511 recognition_engine_->TakeAudioChunk(raw_audio); | 546 recognition_engine_->TakeAudioChunk(raw_audio); |
512 } | 547 } |
513 } | 548 } |
514 | 549 |
550 void SpeechRecognizerImpl::OnDeviceInfo(const media::AudioParameters& params) { | |
551 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
552 device_params_ = params; | |
553 DVLOG(1) << "Device parameters: " << device_params_.AsHumanReadableString(); | |
554 DispatchEvent(FSMEventArgs(EVENT_START)); | |
555 } | |
556 | |
557 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::PrepareRecognition( | |
558 const FSMEventArgs&) { | |
559 DCHECK(state_ == STATE_IDLE); | |
560 DCHECK(recognition_engine_.get() != NULL); | |
561 DCHECK(!IsCapturingAudio()); | |
562 GetAudioSystem()->GetInputStreamParameters( | |
tommi (sloooow) - chröme
2017/02/02 16:27:18
Instead of adding AudioSystem etc, could you post
| |
563 device_id_, base::Bind(&SpeechRecognizerImpl::OnDeviceInfo, | |
564 weak_ptr_factory_.GetWeakPtr())); | |
565 | |
566 listener()->OnRecognitionStart(session_id()); | |
567 return STATE_PREPARING; | |
568 } | |
569 | |
515 SpeechRecognizerImpl::FSMState | 570 SpeechRecognizerImpl::FSMState |
516 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { | 571 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
517 DCHECK(state_ == STATE_IDLE); | 572 DCHECK(state_ == STATE_PREPARING); |
518 DCHECK(recognition_engine_.get() != NULL); | 573 DCHECK(recognition_engine_.get() != NULL); |
519 DCHECK(!IsCapturingAudio()); | 574 DCHECK(!IsCapturingAudio()); |
520 const bool unit_test_is_active = (audio_manager_for_tests_ != NULL); | |
521 AudioManager* audio_manager = unit_test_is_active ? | |
522 audio_manager_for_tests_ : | |
523 AudioManager::Get(); | |
524 DCHECK(audio_manager != NULL); | |
525 | 575 |
526 DVLOG(1) << "SpeechRecognizerImpl starting audio capture."; | 576 DVLOG(1) << "SpeechRecognizerImpl starting audio capture."; |
527 num_samples_recorded_ = 0; | 577 num_samples_recorded_ = 0; |
528 audio_level_ = 0; | 578 audio_level_ = 0; |
529 end_of_utterance_ = false; | 579 end_of_utterance_ = false; |
530 listener()->OnRecognitionStart(session_id()); | |
531 | 580 |
532 // TODO(xians): Check if the OS has the device with |device_id_|, return | 581 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs(); |
533 // |SPEECH_AUDIO_ERROR_DETAILS_NO_MIC| if the target device does not exist. | 582 |
534 if (!audio_manager->HasAudioInputDevices()) { | 583 if (!device_params_.IsValid()) { |
584 DLOG(ERROR) << "Audio input device not found"; | |
535 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, | 585 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, |
536 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); | 586 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); |
537 } | 587 } |
538 | 588 |
539 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs(); | |
540 | |
541 AudioParameters in_params = audio_manager->GetInputStreamParameters( | |
542 device_id_); | |
543 if (!in_params.IsValid() && !unit_test_is_active) { | |
544 DLOG(ERROR) << "Invalid native audio input parameters"; | |
545 return Abort( | |
546 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); | |
547 } | |
548 | |
549 // Audio converter shall provide audio based on these parameters as output. | 589 // Audio converter shall provide audio based on these parameters as output. |
550 // Hard coded, WebSpeech specific parameters are utilized here. | 590 // Hard coded, WebSpeech specific parameters are utilized here. |
551 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000; | 591 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000; |
552 AudioParameters output_parameters = AudioParameters( | 592 AudioParameters output_parameters = AudioParameters( |
553 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, | 593 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, |
554 kNumBitsPerAudioSample, frames_per_buffer); | 594 kNumBitsPerAudioSample, frames_per_buffer); |
555 DVLOG(1) << "SRI::output_parameters: " | 595 DVLOG(1) << "SRI::output_parameters: " |
556 << output_parameters.AsHumanReadableString(); | 596 << output_parameters.AsHumanReadableString(); |
557 | 597 |
558 // Audio converter will receive audio based on these parameters as input. | 598 // Audio converter will receive audio based on these parameters as input. |
559 // On Windows we start by verifying that Core Audio is supported. If not, | 599 // On Windows we start by verifying that Core Audio is supported. If not, |
560 // the WaveIn API is used and we might as well avoid all audio conversations | 600 // the WaveIn API is used and we might as well avoid all audio conversations |
561 // since WaveIn does the conversion for us. | 601 // since WaveIn does the conversion for us. |
562 // TODO(henrika): this code should be moved to platform dependent audio | 602 // TODO(henrika): this code should be moved to platform dependent audio |
563 // managers. | 603 // managers. |
564 bool use_native_audio_params = true; | 604 bool use_native_audio_params = true; |
565 #if defined(OS_WIN) | 605 #if defined(OS_WIN) |
566 use_native_audio_params = media::CoreAudioUtil::IsSupported(); | 606 use_native_audio_params = media::CoreAudioUtil::IsSupported(); |
567 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech"; | 607 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech"; |
568 #endif | 608 #endif |
569 | 609 |
570 AudioParameters input_parameters = output_parameters; | 610 AudioParameters input_parameters = output_parameters; |
571 if (use_native_audio_params && !unit_test_is_active) { | 611 |
612 // AUDIO_FAKE means we are running a test. | |
613 if (use_native_audio_params && | |
614 device_params_.format() != media::AudioParameters::AUDIO_FAKE) { | |
572 // Use native audio parameters but avoid opening up at the native buffer | 615 // Use native audio parameters but avoid opening up at the native buffer |
573 // size. Instead use same frame size (in milliseconds) as WebSpeech uses. | 616 // size. Instead use same frame size (in milliseconds) as WebSpeech uses. |
574 // We rely on internal buffers in the audio back-end to fulfill this request | 617 // We rely on internal buffers in the audio back-end to fulfill this request |
575 // and the idea is to simplify the audio conversion since each Convert() | 618 // and the idea is to simplify the audio conversion since each Convert() |
576 // call will then render exactly one ProvideInput() call. | 619 // call will then render exactly one ProvideInput() call. |
577 // in_params.sample_rate() | 620 input_parameters = device_params_; |
578 input_parameters = in_params; | |
579 frames_per_buffer = | 621 frames_per_buffer = |
580 ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; | 622 ((input_parameters.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
581 input_parameters.set_frames_per_buffer(frames_per_buffer); | 623 input_parameters.set_frames_per_buffer(frames_per_buffer); |
582 DVLOG(1) << "SRI::input_parameters: " | 624 DVLOG(1) << "SRI::input_parameters: " |
583 << input_parameters.AsHumanReadableString(); | 625 << input_parameters.AsHumanReadableString(); |
584 } | 626 } |
585 | 627 |
586 // Create an audio converter which converts data between native input format | 628 // Create an audio converter which converts data between native input format |
587 // and WebSpeech specific output format. | 629 // and WebSpeech specific output format. |
588 audio_converter_.reset( | 630 audio_converter_.reset( |
589 new OnDataConverter(input_parameters, output_parameters)); | 631 new OnDataConverter(input_parameters, output_parameters)); |
590 | 632 |
591 audio_controller_ = AudioInputController::Create( | 633 audio_controller_ = |
592 audio_manager, this, this, input_parameters, device_id_, NULL); | 634 AudioInputController::Create(GetAudioSystem()->GetAudioManager(), this, |
635 this, input_parameters, device_id_, NULL); | |
593 | 636 |
594 if (!audio_controller_.get()) { | 637 if (!audio_controller_.get()) { |
595 return Abort( | 638 return Abort( |
596 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); | 639 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); |
597 } | 640 } |
598 | 641 |
599 audio_log_->OnCreated(0, input_parameters, device_id_); | 642 audio_log_->OnCreated(0, input_parameters, device_id_); |
600 | 643 |
601 // The endpointer needs to estimate the environment/background noise before | 644 // The endpointer needs to estimate the environment/background noise before |
602 // starting to treat the audio as user input. We wait in the state | 645 // starting to treat the audio as user input. We wait in the state |
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
681 return Abort( | 724 return Abort( |
682 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); | 725 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); |
683 } else if (event_args.event == EVENT_ENGINE_ERROR) { | 726 } else if (event_args.event == EVENT_ENGINE_ERROR) { |
684 return Abort(event_args.engine_error); | 727 return Abort(event_args.engine_error); |
685 } | 728 } |
686 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED)); | 729 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED)); |
687 } | 730 } |
688 | 731 |
689 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort( | 732 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort( |
690 const SpeechRecognitionError& error) { | 733 const SpeechRecognitionError& error) { |
734 DCHECK_CURRENTLY_ON(BrowserThread::IO); | |
735 | |
691 if (IsCapturingAudio()) | 736 if (IsCapturingAudio()) |
692 CloseAudioControllerAsynchronously(); | 737 CloseAudioControllerAsynchronously(); |
693 | 738 |
694 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. "; | 739 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. "; |
695 | 740 |
741 if (state_ == STATE_PREPARING) { | |
742 // Cancel an outstanding reply from AudioSystem. | |
743 weak_ptr_factory_.InvalidateWeakPtrs(); | |
744 } | |
745 | |
696 // The recognition engine is initialized only after STATE_STARTING. | 746 // The recognition engine is initialized only after STATE_STARTING. |
697 if (state_ > STATE_STARTING) { | 747 if (state_ > STATE_STARTING) { |
698 DCHECK(recognition_engine_.get() != NULL); | 748 DCHECK(recognition_engine_.get() != NULL); |
699 recognition_engine_->EndRecognition(); | 749 recognition_engine_->EndRecognition(); |
700 } | 750 } |
701 | 751 |
702 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) | 752 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) |
703 listener()->OnSoundEnd(session_id()); | 753 listener()->OnSoundEnd(session_id()); |
704 | 754 |
705 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) | 755 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) |
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
822 | 872 |
823 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | 873 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / |
824 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | 874 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
825 noise_level = std::min(std::max(0.0f, noise_level), | 875 noise_level = std::min(std::max(0.0f, noise_level), |
826 kAudioMeterRangeMaxUnclipped); | 876 kAudioMeterRangeMaxUnclipped); |
827 | 877 |
828 listener()->OnAudioLevelsChange( | 878 listener()->OnAudioLevelsChange( |
829 session_id(), clip_detected ? 1.0f : audio_level_, noise_level); | 879 session_id(), clip_detected ? 1.0f : audio_level_, noise_level); |
830 } | 880 } |
831 | 881 |
832 void SpeechRecognizerImpl::SetAudioManagerForTesting( | 882 void SpeechRecognizerImpl::SetAudioSystemForTesting( |
833 AudioManager* audio_manager) { | 883 media::AudioSystem* audio_system) { |
834 audio_manager_for_tests_ = audio_manager; | 884 audio_system_for_tests_ = audio_system; |
885 } | |
886 | |
887 media::AudioSystem* SpeechRecognizerImpl::GetAudioSystem() { | |
888 return audio_system_for_tests_ ? audio_system_for_tests_ : audio_system_; | |
835 } | 889 } |
836 | 890 |
837 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) | 891 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |
838 : event(event_value), | 892 : event(event_value), |
839 audio_data(NULL), | 893 audio_data(NULL), |
840 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { | 894 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { |
841 } | 895 } |
842 | 896 |
843 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) = | 897 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) = |
844 default; | 898 default; |
845 | 899 |
846 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { | 900 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { |
847 } | 901 } |
848 | 902 |
849 } // namespace content | 903 } // namespace content |
OLD | NEW |