Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(751)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions. (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/speech/speech_recognizer_impl.h" 5 #include "content/browser/speech/speech_recognizer_impl.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include "base/bind.h" 9 #include "base/bind.h"
10 #include "base/macros.h" 10 #include "base/macros.h"
11 #include "base/time/time.h" 11 #include "base/time/time.h"
12 #include "build/build_config.h" 12 #include "build/build_config.h"
13 #include "content/browser/browser_main_loop.h" 13 #include "content/browser/browser_main_loop.h"
14 #include "content/browser/media/media_internals.h" 14 #include "content/browser/media/media_internals.h"
15 #include "content/browser/speech/audio_buffer.h" 15 #include "content/browser/speech/audio_buffer.h"
16 #include "content/public/browser/speech_recognition_event_listener.h" 16 #include "content/public/browser/speech_recognition_event_listener.h"
17 #include "media/audio/audio_manager.h"
18 #include "media/audio/audio_system.h"
17 #include "media/base/audio_converter.h" 19 #include "media/base/audio_converter.h"
18 20
19 #if defined(OS_WIN) 21 #if defined(OS_WIN)
20 #include "media/audio/win/core_audio_util_win.h" 22 #include "media/audio/win/core_audio_util_win.h"
21 #endif 23 #endif
22 24
23 using media::AudioBus; 25 using media::AudioBus;
24 using media::AudioConverter; 26 using media::AudioConverter;
25 using media::AudioInputController; 27 using media::AudioInputController;
26 using media::AudioManager; 28 using media::AudioManager;
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
102 } 104 }
103 105
104 } // namespace 106 } // namespace
105 107
106 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; 108 const int SpeechRecognizerImpl::kAudioSampleRate = 16000;
107 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = 109 const ChannelLayout SpeechRecognizerImpl::kChannelLayout =
108 media::CHANNEL_LAYOUT_MONO; 110 media::CHANNEL_LAYOUT_MONO;
109 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; 111 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;
110 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; 112 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;
111 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; 113 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;
112 media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL; 114 media::AudioSystem* SpeechRecognizerImpl::audio_system_for_tests_ = nullptr;
113 115
114 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0, 116 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,
115 "kNumBitsPerAudioSample must be a multiple of 8"); 117 "kNumBitsPerAudioSample must be a multiple of 8");
116 118
117 // SpeechRecognizerImpl::OnDataConverter implementation 119 // SpeechRecognizerImpl::OnDataConverter implementation
118 120
119 SpeechRecognizerImpl::OnDataConverter::OnDataConverter( 121 SpeechRecognizerImpl::OnDataConverter::OnDataConverter(
120 const AudioParameters& input_params, 122 const AudioParameters& input_params,
121 const AudioParameters& output_params) 123 const AudioParameters& output_params)
122 : audio_converter_(input_params, output_params, false), 124 : audio_converter_(input_params, output_params, false),
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
166 input_bus_->CopyTo(dest); 168 input_bus_->CopyTo(dest);
167 // Indicate that the recorded audio has in fact been used by the converter. 169 // Indicate that the recorded audio has in fact been used by the converter.
168 data_was_converted_ = true; 170 data_was_converted_ = true;
169 return 1; 171 return 1;
170 } 172 }
171 173
172 // SpeechRecognizerImpl implementation 174 // SpeechRecognizerImpl implementation
173 175
174 SpeechRecognizerImpl::SpeechRecognizerImpl( 176 SpeechRecognizerImpl::SpeechRecognizerImpl(
175 SpeechRecognitionEventListener* listener, 177 SpeechRecognitionEventListener* listener,
178 media::AudioSystem* audio_system,
176 int session_id, 179 int session_id,
177 bool continuous, 180 bool continuous,
178 bool provisional_results, 181 bool provisional_results,
179 SpeechRecognitionEngine* engine) 182 SpeechRecognitionEngine* engine)
180 : SpeechRecognizer(listener, session_id), 183 : SpeechRecognizer(listener, session_id),
184 audio_system_(audio_system),
181 recognition_engine_(engine), 185 recognition_engine_(engine),
182 endpointer_(kAudioSampleRate), 186 endpointer_(kAudioSampleRate),
183 audio_log_(MediaInternals::GetInstance()->CreateAudioLog( 187 audio_log_(MediaInternals::GetInstance()->CreateAudioLog(
184 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)), 188 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)),
185 is_dispatching_event_(false), 189 is_dispatching_event_(false),
186 provisional_results_(provisional_results), 190 provisional_results_(provisional_results),
187 end_of_utterance_(false), 191 end_of_utterance_(false),
188 state_(STATE_IDLE) { 192 state_(STATE_IDLE),
189 DCHECK(recognition_engine_ != NULL); 193 weak_ptr_factory_(this) {
194 DCHECK(recognition_engine_ != nullptr);
195 DCHECK(audio_system_ != nullptr);
190 if (!continuous) { 196 if (!continuous) {
191 // In single shot (non-continous) recognition, 197 // In single shot (non-continous) recognition,
192 // the session is automatically ended after: 198 // the session is automatically ended after:
193 // - 0.5 seconds of silence if time < 3 seconds 199 // - 0.5 seconds of silence if time < 3 seconds
194 // - 1 seconds of silence if time >= 3 seconds 200 // - 1 seconds of silence if time >= 3 seconds
195 endpointer_.set_speech_input_complete_silence_length( 201 endpointer_.set_speech_input_complete_silence_length(
196 base::Time::kMicrosecondsPerSecond / 2); 202 base::Time::kMicrosecondsPerSecond / 2);
197 endpointer_.set_long_speech_input_complete_silence_length( 203 endpointer_.set_long_speech_input_complete_silence_length(
198 base::Time::kMicrosecondsPerSecond); 204 base::Time::kMicrosecondsPerSecond);
199 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); 205 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);
(...skipping 13 matching lines...) Expand all
213 // NOTE:all the external events and requests should be enqueued (PostTask), even 219 // NOTE:all the external events and requests should be enqueued (PostTask), even
214 // if they come from the same (IO) thread, in order to preserve the relationship 220 // if they come from the same (IO) thread, in order to preserve the relationship
215 // of causality between events and avoid interleaved event processing due to 221 // of causality between events and avoid interleaved event processing due to
216 // synchronous callbacks. 222 // synchronous callbacks.
217 223
218 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) { 224 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {
219 DCHECK(!device_id.empty()); 225 DCHECK(!device_id.empty());
220 device_id_ = device_id; 226 device_id_ = device_id;
221 227
222 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 228 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
223 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 229 base::Bind(&SpeechRecognizerImpl::DispatchEvent, this,
224 this, FSMEventArgs(EVENT_START))); 230 FSMEventArgs(EVENT_PREPARE)));
225 } 231 }
226 232
227 void SpeechRecognizerImpl::AbortRecognition() { 233 void SpeechRecognizerImpl::AbortRecognition() {
228 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 234 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
229 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 235 base::Bind(&SpeechRecognizerImpl::DispatchEvent,
230 this, FSMEventArgs(EVENT_ABORT))); 236 this, FSMEventArgs(EVENT_ABORT)));
231 } 237 }
232 238
233 void SpeechRecognizerImpl::StopAudioCapture() { 239 void SpeechRecognizerImpl::StopAudioCapture() {
234 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 240 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
366 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( 372 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
367 const FSMEventArgs& event_args) { 373 const FSMEventArgs& event_args) {
368 const FSMEvent event = event_args.event; 374 const FSMEvent event = event_args.event;
369 switch (state_) { 375 switch (state_) {
370 case STATE_IDLE: 376 case STATE_IDLE:
371 switch (event) { 377 switch (event) {
372 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and 378 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and
373 // EVENT_STOP_CAPTURE below once speech input extensions are fixed. 379 // EVENT_STOP_CAPTURE below once speech input extensions are fixed.
374 case EVENT_ABORT: 380 case EVENT_ABORT:
375 return AbortSilently(event_args); 381 return AbortSilently(event_args);
382 case EVENT_PREPARE:
383 return PrepareRecognition(event_args);
384 case EVENT_START:
385 return NotFeasible(event_args);
386 case EVENT_STOP_CAPTURE:
387 return AbortSilently(event_args);
388 case EVENT_AUDIO_DATA: // Corner cases related to queued messages
389 case EVENT_ENGINE_RESULT: // being lately dispatched.
390 case EVENT_ENGINE_ERROR:
391 case EVENT_AUDIO_ERROR:
392 return DoNothing(event_args);
393 }
394 break;
395 case STATE_PREPARING:
396 switch (event) {
397 case EVENT_ABORT:
398 return AbortSilently(event_args);
399 case EVENT_PREPARE:
400 return NotFeasible(event_args);
376 case EVENT_START: 401 case EVENT_START:
377 return StartRecording(event_args); 402 return StartRecording(event_args);
378 case EVENT_STOP_CAPTURE: 403 case EVENT_STOP_CAPTURE:
379 return AbortSilently(event_args); 404 return AbortSilently(event_args);
380 case EVENT_AUDIO_DATA: // Corner cases related to queued messages 405 case EVENT_AUDIO_DATA: // Corner cases related to queued messages
381 case EVENT_ENGINE_RESULT: // being lately dispatched. 406 case EVENT_ENGINE_RESULT: // being lately dispatched.
382 case EVENT_ENGINE_ERROR: 407 case EVENT_ENGINE_ERROR:
383 case EVENT_AUDIO_ERROR: 408 case EVENT_AUDIO_ERROR:
384 return DoNothing(event_args); 409 return DoNothing(event_args);
385 } 410 }
386 break; 411 break;
387 case STATE_STARTING: 412 case STATE_STARTING:
388 switch (event) { 413 switch (event) {
389 case EVENT_ABORT: 414 case EVENT_ABORT:
390 return AbortWithError(event_args); 415 return AbortWithError(event_args);
416 case EVENT_PREPARE:
417 return NotFeasible(event_args);
391 case EVENT_START: 418 case EVENT_START:
392 return NotFeasible(event_args); 419 return NotFeasible(event_args);
393 case EVENT_STOP_CAPTURE: 420 case EVENT_STOP_CAPTURE:
394 return AbortSilently(event_args); 421 return AbortSilently(event_args);
395 case EVENT_AUDIO_DATA: 422 case EVENT_AUDIO_DATA:
396 return StartRecognitionEngine(event_args); 423 return StartRecognitionEngine(event_args);
397 case EVENT_ENGINE_RESULT: 424 case EVENT_ENGINE_RESULT:
398 return NotFeasible(event_args); 425 return NotFeasible(event_args);
399 case EVENT_ENGINE_ERROR: 426 case EVENT_ENGINE_ERROR:
400 case EVENT_AUDIO_ERROR: 427 case EVENT_AUDIO_ERROR:
401 return AbortWithError(event_args); 428 return AbortWithError(event_args);
402 } 429 }
403 break; 430 break;
404 case STATE_ESTIMATING_ENVIRONMENT: 431 case STATE_ESTIMATING_ENVIRONMENT:
405 switch (event) { 432 switch (event) {
406 case EVENT_ABORT: 433 case EVENT_ABORT:
407 return AbortWithError(event_args); 434 return AbortWithError(event_args);
435 case EVENT_PREPARE:
436 return NotFeasible(event_args);
408 case EVENT_START: 437 case EVENT_START:
409 return NotFeasible(event_args); 438 return NotFeasible(event_args);
410 case EVENT_STOP_CAPTURE: 439 case EVENT_STOP_CAPTURE:
411 return StopCaptureAndWaitForResult(event_args); 440 return StopCaptureAndWaitForResult(event_args);
412 case EVENT_AUDIO_DATA: 441 case EVENT_AUDIO_DATA:
413 return WaitEnvironmentEstimationCompletion(event_args); 442 return WaitEnvironmentEstimationCompletion(event_args);
414 case EVENT_ENGINE_RESULT: 443 case EVENT_ENGINE_RESULT:
415 return ProcessIntermediateResult(event_args); 444 return ProcessIntermediateResult(event_args);
416 case EVENT_ENGINE_ERROR: 445 case EVENT_ENGINE_ERROR:
417 case EVENT_AUDIO_ERROR: 446 case EVENT_AUDIO_ERROR:
418 return AbortWithError(event_args); 447 return AbortWithError(event_args);
419 } 448 }
420 break; 449 break;
421 case STATE_WAITING_FOR_SPEECH: 450 case STATE_WAITING_FOR_SPEECH:
422 switch (event) { 451 switch (event) {
423 case EVENT_ABORT: 452 case EVENT_ABORT:
424 return AbortWithError(event_args); 453 return AbortWithError(event_args);
454 case EVENT_PREPARE:
455 return NotFeasible(event_args);
425 case EVENT_START: 456 case EVENT_START:
426 return NotFeasible(event_args); 457 return NotFeasible(event_args);
427 case EVENT_STOP_CAPTURE: 458 case EVENT_STOP_CAPTURE:
428 return StopCaptureAndWaitForResult(event_args); 459 return StopCaptureAndWaitForResult(event_args);
429 case EVENT_AUDIO_DATA: 460 case EVENT_AUDIO_DATA:
430 return DetectUserSpeechOrTimeout(event_args); 461 return DetectUserSpeechOrTimeout(event_args);
431 case EVENT_ENGINE_RESULT: 462 case EVENT_ENGINE_RESULT:
432 return ProcessIntermediateResult(event_args); 463 return ProcessIntermediateResult(event_args);
433 case EVENT_ENGINE_ERROR: 464 case EVENT_ENGINE_ERROR:
434 case EVENT_AUDIO_ERROR: 465 case EVENT_AUDIO_ERROR:
435 return AbortWithError(event_args); 466 return AbortWithError(event_args);
436 } 467 }
437 break; 468 break;
438 case STATE_RECOGNIZING: 469 case STATE_RECOGNIZING:
439 switch (event) { 470 switch (event) {
440 case EVENT_ABORT: 471 case EVENT_ABORT:
441 return AbortWithError(event_args); 472 return AbortWithError(event_args);
473 case EVENT_PREPARE:
474 return NotFeasible(event_args);
442 case EVENT_START: 475 case EVENT_START:
443 return NotFeasible(event_args); 476 return NotFeasible(event_args);
444 case EVENT_STOP_CAPTURE: 477 case EVENT_STOP_CAPTURE:
445 return StopCaptureAndWaitForResult(event_args); 478 return StopCaptureAndWaitForResult(event_args);
446 case EVENT_AUDIO_DATA: 479 case EVENT_AUDIO_DATA:
447 return DetectEndOfSpeech(event_args); 480 return DetectEndOfSpeech(event_args);
448 case EVENT_ENGINE_RESULT: 481 case EVENT_ENGINE_RESULT:
449 return ProcessIntermediateResult(event_args); 482 return ProcessIntermediateResult(event_args);
450 case EVENT_ENGINE_ERROR: 483 case EVENT_ENGINE_ERROR:
451 case EVENT_AUDIO_ERROR: 484 case EVENT_AUDIO_ERROR:
452 return AbortWithError(event_args); 485 return AbortWithError(event_args);
453 } 486 }
454 break; 487 break;
455 case STATE_WAITING_FINAL_RESULT: 488 case STATE_WAITING_FINAL_RESULT:
456 switch (event) { 489 switch (event) {
457 case EVENT_ABORT: 490 case EVENT_ABORT:
458 return AbortWithError(event_args); 491 return AbortWithError(event_args);
492 case EVENT_PREPARE:
493 return NotFeasible(event_args);
459 case EVENT_START: 494 case EVENT_START:
460 return NotFeasible(event_args); 495 return NotFeasible(event_args);
461 case EVENT_STOP_CAPTURE: 496 case EVENT_STOP_CAPTURE:
462 case EVENT_AUDIO_DATA: 497 case EVENT_AUDIO_DATA:
463 return DoNothing(event_args); 498 return DoNothing(event_args);
464 case EVENT_ENGINE_RESULT: 499 case EVENT_ENGINE_RESULT:
465 return ProcessFinalResult(event_args); 500 return ProcessFinalResult(event_args);
466 case EVENT_ENGINE_ERROR: 501 case EVENT_ENGINE_ERROR:
467 case EVENT_AUDIO_ERROR: 502 case EVENT_AUDIO_ERROR:
468 return AbortWithError(event_args); 503 return AbortWithError(event_args);
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 if (route_to_vumeter) { 540 if (route_to_vumeter) {
506 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|. 541 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|.
507 UpdateSignalAndNoiseLevels(rms, clip_detected); 542 UpdateSignalAndNoiseLevels(rms, clip_detected);
508 } 543 }
509 if (route_to_sr_engine) { 544 if (route_to_sr_engine) {
510 DCHECK(recognition_engine_.get() != NULL); 545 DCHECK(recognition_engine_.get() != NULL);
511 recognition_engine_->TakeAudioChunk(raw_audio); 546 recognition_engine_->TakeAudioChunk(raw_audio);
512 } 547 }
513 } 548 }
514 549
550 void SpeechRecognizerImpl::OnDeviceInfo(const media::AudioParameters& params) {
551 DCHECK_CURRENTLY_ON(BrowserThread::IO);
552 device_params_ = params;
553 DVLOG(1) << "Device parameters: " << device_params_.AsHumanReadableString();
554 DispatchEvent(FSMEventArgs(EVENT_START));
555 }
556
557 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::PrepareRecognition(
558 const FSMEventArgs&) {
559 DCHECK(state_ == STATE_IDLE);
560 DCHECK(recognition_engine_.get() != NULL);
561 DCHECK(!IsCapturingAudio());
562 GetAudioSystem()->GetInputStreamParameters(
tommi (sloooow) - chröme 2017/02/02 16:27:18 Instead of adding AudioSystem etc, could you post
563 device_id_, base::Bind(&SpeechRecognizerImpl::OnDeviceInfo,
564 weak_ptr_factory_.GetWeakPtr()));
565
566 listener()->OnRecognitionStart(session_id());
567 return STATE_PREPARING;
568 }
569
515 SpeechRecognizerImpl::FSMState 570 SpeechRecognizerImpl::FSMState
516 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { 571 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
517 DCHECK(state_ == STATE_IDLE); 572 DCHECK(state_ == STATE_PREPARING);
518 DCHECK(recognition_engine_.get() != NULL); 573 DCHECK(recognition_engine_.get() != NULL);
519 DCHECK(!IsCapturingAudio()); 574 DCHECK(!IsCapturingAudio());
520 const bool unit_test_is_active = (audio_manager_for_tests_ != NULL);
521 AudioManager* audio_manager = unit_test_is_active ?
522 audio_manager_for_tests_ :
523 AudioManager::Get();
524 DCHECK(audio_manager != NULL);
525 575
526 DVLOG(1) << "SpeechRecognizerImpl starting audio capture."; 576 DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";
527 num_samples_recorded_ = 0; 577 num_samples_recorded_ = 0;
528 audio_level_ = 0; 578 audio_level_ = 0;
529 end_of_utterance_ = false; 579 end_of_utterance_ = false;
530 listener()->OnRecognitionStart(session_id());
531 580
532 // TODO(xians): Check if the OS has the device with |device_id_|, return 581 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();
533 // |SPEECH_AUDIO_ERROR_DETAILS_NO_MIC| if the target device does not exist. 582
534 if (!audio_manager->HasAudioInputDevices()) { 583 if (!device_params_.IsValid()) {
584 DLOG(ERROR) << "Audio input device not found";
535 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, 585 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,
536 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); 586 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
537 } 587 }
538 588
539 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();
540
541 AudioParameters in_params = audio_manager->GetInputStreamParameters(
542 device_id_);
543 if (!in_params.IsValid() && !unit_test_is_active) {
544 DLOG(ERROR) << "Invalid native audio input parameters";
545 return Abort(
546 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));
547 }
548
549 // Audio converter shall provide audio based on these parameters as output. 589 // Audio converter shall provide audio based on these parameters as output.
550 // Hard coded, WebSpeech specific parameters are utilized here. 590 // Hard coded, WebSpeech specific parameters are utilized here.
551 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000; 591 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000;
552 AudioParameters output_parameters = AudioParameters( 592 AudioParameters output_parameters = AudioParameters(
553 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, 593 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate,
554 kNumBitsPerAudioSample, frames_per_buffer); 594 kNumBitsPerAudioSample, frames_per_buffer);
555 DVLOG(1) << "SRI::output_parameters: " 595 DVLOG(1) << "SRI::output_parameters: "
556 << output_parameters.AsHumanReadableString(); 596 << output_parameters.AsHumanReadableString();
557 597
558 // Audio converter will receive audio based on these parameters as input. 598 // Audio converter will receive audio based on these parameters as input.
559 // On Windows we start by verifying that Core Audio is supported. If not, 599 // On Windows we start by verifying that Core Audio is supported. If not,
560 // the WaveIn API is used and we might as well avoid all audio conversations 600 // the WaveIn API is used and we might as well avoid all audio conversations
561 // since WaveIn does the conversion for us. 601 // since WaveIn does the conversion for us.
562 // TODO(henrika): this code should be moved to platform dependent audio 602 // TODO(henrika): this code should be moved to platform dependent audio
563 // managers. 603 // managers.
564 bool use_native_audio_params = true; 604 bool use_native_audio_params = true;
565 #if defined(OS_WIN) 605 #if defined(OS_WIN)
566 use_native_audio_params = media::CoreAudioUtil::IsSupported(); 606 use_native_audio_params = media::CoreAudioUtil::IsSupported();
567 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech"; 607 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech";
568 #endif 608 #endif
569 609
570 AudioParameters input_parameters = output_parameters; 610 AudioParameters input_parameters = output_parameters;
571 if (use_native_audio_params && !unit_test_is_active) { 611
612 // AUDIO_FAKE means we are running a test.
613 if (use_native_audio_params &&
614 device_params_.format() != media::AudioParameters::AUDIO_FAKE) {
572 // Use native audio parameters but avoid opening up at the native buffer 615 // Use native audio parameters but avoid opening up at the native buffer
573 // size. Instead use same frame size (in milliseconds) as WebSpeech uses. 616 // size. Instead use same frame size (in milliseconds) as WebSpeech uses.
574 // We rely on internal buffers in the audio back-end to fulfill this request 617 // We rely on internal buffers in the audio back-end to fulfill this request
575 // and the idea is to simplify the audio conversion since each Convert() 618 // and the idea is to simplify the audio conversion since each Convert()
576 // call will then render exactly one ProvideInput() call. 619 // call will then render exactly one ProvideInput() call.
577 // in_params.sample_rate() 620 input_parameters = device_params_;
578 input_parameters = in_params;
579 frames_per_buffer = 621 frames_per_buffer =
580 ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; 622 ((input_parameters.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;
581 input_parameters.set_frames_per_buffer(frames_per_buffer); 623 input_parameters.set_frames_per_buffer(frames_per_buffer);
582 DVLOG(1) << "SRI::input_parameters: " 624 DVLOG(1) << "SRI::input_parameters: "
583 << input_parameters.AsHumanReadableString(); 625 << input_parameters.AsHumanReadableString();
584 } 626 }
585 627
586 // Create an audio converter which converts data between native input format 628 // Create an audio converter which converts data between native input format
587 // and WebSpeech specific output format. 629 // and WebSpeech specific output format.
588 audio_converter_.reset( 630 audio_converter_.reset(
589 new OnDataConverter(input_parameters, output_parameters)); 631 new OnDataConverter(input_parameters, output_parameters));
590 632
591 audio_controller_ = AudioInputController::Create( 633 audio_controller_ =
592 audio_manager, this, this, input_parameters, device_id_, NULL); 634 AudioInputController::Create(GetAudioSystem()->GetAudioManager(), this,
635 this, input_parameters, device_id_, NULL);
593 636
594 if (!audio_controller_.get()) { 637 if (!audio_controller_.get()) {
595 return Abort( 638 return Abort(
596 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); 639 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));
597 } 640 }
598 641
599 audio_log_->OnCreated(0, input_parameters, device_id_); 642 audio_log_->OnCreated(0, input_parameters, device_id_);
600 643
601 // The endpointer needs to estimate the environment/background noise before 644 // The endpointer needs to estimate the environment/background noise before
602 // starting to treat the audio as user input. We wait in the state 645 // starting to treat the audio as user input. We wait in the state
(...skipping 78 matching lines...) Expand 10 before | Expand all | Expand 10 after
681 return Abort( 724 return Abort(
682 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); 725 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));
683 } else if (event_args.event == EVENT_ENGINE_ERROR) { 726 } else if (event_args.event == EVENT_ENGINE_ERROR) {
684 return Abort(event_args.engine_error); 727 return Abort(event_args.engine_error);
685 } 728 }
686 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED)); 729 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED));
687 } 730 }
688 731
689 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort( 732 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(
690 const SpeechRecognitionError& error) { 733 const SpeechRecognitionError& error) {
734 DCHECK_CURRENTLY_ON(BrowserThread::IO);
735
691 if (IsCapturingAudio()) 736 if (IsCapturingAudio())
692 CloseAudioControllerAsynchronously(); 737 CloseAudioControllerAsynchronously();
693 738
694 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. "; 739 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";
695 740
741 if (state_ == STATE_PREPARING) {
742 // Cancel an outstanding reply from AudioSystem.
743 weak_ptr_factory_.InvalidateWeakPtrs();
744 }
745
696 // The recognition engine is initialized only after STATE_STARTING. 746 // The recognition engine is initialized only after STATE_STARTING.
697 if (state_ > STATE_STARTING) { 747 if (state_ > STATE_STARTING) {
698 DCHECK(recognition_engine_.get() != NULL); 748 DCHECK(recognition_engine_.get() != NULL);
699 recognition_engine_->EndRecognition(); 749 recognition_engine_->EndRecognition();
700 } 750 }
701 751
702 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) 752 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT)
703 listener()->OnSoundEnd(session_id()); 753 listener()->OnSoundEnd(session_id());
704 754
705 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) 755 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
822 872
823 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / 873 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /
824 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); 874 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
825 noise_level = std::min(std::max(0.0f, noise_level), 875 noise_level = std::min(std::max(0.0f, noise_level),
826 kAudioMeterRangeMaxUnclipped); 876 kAudioMeterRangeMaxUnclipped);
827 877
828 listener()->OnAudioLevelsChange( 878 listener()->OnAudioLevelsChange(
829 session_id(), clip_detected ? 1.0f : audio_level_, noise_level); 879 session_id(), clip_detected ? 1.0f : audio_level_, noise_level);
830 } 880 }
831 881
832 void SpeechRecognizerImpl::SetAudioManagerForTesting( 882 void SpeechRecognizerImpl::SetAudioSystemForTesting(
833 AudioManager* audio_manager) { 883 media::AudioSystem* audio_system) {
834 audio_manager_for_tests_ = audio_manager; 884 audio_system_for_tests_ = audio_system;
885 }
886
887 media::AudioSystem* SpeechRecognizerImpl::GetAudioSystem() {
888 return audio_system_for_tests_ ? audio_system_for_tests_ : audio_system_;
835 } 889 }
836 890
837 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) 891 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
838 : event(event_value), 892 : event(event_value),
839 audio_data(NULL), 893 audio_data(NULL),
840 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { 894 engine_error(SPEECH_RECOGNITION_ERROR_NONE) {
841 } 895 }
842 896
843 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) = 897 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) =
844 default; 898 default;
845 899
846 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { 900 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() {
847 } 901 }
848 902
849 } // namespace content 903 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698