Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(468)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions. (Closed)
Patch Set: review comments addressed Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/speech/speech_recognizer_impl.h" 5 #include "content/browser/speech/speech_recognizer_impl.h"
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <algorithm> 9 #include <algorithm>
10 10
11 #include "base/bind.h" 11 #include "base/bind.h"
12 #include "base/macros.h" 12 #include "base/macros.h"
13 #include "base/time/time.h" 13 #include "base/time/time.h"
14 #include "build/build_config.h" 14 #include "build/build_config.h"
15 #include "content/browser/browser_main_loop.h" 15 #include "content/browser/browser_main_loop.h"
16 #include "content/browser/media/media_internals.h" 16 #include "content/browser/media/media_internals.h"
17 #include "content/browser/speech/audio_buffer.h" 17 #include "content/browser/speech/audio_buffer.h"
18 #include "content/public/browser/speech_recognition_event_listener.h" 18 #include "content/public/browser/speech_recognition_event_listener.h"
19 #include "media/audio/audio_file_writer.h" 19 #include "media/audio/audio_file_writer.h"
20 #include "media/audio/audio_manager.h"
21 #include "media/audio/audio_system.h"
20 #include "media/base/audio_converter.h" 22 #include "media/base/audio_converter.h"
21 23
22 #if defined(OS_WIN) 24 #if defined(OS_WIN)
23 #include "media/audio/win/core_audio_util_win.h" 25 #include "media/audio/win/core_audio_util_win.h"
24 #endif 26 #endif
25 27
26 using media::AudioBus; 28 using media::AudioBus;
27 using media::AudioConverter; 29 using media::AudioConverter;
28 using media::AudioInputController; 30 using media::AudioInputController;
29 using media::AudioManager; 31 using media::AudioManager;
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
105 } 107 }
106 108
107 } // namespace 109 } // namespace
108 110
109 const int SpeechRecognizerImpl::kAudioSampleRate = 16000; 111 const int SpeechRecognizerImpl::kAudioSampleRate = 16000;
110 const ChannelLayout SpeechRecognizerImpl::kChannelLayout = 112 const ChannelLayout SpeechRecognizerImpl::kChannelLayout =
111 media::CHANNEL_LAYOUT_MONO; 113 media::CHANNEL_LAYOUT_MONO;
112 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; 114 const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;
113 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; 115 const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;
114 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; 116 const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;
115 media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL; 117 media::AudioSystem* SpeechRecognizerImpl::audio_system_for_tests_ = nullptr;
116 118
117 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0, 119 static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,
118 "kNumBitsPerAudioSample must be a multiple of 8"); 120 "kNumBitsPerAudioSample must be a multiple of 8");
119 121
120 // SpeechRecognizerImpl::OnDataConverter implementation 122 // SpeechRecognizerImpl::OnDataConverter implementation
121 123
122 SpeechRecognizerImpl::OnDataConverter::OnDataConverter( 124 SpeechRecognizerImpl::OnDataConverter::OnDataConverter(
123 const AudioParameters& input_params, 125 const AudioParameters& input_params,
124 const AudioParameters& output_params) 126 const AudioParameters& output_params)
125 : audio_converter_(input_params, output_params, false), 127 : audio_converter_(input_params, output_params, false),
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
169 input_bus_->CopyTo(dest); 171 input_bus_->CopyTo(dest);
170 // Indicate that the recorded audio has in fact been used by the converter. 172 // Indicate that the recorded audio has in fact been used by the converter.
171 data_was_converted_ = true; 173 data_was_converted_ = true;
172 return 1; 174 return 1;
173 } 175 }
174 176
175 // SpeechRecognizerImpl implementation 177 // SpeechRecognizerImpl implementation
176 178
177 SpeechRecognizerImpl::SpeechRecognizerImpl( 179 SpeechRecognizerImpl::SpeechRecognizerImpl(
178 SpeechRecognitionEventListener* listener, 180 SpeechRecognitionEventListener* listener,
181 media::AudioSystem* audio_system,
179 int session_id, 182 int session_id,
180 bool continuous, 183 bool continuous,
181 bool provisional_results, 184 bool provisional_results,
182 SpeechRecognitionEngine* engine) 185 SpeechRecognitionEngine* engine)
183 : SpeechRecognizer(listener, session_id), 186 : SpeechRecognizer(listener, session_id),
187 audio_system_(audio_system),
184 recognition_engine_(engine), 188 recognition_engine_(engine),
185 endpointer_(kAudioSampleRate), 189 endpointer_(kAudioSampleRate),
186 audio_log_(MediaInternals::GetInstance()->CreateAudioLog( 190 audio_log_(MediaInternals::GetInstance()->CreateAudioLog(
187 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)), 191 media::AudioLogFactory::AUDIO_INPUT_CONTROLLER)),
188 is_dispatching_event_(false), 192 is_dispatching_event_(false),
189 provisional_results_(provisional_results), 193 provisional_results_(provisional_results),
190 end_of_utterance_(false), 194 end_of_utterance_(false),
191 state_(STATE_IDLE) { 195 state_(STATE_IDLE),
192 DCHECK(recognition_engine_ != NULL); 196 weak_ptr_factory_(this) {
197 DCHECK(recognition_engine_ != nullptr);
198 DCHECK(audio_system_ != nullptr);
193 if (!continuous) { 199 if (!continuous) {
194 // In single shot (non-continous) recognition, 200 // In single shot (non-continous) recognition,
195 // the session is automatically ended after: 201 // the session is automatically ended after:
196 // - 0.5 seconds of silence if time < 3 seconds 202 // - 0.5 seconds of silence if time < 3 seconds
197 // - 1 seconds of silence if time >= 3 seconds 203 // - 1 seconds of silence if time >= 3 seconds
198 endpointer_.set_speech_input_complete_silence_length( 204 endpointer_.set_speech_input_complete_silence_length(
199 base::Time::kMicrosecondsPerSecond / 2); 205 base::Time::kMicrosecondsPerSecond / 2);
200 endpointer_.set_long_speech_input_complete_silence_length( 206 endpointer_.set_long_speech_input_complete_silence_length(
201 base::Time::kMicrosecondsPerSecond); 207 base::Time::kMicrosecondsPerSecond);
202 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); 208 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);
(...skipping 13 matching lines...) Expand all
216 // NOTE:all the external events and requests should be enqueued (PostTask), even 222 // NOTE:all the external events and requests should be enqueued (PostTask), even
217 // if they come from the same (IO) thread, in order to preserve the relationship 223 // if they come from the same (IO) thread, in order to preserve the relationship
218 // of causality between events and avoid interleaved event processing due to 224 // of causality between events and avoid interleaved event processing due to
219 // synchronous callbacks. 225 // synchronous callbacks.
220 226
221 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) { 227 void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {
222 DCHECK(!device_id.empty()); 228 DCHECK(!device_id.empty());
223 device_id_ = device_id; 229 device_id_ = device_id;
224 230
225 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 231 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
226 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 232 base::Bind(&SpeechRecognizerImpl::DispatchEvent, this,
227 this, FSMEventArgs(EVENT_START))); 233 FSMEventArgs(EVENT_PREPARE)));
228 } 234 }
229 235
230 void SpeechRecognizerImpl::AbortRecognition() { 236 void SpeechRecognizerImpl::AbortRecognition() {
231 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 237 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
232 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 238 base::Bind(&SpeechRecognizerImpl::DispatchEvent,
233 this, FSMEventArgs(EVENT_ABORT))); 239 this, FSMEventArgs(EVENT_ABORT)));
234 } 240 }
235 241
236 void SpeechRecognizerImpl::StopAudioCapture() { 242 void SpeechRecognizerImpl::StopAudioCapture() {
237 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 243 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
(...skipping 131 matching lines...) Expand 10 before | Expand all | Expand 10 after
369 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( 375 SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
370 const FSMEventArgs& event_args) { 376 const FSMEventArgs& event_args) {
371 const FSMEvent event = event_args.event; 377 const FSMEvent event = event_args.event;
372 switch (state_) { 378 switch (state_) {
373 case STATE_IDLE: 379 case STATE_IDLE:
374 switch (event) { 380 switch (event) {
375 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and 381 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and
376 // EVENT_STOP_CAPTURE below once speech input extensions are fixed. 382 // EVENT_STOP_CAPTURE below once speech input extensions are fixed.
377 case EVENT_ABORT: 383 case EVENT_ABORT:
378 return AbortSilently(event_args); 384 return AbortSilently(event_args);
385 case EVENT_PREPARE:
386 return PrepareRecognition(event_args);
387 case EVENT_START:
388 return NotFeasible(event_args);
389 case EVENT_STOP_CAPTURE:
390 return AbortSilently(event_args);
391 case EVENT_AUDIO_DATA: // Corner cases related to queued messages
392 case EVENT_ENGINE_RESULT: // being lately dispatched.
393 case EVENT_ENGINE_ERROR:
394 case EVENT_AUDIO_ERROR:
395 return DoNothing(event_args);
396 }
397 break;
398 case STATE_PREPARING:
399 switch (event) {
400 case EVENT_ABORT:
401 return AbortSilently(event_args);
402 case EVENT_PREPARE:
403 return NotFeasible(event_args);
379 case EVENT_START: 404 case EVENT_START:
380 return StartRecording(event_args); 405 return StartRecording(event_args);
381 case EVENT_STOP_CAPTURE: 406 case EVENT_STOP_CAPTURE:
382 return AbortSilently(event_args); 407 return AbortSilently(event_args);
383 case EVENT_AUDIO_DATA: // Corner cases related to queued messages 408 case EVENT_AUDIO_DATA: // Corner cases related to queued messages
384 case EVENT_ENGINE_RESULT: // being lately dispatched. 409 case EVENT_ENGINE_RESULT: // being lately dispatched.
385 case EVENT_ENGINE_ERROR: 410 case EVENT_ENGINE_ERROR:
386 case EVENT_AUDIO_ERROR: 411 case EVENT_AUDIO_ERROR:
387 return DoNothing(event_args); 412 return DoNothing(event_args);
388 } 413 }
389 break; 414 break;
390 case STATE_STARTING: 415 case STATE_STARTING:
391 switch (event) { 416 switch (event) {
392 case EVENT_ABORT: 417 case EVENT_ABORT:
393 return AbortWithError(event_args); 418 return AbortWithError(event_args);
419 case EVENT_PREPARE:
420 return NotFeasible(event_args);
394 case EVENT_START: 421 case EVENT_START:
395 return NotFeasible(event_args); 422 return NotFeasible(event_args);
396 case EVENT_STOP_CAPTURE: 423 case EVENT_STOP_CAPTURE:
397 return AbortSilently(event_args); 424 return AbortSilently(event_args);
398 case EVENT_AUDIO_DATA: 425 case EVENT_AUDIO_DATA:
399 return StartRecognitionEngine(event_args); 426 return StartRecognitionEngine(event_args);
400 case EVENT_ENGINE_RESULT: 427 case EVENT_ENGINE_RESULT:
401 return NotFeasible(event_args); 428 return NotFeasible(event_args);
402 case EVENT_ENGINE_ERROR: 429 case EVENT_ENGINE_ERROR:
403 case EVENT_AUDIO_ERROR: 430 case EVENT_AUDIO_ERROR:
404 return AbortWithError(event_args); 431 return AbortWithError(event_args);
405 } 432 }
406 break; 433 break;
407 case STATE_ESTIMATING_ENVIRONMENT: 434 case STATE_ESTIMATING_ENVIRONMENT:
408 switch (event) { 435 switch (event) {
409 case EVENT_ABORT: 436 case EVENT_ABORT:
410 return AbortWithError(event_args); 437 return AbortWithError(event_args);
438 case EVENT_PREPARE:
439 return NotFeasible(event_args);
411 case EVENT_START: 440 case EVENT_START:
412 return NotFeasible(event_args); 441 return NotFeasible(event_args);
413 case EVENT_STOP_CAPTURE: 442 case EVENT_STOP_CAPTURE:
414 return StopCaptureAndWaitForResult(event_args); 443 return StopCaptureAndWaitForResult(event_args);
415 case EVENT_AUDIO_DATA: 444 case EVENT_AUDIO_DATA:
416 return WaitEnvironmentEstimationCompletion(event_args); 445 return WaitEnvironmentEstimationCompletion(event_args);
417 case EVENT_ENGINE_RESULT: 446 case EVENT_ENGINE_RESULT:
418 return ProcessIntermediateResult(event_args); 447 return ProcessIntermediateResult(event_args);
419 case EVENT_ENGINE_ERROR: 448 case EVENT_ENGINE_ERROR:
420 case EVENT_AUDIO_ERROR: 449 case EVENT_AUDIO_ERROR:
421 return AbortWithError(event_args); 450 return AbortWithError(event_args);
422 } 451 }
423 break; 452 break;
424 case STATE_WAITING_FOR_SPEECH: 453 case STATE_WAITING_FOR_SPEECH:
425 switch (event) { 454 switch (event) {
426 case EVENT_ABORT: 455 case EVENT_ABORT:
427 return AbortWithError(event_args); 456 return AbortWithError(event_args);
457 case EVENT_PREPARE:
458 return NotFeasible(event_args);
428 case EVENT_START: 459 case EVENT_START:
429 return NotFeasible(event_args); 460 return NotFeasible(event_args);
430 case EVENT_STOP_CAPTURE: 461 case EVENT_STOP_CAPTURE:
431 return StopCaptureAndWaitForResult(event_args); 462 return StopCaptureAndWaitForResult(event_args);
432 case EVENT_AUDIO_DATA: 463 case EVENT_AUDIO_DATA:
433 return DetectUserSpeechOrTimeout(event_args); 464 return DetectUserSpeechOrTimeout(event_args);
434 case EVENT_ENGINE_RESULT: 465 case EVENT_ENGINE_RESULT:
435 return ProcessIntermediateResult(event_args); 466 return ProcessIntermediateResult(event_args);
436 case EVENT_ENGINE_ERROR: 467 case EVENT_ENGINE_ERROR:
437 case EVENT_AUDIO_ERROR: 468 case EVENT_AUDIO_ERROR:
438 return AbortWithError(event_args); 469 return AbortWithError(event_args);
439 } 470 }
440 break; 471 break;
441 case STATE_RECOGNIZING: 472 case STATE_RECOGNIZING:
442 switch (event) { 473 switch (event) {
443 case EVENT_ABORT: 474 case EVENT_ABORT:
444 return AbortWithError(event_args); 475 return AbortWithError(event_args);
476 case EVENT_PREPARE:
477 return NotFeasible(event_args);
445 case EVENT_START: 478 case EVENT_START:
446 return NotFeasible(event_args); 479 return NotFeasible(event_args);
447 case EVENT_STOP_CAPTURE: 480 case EVENT_STOP_CAPTURE:
448 return StopCaptureAndWaitForResult(event_args); 481 return StopCaptureAndWaitForResult(event_args);
449 case EVENT_AUDIO_DATA: 482 case EVENT_AUDIO_DATA:
450 return DetectEndOfSpeech(event_args); 483 return DetectEndOfSpeech(event_args);
451 case EVENT_ENGINE_RESULT: 484 case EVENT_ENGINE_RESULT:
452 return ProcessIntermediateResult(event_args); 485 return ProcessIntermediateResult(event_args);
453 case EVENT_ENGINE_ERROR: 486 case EVENT_ENGINE_ERROR:
454 case EVENT_AUDIO_ERROR: 487 case EVENT_AUDIO_ERROR:
455 return AbortWithError(event_args); 488 return AbortWithError(event_args);
456 } 489 }
457 break; 490 break;
458 case STATE_WAITING_FINAL_RESULT: 491 case STATE_WAITING_FINAL_RESULT:
459 switch (event) { 492 switch (event) {
460 case EVENT_ABORT: 493 case EVENT_ABORT:
461 return AbortWithError(event_args); 494 return AbortWithError(event_args);
495 case EVENT_PREPARE:
496 return NotFeasible(event_args);
462 case EVENT_START: 497 case EVENT_START:
463 return NotFeasible(event_args); 498 return NotFeasible(event_args);
464 case EVENT_STOP_CAPTURE: 499 case EVENT_STOP_CAPTURE:
465 case EVENT_AUDIO_DATA: 500 case EVENT_AUDIO_DATA:
466 return DoNothing(event_args); 501 return DoNothing(event_args);
467 case EVENT_ENGINE_RESULT: 502 case EVENT_ENGINE_RESULT:
468 return ProcessFinalResult(event_args); 503 return ProcessFinalResult(event_args);
469 case EVENT_ENGINE_ERROR: 504 case EVENT_ENGINE_ERROR:
470 case EVENT_AUDIO_ERROR: 505 case EVENT_AUDIO_ERROR:
471 return AbortWithError(event_args); 506 return AbortWithError(event_args);
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
508 if (route_to_vumeter) { 543 if (route_to_vumeter) {
509 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|. 544 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|.
510 UpdateSignalAndNoiseLevels(rms, clip_detected); 545 UpdateSignalAndNoiseLevels(rms, clip_detected);
511 } 546 }
512 if (route_to_sr_engine) { 547 if (route_to_sr_engine) {
513 DCHECK(recognition_engine_.get() != NULL); 548 DCHECK(recognition_engine_.get() != NULL);
514 recognition_engine_->TakeAudioChunk(raw_audio); 549 recognition_engine_->TakeAudioChunk(raw_audio);
515 } 550 }
516 } 551 }
517 552
553 void SpeechRecognizerImpl::OnDeviceInfo(const media::AudioParameters& params) {
554 DCHECK_CURRENTLY_ON(BrowserThread::IO);
555 device_params_ = params;
556 DVLOG(1) << "Device parameters: " << device_params_.AsHumanReadableString();
557 DispatchEvent(FSMEventArgs(EVENT_START));
558 }
559
560 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::PrepareRecognition(
561 const FSMEventArgs&) {
562 DCHECK(state_ == STATE_IDLE);
563 DCHECK(recognition_engine_.get() != NULL);
564 DCHECK(!IsCapturingAudio());
565 GetAudioSystem()->GetInputStreamParameters(
566 device_id_, base::Bind(&SpeechRecognizerImpl::OnDeviceInfo,
567 weak_ptr_factory_.GetWeakPtr()));
568
569 listener()->OnRecognitionStart(session_id());
570 return STATE_PREPARING;
571 }
572
518 SpeechRecognizerImpl::FSMState 573 SpeechRecognizerImpl::FSMState
519 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { 574 SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
520 DCHECK(state_ == STATE_IDLE); 575 DCHECK(state_ == STATE_PREPARING);
521 DCHECK(recognition_engine_.get() != NULL); 576 DCHECK(recognition_engine_.get() != NULL);
522 DCHECK(!IsCapturingAudio()); 577 DCHECK(!IsCapturingAudio());
523 const bool unit_test_is_active = (audio_manager_for_tests_ != NULL);
524 AudioManager* audio_manager = unit_test_is_active ?
525 audio_manager_for_tests_ :
526 AudioManager::Get();
527 DCHECK(audio_manager != NULL);
528 578
529 DVLOG(1) << "SpeechRecognizerImpl starting audio capture."; 579 DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";
530 num_samples_recorded_ = 0; 580 num_samples_recorded_ = 0;
531 audio_level_ = 0; 581 audio_level_ = 0;
532 end_of_utterance_ = false; 582 end_of_utterance_ = false;
533 listener()->OnRecognitionStart(session_id());
534 583
535 // TODO(xians): Check if the OS has the device with |device_id_|, return 584 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();
536 // |SPEECH_AUDIO_ERROR_DETAILS_NO_MIC| if the target device does not exist. 585
537 if (!audio_manager->HasAudioInputDevices()) { 586 if (!device_params_.IsValid()) {
587 DLOG(ERROR) << "Audio input device not found";
538 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, 588 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,
539 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); 589 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
540 } 590 }
541 591
542 int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();
543
544 AudioParameters in_params = audio_manager->GetInputStreamParameters(
545 device_id_);
546 if (!in_params.IsValid() && !unit_test_is_active) {
547 DLOG(ERROR) << "Invalid native audio input parameters";
548 return Abort(
549 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));
550 }
551
552 // Audio converter shall provide audio based on these parameters as output. 592 // Audio converter shall provide audio based on these parameters as output.
553 // Hard coded, WebSpeech specific parameters are utilized here. 593 // Hard coded, WebSpeech specific parameters are utilized here.
554 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000; 594 int frames_per_buffer = (kAudioSampleRate * chunk_duration_ms) / 1000;
555 AudioParameters output_parameters = AudioParameters( 595 AudioParameters output_parameters = AudioParameters(
556 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate, 596 AudioParameters::AUDIO_PCM_LOW_LATENCY, kChannelLayout, kAudioSampleRate,
557 kNumBitsPerAudioSample, frames_per_buffer); 597 kNumBitsPerAudioSample, frames_per_buffer);
558 DVLOG(1) << "SRI::output_parameters: " 598 DVLOG(1) << "SRI::output_parameters: "
559 << output_parameters.AsHumanReadableString(); 599 << output_parameters.AsHumanReadableString();
560 600
561 // Audio converter will receive audio based on these parameters as input. 601 // Audio converter will receive audio based on these parameters as input.
562 // On Windows we start by verifying that Core Audio is supported. If not, 602 // On Windows we start by verifying that Core Audio is supported. If not,
563 // the WaveIn API is used and we might as well avoid all audio conversations 603 // the WaveIn API is used and we might as well avoid all audio conversations
564 // since WaveIn does the conversion for us. 604 // since WaveIn does the conversion for us.
565 // TODO(henrika): this code should be moved to platform dependent audio 605 // TODO(henrika): this code should be moved to platform dependent audio
566 // managers. 606 // managers.
567 bool use_native_audio_params = true; 607 bool use_native_audio_params = true;
568 #if defined(OS_WIN) 608 #if defined(OS_WIN)
569 use_native_audio_params = media::CoreAudioUtil::IsSupported(); 609 use_native_audio_params = media::CoreAudioUtil::IsSupported();
570 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech"; 610 DVLOG_IF(1, !use_native_audio_params) << "Reverting to WaveIn for WebSpeech";
571 #endif 611 #endif
572 612
573 AudioParameters input_parameters = output_parameters; 613 AudioParameters input_parameters = output_parameters;
574 if (use_native_audio_params && !unit_test_is_active) { 614
615 // AUDIO_FAKE means we are running a test.
616 if (use_native_audio_params &&
617 device_params_.format() != media::AudioParameters::AUDIO_FAKE) {
575 // Use native audio parameters but avoid opening up at the native buffer 618 // Use native audio parameters but avoid opening up at the native buffer
576 // size. Instead use same frame size (in milliseconds) as WebSpeech uses. 619 // size. Instead use same frame size (in milliseconds) as WebSpeech uses.
577 // We rely on internal buffers in the audio back-end to fulfill this request 620 // We rely on internal buffers in the audio back-end to fulfill this request
578 // and the idea is to simplify the audio conversion since each Convert() 621 // and the idea is to simplify the audio conversion since each Convert()
579 // call will then render exactly one ProvideInput() call. 622 // call will then render exactly one ProvideInput() call.
580 // in_params.sample_rate() 623 input_parameters = device_params_;
581 input_parameters = in_params;
582 frames_per_buffer = 624 frames_per_buffer =
583 ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; 625 ((input_parameters.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;
584 input_parameters.set_frames_per_buffer(frames_per_buffer); 626 input_parameters.set_frames_per_buffer(frames_per_buffer);
585 DVLOG(1) << "SRI::input_parameters: " 627 DVLOG(1) << "SRI::input_parameters: "
586 << input_parameters.AsHumanReadableString(); 628 << input_parameters.AsHumanReadableString();
587 } 629 }
588 630
589 // Create an audio converter which converts data between native input format 631 // Create an audio converter which converts data between native input format
590 // and WebSpeech specific output format. 632 // and WebSpeech specific output format.
591 audio_converter_.reset( 633 audio_converter_.reset(
592 new OnDataConverter(input_parameters, output_parameters)); 634 new OnDataConverter(input_parameters, output_parameters));
593 635
594 audio_controller_ = AudioInputController::Create( 636 audio_controller_ = AudioInputController::Create(
595 audio_manager, this, this, nullptr, nullptr, input_parameters, device_id_, 637 GetAudioSystem()->GetAudioManager(), this, this, nullptr, nullptr,
638 input_parameters, device_id_,
596 /*agc_is_enabled*/ false); 639 /*agc_is_enabled*/ false);
597 640
598 if (!audio_controller_.get()) { 641 if (!audio_controller_.get()) {
599 return Abort( 642 return Abort(
600 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); 643 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));
601 } 644 }
602 645
603 audio_log_->OnCreated(0, input_parameters, device_id_); 646 audio_log_->OnCreated(0, input_parameters, device_id_);
604 647
605 // The endpointer needs to estimate the environment/background noise before 648 // The endpointer needs to estimate the environment/background noise before
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
685 return Abort( 728 return Abort(
686 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); 729 SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));
687 } else if (event_args.event == EVENT_ENGINE_ERROR) { 730 } else if (event_args.event == EVENT_ENGINE_ERROR) {
688 return Abort(event_args.engine_error); 731 return Abort(event_args.engine_error);
689 } 732 }
690 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED)); 733 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED));
691 } 734 }
692 735
693 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort( 736 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(
694 const SpeechRecognitionError& error) { 737 const SpeechRecognitionError& error) {
738 DCHECK_CURRENTLY_ON(BrowserThread::IO);
739
695 if (IsCapturingAudio()) 740 if (IsCapturingAudio())
696 CloseAudioControllerAsynchronously(); 741 CloseAudioControllerAsynchronously();
697 742
698 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. "; 743 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";
699 744
745 if (state_ == STATE_PREPARING) {
746 // Cancel an outstanding reply from AudioSystem.
747 weak_ptr_factory_.InvalidateWeakPtrs();
748 }
749
700 // The recognition engine is initialized only after STATE_STARTING. 750 // The recognition engine is initialized only after STATE_STARTING.
701 if (state_ > STATE_STARTING) { 751 if (state_ > STATE_STARTING) {
702 DCHECK(recognition_engine_.get() != NULL); 752 DCHECK(recognition_engine_.get() != NULL);
703 recognition_engine_->EndRecognition(); 753 recognition_engine_->EndRecognition();
704 } 754 }
705 755
706 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) 756 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT)
707 listener()->OnSoundEnd(session_id()); 757 listener()->OnSoundEnd(session_id());
708 758
709 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) 759 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
826 876
827 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / 877 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /
828 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); 878 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
829 noise_level = std::min(std::max(0.0f, noise_level), 879 noise_level = std::min(std::max(0.0f, noise_level),
830 kAudioMeterRangeMaxUnclipped); 880 kAudioMeterRangeMaxUnclipped);
831 881
832 listener()->OnAudioLevelsChange( 882 listener()->OnAudioLevelsChange(
833 session_id(), clip_detected ? 1.0f : audio_level_, noise_level); 883 session_id(), clip_detected ? 1.0f : audio_level_, noise_level);
834 } 884 }
835 885
836 void SpeechRecognizerImpl::SetAudioManagerForTesting( 886 void SpeechRecognizerImpl::SetAudioSystemForTesting(
837 AudioManager* audio_manager) { 887 media::AudioSystem* audio_system) {
838 audio_manager_for_tests_ = audio_manager; 888 audio_system_for_tests_ = audio_system;
889 }
890
891 media::AudioSystem* SpeechRecognizerImpl::GetAudioSystem() {
892 return audio_system_for_tests_ ? audio_system_for_tests_ : audio_system_;
839 } 893 }
840 894
841 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) 895 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
842 : event(event_value), 896 : event(event_value),
843 audio_data(NULL), 897 audio_data(NULL),
844 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { 898 engine_error(SPEECH_RECOGNITION_ERROR_NONE) {
845 } 899 }
846 900
847 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) = 901 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(const FSMEventArgs& other) =
848 default; 902 default;
849 903
850 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { 904 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() {
851 } 905 }
852 906
853 } // namespace content 907 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698