Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(62)

Side by Side Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 15907012: Implement SpeechRecognizerImplAndroid (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Convert error codes in Java, refactor *{,JNI} methods into single methods, nits Created 7 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/speech/speech_recognizer_impl.h" 5 #include "content/browser/speech/speech_recognizer_impl.h"
6 6
7 #include "base/basictypes.h" 7 #include "base/basictypes.h"
8 #include "base/bind.h" 8 #include "base/bind.h"
9 #include "base/time.h" 9 #include "base/time.h"
10 #include "content/browser/browser_main_loop.h" 10 #include "content/browser/browser_main_loop.h"
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 SpeechRecognitionEventListener* listener, 74 SpeechRecognitionEventListener* listener,
75 int session_id, 75 int session_id,
76 bool is_single_shot, 76 bool is_single_shot,
77 SpeechRecognitionEngine* engine) 77 SpeechRecognitionEngine* engine)
78 : SpeechRecognizer(listener, session_id), 78 : SpeechRecognizer(listener, session_id),
79 recognition_engine_(engine), 79 recognition_engine_(engine),
80 endpointer_(kAudioSampleRate), 80 endpointer_(kAudioSampleRate),
81 is_dispatching_event_(false), 81 is_dispatching_event_(false),
82 is_single_shot_(is_single_shot), 82 is_single_shot_(is_single_shot),
83 state_(STATE_IDLE) { 83 state_(STATE_IDLE) {
84 DCHECK(listener_ != NULL); 84 DCHECK(this->listener() != NULL);
bulach 2013/06/11 07:24:48 nit: move the DCHECK to the base class, and there
janx 2013/06/12 14:47:14 Moved DCHECK to base class.
85 DCHECK(recognition_engine_ != NULL); 85 DCHECK(recognition_engine_ != NULL);
86 if (is_single_shot) { 86 if (is_single_shot) {
87 // In single shot recognition, the session is automatically ended after: 87 // In single shot recognition, the session is automatically ended after:
88 // - 0.5 seconds of silence if time < 3 seconds 88 // - 0.5 seconds of silence if time < 3 seconds
89 // - 1 seconds of silence if time >= 3 seconds 89 // - 1 seconds of silence if time >= 3 seconds
90 endpointer_.set_speech_input_complete_silence_length( 90 endpointer_.set_speech_input_complete_silence_length(
91 base::Time::kMicrosecondsPerSecond / 2); 91 base::Time::kMicrosecondsPerSecond / 2);
92 endpointer_.set_long_speech_input_complete_silence_length( 92 endpointer_.set_long_speech_input_complete_silence_length(
93 base::Time::kMicrosecondsPerSecond); 93 base::Time::kMicrosecondsPerSecond);
94 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); 94 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond);
(...skipping 294 matching lines...) Expand 10 before | Expand all | Expand 10 after
389 DCHECK(recognition_engine_.get() != NULL); 389 DCHECK(recognition_engine_.get() != NULL);
390 DCHECK(!IsCapturingAudio()); 390 DCHECK(!IsCapturingAudio());
391 AudioManager* audio_manager = (audio_manager_for_tests_ != NULL) ? 391 AudioManager* audio_manager = (audio_manager_for_tests_ != NULL) ?
392 audio_manager_for_tests_ : 392 audio_manager_for_tests_ :
393 BrowserMainLoop::GetAudioManager(); 393 BrowserMainLoop::GetAudioManager();
394 DCHECK(audio_manager != NULL); 394 DCHECK(audio_manager != NULL);
395 395
396 DVLOG(1) << "SpeechRecognizerImpl starting audio capture."; 396 DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";
397 num_samples_recorded_ = 0; 397 num_samples_recorded_ = 0;
398 audio_level_ = 0; 398 audio_level_ = 0;
399 listener_->OnRecognitionStart(session_id_); 399 listener()->OnRecognitionStart(session_id());
400 400
401 if (!audio_manager->HasAudioInputDevices()) { 401 if (!audio_manager->HasAudioInputDevices()) {
402 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO, 402 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO,
403 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); 403 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
404 } 404 }
405 405
406 const int samples_per_packet = (kAudioSampleRate * 406 const int samples_per_packet = (kAudioSampleRate *
407 recognition_engine_->GetDesiredAudioChunkDurationMs()) / 1000; 407 recognition_engine_->GetDesiredAudioChunkDurationMs()) / 1000;
408 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, 408 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout,
409 kAudioSampleRate, kNumBitsPerAudioSample, 409 kAudioSampleRate, kNumBitsPerAudioSample,
(...skipping 12 matching lines...) Expand all
422 audio_controller_->Record(); 422 audio_controller_->Record();
423 return STATE_STARTING; 423 return STATE_STARTING;
424 } 424 }
425 425
426 SpeechRecognizerImpl::FSMState 426 SpeechRecognizerImpl::FSMState
427 SpeechRecognizerImpl::StartRecognitionEngine(const FSMEventArgs& event_args) { 427 SpeechRecognizerImpl::StartRecognitionEngine(const FSMEventArgs& event_args) {
428 // This is the first audio packet captured, so the recognition engine is 428 // This is the first audio packet captured, so the recognition engine is
429 // started and the delegate notified about the event. 429 // started and the delegate notified about the event.
430 DCHECK(recognition_engine_.get() != NULL); 430 DCHECK(recognition_engine_.get() != NULL);
431 recognition_engine_->StartRecognition(); 431 recognition_engine_->StartRecognition();
432 listener_->OnAudioStart(session_id_); 432 listener()->OnAudioStart(session_id());
433 433
434 // This is a little hack, since TakeAudioChunk() is already called by 434 // This is a little hack, since TakeAudioChunk() is already called by
435 // ProcessAudioPipeline(). It is the best tradeoff, unless we allow dropping 435 // ProcessAudioPipeline(). It is the best tradeoff, unless we allow dropping
436 // the first audio chunk captured after opening the audio device. 436 // the first audio chunk captured after opening the audio device.
437 recognition_engine_->TakeAudioChunk(*(event_args.audio_data.get())); 437 recognition_engine_->TakeAudioChunk(*(event_args.audio_data.get()));
438 return STATE_ESTIMATING_ENVIRONMENT; 438 return STATE_ESTIMATING_ENVIRONMENT;
439 } 439 }
440 440
441 SpeechRecognizerImpl::FSMState 441 SpeechRecognizerImpl::FSMState
442 SpeechRecognizerImpl::WaitEnvironmentEstimationCompletion(const FSMEventArgs&) { 442 SpeechRecognizerImpl::WaitEnvironmentEstimationCompletion(const FSMEventArgs&) {
443 DCHECK(endpointer_.IsEstimatingEnvironment()); 443 DCHECK(endpointer_.IsEstimatingEnvironment());
444 if (GetElapsedTimeMs() >= kEndpointerEstimationTimeMs) { 444 if (GetElapsedTimeMs() >= kEndpointerEstimationTimeMs) {
445 endpointer_.SetUserInputMode(); 445 endpointer_.SetUserInputMode();
446 listener_->OnEnvironmentEstimationComplete(session_id_); 446 listener()->OnEnvironmentEstimationComplete(session_id());
447 return STATE_WAITING_FOR_SPEECH; 447 return STATE_WAITING_FOR_SPEECH;
448 } else { 448 } else {
449 return STATE_ESTIMATING_ENVIRONMENT; 449 return STATE_ESTIMATING_ENVIRONMENT;
450 } 450 }
451 } 451 }
452 452
453 SpeechRecognizerImpl::FSMState 453 SpeechRecognizerImpl::FSMState
454 SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) { 454 SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) {
455 if (endpointer_.DidStartReceivingSpeech()) { 455 if (endpointer_.DidStartReceivingSpeech()) {
456 listener_->OnSoundStart(session_id_); 456 listener()->OnSoundStart(session_id());
457 return STATE_RECOGNIZING; 457 return STATE_RECOGNIZING;
458 } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) { 458 } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) {
459 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_NO_SPEECH)); 459 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_NO_SPEECH));
460 } 460 }
461 return STATE_WAITING_FOR_SPEECH; 461 return STATE_WAITING_FOR_SPEECH;
462 } 462 }
463 463
464 SpeechRecognizerImpl::FSMState 464 SpeechRecognizerImpl::FSMState
465 SpeechRecognizerImpl::DetectEndOfSpeech(const FSMEventArgs& event_args) { 465 SpeechRecognizerImpl::DetectEndOfSpeech(const FSMEventArgs& event_args) {
466 if (endpointer_.speech_input_complete()) 466 if (endpointer_.speech_input_complete())
467 return StopCaptureAndWaitForResult(event_args); 467 return StopCaptureAndWaitForResult(event_args);
468 return STATE_RECOGNIZING; 468 return STATE_RECOGNIZING;
469 } 469 }
470 470
471 SpeechRecognizerImpl::FSMState 471 SpeechRecognizerImpl::FSMState
472 SpeechRecognizerImpl::StopCaptureAndWaitForResult(const FSMEventArgs&) { 472 SpeechRecognizerImpl::StopCaptureAndWaitForResult(const FSMEventArgs&) {
473 DCHECK(state_ >= STATE_ESTIMATING_ENVIRONMENT && state_ <= STATE_RECOGNIZING); 473 DCHECK(state_ >= STATE_ESTIMATING_ENVIRONMENT && state_ <= STATE_RECOGNIZING);
474 474
475 DVLOG(1) << "Concluding recognition"; 475 DVLOG(1) << "Concluding recognition";
476 CloseAudioControllerAsynchronously(); 476 CloseAudioControllerAsynchronously();
477 recognition_engine_->AudioChunksEnded(); 477 recognition_engine_->AudioChunksEnded();
478 478
479 if (state_ > STATE_WAITING_FOR_SPEECH) 479 if (state_ > STATE_WAITING_FOR_SPEECH)
480 listener_->OnSoundEnd(session_id_); 480 listener()->OnSoundEnd(session_id());
481 481
482 listener_->OnAudioEnd(session_id_); 482 listener()->OnAudioEnd(session_id());
483 return STATE_WAITING_FINAL_RESULT; 483 return STATE_WAITING_FINAL_RESULT;
484 } 484 }
485 485
486 SpeechRecognizerImpl::FSMState 486 SpeechRecognizerImpl::FSMState
487 SpeechRecognizerImpl::AbortSilently(const FSMEventArgs& event_args) { 487 SpeechRecognizerImpl::AbortSilently(const FSMEventArgs& event_args) {
488 DCHECK_NE(event_args.event, EVENT_AUDIO_ERROR); 488 DCHECK_NE(event_args.event, EVENT_AUDIO_ERROR);
489 DCHECK_NE(event_args.event, EVENT_ENGINE_ERROR); 489 DCHECK_NE(event_args.event, EVENT_ENGINE_ERROR);
490 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_NONE)); 490 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_NONE));
491 } 491 }
492 492
(...skipping 14 matching lines...) Expand all
507 507
508 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. "; 508 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";
509 509
510 // The recognition engine is initialized only after STATE_STARTING. 510 // The recognition engine is initialized only after STATE_STARTING.
511 if (state_ > STATE_STARTING) { 511 if (state_ > STATE_STARTING) {
512 DCHECK(recognition_engine_.get() != NULL); 512 DCHECK(recognition_engine_.get() != NULL);
513 recognition_engine_->EndRecognition(); 513 recognition_engine_->EndRecognition();
514 } 514 }
515 515
516 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) 516 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT)
517 listener_->OnSoundEnd(session_id_); 517 listener()->OnSoundEnd(session_id());
518 518
519 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) 519 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT)
520 listener_->OnAudioEnd(session_id_); 520 listener()->OnAudioEnd(session_id());
521 521
522 if (error.code != SPEECH_RECOGNITION_ERROR_NONE) 522 if (error.code != SPEECH_RECOGNITION_ERROR_NONE)
523 listener_->OnRecognitionError(session_id_, error); 523 listener()->OnRecognitionError(session_id(), error);
524 524
525 listener_->OnRecognitionEnd(session_id_); 525 listener()->OnRecognitionEnd(session_id());
526 526
527 return STATE_ENDED; 527 return STATE_ENDED;
528 } 528 }
529 529
530 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::ProcessIntermediateResult( 530 SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::ProcessIntermediateResult(
531 const FSMEventArgs& event_args) { 531 const FSMEventArgs& event_args) {
532 // Provisional results can occur only during continuous (non one-shot) mode. 532 // Provisional results can occur only during continuous (non one-shot) mode.
533 // If this check is reached it means that a continuous speech recognition 533 // If this check is reached it means that a continuous speech recognition
534 // engine is being used for a one shot recognition. 534 // engine is being used for a one shot recognition.
535 DCHECK_EQ(false, is_single_shot_); 535 DCHECK_EQ(false, is_single_shot_);
536 536
537 // In continuous recognition, intermediate results can occur even when we are 537 // In continuous recognition, intermediate results can occur even when we are
538 // in the ESTIMATING_ENVIRONMENT or WAITING_FOR_SPEECH states (if the 538 // in the ESTIMATING_ENVIRONMENT or WAITING_FOR_SPEECH states (if the
539 // recognition engine is "faster" than our endpointer). In these cases we 539 // recognition engine is "faster" than our endpointer). In these cases we
540 // skip the endpointer and fast-forward to the RECOGNIZING state, with respect 540 // skip the endpointer and fast-forward to the RECOGNIZING state, with respect
541 // of the events triggering order. 541 // of the events triggering order.
542 if (state_ == STATE_ESTIMATING_ENVIRONMENT) { 542 if (state_ == STATE_ESTIMATING_ENVIRONMENT) {
543 DCHECK(endpointer_.IsEstimatingEnvironment()); 543 DCHECK(endpointer_.IsEstimatingEnvironment());
544 endpointer_.SetUserInputMode(); 544 endpointer_.SetUserInputMode();
545 listener_->OnEnvironmentEstimationComplete(session_id_); 545 listener()->OnEnvironmentEstimationComplete(session_id());
546 } else if (state_ == STATE_WAITING_FOR_SPEECH) { 546 } else if (state_ == STATE_WAITING_FOR_SPEECH) {
547 listener_->OnSoundStart(session_id_); 547 listener()->OnSoundStart(session_id());
548 } else { 548 } else {
549 DCHECK_EQ(STATE_RECOGNIZING, state_); 549 DCHECK_EQ(STATE_RECOGNIZING, state_);
550 } 550 }
551 551
552 listener_->OnRecognitionResults(session_id_, event_args.engine_results); 552 listener()->OnRecognitionResults(session_id(), event_args.engine_results);
553 return STATE_RECOGNIZING; 553 return STATE_RECOGNIZING;
554 } 554 }
555 555
556 SpeechRecognizerImpl::FSMState 556 SpeechRecognizerImpl::FSMState
557 SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) { 557 SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) {
558 const SpeechRecognitionResults& results = event_args.engine_results; 558 const SpeechRecognitionResults& results = event_args.engine_results;
559 SpeechRecognitionResults::const_iterator i = results.begin(); 559 SpeechRecognitionResults::const_iterator i = results.begin();
560 bool provisional_results_pending = false; 560 bool provisional_results_pending = false;
561 bool results_are_empty = true; 561 bool results_are_empty = true;
562 for (; i != results.end(); ++i) { 562 for (; i != results.end(); ++i) {
563 const SpeechRecognitionResult& result = *i; 563 const SpeechRecognitionResult& result = *i;
564 if (result.is_provisional) { 564 if (result.is_provisional) {
565 provisional_results_pending = true; 565 provisional_results_pending = true;
566 DCHECK(!is_single_shot_); 566 DCHECK(!is_single_shot_);
567 } else if (results_are_empty) { 567 } else if (results_are_empty) {
568 results_are_empty = result.hypotheses.empty(); 568 results_are_empty = result.hypotheses.empty();
569 } 569 }
570 } 570 }
571 571
572 if (provisional_results_pending) { 572 if (provisional_results_pending) {
573 listener_->OnRecognitionResults(session_id_, results); 573 listener()->OnRecognitionResults(session_id(), results);
574 // We don't end the recognition if a provisional result is received in 574 // We don't end the recognition if a provisional result is received in
575 // STATE_WAITING_FINAL_RESULT. A definitive result will come next and will 575 // STATE_WAITING_FINAL_RESULT. A definitive result will come next and will
576 // end the recognition. 576 // end the recognition.
577 return state_; 577 return state_;
578 } 578 }
579 579
580 recognition_engine_->EndRecognition(); 580 recognition_engine_->EndRecognition();
581 581
582 if (!results_are_empty) { 582 if (!results_are_empty) {
583 // We could receive an empty result (which we won't propagate further) 583 // We could receive an empty result (which we won't propagate further)
584 // in the following (continuous) scenario: 584 // in the following (continuous) scenario:
585 // 1. The caller start pushing audio and receives some results; 585 // 1. The caller start pushing audio and receives some results;
586 // 2. A |StopAudioCapture| is issued later; 586 // 2. A |StopAudioCapture| is issued later;
587 // 3. The final audio frames captured in the interval ]1,2] do not lead to 587 // 3. The final audio frames captured in the interval ]1,2] do not lead to
588 // any result (nor any error); 588 // any result (nor any error);
589 // 4. The speech recognition engine, therefore, emits an empty result to 589 // 4. The speech recognition engine, therefore, emits an empty result to
590 // notify that the recognition is ended with no error, yet neither any 590 // notify that the recognition is ended with no error, yet neither any
591 // further result. 591 // further result.
592 listener_->OnRecognitionResults(session_id_, results); 592 listener()->OnRecognitionResults(session_id(), results);
593 } 593 }
594 594
595 listener_->OnRecognitionEnd(session_id_); 595 listener()->OnRecognitionEnd(session_id());
596 return STATE_ENDED; 596 return STATE_ENDED;
597 } 597 }
598 598
599 SpeechRecognizerImpl::FSMState 599 SpeechRecognizerImpl::FSMState
600 SpeechRecognizerImpl::DoNothing(const FSMEventArgs&) const { 600 SpeechRecognizerImpl::DoNothing(const FSMEventArgs&) const {
601 return state_; // Just keep the current state. 601 return state_; // Just keep the current state.
602 } 602 }
603 603
604 SpeechRecognizerImpl::FSMState 604 SpeechRecognizerImpl::FSMState
605 SpeechRecognizerImpl::NotFeasible(const FSMEventArgs& event_args) { 605 SpeechRecognizerImpl::NotFeasible(const FSMEventArgs& event_args) {
(...skipping 29 matching lines...) Expand all
635 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); 635 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped);
636 const float smoothing_factor = (level > audio_level_) ? kUpSmoothingFactor : 636 const float smoothing_factor = (level > audio_level_) ? kUpSmoothingFactor :
637 kDownSmoothingFactor; 637 kDownSmoothingFactor;
638 audio_level_ += (level - audio_level_) * smoothing_factor; 638 audio_level_ += (level - audio_level_) * smoothing_factor;
639 639
640 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / 640 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /
641 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); 641 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
642 noise_level = std::min(std::max(0.0f, noise_level), 642 noise_level = std::min(std::max(0.0f, noise_level),
643 kAudioMeterRangeMaxUnclipped); 643 kAudioMeterRangeMaxUnclipped);
644 644
645 listener_->OnAudioLevelsChange( 645 listener()->OnAudioLevelsChange(
646 session_id_, clip_detected ? 1.0f : audio_level_, noise_level); 646 session_id(), clip_detected ? 1.0f : audio_level_, noise_level);
647 } 647 }
648 648
649 void SpeechRecognizerImpl::SetAudioManagerForTests( 649 void SpeechRecognizerImpl::SetAudioManagerForTests(
650 AudioManager* audio_manager) { 650 AudioManager* audio_manager) {
651 audio_manager_for_tests_ = audio_manager; 651 audio_manager_for_tests_ = audio_manager;
652 } 652 }
653 653
654 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) 654 SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
655 : event(event_value), 655 : event(event_value),
656 audio_data(NULL), 656 audio_data(NULL),
657 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { 657 engine_error(SPEECH_RECOGNITION_ERROR_NONE) {
658 } 658 }
659 659
660 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { 660 SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() {
661 } 661 }
662 662
663 } // namespace content 663 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698