| Index: content/browser/speech/speech_recognizer_impl.cc
|
| diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
|
| index 8a57992e749c026611e93f4186859bb86e50f020..acec2671135c3e8322e5e6e487096def18d06683 100644
|
| --- a/content/browser/speech/speech_recognizer_impl.cc
|
| +++ b/content/browser/speech/speech_recognizer_impl.cc
|
| @@ -17,6 +17,8 @@
|
| #include "content/browser/speech/audio_buffer.h"
|
| #include "content/public/browser/speech_recognition_event_listener.h"
|
| #include "media/audio/audio_file_writer.h"
|
| +#include "media/audio/audio_manager.h"
|
| +#include "media/audio/audio_system.h"
|
| #include "media/base/audio_converter.h"
|
|
|
| #if defined(OS_WIN)
|
| @@ -112,7 +114,7 @@ const ChannelLayout SpeechRecognizerImpl::kChannelLayout =
|
| const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;
|
| const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;
|
| const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;
|
| -media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL;
|
| +media::AudioSystem* SpeechRecognizerImpl::audio_system_for_tests_ = nullptr;
|
|
|
| static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,
|
| "kNumBitsPerAudioSample must be a multiple of 8");
|
| @@ -176,11 +178,13 @@ double SpeechRecognizerImpl::OnDataConverter::ProvideInput(
|
|
|
| SpeechRecognizerImpl::SpeechRecognizerImpl(
|
| SpeechRecognitionEventListener* listener,
|
| + media::AudioSystem* audio_system,
|
| int session_id,
|
| bool continuous,
|
| bool provisional_results,
|
| SpeechRecognitionEngine* engine)
|
| : SpeechRecognizer(listener, session_id),
|
| + audio_system_(audio_system),
|
| recognition_engine_(engine),
|
| endpointer_(kAudioSampleRate),
|
| audio_log_(MediaInternals::GetInstance()->CreateAudioLog(
|
| @@ -188,8 +192,10 @@ SpeechRecognizerImpl::SpeechRecognizerImpl(
|
| is_dispatching_event_(false),
|
| provisional_results_(provisional_results),
|
| end_of_utterance_(false),
|
| - state_(STATE_IDLE) {
|
| - DCHECK(recognition_engine_ != NULL);
|
| + state_(STATE_IDLE),
|
| + weak_ptr_factory_(this) {
|
| + DCHECK(recognition_engine_ != nullptr);
|
| + DCHECK(audio_system_ != nullptr);
|
| if (!continuous) {
|
| // In single shot (non-continous) recognition,
|
| // the session is automatically ended after:
|
| @@ -223,8 +229,8 @@ void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {
|
| device_id_ = device_id;
|
|
|
| BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
|
| - base::Bind(&SpeechRecognizerImpl::DispatchEvent,
|
| - this, FSMEventArgs(EVENT_START)));
|
| + base::Bind(&SpeechRecognizerImpl::DispatchEvent, this,
|
| + FSMEventArgs(EVENT_PREPARE)));
|
| }
|
|
|
| void SpeechRecognizerImpl::AbortRecognition() {
|
| @@ -376,6 +382,25 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
|
| // EVENT_STOP_CAPTURE below once speech input extensions are fixed.
|
| case EVENT_ABORT:
|
| return AbortSilently(event_args);
|
| + case EVENT_PREPARE:
|
| + return PrepareRecognition(event_args);
|
| + case EVENT_START:
|
| + return NotFeasible(event_args);
|
| + case EVENT_STOP_CAPTURE:
|
| + return AbortSilently(event_args);
|
| + case EVENT_AUDIO_DATA: // Corner cases related to queued messages
|
| + case EVENT_ENGINE_RESULT: // being lately dispatched.
|
| + case EVENT_ENGINE_ERROR:
|
| + case EVENT_AUDIO_ERROR:
|
| + return DoNothing(event_args);
|
| + }
|
| + break;
|
| + case STATE_PREPARING:
|
| + switch (event) {
|
| + case EVENT_ABORT:
|
| + return AbortSilently(event_args);
|
| + case EVENT_PREPARE:
|
| + return NotFeasible(event_args);
|
| case EVENT_START:
|
| return StartRecording(event_args);
|
| case EVENT_STOP_CAPTURE:
|
| @@ -391,6 +416,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
|
| switch (event) {
|
| case EVENT_ABORT:
|
| return AbortWithError(event_args);
|
| + case EVENT_PREPARE:
|
| + return NotFeasible(event_args);
|
| case EVENT_START:
|
| return NotFeasible(event_args);
|
| case EVENT_STOP_CAPTURE:
|
| @@ -408,6 +435,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
|
| switch (event) {
|
| case EVENT_ABORT:
|
| return AbortWithError(event_args);
|
| + case EVENT_PREPARE:
|
| + return NotFeasible(event_args);
|
| case EVENT_START:
|
| return NotFeasible(event_args);
|
| case EVENT_STOP_CAPTURE:
|
| @@ -425,6 +454,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
|
| switch (event) {
|
| case EVENT_ABORT:
|
| return AbortWithError(event_args);
|
| + case EVENT_PREPARE:
|
| + return NotFeasible(event_args);
|
| case EVENT_START:
|
| return NotFeasible(event_args);
|
| case EVENT_STOP_CAPTURE:
|
| @@ -442,6 +473,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
|
| switch (event) {
|
| case EVENT_ABORT:
|
| return AbortWithError(event_args);
|
| + case EVENT_PREPARE:
|
| + return NotFeasible(event_args);
|
| case EVENT_START:
|
| return NotFeasible(event_args);
|
| case EVENT_STOP_CAPTURE:
|
| @@ -459,6 +492,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
|
| switch (event) {
|
| case EVENT_ABORT:
|
| return AbortWithError(event_args);
|
| + case EVENT_PREPARE:
|
| + return NotFeasible(event_args);
|
| case EVENT_START:
|
| return NotFeasible(event_args);
|
| case EVENT_STOP_CAPTURE:
|
| @@ -515,38 +550,43 @@ void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) {
|
| }
|
| }
|
|
|
| +void SpeechRecognizerImpl::OnDeviceInfo(const media::AudioParameters& params) {
|
| + DCHECK_CURRENTLY_ON(BrowserThread::IO);
|
| + device_params_ = params;
|
| + DVLOG(1) << "Device parameters: " << device_params_.AsHumanReadableString();
|
| + DispatchEvent(FSMEventArgs(EVENT_START));
|
| +}
|
| +
|
| +SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::PrepareRecognition(
|
| + const FSMEventArgs&) {
|
| + DCHECK(state_ == STATE_IDLE);
|
| + DCHECK(recognition_engine_.get() != NULL);
|
| + DCHECK(!IsCapturingAudio());
|
| + GetAudioSystem()->GetInputStreamParameters(
|
| + device_id_, base::Bind(&SpeechRecognizerImpl::OnDeviceInfo,
|
| + weak_ptr_factory_.GetWeakPtr()));
|
| +
|
| + listener()->OnRecognitionStart(session_id());
|
| + return STATE_PREPARING;
|
| +}
|
| +
|
| SpeechRecognizerImpl::FSMState
|
| SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
|
| - DCHECK(state_ == STATE_IDLE);
|
| + DCHECK(state_ == STATE_PREPARING);
|
| DCHECK(recognition_engine_.get() != NULL);
|
| DCHECK(!IsCapturingAudio());
|
| - const bool unit_test_is_active = (audio_manager_for_tests_ != NULL);
|
| - AudioManager* audio_manager = unit_test_is_active ?
|
| - audio_manager_for_tests_ :
|
| - AudioManager::Get();
|
| - DCHECK(audio_manager != NULL);
|
|
|
| DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";
|
| num_samples_recorded_ = 0;
|
| audio_level_ = 0;
|
| end_of_utterance_ = false;
|
| - listener()->OnRecognitionStart(session_id());
|
| -
|
| - // TODO(xians): Check if the OS has the device with |device_id_|, return
|
| - // |SPEECH_AUDIO_ERROR_DETAILS_NO_MIC| if the target device does not exist.
|
| - if (!audio_manager->HasAudioInputDevices()) {
|
| - return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,
|
| - SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
|
| - }
|
|
|
| int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();
|
|
|
| - AudioParameters in_params = audio_manager->GetInputStreamParameters(
|
| - device_id_);
|
| - if (!in_params.IsValid() && !unit_test_is_active) {
|
| - DLOG(ERROR) << "Invalid native audio input parameters";
|
| - return Abort(
|
| - SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));
|
| + if (!device_params_.IsValid()) {
|
| + DLOG(ERROR) << "Audio input device not found";
|
| + return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,
|
| + SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
|
| }
|
|
|
| // Audio converter shall provide audio based on these parameters as output.
|
| @@ -571,16 +611,18 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
|
| #endif
|
|
|
| AudioParameters input_parameters = output_parameters;
|
| - if (use_native_audio_params && !unit_test_is_active) {
|
| +
|
| + // AUDIO_FAKE means we are running a test.
|
| + if (use_native_audio_params &&
|
| + device_params_.format() != media::AudioParameters::AUDIO_FAKE) {
|
| // Use native audio parameters but avoid opening up at the native buffer
|
| // size. Instead use same frame size (in milliseconds) as WebSpeech uses.
|
| // We rely on internal buffers in the audio back-end to fulfill this request
|
| // and the idea is to simplify the audio conversion since each Convert()
|
| // call will then render exactly one ProvideInput() call.
|
| - // in_params.sample_rate()
|
| - input_parameters = in_params;
|
| + input_parameters = device_params_;
|
| frames_per_buffer =
|
| - ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;
|
| + ((input_parameters.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;
|
| input_parameters.set_frames_per_buffer(frames_per_buffer);
|
| DVLOG(1) << "SRI::input_parameters: "
|
| << input_parameters.AsHumanReadableString();
|
| @@ -592,7 +634,8 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
|
| new OnDataConverter(input_parameters, output_parameters));
|
|
|
| audio_controller_ = AudioInputController::Create(
|
| - audio_manager, this, this, nullptr, nullptr, input_parameters, device_id_,
|
| + GetAudioSystem()->GetAudioManager(), this, this, nullptr, nullptr,
|
| + input_parameters, device_id_,
|
| /*agc_is_enabled*/ false);
|
|
|
| if (!audio_controller_.get()) {
|
| @@ -692,11 +735,18 @@ SpeechRecognizerImpl::AbortWithError(const FSMEventArgs& event_args) {
|
|
|
| SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(
|
| const SpeechRecognitionError& error) {
|
| + DCHECK_CURRENTLY_ON(BrowserThread::IO);
|
| +
|
| if (IsCapturingAudio())
|
| CloseAudioControllerAsynchronously();
|
|
|
| DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";
|
|
|
| + if (state_ == STATE_PREPARING) {
|
| + // Cancel an outstanding reply from AudioSystem.
|
| + weak_ptr_factory_.InvalidateWeakPtrs();
|
| + }
|
| +
|
| // The recognition engine is initialized only after STATE_STARTING.
|
| if (state_ > STATE_STARTING) {
|
| DCHECK(recognition_engine_.get() != NULL);
|
| @@ -833,9 +883,13 @@ void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms,
|
| session_id(), clip_detected ? 1.0f : audio_level_, noise_level);
|
| }
|
|
|
| -void SpeechRecognizerImpl::SetAudioManagerForTesting(
|
| - AudioManager* audio_manager) {
|
| - audio_manager_for_tests_ = audio_manager;
|
| +void SpeechRecognizerImpl::SetAudioSystemForTesting(
|
| + media::AudioSystem* audio_system) {
|
| + audio_system_for_tests_ = audio_system;
|
| +}
|
| +
|
| +media::AudioSystem* SpeechRecognizerImpl::GetAudioSystem() {
|
| + return audio_system_for_tests_ ? audio_system_for_tests_ : audio_system_;
|
| }
|
|
|
| SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
|
|
|