Chromium Code Reviews| Index: content/browser/speech/speech_recognizer_impl.cc |
| diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc |
| index 8a57992e749c026611e93f4186859bb86e50f020..acec2671135c3e8322e5e6e487096def18d06683 100644 |
| --- a/content/browser/speech/speech_recognizer_impl.cc |
| +++ b/content/browser/speech/speech_recognizer_impl.cc |
| @@ -17,6 +17,8 @@ |
| #include "content/browser/speech/audio_buffer.h" |
| #include "content/public/browser/speech_recognition_event_listener.h" |
| #include "media/audio/audio_file_writer.h" |
| +#include "media/audio/audio_manager.h" |
| +#include "media/audio/audio_system.h" |
| #include "media/base/audio_converter.h" |
| #if defined(OS_WIN) |
| @@ -112,7 +114,7 @@ const ChannelLayout SpeechRecognizerImpl::kChannelLayout = |
| const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; |
| const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; |
| const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; |
| -media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL; |
| +media::AudioSystem* SpeechRecognizerImpl::audio_system_for_tests_ = nullptr; |
|
tommi (sloooow) - chröme
2017/02/05 20:14:54
Just checking - Is it still necessary for this to
o1ka
2017/02/06 12:06:08
Yes, it's used in speech_recognition_browsertest.c
|
| static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0, |
| "kNumBitsPerAudioSample must be a multiple of 8"); |
| @@ -176,11 +178,13 @@ double SpeechRecognizerImpl::OnDataConverter::ProvideInput( |
| SpeechRecognizerImpl::SpeechRecognizerImpl( |
| SpeechRecognitionEventListener* listener, |
| + media::AudioSystem* audio_system, |
| int session_id, |
| bool continuous, |
| bool provisional_results, |
| SpeechRecognitionEngine* engine) |
| : SpeechRecognizer(listener, session_id), |
| + audio_system_(audio_system), |
| recognition_engine_(engine), |
| endpointer_(kAudioSampleRate), |
| audio_log_(MediaInternals::GetInstance()->CreateAudioLog( |
| @@ -188,8 +192,10 @@ SpeechRecognizerImpl::SpeechRecognizerImpl( |
| is_dispatching_event_(false), |
| provisional_results_(provisional_results), |
| end_of_utterance_(false), |
| - state_(STATE_IDLE) { |
| - DCHECK(recognition_engine_ != NULL); |
| + state_(STATE_IDLE), |
| + weak_ptr_factory_(this) { |
| + DCHECK(recognition_engine_ != nullptr); |
| + DCHECK(audio_system_ != nullptr); |
| if (!continuous) { |
| // In single shot (non-continous) recognition, |
| // the session is automatically ended after: |
| @@ -223,8 +229,8 @@ void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) { |
| device_id_ = device_id; |
| BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| - base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
| - this, FSMEventArgs(EVENT_START))); |
| + base::Bind(&SpeechRecognizerImpl::DispatchEvent, this, |
| + FSMEventArgs(EVENT_PREPARE))); |
| } |
| void SpeechRecognizerImpl::AbortRecognition() { |
| @@ -376,6 +382,25 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
| // EVENT_STOP_CAPTURE below once speech input extensions are fixed. |
| case EVENT_ABORT: |
| return AbortSilently(event_args); |
| + case EVENT_PREPARE: |
| + return PrepareRecognition(event_args); |
| + case EVENT_START: |
| + return NotFeasible(event_args); |
| + case EVENT_STOP_CAPTURE: |
| + return AbortSilently(event_args); |
| + case EVENT_AUDIO_DATA: // Corner cases related to queued messages |
| + case EVENT_ENGINE_RESULT: // being lately dispatched. |
| + case EVENT_ENGINE_ERROR: |
| + case EVENT_AUDIO_ERROR: |
| + return DoNothing(event_args); |
| + } |
| + break; |
| + case STATE_PREPARING: |
| + switch (event) { |
| + case EVENT_ABORT: |
| + return AbortSilently(event_args); |
| + case EVENT_PREPARE: |
| + return NotFeasible(event_args); |
| case EVENT_START: |
| return StartRecording(event_args); |
| case EVENT_STOP_CAPTURE: |
| @@ -391,6 +416,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
| switch (event) { |
| case EVENT_ABORT: |
| return AbortWithError(event_args); |
| + case EVENT_PREPARE: |
| + return NotFeasible(event_args); |
| case EVENT_START: |
| return NotFeasible(event_args); |
| case EVENT_STOP_CAPTURE: |
| @@ -408,6 +435,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
| switch (event) { |
| case EVENT_ABORT: |
| return AbortWithError(event_args); |
| + case EVENT_PREPARE: |
| + return NotFeasible(event_args); |
| case EVENT_START: |
| return NotFeasible(event_args); |
| case EVENT_STOP_CAPTURE: |
| @@ -425,6 +454,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
| switch (event) { |
| case EVENT_ABORT: |
| return AbortWithError(event_args); |
| + case EVENT_PREPARE: |
| + return NotFeasible(event_args); |
| case EVENT_START: |
| return NotFeasible(event_args); |
| case EVENT_STOP_CAPTURE: |
| @@ -442,6 +473,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
| switch (event) { |
| case EVENT_ABORT: |
| return AbortWithError(event_args); |
| + case EVENT_PREPARE: |
| + return NotFeasible(event_args); |
| case EVENT_START: |
| return NotFeasible(event_args); |
| case EVENT_STOP_CAPTURE: |
| @@ -459,6 +492,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
| switch (event) { |
| case EVENT_ABORT: |
| return AbortWithError(event_args); |
| + case EVENT_PREPARE: |
| + return NotFeasible(event_args); |
| case EVENT_START: |
| return NotFeasible(event_args); |
| case EVENT_STOP_CAPTURE: |
| @@ -515,38 +550,43 @@ void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) { |
| } |
| } |
| +void SpeechRecognizerImpl::OnDeviceInfo(const media::AudioParameters& params) { |
| + DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| + device_params_ = params; |
| + DVLOG(1) << "Device parameters: " << device_params_.AsHumanReadableString(); |
| + DispatchEvent(FSMEventArgs(EVENT_START)); |
| +} |
| + |
| +SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::PrepareRecognition( |
| + const FSMEventArgs&) { |
| + DCHECK(state_ == STATE_IDLE); |
| + DCHECK(recognition_engine_.get() != NULL); |
| + DCHECK(!IsCapturingAudio()); |
| + GetAudioSystem()->GetInputStreamParameters( |
| + device_id_, base::Bind(&SpeechRecognizerImpl::OnDeviceInfo, |
| + weak_ptr_factory_.GetWeakPtr())); |
| + |
| + listener()->OnRecognitionStart(session_id()); |
| + return STATE_PREPARING; |
| +} |
| + |
| SpeechRecognizerImpl::FSMState |
| SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
| - DCHECK(state_ == STATE_IDLE); |
| + DCHECK(state_ == STATE_PREPARING); |
| DCHECK(recognition_engine_.get() != NULL); |
| DCHECK(!IsCapturingAudio()); |
| - const bool unit_test_is_active = (audio_manager_for_tests_ != NULL); |
| - AudioManager* audio_manager = unit_test_is_active ? |
| - audio_manager_for_tests_ : |
| - AudioManager::Get(); |
| - DCHECK(audio_manager != NULL); |
| DVLOG(1) << "SpeechRecognizerImpl starting audio capture."; |
| num_samples_recorded_ = 0; |
| audio_level_ = 0; |
| end_of_utterance_ = false; |
| - listener()->OnRecognitionStart(session_id()); |
| - |
| - // TODO(xians): Check if the OS has the device with |device_id_|, return |
| - // |SPEECH_AUDIO_ERROR_DETAILS_NO_MIC| if the target device does not exist. |
| - if (!audio_manager->HasAudioInputDevices()) { |
| - return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, |
| - SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); |
| - } |
| int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs(); |
| - AudioParameters in_params = audio_manager->GetInputStreamParameters( |
| - device_id_); |
| - if (!in_params.IsValid() && !unit_test_is_active) { |
| - DLOG(ERROR) << "Invalid native audio input parameters"; |
| - return Abort( |
| - SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); |
| + if (!device_params_.IsValid()) { |
| + DLOG(ERROR) << "Audio input device not found"; |
| + return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, |
| + SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); |
| } |
| // Audio converter shall provide audio based on these parameters as output. |
| @@ -571,16 +611,18 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
| #endif |
| AudioParameters input_parameters = output_parameters; |
| - if (use_native_audio_params && !unit_test_is_active) { |
| + |
| + // AUDIO_FAKE means we are running a test. |
| + if (use_native_audio_params && |
| + device_params_.format() != media::AudioParameters::AUDIO_FAKE) { |
| // Use native audio parameters but avoid opening up at the native buffer |
| // size. Instead use same frame size (in milliseconds) as WebSpeech uses. |
| // We rely on internal buffers in the audio back-end to fulfill this request |
| // and the idea is to simplify the audio conversion since each Convert() |
| // call will then render exactly one ProvideInput() call. |
| - // in_params.sample_rate() |
| - input_parameters = in_params; |
| + input_parameters = device_params_; |
| frames_per_buffer = |
| - ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
| + ((input_parameters.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
| input_parameters.set_frames_per_buffer(frames_per_buffer); |
| DVLOG(1) << "SRI::input_parameters: " |
| << input_parameters.AsHumanReadableString(); |
| @@ -592,7 +634,8 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
| new OnDataConverter(input_parameters, output_parameters)); |
| audio_controller_ = AudioInputController::Create( |
| - audio_manager, this, this, nullptr, nullptr, input_parameters, device_id_, |
| + GetAudioSystem()->GetAudioManager(), this, this, nullptr, nullptr, |
| + input_parameters, device_id_, |
| /*agc_is_enabled*/ false); |
| if (!audio_controller_.get()) { |
| @@ -692,11 +735,18 @@ SpeechRecognizerImpl::AbortWithError(const FSMEventArgs& event_args) { |
| SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort( |
| const SpeechRecognitionError& error) { |
| + DCHECK_CURRENTLY_ON(BrowserThread::IO); |
| + |
| if (IsCapturingAudio()) |
| CloseAudioControllerAsynchronously(); |
| DVLOG(1) << "SpeechRecognizerImpl canceling recognition. "; |
| + if (state_ == STATE_PREPARING) { |
| + // Cancel an outstanding reply from AudioSystem. |
| + weak_ptr_factory_.InvalidateWeakPtrs(); |
| + } |
| + |
| // The recognition engine is initialized only after STATE_STARTING. |
| if (state_ > STATE_STARTING) { |
| DCHECK(recognition_engine_.get() != NULL); |
| @@ -833,9 +883,13 @@ void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms, |
| session_id(), clip_detected ? 1.0f : audio_level_, noise_level); |
| } |
| -void SpeechRecognizerImpl::SetAudioManagerForTesting( |
| - AudioManager* audio_manager) { |
| - audio_manager_for_tests_ = audio_manager; |
| +void SpeechRecognizerImpl::SetAudioSystemForTesting( |
| + media::AudioSystem* audio_system) { |
| + audio_system_for_tests_ = audio_system; |
| +} |
| + |
| +media::AudioSystem* SpeechRecognizerImpl::GetAudioSystem() { |
| + return audio_system_for_tests_ ? audio_system_for_tests_ : audio_system_; |
| } |
| SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |