Index: content/browser/speech/speech_recognizer_impl.cc |
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc |
index 8a57992e749c026611e93f4186859bb86e50f020..acec2671135c3e8322e5e6e487096def18d06683 100644 |
--- a/content/browser/speech/speech_recognizer_impl.cc |
+++ b/content/browser/speech/speech_recognizer_impl.cc |
@@ -17,6 +17,8 @@ |
#include "content/browser/speech/audio_buffer.h" |
#include "content/public/browser/speech_recognition_event_listener.h" |
#include "media/audio/audio_file_writer.h" |
+#include "media/audio/audio_manager.h" |
+#include "media/audio/audio_system.h" |
#include "media/base/audio_converter.h" |
#if defined(OS_WIN) |
@@ -112,7 +114,7 @@ const ChannelLayout SpeechRecognizerImpl::kChannelLayout = |
const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; |
const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; |
const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; |
-media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL; |
+media::AudioSystem* SpeechRecognizerImpl::audio_system_for_tests_ = nullptr; |
static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0, |
"kNumBitsPerAudioSample must be a multiple of 8"); |
@@ -176,11 +178,13 @@ double SpeechRecognizerImpl::OnDataConverter::ProvideInput( |
SpeechRecognizerImpl::SpeechRecognizerImpl( |
SpeechRecognitionEventListener* listener, |
+ media::AudioSystem* audio_system, |
int session_id, |
bool continuous, |
bool provisional_results, |
SpeechRecognitionEngine* engine) |
: SpeechRecognizer(listener, session_id), |
+ audio_system_(audio_system), |
recognition_engine_(engine), |
endpointer_(kAudioSampleRate), |
audio_log_(MediaInternals::GetInstance()->CreateAudioLog( |
@@ -188,8 +192,10 @@ SpeechRecognizerImpl::SpeechRecognizerImpl( |
is_dispatching_event_(false), |
provisional_results_(provisional_results), |
end_of_utterance_(false), |
- state_(STATE_IDLE) { |
- DCHECK(recognition_engine_ != NULL); |
+ state_(STATE_IDLE), |
+ weak_ptr_factory_(this) { |
+ DCHECK(recognition_engine_ != nullptr); |
+ DCHECK(audio_system_ != nullptr); |
if (!continuous) { |
// In single shot (non-continous) recognition, |
// the session is automatically ended after: |
@@ -223,8 +229,8 @@ void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) { |
device_id_ = device_id; |
BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
- base::Bind(&SpeechRecognizerImpl::DispatchEvent, |
- this, FSMEventArgs(EVENT_START))); |
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent, this, |
+ FSMEventArgs(EVENT_PREPARE))); |
} |
void SpeechRecognizerImpl::AbortRecognition() { |
@@ -376,6 +382,25 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
// EVENT_STOP_CAPTURE below once speech input extensions are fixed. |
case EVENT_ABORT: |
return AbortSilently(event_args); |
+ case EVENT_PREPARE: |
+ return PrepareRecognition(event_args); |
+ case EVENT_START: |
+ return NotFeasible(event_args); |
+ case EVENT_STOP_CAPTURE: |
+ return AbortSilently(event_args); |
+ case EVENT_AUDIO_DATA: // Corner cases related to queued messages |
+ case EVENT_ENGINE_RESULT: // being lately dispatched. |
+ case EVENT_ENGINE_ERROR: |
+ case EVENT_AUDIO_ERROR: |
+ return DoNothing(event_args); |
+ } |
+ break; |
+ case STATE_PREPARING: |
+ switch (event) { |
+ case EVENT_ABORT: |
+ return AbortSilently(event_args); |
+ case EVENT_PREPARE: |
+ return NotFeasible(event_args); |
case EVENT_START: |
return StartRecording(event_args); |
case EVENT_STOP_CAPTURE: |
@@ -391,6 +416,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
switch (event) { |
case EVENT_ABORT: |
return AbortWithError(event_args); |
+ case EVENT_PREPARE: |
+ return NotFeasible(event_args); |
case EVENT_START: |
return NotFeasible(event_args); |
case EVENT_STOP_CAPTURE: |
@@ -408,6 +435,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
switch (event) { |
case EVENT_ABORT: |
return AbortWithError(event_args); |
+ case EVENT_PREPARE: |
+ return NotFeasible(event_args); |
case EVENT_START: |
return NotFeasible(event_args); |
case EVENT_STOP_CAPTURE: |
@@ -425,6 +454,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
switch (event) { |
case EVENT_ABORT: |
return AbortWithError(event_args); |
+ case EVENT_PREPARE: |
+ return NotFeasible(event_args); |
case EVENT_START: |
return NotFeasible(event_args); |
case EVENT_STOP_CAPTURE: |
@@ -442,6 +473,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
switch (event) { |
case EVENT_ABORT: |
return AbortWithError(event_args); |
+ case EVENT_PREPARE: |
+ return NotFeasible(event_args); |
case EVENT_START: |
return NotFeasible(event_args); |
case EVENT_STOP_CAPTURE: |
@@ -459,6 +492,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( |
switch (event) { |
case EVENT_ABORT: |
return AbortWithError(event_args); |
+ case EVENT_PREPARE: |
+ return NotFeasible(event_args); |
case EVENT_START: |
return NotFeasible(event_args); |
case EVENT_STOP_CAPTURE: |
@@ -515,38 +550,43 @@ void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) { |
} |
} |
+void SpeechRecognizerImpl::OnDeviceInfo(const media::AudioParameters& params) { |
+ DCHECK_CURRENTLY_ON(BrowserThread::IO); |
+ device_params_ = params; |
+ DVLOG(1) << "Device parameters: " << device_params_.AsHumanReadableString(); |
+ DispatchEvent(FSMEventArgs(EVENT_START)); |
+} |
+ |
+SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::PrepareRecognition( |
+ const FSMEventArgs&) { |
+ DCHECK(state_ == STATE_IDLE); |
+ DCHECK(recognition_engine_.get() != NULL); |
+ DCHECK(!IsCapturingAudio()); |
+ GetAudioSystem()->GetInputStreamParameters( |
+ device_id_, base::Bind(&SpeechRecognizerImpl::OnDeviceInfo, |
+ weak_ptr_factory_.GetWeakPtr())); |
+ |
+ listener()->OnRecognitionStart(session_id()); |
+ return STATE_PREPARING; |
+} |
+ |
SpeechRecognizerImpl::FSMState |
SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
- DCHECK(state_ == STATE_IDLE); |
+ DCHECK(state_ == STATE_PREPARING); |
DCHECK(recognition_engine_.get() != NULL); |
DCHECK(!IsCapturingAudio()); |
- const bool unit_test_is_active = (audio_manager_for_tests_ != NULL); |
- AudioManager* audio_manager = unit_test_is_active ? |
- audio_manager_for_tests_ : |
- AudioManager::Get(); |
- DCHECK(audio_manager != NULL); |
DVLOG(1) << "SpeechRecognizerImpl starting audio capture."; |
num_samples_recorded_ = 0; |
audio_level_ = 0; |
end_of_utterance_ = false; |
- listener()->OnRecognitionStart(session_id()); |
- |
- // TODO(xians): Check if the OS has the device with |device_id_|, return |
- // |SPEECH_AUDIO_ERROR_DETAILS_NO_MIC| if the target device does not exist. |
- if (!audio_manager->HasAudioInputDevices()) { |
- return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, |
- SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); |
- } |
int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs(); |
- AudioParameters in_params = audio_manager->GetInputStreamParameters( |
- device_id_); |
- if (!in_params.IsValid() && !unit_test_is_active) { |
- DLOG(ERROR) << "Invalid native audio input parameters"; |
- return Abort( |
- SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE)); |
+ if (!device_params_.IsValid()) { |
+ DLOG(ERROR) << "Audio input device not found"; |
+ return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE, |
+ SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); |
} |
// Audio converter shall provide audio based on these parameters as output. |
@@ -571,16 +611,18 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
#endif |
AudioParameters input_parameters = output_parameters; |
- if (use_native_audio_params && !unit_test_is_active) { |
+ |
+ // AUDIO_FAKE means we are running a test. |
+ if (use_native_audio_params && |
+ device_params_.format() != media::AudioParameters::AUDIO_FAKE) { |
// Use native audio parameters but avoid opening up at the native buffer |
// size. Instead use same frame size (in milliseconds) as WebSpeech uses. |
// We rely on internal buffers in the audio back-end to fulfill this request |
// and the idea is to simplify the audio conversion since each Convert() |
// call will then render exactly one ProvideInput() call. |
- // in_params.sample_rate() |
- input_parameters = in_params; |
+ input_parameters = device_params_; |
frames_per_buffer = |
- ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
+ ((input_parameters.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5; |
input_parameters.set_frames_per_buffer(frames_per_buffer); |
DVLOG(1) << "SRI::input_parameters: " |
<< input_parameters.AsHumanReadableString(); |
@@ -592,7 +634,8 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { |
new OnDataConverter(input_parameters, output_parameters)); |
audio_controller_ = AudioInputController::Create( |
- audio_manager, this, this, nullptr, nullptr, input_parameters, device_id_, |
+ GetAudioSystem()->GetAudioManager(), this, this, nullptr, nullptr, |
+ input_parameters, device_id_, |
/*agc_is_enabled*/ false); |
if (!audio_controller_.get()) { |
@@ -692,11 +735,18 @@ SpeechRecognizerImpl::AbortWithError(const FSMEventArgs& event_args) { |
SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort( |
const SpeechRecognitionError& error) { |
+ DCHECK_CURRENTLY_ON(BrowserThread::IO); |
+ |
if (IsCapturingAudio()) |
CloseAudioControllerAsynchronously(); |
DVLOG(1) << "SpeechRecognizerImpl canceling recognition. "; |
+ if (state_ == STATE_PREPARING) { |
+ // Cancel an outstanding reply from AudioSystem. |
+ weak_ptr_factory_.InvalidateWeakPtrs(); |
+ } |
+ |
// The recognition engine is initialized only after STATE_STARTING. |
if (state_ > STATE_STARTING) { |
DCHECK(recognition_engine_.get() != NULL); |
@@ -833,9 +883,13 @@ void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms, |
session_id(), clip_detected ? 1.0f : audio_level_, noise_level); |
} |
-void SpeechRecognizerImpl::SetAudioManagerForTesting( |
- AudioManager* audio_manager) { |
- audio_manager_for_tests_ = audio_manager; |
+void SpeechRecognizerImpl::SetAudioSystemForTesting( |
+ media::AudioSystem* audio_system) { |
+ audio_system_for_tests_ = audio_system; |
+} |
+ |
+media::AudioSystem* SpeechRecognizerImpl::GetAudioSystem() { |
+ return audio_system_for_tests_ ? audio_system_for_tests_ : audio_system_; |
} |
SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) |