Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(23)

Unified Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 2675713002: Switch Speech Recognition to asynchronous callback-based AudioManager interactions. (Closed)
Patch Set: review comments addressed Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/browser/speech/speech_recognizer_impl.cc
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
index 8a57992e749c026611e93f4186859bb86e50f020..acec2671135c3e8322e5e6e487096def18d06683 100644
--- a/content/browser/speech/speech_recognizer_impl.cc
+++ b/content/browser/speech/speech_recognizer_impl.cc
@@ -17,6 +17,8 @@
#include "content/browser/speech/audio_buffer.h"
#include "content/public/browser/speech_recognition_event_listener.h"
#include "media/audio/audio_file_writer.h"
+#include "media/audio/audio_manager.h"
+#include "media/audio/audio_system.h"
#include "media/base/audio_converter.h"
#if defined(OS_WIN)
@@ -112,7 +114,7 @@ const ChannelLayout SpeechRecognizerImpl::kChannelLayout =
const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;
const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;
const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;
-media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL;
+media::AudioSystem* SpeechRecognizerImpl::audio_system_for_tests_ = nullptr;
static_assert(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0,
"kNumBitsPerAudioSample must be a multiple of 8");
@@ -176,11 +178,13 @@ double SpeechRecognizerImpl::OnDataConverter::ProvideInput(
SpeechRecognizerImpl::SpeechRecognizerImpl(
SpeechRecognitionEventListener* listener,
+ media::AudioSystem* audio_system,
int session_id,
bool continuous,
bool provisional_results,
SpeechRecognitionEngine* engine)
: SpeechRecognizer(listener, session_id),
+ audio_system_(audio_system),
recognition_engine_(engine),
endpointer_(kAudioSampleRate),
audio_log_(MediaInternals::GetInstance()->CreateAudioLog(
@@ -188,8 +192,10 @@ SpeechRecognizerImpl::SpeechRecognizerImpl(
is_dispatching_event_(false),
provisional_results_(provisional_results),
end_of_utterance_(false),
- state_(STATE_IDLE) {
- DCHECK(recognition_engine_ != NULL);
+ state_(STATE_IDLE),
+ weak_ptr_factory_(this) {
+ DCHECK(recognition_engine_ != nullptr);
+ DCHECK(audio_system_ != nullptr);
if (!continuous) {
// In single shot (non-continous) recognition,
// the session is automatically ended after:
@@ -223,8 +229,8 @@ void SpeechRecognizerImpl::StartRecognition(const std::string& device_id) {
device_id_ = device_id;
BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
- base::Bind(&SpeechRecognizerImpl::DispatchEvent,
- this, FSMEventArgs(EVENT_START)));
+ base::Bind(&SpeechRecognizerImpl::DispatchEvent, this,
+ FSMEventArgs(EVENT_PREPARE)));
}
void SpeechRecognizerImpl::AbortRecognition() {
@@ -376,6 +382,25 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
// EVENT_STOP_CAPTURE below once speech input extensions are fixed.
case EVENT_ABORT:
return AbortSilently(event_args);
+ case EVENT_PREPARE:
+ return PrepareRecognition(event_args);
+ case EVENT_START:
+ return NotFeasible(event_args);
+ case EVENT_STOP_CAPTURE:
+ return AbortSilently(event_args);
+ case EVENT_AUDIO_DATA: // Corner cases related to queued messages
+ case EVENT_ENGINE_RESULT: // being lately dispatched.
+ case EVENT_ENGINE_ERROR:
+ case EVENT_AUDIO_ERROR:
+ return DoNothing(event_args);
+ }
+ break;
+ case STATE_PREPARING:
+ switch (event) {
+ case EVENT_ABORT:
+ return AbortSilently(event_args);
+ case EVENT_PREPARE:
+ return NotFeasible(event_args);
case EVENT_START:
return StartRecording(event_args);
case EVENT_STOP_CAPTURE:
@@ -391,6 +416,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
+ case EVENT_PREPARE:
+ return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
@@ -408,6 +435,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
+ case EVENT_PREPARE:
+ return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
@@ -425,6 +454,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
+ case EVENT_PREPARE:
+ return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
@@ -442,6 +473,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
+ case EVENT_PREPARE:
+ return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
@@ -459,6 +492,8 @@ SpeechRecognizerImpl::ExecuteTransitionAndGetNextState(
switch (event) {
case EVENT_ABORT:
return AbortWithError(event_args);
+ case EVENT_PREPARE:
+ return NotFeasible(event_args);
case EVENT_START:
return NotFeasible(event_args);
case EVENT_STOP_CAPTURE:
@@ -515,38 +550,43 @@ void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) {
}
}
+void SpeechRecognizerImpl::OnDeviceInfo(const media::AudioParameters& params) {
+ DCHECK_CURRENTLY_ON(BrowserThread::IO);
+ device_params_ = params;
+ DVLOG(1) << "Device parameters: " << device_params_.AsHumanReadableString();
+ DispatchEvent(FSMEventArgs(EVENT_START));
+}
+
+SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::PrepareRecognition(
+ const FSMEventArgs&) {
+ DCHECK(state_ == STATE_IDLE);
+ DCHECK(recognition_engine_.get() != NULL);
+ DCHECK(!IsCapturingAudio());
+ GetAudioSystem()->GetInputStreamParameters(
+ device_id_, base::Bind(&SpeechRecognizerImpl::OnDeviceInfo,
+ weak_ptr_factory_.GetWeakPtr()));
+
+ listener()->OnRecognitionStart(session_id());
+ return STATE_PREPARING;
+}
+
SpeechRecognizerImpl::FSMState
SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
- DCHECK(state_ == STATE_IDLE);
+ DCHECK(state_ == STATE_PREPARING);
DCHECK(recognition_engine_.get() != NULL);
DCHECK(!IsCapturingAudio());
- const bool unit_test_is_active = (audio_manager_for_tests_ != NULL);
- AudioManager* audio_manager = unit_test_is_active ?
- audio_manager_for_tests_ :
- AudioManager::Get();
- DCHECK(audio_manager != NULL);
DVLOG(1) << "SpeechRecognizerImpl starting audio capture.";
num_samples_recorded_ = 0;
audio_level_ = 0;
end_of_utterance_ = false;
- listener()->OnRecognitionStart(session_id());
-
- // TODO(xians): Check if the OS has the device with |device_id_|, return
- // |SPEECH_AUDIO_ERROR_DETAILS_NO_MIC| if the target device does not exist.
- if (!audio_manager->HasAudioInputDevices()) {
- return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,
- SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
- }
int chunk_duration_ms = recognition_engine_->GetDesiredAudioChunkDurationMs();
- AudioParameters in_params = audio_manager->GetInputStreamParameters(
- device_id_);
- if (!in_params.IsValid() && !unit_test_is_active) {
- DLOG(ERROR) << "Invalid native audio input parameters";
- return Abort(
- SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE));
+ if (!device_params_.IsValid()) {
+ DLOG(ERROR) << "Audio input device not found";
+ return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE,
+ SPEECH_AUDIO_ERROR_DETAILS_NO_MIC));
}
// Audio converter shall provide audio based on these parameters as output.
@@ -571,16 +611,18 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
#endif
AudioParameters input_parameters = output_parameters;
- if (use_native_audio_params && !unit_test_is_active) {
+
+ // AUDIO_FAKE means we are running a test.
+ if (use_native_audio_params &&
+ device_params_.format() != media::AudioParameters::AUDIO_FAKE) {
// Use native audio parameters but avoid opening up at the native buffer
// size. Instead use same frame size (in milliseconds) as WebSpeech uses.
// We rely on internal buffers in the audio back-end to fulfill this request
// and the idea is to simplify the audio conversion since each Convert()
// call will then render exactly one ProvideInput() call.
- // in_params.sample_rate()
- input_parameters = in_params;
+ input_parameters = device_params_;
frames_per_buffer =
- ((in_params.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;
+ ((input_parameters.sample_rate() * chunk_duration_ms) / 1000.0) + 0.5;
input_parameters.set_frames_per_buffer(frames_per_buffer);
DVLOG(1) << "SRI::input_parameters: "
<< input_parameters.AsHumanReadableString();
@@ -592,7 +634,8 @@ SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) {
new OnDataConverter(input_parameters, output_parameters));
audio_controller_ = AudioInputController::Create(
- audio_manager, this, this, nullptr, nullptr, input_parameters, device_id_,
+ GetAudioSystem()->GetAudioManager(), this, this, nullptr, nullptr,
+ input_parameters, device_id_,
/*agc_is_enabled*/ false);
if (!audio_controller_.get()) {
@@ -692,11 +735,18 @@ SpeechRecognizerImpl::AbortWithError(const FSMEventArgs& event_args) {
SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort(
const SpeechRecognitionError& error) {
+ DCHECK_CURRENTLY_ON(BrowserThread::IO);
+
if (IsCapturingAudio())
CloseAudioControllerAsynchronously();
DVLOG(1) << "SpeechRecognizerImpl canceling recognition. ";
+ if (state_ == STATE_PREPARING) {
+ // Cancel an outstanding reply from AudioSystem.
+ weak_ptr_factory_.InvalidateWeakPtrs();
+ }
+
// The recognition engine is initialized only after STATE_STARTING.
if (state_ > STATE_STARTING) {
DCHECK(recognition_engine_.get() != NULL);
@@ -833,9 +883,13 @@ void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms,
session_id(), clip_detected ? 1.0f : audio_level_, noise_level);
}
-void SpeechRecognizerImpl::SetAudioManagerForTesting(
- AudioManager* audio_manager) {
- audio_manager_for_tests_ = audio_manager;
+void SpeechRecognizerImpl::SetAudioSystemForTesting(
+ media::AudioSystem* audio_system) {
+ audio_system_for_tests_ = audio_system;
+}
+
+media::AudioSystem* SpeechRecognizerImpl::GetAudioSystem() {
+ return audio_system_for_tests_ ? audio_system_for_tests_ : audio_system_;
}
SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value)

Powered by Google App Engine
This is Rietveld 408576698