Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(219)

Unified Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Minor nit on speech_recognition_engine comments. Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/browser/speech/speech_recognizer_impl.cc
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
index 84f46a5e69f593735d128a27b7c288ccaa381cff..226dff90b5931dac48ca668521104fc808015c99 100644
--- a/content/browser/speech/speech_recognizer_impl.cc
+++ b/content/browser/speech/speech_recognizer_impl.cc
@@ -8,17 +8,21 @@
#include "base/time.h"
#include "content/browser/browser_main_loop.h"
#include "content/browser/speech/audio_buffer.h"
-#include "content/public/browser/speech_recognition_event_listener.h"
+#include "content/browser/speech/google_one_shot_remote_engine.h"
#include "content/public/browser/browser_thread.h"
+#include "content/public/browser/speech_recognition_event_listener.h"
+#include "content/public/browser/speech_recognizer.h"
#include "content/public/common/speech_recognition_result.h"
#include "net/url_request/url_request_context_getter.h"
+#include "chrome/browser/speech/chrome_speech_recognition_preferences.h"
using content::BrowserMainLoop;
using content::BrowserThread;
+using content::SpeechRecognitionError;
using content::SpeechRecognitionEventListener;
+using content::SpeechRecognitionResult;
using content::SpeechRecognizer;
using media::AudioInputController;
-using std::string;
namespace {
@@ -55,6 +59,7 @@ bool DetectClipping(const speech::AudioChunk& chunk) {
} // namespace
+// TODO(primiano) transitional, see description in speech_recognizer.h.
Satish 2012/03/22 12:03:28 this doesn't seem useful, could remove
Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Done.
SpeechRecognizer* SpeechRecognizer::Create(
SpeechRecognitionEventListener* listener,
int caller_id,
@@ -64,18 +69,37 @@ SpeechRecognizer* SpeechRecognizer::Create(
bool filter_profanities,
const std::string& hardware_info,
const std::string& origin_url) {
- return new speech::SpeechRecognizerImpl(
- listener, caller_id, language, grammar, context_getter,
- filter_profanities, hardware_info, origin_url);
+ speech::GoogleOneShotRemoteEngineConfig google_sr_config;
+ google_sr_config.language = language;
+ google_sr_config.grammar = grammar;
+ google_sr_config.audio_sample_rate =
+ speech::SpeechRecognizerImpl::kAudioSampleRate;
+ google_sr_config.audio_num_bits_per_sample =
+ speech::SpeechRecognizerImpl::kNumBitsPerAudioSample;
+ google_sr_config.filter_profanities = filter_profanities;
+ google_sr_config.hardware_info = hardware_info;
+ google_sr_config.origin_url = origin_url;
+
+ speech::GoogleOneShotRemoteEngine* google_sr_engine =
+ new speech::GoogleOneShotRemoteEngine(context_getter);
Satish 2012/03/22 12:03:28 is this code to create google_sr_engine necessary?
Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Oops. It was a "remainder" of the old code. Remove
+ google_sr_engine->SetConfig(google_sr_config);
+
+ return new speech::SpeechRecognizerImpl(listener,
+ caller_id,
+ language,
+ grammar,
+ context_getter,
+ filter_profanities,
+ hardware_info,
+ origin_url);
}
namespace speech {
const int SpeechRecognizerImpl::kAudioSampleRate = 16000;
-const int SpeechRecognizerImpl::kAudioPacketIntervalMs = 100;
const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO;
const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;
-const int SpeechRecognizerImpl::kNoSpeechTimeoutSec = 8;
+const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;
const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;
SpeechRecognizerImpl::SpeechRecognizerImpl(
@@ -88,19 +112,17 @@ SpeechRecognizerImpl::SpeechRecognizerImpl(
const std::string& hardware_info,
const std::string& origin_url)
: listener_(listener),
+ testing_audio_manager_(NULL),
+ recognition_engine_(NULL),
Satish 2012/03/22 12:03:28 this isn't required as it is a scoped_ptr
Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Done.
+ endpointer_(kAudioSampleRate),
+ context_getter_(context_getter),
caller_id_(caller_id),
language_(language),
grammar_(grammar),
filter_profanities_(filter_profanities),
hardware_info_(hardware_info),
- origin_url_(origin_url),
- context_getter_(context_getter),
- codec_(AudioEncoder::CODEC_FLAC),
- encoder_(NULL),
- endpointer_(kAudioSampleRate),
- num_samples_recorded_(0),
- audio_level_(0.0f),
- audio_manager_(NULL) {
+ origin_url_(origin_url) {
+ DCHECK(listener_ != NULL);
Satish 2012/03/22 12:03:28 is this DCHECK required? There are other pointer p
Primiano Tucci (use gerrit) 2012/03/22 12:39:29 The only othher pointer I see is context_getter, t
endpointer_.set_speech_input_complete_silence_length(
base::Time::kMicrosecondsPerSecond / 2);
endpointer_.set_long_speech_input_complete_silence_length(
@@ -113,42 +135,40 @@ SpeechRecognizerImpl::~SpeechRecognizerImpl() {
// Recording should have stopped earlier due to the endpointer or
// |StopRecording| being called.
DCHECK(!audio_controller_.get());
- DCHECK(!request_.get() || !request_->HasPendingRequest());
- DCHECK(!encoder_.get());
+ DCHECK(!recognition_engine_.get() ||
+ !recognition_engine_->IsRecognitionPending());
endpointer_.EndSession();
}
-bool SpeechRecognizerImpl::StartRecognition() {
+void SpeechRecognizerImpl::StartRecognition() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
DCHECK(!audio_controller_.get());
- DCHECK(!request_.get() || !request_->HasPendingRequest());
- DCHECK(!encoder_.get());
+ DCHECK(!recognition_engine_.get() ||
+ !recognition_engine_->IsRecognitionPending());
// The endpointer needs to estimate the environment/background noise before
// starting to treat the audio as user input. In |HandleOnData| we wait until
// such time has passed before switching to user input mode.
endpointer_.SetEnvironmentEstimationMode();
- encoder_.reset(AudioEncoder::Create(codec_, kAudioSampleRate,
- kNumBitsPerAudioSample));
- int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000;
+ AudioManager* audio_manager = (testing_audio_manager_ != NULL) ?
+ testing_audio_manager_ :
+ BrowserMainLoop::GetAudioManager();
+ const int samples_per_packet = kAudioSampleRate *
+ GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000;
Satish 2012/03/22 12:03:28 this doesn't seem right, having sampling rate as p
Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Hmm this is transitional code, I know it is not cl
AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout,
kAudioSampleRate, kNumBitsPerAudioSample,
samples_per_packet);
- audio_controller_ = AudioInputController::Create(
- audio_manager_ ? audio_manager_ : BrowserMainLoop::GetAudioManager(),
- this, params);
+ audio_controller_ = AudioInputController::Create(audio_manager, this, params);
DCHECK(audio_controller_.get());
VLOG(1) << "SpeechRecognizer starting record.";
num_samples_recorded_ = 0;
audio_controller_->Record();
-
- return true;
}
void SpeechRecognizerImpl::AbortRecognition() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- DCHECK(audio_controller_.get() || request_.get());
+ DCHECK(audio_controller_.get() || recognition_engine_.get());
// Stop recording if required.
if (audio_controller_.get()) {
@@ -156,8 +176,7 @@ void SpeechRecognizerImpl::AbortRecognition() {
}
VLOG(1) << "SpeechRecognizer canceling recognition.";
- encoder_.reset();
- request_.reset();
+ recognition_engine_.reset();
Satish 2012/03/22 12:03:28 is it required to call EndRecognition here or does
Primiano Tucci (use gerrit) 2012/03/22 12:39:29 I didn't make a lot of changes in this CL as this
}
void SpeechRecognizerImpl::StopAudioCapture() {
@@ -170,29 +189,15 @@ void SpeechRecognizerImpl::StopAudioCapture() {
CloseAudioControllerSynchronously();
- listener_->OnSoundEnd(caller_id_);
Satish 2012/03/22 12:03:28 I don't see OnSoundEnd called from this class anym
Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Done.
listener_->OnAudioEnd(caller_id_);
- // UploadAudioChunk requires a non-empty final buffer. So we encode a packet
- // of silence in case encoder had no data already.
- std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /
- 1000);
- AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]),
- samples.size() * sizeof(short),
- encoder_->bits_per_sample() / 8);
- encoder_->Encode(dummy_chunk);
- encoder_->Flush();
- scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
- DCHECK(!encoded_data->IsEmpty());
- encoder_.reset();
-
// If we haven't got any audio yet end the recognition sequence here.
- if (request_ == NULL) {
+ if (recognition_engine_ == NULL) {
// Guard against the listener freeing us until we finish our job.
scoped_refptr<SpeechRecognizerImpl> me(this);
listener_->OnRecognitionEnd(caller_id_);
} else {
- request_->UploadAudioChunk(*encoded_data, true /* is_last_chunk */);
+ recognition_engine_->AudioChunksEnded();
}
}
@@ -237,24 +242,32 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {
bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();
- encoder_->Encode(*raw_audio);
float rms;
endpointer_.ProcessAudio(*raw_audio, &rms);
bool did_clip = DetectClipping(*raw_audio);
num_samples_recorded_ += raw_audio->NumSamples();
- if (request_ == NULL) {
+ if (recognition_engine_ == NULL) {
// This was the first audio packet recorded, so start a request to the
// server to send the data and inform the listener.
listener_->OnAudioStart(caller_id_);
- request_.reset(new SpeechRecognitionRequest(context_getter_.get(), this));
- request_->Start(language_, grammar_, filter_profanities_,
- hardware_info_, origin_url_, encoder_->mime_type());
+ GoogleOneShotRemoteEngineConfig google_sr_config;
Satish 2012/03/22 12:03:28 suggest moving all this to a separate helper funct
Primiano Tucci (use gerrit) 2012/03/22 12:39:29 Same as above, this code is temporary and the next
+ google_sr_config.language = language_;
+ google_sr_config.grammar = grammar_;
+ google_sr_config.audio_sample_rate = kAudioSampleRate;
+ google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample;
+ google_sr_config.filter_profanities = filter_profanities_;
+ google_sr_config.hardware_info = hardware_info_;
+ google_sr_config.origin_url = origin_url_;
+ GoogleOneShotRemoteEngine* google_sr_engine =
+ new GoogleOneShotRemoteEngine(context_getter_.get());
+ google_sr_engine->SetConfig(google_sr_config);
+ recognition_engine_.reset(google_sr_engine);
+ recognition_engine_->set_delegate(this);
+ recognition_engine_->StartRecognition();
}
- scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
- DCHECK(!encoded_data->IsEmpty());
- request_->UploadAudioChunk(*encoded_data, false /* is_last_chunk */);
+ recognition_engine_->TakeAudioChunk(*raw_audio);
if (endpointer_.IsEstimatingEnvironment()) {
// Check if we have gathered enough audio for the endpointer to do
@@ -270,7 +283,7 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {
// Check if we have waited too long without hearing any speech.
bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech();
if (!speech_was_heard_after_packet &&
- num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) {
+ num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) {
InformErrorAndAbortRecognition(
content::SPEECH_RECOGNITION_ERROR_NO_SPEECH);
return;
@@ -302,7 +315,7 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {
StopAudioCapture();
}
-void SpeechRecognizerImpl::SetRecognitionResult(
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult(
const content::SpeechRecognitionResult& result) {
if (result.error != content::SPEECH_RECOGNITION_ERROR_NONE) {
InformErrorAndAbortRecognition(result.error);
@@ -315,6 +328,11 @@ void SpeechRecognizerImpl::SetRecognitionResult(
listener_->OnRecognitionEnd(caller_id_);
}
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(
+ const content::SpeechRecognitionError& error) {
+ InformErrorAndAbortRecognition(error.code);
+}
+
void SpeechRecognizerImpl::InformErrorAndAbortRecognition(
content::SpeechRecognitionErrorCode error) {
DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE);
@@ -338,17 +356,23 @@ void SpeechRecognizerImpl::CloseAudioControllerSynchronously() {
audio_controller_ = NULL; // Releases the ref ptr.
}
-void SpeechRecognizerImpl::SetAudioManagerForTesting(
- AudioManager* audio_manager) {
- audio_manager_ = audio_manager;
-}
-
bool SpeechRecognizerImpl::IsActive() const {
- return (request_.get() != NULL);
+ return (recognition_engine_.get() != NULL);
}
bool SpeechRecognizerImpl::IsCapturingAudio() const {
return (audio_controller_.get() != NULL);
}
+const SpeechRecognitionEngine&
+ SpeechRecognizerImpl::recognition_engine() const {
+ return *(recognition_engine_.get());
+}
+
+void SpeechRecognizerImpl::SetAudioManagerForTesting(
+ AudioManager* audio_manager) {
+ testing_audio_manager_ = audio_manager;
+}
+
+
} // namespace speech

Powered by Google App Engine
This is Rietveld 408576698