Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(251)

Unified Diff: content/browser/speech/speech_recognizer_impl.cc

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed compilation issues on windows. Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/browser/speech/speech_recognizer_impl.cc
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc
index 84f46a5e69f593735d128a27b7c288ccaa381cff..007f3ee45a9941007714b1c5e4b25bf571fdcd1a 100644
--- a/content/browser/speech/speech_recognizer_impl.cc
+++ b/content/browser/speech/speech_recognizer_impl.cc
@@ -8,17 +8,21 @@
#include "base/time.h"
#include "content/browser/browser_main_loop.h"
#include "content/browser/speech/audio_buffer.h"
-#include "content/public/browser/speech_recognition_event_listener.h"
+#include "content/browser/speech/google_one_shot_remote_engine.h"
#include "content/public/browser/browser_thread.h"
+#include "content/public/browser/speech_recognition_event_listener.h"
+#include "content/public/browser/speech_recognizer.h"
+#include "content/public/common/speech_recognition_error.h"
#include "content/public/common/speech_recognition_result.h"
#include "net/url_request/url_request_context_getter.h"
using content::BrowserMainLoop;
using content::BrowserThread;
+using content::SpeechRecognitionError;
using content::SpeechRecognitionEventListener;
+using content::SpeechRecognitionResult;
using content::SpeechRecognizer;
using media::AudioInputController;
-using std::string;
namespace {
@@ -64,18 +68,22 @@ SpeechRecognizer* SpeechRecognizer::Create(
bool filter_profanities,
const std::string& hardware_info,
const std::string& origin_url) {
- return new speech::SpeechRecognizerImpl(
- listener, caller_id, language, grammar, context_getter,
- filter_profanities, hardware_info, origin_url);
+ return new speech::SpeechRecognizerImpl(listener,
+ caller_id,
+ language,
+ grammar,
+ context_getter,
+ filter_profanities,
+ hardware_info,
+ origin_url);
}
namespace speech {
const int SpeechRecognizerImpl::kAudioSampleRate = 16000;
-const int SpeechRecognizerImpl::kAudioPacketIntervalMs = 100;
const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO;
const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16;
-const int SpeechRecognizerImpl::kNoSpeechTimeoutSec = 8;
+const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000;
const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300;
SpeechRecognizerImpl::SpeechRecognizerImpl(
@@ -88,19 +96,18 @@ SpeechRecognizerImpl::SpeechRecognizerImpl(
const std::string& hardware_info,
const std::string& origin_url)
: listener_(listener),
+ testing_audio_manager_(NULL),
+ endpointer_(kAudioSampleRate),
+ context_getter_(context_getter),
caller_id_(caller_id),
language_(language),
grammar_(grammar),
filter_profanities_(filter_profanities),
hardware_info_(hardware_info),
origin_url_(origin_url),
- context_getter_(context_getter),
- codec_(AudioEncoder::CODEC_FLAC),
- encoder_(NULL),
- endpointer_(kAudioSampleRate),
num_samples_recorded_(0),
- audio_level_(0.0f),
- audio_manager_(NULL) {
+ audio_level_(0.0f) {
+ DCHECK(listener_ != NULL);
endpointer_.set_speech_input_complete_silence_length(
base::Time::kMicrosecondsPerSecond / 2);
endpointer_.set_long_speech_input_complete_silence_length(
@@ -113,42 +120,40 @@ SpeechRecognizerImpl::~SpeechRecognizerImpl() {
// Recording should have stopped earlier due to the endpointer or
// |StopRecording| being called.
DCHECK(!audio_controller_.get());
- DCHECK(!request_.get() || !request_->HasPendingRequest());
- DCHECK(!encoder_.get());
+ DCHECK(!recognition_engine_.get() ||
+ !recognition_engine_->IsRecognitionPending());
endpointer_.EndSession();
}
-bool SpeechRecognizerImpl::StartRecognition() {
+void SpeechRecognizerImpl::StartRecognition() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
DCHECK(!audio_controller_.get());
- DCHECK(!request_.get() || !request_->HasPendingRequest());
- DCHECK(!encoder_.get());
+ DCHECK(!recognition_engine_.get() ||
+ !recognition_engine_->IsRecognitionPending());
// The endpointer needs to estimate the environment/background noise before
// starting to treat the audio as user input. In |HandleOnData| we wait until
// such time has passed before switching to user input mode.
endpointer_.SetEnvironmentEstimationMode();
- encoder_.reset(AudioEncoder::Create(codec_, kAudioSampleRate,
- kNumBitsPerAudioSample));
- int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000;
+ AudioManager* audio_manager = (testing_audio_manager_ != NULL) ?
+ testing_audio_manager_ :
+ BrowserMainLoop::GetAudioManager();
+ const int samples_per_packet = kAudioSampleRate *
+ GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000;
AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout,
kAudioSampleRate, kNumBitsPerAudioSample,
samples_per_packet);
- audio_controller_ = AudioInputController::Create(
- audio_manager_ ? audio_manager_ : BrowserMainLoop::GetAudioManager(),
- this, params);
+ audio_controller_ = AudioInputController::Create(audio_manager, this, params);
DCHECK(audio_controller_.get());
VLOG(1) << "SpeechRecognizer starting record.";
num_samples_recorded_ = 0;
audio_controller_->Record();
-
- return true;
}
void SpeechRecognizerImpl::AbortRecognition() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
- DCHECK(audio_controller_.get() || request_.get());
+ DCHECK(audio_controller_.get() || recognition_engine_.get());
// Stop recording if required.
if (audio_controller_.get()) {
@@ -156,8 +161,7 @@ void SpeechRecognizerImpl::AbortRecognition() {
}
VLOG(1) << "SpeechRecognizer canceling recognition.";
- encoder_.reset();
- request_.reset();
+ recognition_engine_.reset();
}
void SpeechRecognizerImpl::StopAudioCapture() {
@@ -169,30 +173,16 @@ void SpeechRecognizerImpl::StopAudioCapture() {
return;
CloseAudioControllerSynchronously();
-
listener_->OnSoundEnd(caller_id_);
listener_->OnAudioEnd(caller_id_);
- // UploadAudioChunk requires a non-empty final buffer. So we encode a packet
- // of silence in case encoder had no data already.
- std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /
- 1000);
- AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]),
- samples.size() * sizeof(short),
- encoder_->bits_per_sample() / 8);
- encoder_->Encode(dummy_chunk);
- encoder_->Flush();
- scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
- DCHECK(!encoded_data->IsEmpty());
- encoder_.reset();
-
// If we haven't got any audio yet end the recognition sequence here.
- if (request_ == NULL) {
+ if (recognition_engine_ == NULL) {
// Guard against the listener freeing us until we finish our job.
scoped_refptr<SpeechRecognizerImpl> me(this);
listener_->OnRecognitionEnd(caller_id_);
} else {
- request_->UploadAudioChunk(*encoded_data, true /* is_last_chunk */);
+ recognition_engine_->AudioChunksEnded();
}
}
@@ -237,24 +227,32 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {
bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();
- encoder_->Encode(*raw_audio);
float rms;
endpointer_.ProcessAudio(*raw_audio, &rms);
bool did_clip = DetectClipping(*raw_audio);
num_samples_recorded_ += raw_audio->NumSamples();
- if (request_ == NULL) {
+ if (recognition_engine_ == NULL) {
// This was the first audio packet recorded, so start a request to the
// server to send the data and inform the listener.
listener_->OnAudioStart(caller_id_);
- request_.reset(new SpeechRecognitionRequest(context_getter_.get(), this));
- request_->Start(language_, grammar_, filter_profanities_,
- hardware_info_, origin_url_, encoder_->mime_type());
+ GoogleOneShotRemoteEngineConfig google_sr_config;
+ google_sr_config.language = language_;
+ google_sr_config.grammar = grammar_;
+ google_sr_config.audio_sample_rate = kAudioSampleRate;
+ google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample;
+ google_sr_config.filter_profanities = filter_profanities_;
+ google_sr_config.hardware_info = hardware_info_;
+ google_sr_config.origin_url = origin_url_;
+ GoogleOneShotRemoteEngine* google_sr_engine =
+ new GoogleOneShotRemoteEngine(context_getter_.get());
+ google_sr_engine->SetConfig(google_sr_config);
+ recognition_engine_.reset(google_sr_engine);
+ recognition_engine_->set_delegate(this);
+ recognition_engine_->StartRecognition();
}
- scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
- DCHECK(!encoded_data->IsEmpty());
- request_->UploadAudioChunk(*encoded_data, false /* is_last_chunk */);
+ recognition_engine_->TakeAudioChunk(*raw_audio);
if (endpointer_.IsEstimatingEnvironment()) {
// Check if we have gathered enough audio for the endpointer to do
@@ -270,7 +268,7 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {
// Check if we have waited too long without hearing any speech.
bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech();
if (!speech_was_heard_after_packet &&
- num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) {
+ num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) {
InformErrorAndAbortRecognition(
content::SPEECH_RECOGNITION_ERROR_NO_SPEECH);
return;
@@ -302,19 +300,19 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) {
StopAudioCapture();
}
-void SpeechRecognizerImpl::SetRecognitionResult(
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult(
const content::SpeechRecognitionResult& result) {
- if (result.error != content::SPEECH_RECOGNITION_ERROR_NONE) {
- InformErrorAndAbortRecognition(result.error);
- return;
- }
-
// Guard against the listener freeing us until we finish our job.
scoped_refptr<SpeechRecognizerImpl> me(this);
listener_->OnRecognitionResult(caller_id_, result);
listener_->OnRecognitionEnd(caller_id_);
}
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineError(
+ const content::SpeechRecognitionError& error) {
+ InformErrorAndAbortRecognition(error.code);
+}
+
void SpeechRecognizerImpl::InformErrorAndAbortRecognition(
content::SpeechRecognitionErrorCode error) {
DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE);
@@ -338,17 +336,23 @@ void SpeechRecognizerImpl::CloseAudioControllerSynchronously() {
audio_controller_ = NULL; // Releases the ref ptr.
}
-void SpeechRecognizerImpl::SetAudioManagerForTesting(
- AudioManager* audio_manager) {
- audio_manager_ = audio_manager;
-}
-
bool SpeechRecognizerImpl::IsActive() const {
- return (request_.get() != NULL);
+ return (recognition_engine_.get() != NULL);
}
bool SpeechRecognizerImpl::IsCapturingAudio() const {
return (audio_controller_.get() != NULL);
}
+const SpeechRecognitionEngine&
+ SpeechRecognizerImpl::recognition_engine() const {
+ return *(recognition_engine_.get());
+}
+
+void SpeechRecognizerImpl::SetAudioManagerForTesting(
+ AudioManager* audio_manager) {
+ testing_audio_manager_ = audio_manager;
+}
+
+
} // namespace speech
« no previous file with comments | « content/browser/speech/speech_recognizer_impl.h ('k') | content/browser/speech/speech_recognizer_impl_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698