Index: content/browser/speech/speech_recognizer_impl.cc |
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc |
index 84f46a5e69f593735d128a27b7c288ccaa381cff..007f3ee45a9941007714b1c5e4b25bf571fdcd1a 100644 |
--- a/content/browser/speech/speech_recognizer_impl.cc |
+++ b/content/browser/speech/speech_recognizer_impl.cc |
@@ -8,17 +8,21 @@ |
#include "base/time.h" |
#include "content/browser/browser_main_loop.h" |
#include "content/browser/speech/audio_buffer.h" |
-#include "content/public/browser/speech_recognition_event_listener.h" |
+#include "content/browser/speech/google_one_shot_remote_engine.h" |
#include "content/public/browser/browser_thread.h" |
+#include "content/public/browser/speech_recognition_event_listener.h" |
+#include "content/public/browser/speech_recognizer.h" |
+#include "content/public/common/speech_recognition_error.h" |
#include "content/public/common/speech_recognition_result.h" |
#include "net/url_request/url_request_context_getter.h" |
using content::BrowserMainLoop; |
using content::BrowserThread; |
+using content::SpeechRecognitionError; |
using content::SpeechRecognitionEventListener; |
+using content::SpeechRecognitionResult; |
using content::SpeechRecognizer; |
using media::AudioInputController; |
-using std::string; |
namespace { |
@@ -64,18 +68,22 @@ SpeechRecognizer* SpeechRecognizer::Create( |
bool filter_profanities, |
const std::string& hardware_info, |
const std::string& origin_url) { |
- return new speech::SpeechRecognizerImpl( |
- listener, caller_id, language, grammar, context_getter, |
- filter_profanities, hardware_info, origin_url); |
+ return new speech::SpeechRecognizerImpl(listener, |
+ caller_id, |
+ language, |
+ grammar, |
+ context_getter, |
+ filter_profanities, |
+ hardware_info, |
+ origin_url); |
} |
namespace speech { |
const int SpeechRecognizerImpl::kAudioSampleRate = 16000; |
-const int SpeechRecognizerImpl::kAudioPacketIntervalMs = 100; |
const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO; |
const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; |
-const int SpeechRecognizerImpl::kNoSpeechTimeoutSec = 8; |
+const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; |
const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; |
SpeechRecognizerImpl::SpeechRecognizerImpl( |
@@ -88,19 +96,18 @@ SpeechRecognizerImpl::SpeechRecognizerImpl( |
const std::string& hardware_info, |
const std::string& origin_url) |
: listener_(listener), |
+ testing_audio_manager_(NULL), |
+ endpointer_(kAudioSampleRate), |
+ context_getter_(context_getter), |
caller_id_(caller_id), |
language_(language), |
grammar_(grammar), |
filter_profanities_(filter_profanities), |
hardware_info_(hardware_info), |
origin_url_(origin_url), |
- context_getter_(context_getter), |
- codec_(AudioEncoder::CODEC_FLAC), |
- encoder_(NULL), |
- endpointer_(kAudioSampleRate), |
num_samples_recorded_(0), |
- audio_level_(0.0f), |
- audio_manager_(NULL) { |
+ audio_level_(0.0f) { |
+ DCHECK(listener_ != NULL); |
endpointer_.set_speech_input_complete_silence_length( |
base::Time::kMicrosecondsPerSecond / 2); |
endpointer_.set_long_speech_input_complete_silence_length( |
@@ -113,42 +120,40 @@ SpeechRecognizerImpl::~SpeechRecognizerImpl() { |
// Recording should have stopped earlier due to the endpointer or |
// |StopRecording| being called. |
DCHECK(!audio_controller_.get()); |
- DCHECK(!request_.get() || !request_->HasPendingRequest()); |
- DCHECK(!encoder_.get()); |
+ DCHECK(!recognition_engine_.get() || |
+ !recognition_engine_->IsRecognitionPending()); |
endpointer_.EndSession(); |
} |
-bool SpeechRecognizerImpl::StartRecognition() { |
+void SpeechRecognizerImpl::StartRecognition() { |
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
DCHECK(!audio_controller_.get()); |
- DCHECK(!request_.get() || !request_->HasPendingRequest()); |
- DCHECK(!encoder_.get()); |
+ DCHECK(!recognition_engine_.get() || |
+ !recognition_engine_->IsRecognitionPending()); |
// The endpointer needs to estimate the environment/background noise before |
// starting to treat the audio as user input. In |HandleOnData| we wait until |
// such time has passed before switching to user input mode. |
endpointer_.SetEnvironmentEstimationMode(); |
- encoder_.reset(AudioEncoder::Create(codec_, kAudioSampleRate, |
- kNumBitsPerAudioSample)); |
- int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; |
+ AudioManager* audio_manager = (testing_audio_manager_ != NULL) ? |
+ testing_audio_manager_ : |
+ BrowserMainLoop::GetAudioManager(); |
+ const int samples_per_packet = kAudioSampleRate * |
+ GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000; |
AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, |
kAudioSampleRate, kNumBitsPerAudioSample, |
samples_per_packet); |
- audio_controller_ = AudioInputController::Create( |
- audio_manager_ ? audio_manager_ : BrowserMainLoop::GetAudioManager(), |
- this, params); |
+ audio_controller_ = AudioInputController::Create(audio_manager, this, params); |
DCHECK(audio_controller_.get()); |
VLOG(1) << "SpeechRecognizer starting record."; |
num_samples_recorded_ = 0; |
audio_controller_->Record(); |
- |
- return true; |
} |
void SpeechRecognizerImpl::AbortRecognition() { |
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
- DCHECK(audio_controller_.get() || request_.get()); |
+ DCHECK(audio_controller_.get() || recognition_engine_.get()); |
// Stop recording if required. |
if (audio_controller_.get()) { |
@@ -156,8 +161,7 @@ void SpeechRecognizerImpl::AbortRecognition() { |
} |
VLOG(1) << "SpeechRecognizer canceling recognition."; |
- encoder_.reset(); |
- request_.reset(); |
+ recognition_engine_.reset(); |
} |
void SpeechRecognizerImpl::StopAudioCapture() { |
@@ -169,30 +173,16 @@ void SpeechRecognizerImpl::StopAudioCapture() { |
return; |
CloseAudioControllerSynchronously(); |
- |
listener_->OnSoundEnd(caller_id_); |
listener_->OnAudioEnd(caller_id_); |
- // UploadAudioChunk requires a non-empty final buffer. So we encode a packet |
- // of silence in case encoder had no data already. |
- std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) / |
- 1000); |
- AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]), |
- samples.size() * sizeof(short), |
- encoder_->bits_per_sample() / 8); |
- encoder_->Encode(dummy_chunk); |
- encoder_->Flush(); |
- scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear()); |
- DCHECK(!encoded_data->IsEmpty()); |
- encoder_.reset(); |
- |
// If we haven't got any audio yet end the recognition sequence here. |
- if (request_ == NULL) { |
+ if (recognition_engine_ == NULL) { |
// Guard against the listener freeing us until we finish our job. |
scoped_refptr<SpeechRecognizerImpl> me(this); |
listener_->OnRecognitionEnd(caller_id_); |
} else { |
- request_->UploadAudioChunk(*encoded_data, true /* is_last_chunk */); |
+ recognition_engine_->AudioChunksEnded(); |
} |
} |
@@ -237,24 +227,32 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { |
bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); |
- encoder_->Encode(*raw_audio); |
float rms; |
endpointer_.ProcessAudio(*raw_audio, &rms); |
bool did_clip = DetectClipping(*raw_audio); |
num_samples_recorded_ += raw_audio->NumSamples(); |
- if (request_ == NULL) { |
+ if (recognition_engine_ == NULL) { |
// This was the first audio packet recorded, so start a request to the |
// server to send the data and inform the listener. |
listener_->OnAudioStart(caller_id_); |
- request_.reset(new SpeechRecognitionRequest(context_getter_.get(), this)); |
- request_->Start(language_, grammar_, filter_profanities_, |
- hardware_info_, origin_url_, encoder_->mime_type()); |
+ GoogleOneShotRemoteEngineConfig google_sr_config; |
+ google_sr_config.language = language_; |
+ google_sr_config.grammar = grammar_; |
+ google_sr_config.audio_sample_rate = kAudioSampleRate; |
+ google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample; |
+ google_sr_config.filter_profanities = filter_profanities_; |
+ google_sr_config.hardware_info = hardware_info_; |
+ google_sr_config.origin_url = origin_url_; |
+ GoogleOneShotRemoteEngine* google_sr_engine = |
+ new GoogleOneShotRemoteEngine(context_getter_.get()); |
+ google_sr_engine->SetConfig(google_sr_config); |
+ recognition_engine_.reset(google_sr_engine); |
+ recognition_engine_->set_delegate(this); |
+ recognition_engine_->StartRecognition(); |
} |
- scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear()); |
- DCHECK(!encoded_data->IsEmpty()); |
- request_->UploadAudioChunk(*encoded_data, false /* is_last_chunk */); |
+ recognition_engine_->TakeAudioChunk(*raw_audio); |
if (endpointer_.IsEstimatingEnvironment()) { |
// Check if we have gathered enough audio for the endpointer to do |
@@ -270,7 +268,7 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { |
// Check if we have waited too long without hearing any speech. |
bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech(); |
if (!speech_was_heard_after_packet && |
- num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { |
+ num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) { |
InformErrorAndAbortRecognition( |
content::SPEECH_RECOGNITION_ERROR_NO_SPEECH); |
return; |
@@ -302,19 +300,19 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { |
StopAudioCapture(); |
} |
-void SpeechRecognizerImpl::SetRecognitionResult( |
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult( |
const content::SpeechRecognitionResult& result) { |
- if (result.error != content::SPEECH_RECOGNITION_ERROR_NONE) { |
- InformErrorAndAbortRecognition(result.error); |
- return; |
- } |
- |
// Guard against the listener freeing us until we finish our job. |
scoped_refptr<SpeechRecognizerImpl> me(this); |
listener_->OnRecognitionResult(caller_id_, result); |
listener_->OnRecognitionEnd(caller_id_); |
} |
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( |
+ const content::SpeechRecognitionError& error) { |
+ InformErrorAndAbortRecognition(error.code); |
+} |
+ |
void SpeechRecognizerImpl::InformErrorAndAbortRecognition( |
content::SpeechRecognitionErrorCode error) { |
DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE); |
@@ -338,17 +336,23 @@ void SpeechRecognizerImpl::CloseAudioControllerSynchronously() { |
audio_controller_ = NULL; // Releases the ref ptr. |
} |
-void SpeechRecognizerImpl::SetAudioManagerForTesting( |
- AudioManager* audio_manager) { |
- audio_manager_ = audio_manager; |
-} |
- |
bool SpeechRecognizerImpl::IsActive() const { |
- return (request_.get() != NULL); |
+ return (recognition_engine_.get() != NULL); |
} |
bool SpeechRecognizerImpl::IsCapturingAudio() const { |
return (audio_controller_.get() != NULL); |
} |
+const SpeechRecognitionEngine& |
+ SpeechRecognizerImpl::recognition_engine() const { |
+ return *(recognition_engine_.get()); |
+} |
+ |
+void SpeechRecognizerImpl::SetAudioManagerForTesting( |
+ AudioManager* audio_manager) { |
+ testing_audio_manager_ = audio_manager; |
+} |
+ |
+ |
} // namespace speech |