Index: content/browser/speech/speech_recognizer_impl.cc |
diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc |
index 84f46a5e69f593735d128a27b7c288ccaa381cff..226dff90b5931dac48ca668521104fc808015c99 100644 |
--- a/content/browser/speech/speech_recognizer_impl.cc |
+++ b/content/browser/speech/speech_recognizer_impl.cc |
@@ -8,17 +8,21 @@ |
#include "base/time.h" |
#include "content/browser/browser_main_loop.h" |
#include "content/browser/speech/audio_buffer.h" |
-#include "content/public/browser/speech_recognition_event_listener.h" |
+#include "content/browser/speech/google_one_shot_remote_engine.h" |
#include "content/public/browser/browser_thread.h" |
+#include "content/public/browser/speech_recognition_event_listener.h" |
+#include "content/public/browser/speech_recognizer.h" |
#include "content/public/common/speech_recognition_result.h" |
#include "net/url_request/url_request_context_getter.h" |
+#include "chrome/browser/speech/chrome_speech_recognition_preferences.h" |
using content::BrowserMainLoop; |
using content::BrowserThread; |
+using content::SpeechRecognitionError; |
using content::SpeechRecognitionEventListener; |
+using content::SpeechRecognitionResult; |
using content::SpeechRecognizer; |
using media::AudioInputController; |
-using std::string; |
namespace { |
@@ -55,6 +59,7 @@ bool DetectClipping(const speech::AudioChunk& chunk) { |
} // namespace |
+// TODO(primiano) transitional, see description in speech_recognizer.h. |
Satish
2012/03/22 12:03:28
this doesn't seem useful, could remove
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Done.
|
SpeechRecognizer* SpeechRecognizer::Create( |
SpeechRecognitionEventListener* listener, |
int caller_id, |
@@ -64,18 +69,37 @@ SpeechRecognizer* SpeechRecognizer::Create( |
bool filter_profanities, |
const std::string& hardware_info, |
const std::string& origin_url) { |
- return new speech::SpeechRecognizerImpl( |
- listener, caller_id, language, grammar, context_getter, |
- filter_profanities, hardware_info, origin_url); |
+ speech::GoogleOneShotRemoteEngineConfig google_sr_config; |
+ google_sr_config.language = language; |
+ google_sr_config.grammar = grammar; |
+ google_sr_config.audio_sample_rate = |
+ speech::SpeechRecognizerImpl::kAudioSampleRate; |
+ google_sr_config.audio_num_bits_per_sample = |
+ speech::SpeechRecognizerImpl::kNumBitsPerAudioSample; |
+ google_sr_config.filter_profanities = filter_profanities; |
+ google_sr_config.hardware_info = hardware_info; |
+ google_sr_config.origin_url = origin_url; |
+ |
+ speech::GoogleOneShotRemoteEngine* google_sr_engine = |
+ new speech::GoogleOneShotRemoteEngine(context_getter); |
Satish
2012/03/22 12:03:28
is this code to create google_sr_engine necessary?
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Oops. It was a "remainder" of the old code. Remove
|
+ google_sr_engine->SetConfig(google_sr_config); |
+ |
+ return new speech::SpeechRecognizerImpl(listener, |
+ caller_id, |
+ language, |
+ grammar, |
+ context_getter, |
+ filter_profanities, |
+ hardware_info, |
+ origin_url); |
} |
namespace speech { |
const int SpeechRecognizerImpl::kAudioSampleRate = 16000; |
-const int SpeechRecognizerImpl::kAudioPacketIntervalMs = 100; |
const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO; |
const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; |
-const int SpeechRecognizerImpl::kNoSpeechTimeoutSec = 8; |
+const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; |
const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; |
SpeechRecognizerImpl::SpeechRecognizerImpl( |
@@ -88,19 +112,17 @@ SpeechRecognizerImpl::SpeechRecognizerImpl( |
const std::string& hardware_info, |
const std::string& origin_url) |
: listener_(listener), |
+ testing_audio_manager_(NULL), |
+ recognition_engine_(NULL), |
Satish
2012/03/22 12:03:28
this isn't required as it is a scoped_ptr
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Done.
|
+ endpointer_(kAudioSampleRate), |
+ context_getter_(context_getter), |
caller_id_(caller_id), |
language_(language), |
grammar_(grammar), |
filter_profanities_(filter_profanities), |
hardware_info_(hardware_info), |
- origin_url_(origin_url), |
- context_getter_(context_getter), |
- codec_(AudioEncoder::CODEC_FLAC), |
- encoder_(NULL), |
- endpointer_(kAudioSampleRate), |
- num_samples_recorded_(0), |
- audio_level_(0.0f), |
- audio_manager_(NULL) { |
+ origin_url_(origin_url) { |
+ DCHECK(listener_ != NULL); |
Satish
2012/03/22 12:03:28
is this DCHECK required? There are other pointer p
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
The only othher pointer I see is context_getter, t
|
endpointer_.set_speech_input_complete_silence_length( |
base::Time::kMicrosecondsPerSecond / 2); |
endpointer_.set_long_speech_input_complete_silence_length( |
@@ -113,42 +135,40 @@ SpeechRecognizerImpl::~SpeechRecognizerImpl() { |
// Recording should have stopped earlier due to the endpointer or |
// |StopRecording| being called. |
DCHECK(!audio_controller_.get()); |
- DCHECK(!request_.get() || !request_->HasPendingRequest()); |
- DCHECK(!encoder_.get()); |
+ DCHECK(!recognition_engine_.get() || |
+ !recognition_engine_->IsRecognitionPending()); |
endpointer_.EndSession(); |
} |
-bool SpeechRecognizerImpl::StartRecognition() { |
+void SpeechRecognizerImpl::StartRecognition() { |
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
DCHECK(!audio_controller_.get()); |
- DCHECK(!request_.get() || !request_->HasPendingRequest()); |
- DCHECK(!encoder_.get()); |
+ DCHECK(!recognition_engine_.get() || |
+ !recognition_engine_->IsRecognitionPending()); |
// The endpointer needs to estimate the environment/background noise before |
// starting to treat the audio as user input. In |HandleOnData| we wait until |
// such time has passed before switching to user input mode. |
endpointer_.SetEnvironmentEstimationMode(); |
- encoder_.reset(AudioEncoder::Create(codec_, kAudioSampleRate, |
- kNumBitsPerAudioSample)); |
- int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; |
+ AudioManager* audio_manager = (testing_audio_manager_ != NULL) ? |
+ testing_audio_manager_ : |
+ BrowserMainLoop::GetAudioManager(); |
+ const int samples_per_packet = kAudioSampleRate * |
+ GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000; |
Satish
2012/03/22 12:03:28
this doesn't seem right, having sampling rate as p
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Hmm this is transitional code, I know it is not cl
|
AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, |
kAudioSampleRate, kNumBitsPerAudioSample, |
samples_per_packet); |
- audio_controller_ = AudioInputController::Create( |
- audio_manager_ ? audio_manager_ : BrowserMainLoop::GetAudioManager(), |
- this, params); |
+ audio_controller_ = AudioInputController::Create(audio_manager, this, params); |
DCHECK(audio_controller_.get()); |
VLOG(1) << "SpeechRecognizer starting record."; |
num_samples_recorded_ = 0; |
audio_controller_->Record(); |
- |
- return true; |
} |
void SpeechRecognizerImpl::AbortRecognition() { |
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
- DCHECK(audio_controller_.get() || request_.get()); |
+ DCHECK(audio_controller_.get() || recognition_engine_.get()); |
// Stop recording if required. |
if (audio_controller_.get()) { |
@@ -156,8 +176,7 @@ void SpeechRecognizerImpl::AbortRecognition() { |
} |
VLOG(1) << "SpeechRecognizer canceling recognition."; |
- encoder_.reset(); |
- request_.reset(); |
+ recognition_engine_.reset(); |
Satish
2012/03/22 12:03:28
is it required to call EndRecognition here or does
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
I didn't make a lot of changes in this CL as this
|
} |
void SpeechRecognizerImpl::StopAudioCapture() { |
@@ -170,29 +189,15 @@ void SpeechRecognizerImpl::StopAudioCapture() { |
CloseAudioControllerSynchronously(); |
- listener_->OnSoundEnd(caller_id_); |
Satish
2012/03/22 12:03:28
I don't see OnSoundEnd called from this class anym
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Done.
|
listener_->OnAudioEnd(caller_id_); |
- // UploadAudioChunk requires a non-empty final buffer. So we encode a packet |
- // of silence in case encoder had no data already. |
- std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) / |
- 1000); |
- AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]), |
- samples.size() * sizeof(short), |
- encoder_->bits_per_sample() / 8); |
- encoder_->Encode(dummy_chunk); |
- encoder_->Flush(); |
- scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear()); |
- DCHECK(!encoded_data->IsEmpty()); |
- encoder_.reset(); |
- |
// If we haven't got any audio yet end the recognition sequence here. |
- if (request_ == NULL) { |
+ if (recognition_engine_ == NULL) { |
// Guard against the listener freeing us until we finish our job. |
scoped_refptr<SpeechRecognizerImpl> me(this); |
listener_->OnRecognitionEnd(caller_id_); |
} else { |
- request_->UploadAudioChunk(*encoded_data, true /* is_last_chunk */); |
+ recognition_engine_->AudioChunksEnded(); |
} |
} |
@@ -237,24 +242,32 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { |
bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); |
- encoder_->Encode(*raw_audio); |
float rms; |
endpointer_.ProcessAudio(*raw_audio, &rms); |
bool did_clip = DetectClipping(*raw_audio); |
num_samples_recorded_ += raw_audio->NumSamples(); |
- if (request_ == NULL) { |
+ if (recognition_engine_ == NULL) { |
// This was the first audio packet recorded, so start a request to the |
// server to send the data and inform the listener. |
listener_->OnAudioStart(caller_id_); |
- request_.reset(new SpeechRecognitionRequest(context_getter_.get(), this)); |
- request_->Start(language_, grammar_, filter_profanities_, |
- hardware_info_, origin_url_, encoder_->mime_type()); |
+ GoogleOneShotRemoteEngineConfig google_sr_config; |
Satish
2012/03/22 12:03:28
suggest moving all this to a separate helper funct
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Same as above, this code is temporary and the next
|
+ google_sr_config.language = language_; |
+ google_sr_config.grammar = grammar_; |
+ google_sr_config.audio_sample_rate = kAudioSampleRate; |
+ google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample; |
+ google_sr_config.filter_profanities = filter_profanities_; |
+ google_sr_config.hardware_info = hardware_info_; |
+ google_sr_config.origin_url = origin_url_; |
+ GoogleOneShotRemoteEngine* google_sr_engine = |
+ new GoogleOneShotRemoteEngine(context_getter_.get()); |
+ google_sr_engine->SetConfig(google_sr_config); |
+ recognition_engine_.reset(google_sr_engine); |
+ recognition_engine_->set_delegate(this); |
+ recognition_engine_->StartRecognition(); |
} |
- scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear()); |
- DCHECK(!encoded_data->IsEmpty()); |
- request_->UploadAudioChunk(*encoded_data, false /* is_last_chunk */); |
+ recognition_engine_->TakeAudioChunk(*raw_audio); |
if (endpointer_.IsEstimatingEnvironment()) { |
// Check if we have gathered enough audio for the endpointer to do |
@@ -270,7 +283,7 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { |
// Check if we have waited too long without hearing any speech. |
bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech(); |
if (!speech_was_heard_after_packet && |
- num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { |
+ num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) { |
InformErrorAndAbortRecognition( |
content::SPEECH_RECOGNITION_ERROR_NO_SPEECH); |
return; |
@@ -302,7 +315,7 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { |
StopAudioCapture(); |
} |
-void SpeechRecognizerImpl::SetRecognitionResult( |
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult( |
const content::SpeechRecognitionResult& result) { |
if (result.error != content::SPEECH_RECOGNITION_ERROR_NONE) { |
InformErrorAndAbortRecognition(result.error); |
@@ -315,6 +328,11 @@ void SpeechRecognizerImpl::SetRecognitionResult( |
listener_->OnRecognitionEnd(caller_id_); |
} |
+void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( |
+ const content::SpeechRecognitionError& error) { |
+ InformErrorAndAbortRecognition(error.code); |
+} |
+ |
void SpeechRecognizerImpl::InformErrorAndAbortRecognition( |
content::SpeechRecognitionErrorCode error) { |
DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE); |
@@ -338,17 +356,23 @@ void SpeechRecognizerImpl::CloseAudioControllerSynchronously() { |
audio_controller_ = NULL; // Releases the ref ptr. |
} |
-void SpeechRecognizerImpl::SetAudioManagerForTesting( |
- AudioManager* audio_manager) { |
- audio_manager_ = audio_manager; |
-} |
- |
bool SpeechRecognizerImpl::IsActive() const { |
- return (request_.get() != NULL); |
+ return (recognition_engine_.get() != NULL); |
} |
bool SpeechRecognizerImpl::IsCapturingAudio() const { |
return (audio_controller_.get() != NULL); |
} |
+const SpeechRecognitionEngine& |
+ SpeechRecognizerImpl::recognition_engine() const { |
+ return *(recognition_engine_.get()); |
+} |
+ |
+void SpeechRecognizerImpl::SetAudioManagerForTesting( |
+ AudioManager* audio_manager) { |
+ testing_audio_manager_ = audio_manager; |
+} |
+ |
+ |
} // namespace speech |