Chromium Code Reviews| Index: content/browser/speech/speech_recognizer_impl.cc |
| diff --git a/content/browser/speech/speech_recognizer_impl.cc b/content/browser/speech/speech_recognizer_impl.cc |
| index 84f46a5e69f593735d128a27b7c288ccaa381cff..226dff90b5931dac48ca668521104fc808015c99 100644 |
| --- a/content/browser/speech/speech_recognizer_impl.cc |
| +++ b/content/browser/speech/speech_recognizer_impl.cc |
| @@ -8,17 +8,21 @@ |
| #include "base/time.h" |
| #include "content/browser/browser_main_loop.h" |
| #include "content/browser/speech/audio_buffer.h" |
| -#include "content/public/browser/speech_recognition_event_listener.h" |
| +#include "content/browser/speech/google_one_shot_remote_engine.h" |
| #include "content/public/browser/browser_thread.h" |
| +#include "content/public/browser/speech_recognition_event_listener.h" |
| +#include "content/public/browser/speech_recognizer.h" |
| #include "content/public/common/speech_recognition_result.h" |
| #include "net/url_request/url_request_context_getter.h" |
| +#include "chrome/browser/speech/chrome_speech_recognition_preferences.h" |
| using content::BrowserMainLoop; |
| using content::BrowserThread; |
| +using content::SpeechRecognitionError; |
| using content::SpeechRecognitionEventListener; |
| +using content::SpeechRecognitionResult; |
| using content::SpeechRecognizer; |
| using media::AudioInputController; |
| -using std::string; |
| namespace { |
| @@ -55,6 +59,7 @@ bool DetectClipping(const speech::AudioChunk& chunk) { |
| } // namespace |
| +// TODO(primiano) transitional, see description in speech_recognizer.h. |
|
Satish
2012/03/22 12:03:28
this doesn't seem useful, could remove
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Done.
|
| SpeechRecognizer* SpeechRecognizer::Create( |
| SpeechRecognitionEventListener* listener, |
| int caller_id, |
| @@ -64,18 +69,37 @@ SpeechRecognizer* SpeechRecognizer::Create( |
| bool filter_profanities, |
| const std::string& hardware_info, |
| const std::string& origin_url) { |
| - return new speech::SpeechRecognizerImpl( |
| - listener, caller_id, language, grammar, context_getter, |
| - filter_profanities, hardware_info, origin_url); |
| + speech::GoogleOneShotRemoteEngineConfig google_sr_config; |
| + google_sr_config.language = language; |
| + google_sr_config.grammar = grammar; |
| + google_sr_config.audio_sample_rate = |
| + speech::SpeechRecognizerImpl::kAudioSampleRate; |
| + google_sr_config.audio_num_bits_per_sample = |
| + speech::SpeechRecognizerImpl::kNumBitsPerAudioSample; |
| + google_sr_config.filter_profanities = filter_profanities; |
| + google_sr_config.hardware_info = hardware_info; |
| + google_sr_config.origin_url = origin_url; |
| + |
| + speech::GoogleOneShotRemoteEngine* google_sr_engine = |
| + new speech::GoogleOneShotRemoteEngine(context_getter); |
|
Satish
2012/03/22 12:03:28
is this code to create google_sr_engine necessary?
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Oops. It was a "remainder" of the old code. Remove
|
| + google_sr_engine->SetConfig(google_sr_config); |
| + |
| + return new speech::SpeechRecognizerImpl(listener, |
| + caller_id, |
| + language, |
| + grammar, |
| + context_getter, |
| + filter_profanities, |
| + hardware_info, |
| + origin_url); |
| } |
| namespace speech { |
| const int SpeechRecognizerImpl::kAudioSampleRate = 16000; |
| -const int SpeechRecognizerImpl::kAudioPacketIntervalMs = 100; |
| const ChannelLayout SpeechRecognizerImpl::kChannelLayout = CHANNEL_LAYOUT_MONO; |
| const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; |
| -const int SpeechRecognizerImpl::kNoSpeechTimeoutSec = 8; |
| +const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; |
| const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; |
| SpeechRecognizerImpl::SpeechRecognizerImpl( |
| @@ -88,19 +112,17 @@ SpeechRecognizerImpl::SpeechRecognizerImpl( |
| const std::string& hardware_info, |
| const std::string& origin_url) |
| : listener_(listener), |
| + testing_audio_manager_(NULL), |
| + recognition_engine_(NULL), |
|
Satish
2012/03/22 12:03:28
this isn't required as it is a scoped_ptr
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Done.
|
| + endpointer_(kAudioSampleRate), |
| + context_getter_(context_getter), |
| caller_id_(caller_id), |
| language_(language), |
| grammar_(grammar), |
| filter_profanities_(filter_profanities), |
| hardware_info_(hardware_info), |
| - origin_url_(origin_url), |
| - context_getter_(context_getter), |
| - codec_(AudioEncoder::CODEC_FLAC), |
| - encoder_(NULL), |
| - endpointer_(kAudioSampleRate), |
| - num_samples_recorded_(0), |
| - audio_level_(0.0f), |
| - audio_manager_(NULL) { |
| + origin_url_(origin_url) { |
| + DCHECK(listener_ != NULL); |
|
Satish
2012/03/22 12:03:28
is this DCHECK required? There are other pointer p
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
The only othher pointer I see is context_getter, t
|
| endpointer_.set_speech_input_complete_silence_length( |
| base::Time::kMicrosecondsPerSecond / 2); |
| endpointer_.set_long_speech_input_complete_silence_length( |
| @@ -113,42 +135,40 @@ SpeechRecognizerImpl::~SpeechRecognizerImpl() { |
| // Recording should have stopped earlier due to the endpointer or |
| // |StopRecording| being called. |
| DCHECK(!audio_controller_.get()); |
| - DCHECK(!request_.get() || !request_->HasPendingRequest()); |
| - DCHECK(!encoder_.get()); |
| + DCHECK(!recognition_engine_.get() || |
| + !recognition_engine_->IsRecognitionPending()); |
| endpointer_.EndSession(); |
| } |
| -bool SpeechRecognizerImpl::StartRecognition() { |
| +void SpeechRecognizerImpl::StartRecognition() { |
| DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| DCHECK(!audio_controller_.get()); |
| - DCHECK(!request_.get() || !request_->HasPendingRequest()); |
| - DCHECK(!encoder_.get()); |
| + DCHECK(!recognition_engine_.get() || |
| + !recognition_engine_->IsRecognitionPending()); |
| // The endpointer needs to estimate the environment/background noise before |
| // starting to treat the audio as user input. In |HandleOnData| we wait until |
| // such time has passed before switching to user input mode. |
| endpointer_.SetEnvironmentEstimationMode(); |
| - encoder_.reset(AudioEncoder::Create(codec_, kAudioSampleRate, |
| - kNumBitsPerAudioSample)); |
| - int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; |
| + AudioManager* audio_manager = (testing_audio_manager_ != NULL) ? |
| + testing_audio_manager_ : |
| + BrowserMainLoop::GetAudioManager(); |
| + const int samples_per_packet = kAudioSampleRate * |
| + GoogleOneShotRemoteEngine::kAudioPacketIntervalMs / 1000; |
|
Satish
2012/03/22 12:03:28
this doesn't seem right, having sampling rate as p
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Hmm this is transitional code, I know it is not cl
|
| AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, |
| kAudioSampleRate, kNumBitsPerAudioSample, |
| samples_per_packet); |
| - audio_controller_ = AudioInputController::Create( |
| - audio_manager_ ? audio_manager_ : BrowserMainLoop::GetAudioManager(), |
| - this, params); |
| + audio_controller_ = AudioInputController::Create(audio_manager, this, params); |
| DCHECK(audio_controller_.get()); |
| VLOG(1) << "SpeechRecognizer starting record."; |
| num_samples_recorded_ = 0; |
| audio_controller_->Record(); |
| - |
| - return true; |
| } |
| void SpeechRecognizerImpl::AbortRecognition() { |
| DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
| - DCHECK(audio_controller_.get() || request_.get()); |
| + DCHECK(audio_controller_.get() || recognition_engine_.get()); |
| // Stop recording if required. |
| if (audio_controller_.get()) { |
| @@ -156,8 +176,7 @@ void SpeechRecognizerImpl::AbortRecognition() { |
| } |
| VLOG(1) << "SpeechRecognizer canceling recognition."; |
| - encoder_.reset(); |
| - request_.reset(); |
| + recognition_engine_.reset(); |
|
Satish
2012/03/22 12:03:28
is it required to call EndRecognition here or does
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
I didn't make a lot of changes in this CL as this
|
| } |
| void SpeechRecognizerImpl::StopAudioCapture() { |
| @@ -170,29 +189,15 @@ void SpeechRecognizerImpl::StopAudioCapture() { |
| CloseAudioControllerSynchronously(); |
| - listener_->OnSoundEnd(caller_id_); |
|
Satish
2012/03/22 12:03:28
I don't see OnSoundEnd called from this class anym
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Done.
|
| listener_->OnAudioEnd(caller_id_); |
| - // UploadAudioChunk requires a non-empty final buffer. So we encode a packet |
| - // of silence in case encoder had no data already. |
| - std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) / |
| - 1000); |
| - AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]), |
| - samples.size() * sizeof(short), |
| - encoder_->bits_per_sample() / 8); |
| - encoder_->Encode(dummy_chunk); |
| - encoder_->Flush(); |
| - scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear()); |
| - DCHECK(!encoded_data->IsEmpty()); |
| - encoder_.reset(); |
| - |
| // If we haven't got any audio yet end the recognition sequence here. |
| - if (request_ == NULL) { |
| + if (recognition_engine_ == NULL) { |
| // Guard against the listener freeing us until we finish our job. |
| scoped_refptr<SpeechRecognizerImpl> me(this); |
| listener_->OnRecognitionEnd(caller_id_); |
| } else { |
| - request_->UploadAudioChunk(*encoded_data, true /* is_last_chunk */); |
| + recognition_engine_->AudioChunksEnded(); |
| } |
| } |
| @@ -237,24 +242,32 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { |
| bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); |
| - encoder_->Encode(*raw_audio); |
| float rms; |
| endpointer_.ProcessAudio(*raw_audio, &rms); |
| bool did_clip = DetectClipping(*raw_audio); |
| num_samples_recorded_ += raw_audio->NumSamples(); |
| - if (request_ == NULL) { |
| + if (recognition_engine_ == NULL) { |
| // This was the first audio packet recorded, so start a request to the |
| // server to send the data and inform the listener. |
| listener_->OnAudioStart(caller_id_); |
| - request_.reset(new SpeechRecognitionRequest(context_getter_.get(), this)); |
| - request_->Start(language_, grammar_, filter_profanities_, |
| - hardware_info_, origin_url_, encoder_->mime_type()); |
| + GoogleOneShotRemoteEngineConfig google_sr_config; |
|
Satish
2012/03/22 12:03:28
suggest moving all this to a separate helper funct
Primiano Tucci (use gerrit)
2012/03/22 12:39:29
Same as above, this code is temporary and the next
|
| + google_sr_config.language = language_; |
| + google_sr_config.grammar = grammar_; |
| + google_sr_config.audio_sample_rate = kAudioSampleRate; |
| + google_sr_config.audio_num_bits_per_sample = kNumBitsPerAudioSample; |
| + google_sr_config.filter_profanities = filter_profanities_; |
| + google_sr_config.hardware_info = hardware_info_; |
| + google_sr_config.origin_url = origin_url_; |
| + GoogleOneShotRemoteEngine* google_sr_engine = |
| + new GoogleOneShotRemoteEngine(context_getter_.get()); |
| + google_sr_engine->SetConfig(google_sr_config); |
| + recognition_engine_.reset(google_sr_engine); |
| + recognition_engine_->set_delegate(this); |
| + recognition_engine_->StartRecognition(); |
| } |
| - scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear()); |
| - DCHECK(!encoded_data->IsEmpty()); |
| - request_->UploadAudioChunk(*encoded_data, false /* is_last_chunk */); |
| + recognition_engine_->TakeAudioChunk(*raw_audio); |
| if (endpointer_.IsEstimatingEnvironment()) { |
| // Check if we have gathered enough audio for the endpointer to do |
| @@ -270,7 +283,7 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { |
| // Check if we have waited too long without hearing any speech. |
| bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech(); |
| if (!speech_was_heard_after_packet && |
| - num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { |
| + num_samples_recorded_ >= (kNoSpeechTimeoutMs / 1000) * kAudioSampleRate) { |
| InformErrorAndAbortRecognition( |
| content::SPEECH_RECOGNITION_ERROR_NO_SPEECH); |
| return; |
| @@ -302,7 +315,7 @@ void SpeechRecognizerImpl::HandleOnData(AudioChunk* raw_audio) { |
| StopAudioCapture(); |
| } |
| -void SpeechRecognizerImpl::SetRecognitionResult( |
| +void SpeechRecognizerImpl::OnSpeechRecognitionEngineResult( |
| const content::SpeechRecognitionResult& result) { |
| if (result.error != content::SPEECH_RECOGNITION_ERROR_NONE) { |
| InformErrorAndAbortRecognition(result.error); |
| @@ -315,6 +328,11 @@ void SpeechRecognizerImpl::SetRecognitionResult( |
| listener_->OnRecognitionEnd(caller_id_); |
| } |
| +void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( |
| + const content::SpeechRecognitionError& error) { |
| + InformErrorAndAbortRecognition(error.code); |
| +} |
| + |
| void SpeechRecognizerImpl::InformErrorAndAbortRecognition( |
| content::SpeechRecognitionErrorCode error) { |
| DCHECK_NE(error, content::SPEECH_RECOGNITION_ERROR_NONE); |
| @@ -338,17 +356,23 @@ void SpeechRecognizerImpl::CloseAudioControllerSynchronously() { |
| audio_controller_ = NULL; // Releases the ref ptr. |
| } |
| -void SpeechRecognizerImpl::SetAudioManagerForTesting( |
| - AudioManager* audio_manager) { |
| - audio_manager_ = audio_manager; |
| -} |
| - |
| bool SpeechRecognizerImpl::IsActive() const { |
| - return (request_.get() != NULL); |
| + return (recognition_engine_.get() != NULL); |
| } |
| bool SpeechRecognizerImpl::IsCapturingAudio() const { |
| return (audio_controller_.get() != NULL); |
| } |
| +const SpeechRecognitionEngine& |
| + SpeechRecognizerImpl::recognition_engine() const { |
| + return *(recognition_engine_.get()); |
| +} |
| + |
| +void SpeechRecognizerImpl::SetAudioManagerForTesting( |
| + AudioManager* audio_manager) { |
| + testing_audio_manager_ = audio_manager; |
| +} |
| + |
| + |
| } // namespace speech |