content/browser/speech/speech_recognizer.cc - Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording.

Unified Diff: content/browser/speech/speech_recognizer.cc

Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: content/browser/speech/speech_recognizer.cc

diff --git a/content/browser/speech/speech_recognizer.cc b/content/browser/speech/speech_recognizer.cc

index edd34195b2ef5600b05ecd612b2d16280d3e79b6..786a6f2e973266b61311e3b05fa9d3aa53dec39e 100644

--- a/content/browser/speech/speech_recognizer.cc

+++ b/content/browser/speech/speech_recognizer.cc

@@ -110,6 +110,7 @@ bool SpeechRecognizer::StartRecording() {

VLOG(1) << "SpeechRecognizer starting record.";

num_samples_recorded_ = 0;

audio_controller_->Record();

+ previous_audio_chunk_.clear();

return true;

}

@@ -141,34 +142,33 @@ void SpeechRecognizer::StopRecording() {

VLOG(1) << "SpeechRecognizer stopping record.";

audio_controller_->Close();

audio_controller_ = NULL; // Releases the ref ptr.

- encoder_->Flush();

delegate_->DidCompleteRecording(caller_id_);

- // Since the http request takes a single string as POST data, allocate

- // one and copy over bytes from the audio buffers to the string.

- // And If we haven't got any audio yet end the recognition sequence here.

- string mime_type = encoder_->mime_type();

- string data;

- encoder_->GetEncodedData(&data);

+ // Get any last bits of encoded data left.

+ encoder_->Flush();

+ string encoded_data;

+ encoder_->GetEncodedDataAndClear(&encoded_data);

encoder_.reset();

- if (data.empty()) {

+ // If we haven't got any audio yet end the recognition sequence here.

+ if (request_ == NULL) {

// Guard against the delegate freeing us until we finish our job.

scoped_refptr<SpeechRecognizer> me(this);

delegate_->DidCompleteRecognition(caller_id_);

} else {

- DCHECK(!request_.get());

- request_.reset(new SpeechRecognitionRequest(

- Profile::GetDefaultRequestContext(), this));

- request_->Send(language_, grammar_, hardware_info_, origin_url_,

- mime_type, data);

+ // UploadAudioChunk requires a non-empty buffer. So we check if there was

+ // any data available since last time we sent and if not try to send the

+ // last chunk again (i.e. repeat the last 100ms of audio). If nothing was

+ // recorded yet, we just send a whitespace string.

bulach 2011/03/03 20:05:00 hmm, I haven't followed the other CL this one depe

+ if (encoded_data.empty()) {

+ encoded_data = !previous_audio_chunk_.empty() ?

+ previous_audio_chunk_ : " ";

+ }

+ request_->UploadAudioChunk(encoded_data, true);

}

-void SpeechRecognizer::ReleaseAudioBuffers() {

// Invoked in the audio thread.

void SpeechRecognizer::OnError(AudioInputController* controller,

int error_code) {

@@ -214,7 +214,6 @@ void SpeechRecognizer::HandleOnData(string* data) {

const short* samples = reinterpret_cast<const short*>(data->data());

DCHECK((data->length() % sizeof(short)) == 0);

int num_samples = data->length() / sizeof(short);

encoder_->Encode(samples, num_samples);

float rms;

endpointer_.ProcessAudio(samples, num_samples, &rms);

@@ -222,6 +221,21 @@ void SpeechRecognizer::HandleOnData(string* data) {

delete data;

num_samples_recorded_ += num_samples;

+ if (request_ == NULL) {

+ // This was the first audio packet recorded, so start a request to the

+ // server to send the data.

+ request_.reset(new SpeechRecognitionRequest(

+ Profile::GetDefaultRequestContext(), this));

+ request_->Start(language_, grammar_, hardware_info_, origin_url_,

+ encoder_->mime_type());

+ }

+ string encoded_data;

+ encoder_->GetEncodedDataAndClear(&encoded_data);

+ DCHECK(!encoded_data.empty());

+ request_->UploadAudioChunk(encoded_data, false);

+ previous_audio_chunk_ = encoded_data;

if (endpointer_.IsEstimatingEnvironment()) {

// Check if we have gathered enough audio for the endpointer to do

// environment estimation and should move on to detect speech/end of speech.

« no previous file with comments | « content/browser/speech/speech_recognizer.h ('k') | no next file » | no next file with comments »