content/browser/speech/speech_recognizer.cc - Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording.

Side by Side Diff: content/browser/speech/speech_recognizer.cc

Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/browser/speech/speech_recognizer.h"	5 #include "content/browser/speech/speech_recognizer.h"

6	6

7 #include "base/time.h"	7 #include "base/time.h"

8 #include "chrome/browser/profiles/profile.h"	8 #include "chrome/browser/profiles/profile.h"

9 #include "chrome/common/net/url_request_context_getter.h"	9 #include "chrome/common/net/url_request_context_getter.h"

10 #include "content/browser/browser_thread.h"	10 #include "content/browser/browser_thread.h"

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
103 kNumBitsPerAudioSample));	103 kNumBitsPerAudioSample));

104 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000;	104 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000;

105 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels,	105 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels,

106 kAudioSampleRate, kNumBitsPerAudioSample,	106 kAudioSampleRate, kNumBitsPerAudioSample,

107 samples_per_packet);	107 samples_per_packet);

108 audio_controller_ = AudioInputController::Create(this, params);	108 audio_controller_ = AudioInputController::Create(this, params);

109 DCHECK(audio_controller_.get());	109 DCHECK(audio_controller_.get());

110 VLOG(1) << "SpeechRecognizer starting record.";	110 VLOG(1) << "SpeechRecognizer starting record.";

111 num_samples_recorded_ = 0;	111 num_samples_recorded_ = 0;

112 audio_controller_->Record();	112 audio_controller_->Record();

	113 previous_audio_chunk_.clear();

113	114

114 return true;	115 return true;

115 }	116 }

116	117

117 void SpeechRecognizer::CancelRecognition() {	118 void SpeechRecognizer::CancelRecognition() {

118 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	119 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

119 DCHECK(audio_controller_.get() \|\| request_.get());	120 DCHECK(audio_controller_.get() \|\| request_.get());

120	121

121 // Stop recording if required.	122 // Stop recording if required.

122 if (audio_controller_.get()) {	123 if (audio_controller_.get()) {

(...skipping 11 matching lines...) Expand all Loading...
134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	135 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

135	136

136 // If audio recording has already stopped and we are in recognition phase,	137 // If audio recording has already stopped and we are in recognition phase,

137 // silently ignore any more calls to stop recording.	138 // silently ignore any more calls to stop recording.

138 if (!audio_controller_.get())	139 if (!audio_controller_.get())

139 return;	140 return;

140	141

141 VLOG(1) << "SpeechRecognizer stopping record.";	142 VLOG(1) << "SpeechRecognizer stopping record.";

142 audio_controller_->Close();	143 audio_controller_->Close();

143 audio_controller_ = NULL; // Releases the ref ptr.	144 audio_controller_ = NULL; // Releases the ref ptr.

144 encoder_->Flush();

145	145

146 delegate_->DidCompleteRecording(caller_id_);	146 delegate_->DidCompleteRecording(caller_id_);

147	147

148 // Since the http request takes a single string as POST data, allocate	148 // Get any last bits of encoded data left.

149 // one and copy over bytes from the audio buffers to the string.	149 encoder_->Flush();

150 // And If we haven't got any audio yet end the recognition sequence here.	150 string encoded_data;

151 string mime_type = encoder_->mime_type();	151 encoder_->GetEncodedDataAndClear(&encoded_data);

152 string data;

153 encoder_->GetEncodedData(&data);

154 encoder_.reset();	152 encoder_.reset();

155	153

156 if (data.empty()) {	154 // If we haven't got any audio yet end the recognition sequence here.

	155 if (request_ == NULL) {

157 // Guard against the delegate freeing us until we finish our job.	156 // Guard against the delegate freeing us until we finish our job.

158 scoped_refptr<SpeechRecognizer> me(this);	157 scoped_refptr<SpeechRecognizer> me(this);

159 delegate_->DidCompleteRecognition(caller_id_);	158 delegate_->DidCompleteRecognition(caller_id_);

160 } else {	159 } else {

161 DCHECK(!request_.get());	160 // UploadAudioChunk requires a non-empty buffer. So we check if there was

162 request_.reset(new SpeechRecognitionRequest(	161 // any data available since last time we sent and if not try to send the

163 Profile::GetDefaultRequestContext(), this));	162 // last chunk again (i.e. repeat the last 100ms of audio). If nothing was

164 request_->Send(language_, grammar_, hardware_info_, origin_url_,	163 // recorded yet, we just send a whitespace string.
	bulach 2011/03/03 20:05:00 hmm, I haven't followed the other CL this one depe hmm, I haven't followed the other CL this one depends, but wouldn't it be simpler to have just a "FinishChunk" (or something similar)? worst case, it may be simpler to move the "emptiness" test down to UploadAudioChunk rather than here? also, why is it necessary to send previoud_audio_chunk again? if both we can't work around these limitations, please also clarify why repeat the last chunk.
165 mime_type, data);	164 if (encoded_data.empty()) {

	165 encoded_data = !previous_audio_chunk_.empty() ?

	166 previous_audio_chunk_ : " ";

	167 }

	168 request_->UploadAudioChunk(encoded_data, true);

166 }	169 }

167 }	170 }

168	171

169 void SpeechRecognizer::ReleaseAudioBuffers() {

170 }

171

172 // Invoked in the audio thread.	172 // Invoked in the audio thread.

173 void SpeechRecognizer::OnError(AudioInputController* controller,	173 void SpeechRecognizer::OnError(AudioInputController* controller,

174 int error_code) {	174 int error_code) {

175 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	175 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

176 NewRunnableMethod(this,	176 NewRunnableMethod(this,

177 &SpeechRecognizer::HandleOnError,	177 &SpeechRecognizer::HandleOnError,

178 error_code));	178 error_code));

179 }	179 }

180	180

181 void SpeechRecognizer::HandleOnError(int error_code) {	181 void SpeechRecognizer::HandleOnError(int error_code) {

(...skipping 25 matching lines...) Expand all Loading...
207 // recording might have been stopped after this buffer was posted to the queue	207 // recording might have been stopped after this buffer was posted to the queue

208 // by \|OnData\|.	208 // by \|OnData\|.

209 if (!audio_controller_.get()) {	209 if (!audio_controller_.get()) {

210 delete data;	210 delete data;

211 return;	211 return;

212 }	212 }

213	213

214 const short* samples = reinterpret_cast<const short*>(data->data());	214 const short* samples = reinterpret_cast<const short*>(data->data());

215 DCHECK((data->length() % sizeof(short)) == 0);	215 DCHECK((data->length() % sizeof(short)) == 0);

216 int num_samples = data->length() / sizeof(short);	216 int num_samples = data->length() / sizeof(short);

217

218 encoder_->Encode(samples, num_samples);	217 encoder_->Encode(samples, num_samples);

219 float rms;	218 float rms;

220 endpointer_.ProcessAudio(samples, num_samples, &rms);	219 endpointer_.ProcessAudio(samples, num_samples, &rms);

221 bool did_clip = Clipping(samples, num_samples);	220 bool did_clip = Clipping(samples, num_samples);

222 delete data;	221 delete data;

223 num_samples_recorded_ += num_samples;	222 num_samples_recorded_ += num_samples;

224	223

	224 if (request_ == NULL) {

	225 // This was the first audio packet recorded, so start a request to the

	226 // server to send the data.

	227 request_.reset(new SpeechRecognitionRequest(

	228 Profile::GetDefaultRequestContext(), this));

	229 request_->Start(language_, grammar_, hardware_info_, origin_url_,

	230 encoder_->mime_type());

	231 }

	232

	233 string encoded_data;

	234 encoder_->GetEncodedDataAndClear(&encoded_data);

	235 DCHECK(!encoded_data.empty());

	236 request_->UploadAudioChunk(encoded_data, false);

	237 previous_audio_chunk_ = encoded_data;

	238

225 if (endpointer_.IsEstimatingEnvironment()) {	239 if (endpointer_.IsEstimatingEnvironment()) {

226 // Check if we have gathered enough audio for the endpointer to do	240 // Check if we have gathered enough audio for the endpointer to do

227 // environment estimation and should move on to detect speech/end of speech.	241 // environment estimation and should move on to detect speech/end of speech.

228 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *	242 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *

229 kAudioSampleRate) / 1000) {	243 kAudioSampleRate) / 1000) {

230 endpointer_.SetUserInputMode();	244 endpointer_.SetUserInputMode();

231 delegate_->DidCompleteEnvironmentEstimation(caller_id_);	245 delegate_->DidCompleteEnvironmentEstimation(caller_id_);

232 }	246 }

233 return; // No more processing since we are still estimating environment.	247 return; // No more processing since we are still estimating environment.

234 }	248 }

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
283	297

284 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {	298 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {

285 CancelRecognition();	299 CancelRecognition();

286	300

287 // Guard against the delegate freeing us until we finish our job.	301 // Guard against the delegate freeing us until we finish our job.

288 scoped_refptr<SpeechRecognizer> me(this);	302 scoped_refptr<SpeechRecognizer> me(this);

289 delegate_->OnRecognizerError(caller_id_, error);	303 delegate_->OnRecognizerError(caller_id_, error);

290 }	304 }

291	305

292 } // namespace speech_input	306 } // namespace speech_input

OLD	NEW

« no previous file with comments | « content/browser/speech/speech_recognizer.h ('k') | no next file » | no next file with comments »