content/browser/speech/speech_recognizer.cc - Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording.

Side by Side Diff: content/browser/speech/speech_recognizer.cc

Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: . Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/common/net/url_fetcher.h ('K') | « content/browser/speech/speech_recognizer.h ('k') | content/browser/speech/speech_recognizer_unittest.cc » ('j') | content/browser/speech/speech_recognizer_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/browser/speech/speech_recognizer.h"	5 #include "content/browser/speech/speech_recognizer.h"

6	6

7 #include "base/time.h"	7 #include "base/time.h"

8 #include "chrome/browser/profiles/profile.h"	8 #include "chrome/browser/profiles/profile.h"

9 #include "chrome/common/net/url_request_context_getter.h"	9 #include "chrome/common/net/url_request_context_getter.h"

10 #include "content/browser/browser_thread.h"	10 #include "content/browser/browser_thread.h"

(...skipping 123 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

135	135

136 // If audio recording has already stopped and we are in recognition phase,	136 // If audio recording has already stopped and we are in recognition phase,

137 // silently ignore any more calls to stop recording.	137 // silently ignore any more calls to stop recording.

138 if (!audio_controller_.get())	138 if (!audio_controller_.get())

139 return;	139 return;

140	140

141 VLOG(1) << "SpeechRecognizer stopping record.";	141 VLOG(1) << "SpeechRecognizer stopping record.";

142 audio_controller_->Close();	142 audio_controller_->Close();

143 audio_controller_ = NULL; // Releases the ref ptr.	143 audio_controller_ = NULL; // Releases the ref ptr.

144 encoder_->Flush();

145	144

146 delegate_->DidCompleteRecording(caller_id_);	145 delegate_->DidCompleteRecording(caller_id_);

147	146

148 // Since the http request takes a single string as POST data, allocate	147 // UploadAudioChunk requires a non-empty buffer. So we encode a packet of
	bulach 2011/03/04 12:41:08 maybe: non-empty final buffer maybe: non-empty final buffer
149 // one and copy over bytes from the audio buffers to the string.	148 // silence in case encoder had no data already.

150 // And If we haven't got any audio yet end the recognition sequence here.	149 int num_samples = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000;

151 string mime_type = encoder_->mime_type();	150 scoped_ptr<short> samples(new short[num_samples]);

152 string data;	151 memset(samples.get(), 0, sizeof(short) * num_samples);
	bulach 2011/03/04 12:41:08 would this work? vector<short> sample((kAudioSamp would this work? vector<short> sample((kAudioSampleRate * kAudioPacketIntervalMs) / 1000); encoder_->Encode(sample.data(), sample.size());
153 encoder_->GetEncodedData(&data);	152 encoder_->Encode(samples.get(), num_samples);

	153 encoder_->Flush();

	154 string encoded_data;

	155 encoder_->GetEncodedDataAndClear(&encoded_data);

	156 DCHECK(!encoded_data.empty());

154 encoder_.reset();	157 encoder_.reset();

155	158

156 if (data.empty()) {	159 // If we haven't got any audio yet end the recognition sequence here.

	160 if (request_ == NULL) {

157 // Guard against the delegate freeing us until we finish our job.	161 // Guard against the delegate freeing us until we finish our job.

158 scoped_refptr<SpeechRecognizer> me(this);	162 scoped_refptr<SpeechRecognizer> me(this);

159 delegate_->DidCompleteRecognition(caller_id_);	163 delegate_->DidCompleteRecognition(caller_id_);

160 } else {	164 } else {

161 DCHECK(!request_.get());	165 request_->UploadAudioChunk(encoded_data, true);
	bulach 2011/03/04 12:41:08 nit: true /* final / nit: true / final */
162 request_.reset(new SpeechRecognitionRequest(

163 Profile::GetDefaultRequestContext(), this));

164 request_->Send(language_, grammar_, hardware_info_, origin_url_,

165 mime_type, data);

166 }	166 }

167 }	167 }

168	168

169 void SpeechRecognizer::ReleaseAudioBuffers() {

170 }

171

172 // Invoked in the audio thread.	169 // Invoked in the audio thread.

173 void SpeechRecognizer::OnError(AudioInputController* controller,	170 void SpeechRecognizer::OnError(AudioInputController* controller,

174 int error_code) {	171 int error_code) {

175 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	172 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

176 NewRunnableMethod(this,	173 NewRunnableMethod(this,

177 &SpeechRecognizer::HandleOnError,	174 &SpeechRecognizer::HandleOnError,

178 error_code));	175 error_code));

179 }	176 }

180	177

181 void SpeechRecognizer::HandleOnError(int error_code) {	178 void SpeechRecognizer::HandleOnError(int error_code) {

(...skipping 25 matching lines...) Expand all Loading...
207 // recording might have been stopped after this buffer was posted to the queue	204 // recording might have been stopped after this buffer was posted to the queue

208 // by \|OnData\|.	205 // by \|OnData\|.

209 if (!audio_controller_.get()) {	206 if (!audio_controller_.get()) {

210 delete data;	207 delete data;

211 return;	208 return;

212 }	209 }

213	210

214 const short* samples = reinterpret_cast<const short*>(data->data());	211 const short* samples = reinterpret_cast<const short*>(data->data());

215 DCHECK((data->length() % sizeof(short)) == 0);	212 DCHECK((data->length() % sizeof(short)) == 0);

216 int num_samples = data->length() / sizeof(short);	213 int num_samples = data->length() / sizeof(short);

217

218 encoder_->Encode(samples, num_samples);	214 encoder_->Encode(samples, num_samples);

219 float rms;	215 float rms;

220 endpointer_.ProcessAudio(samples, num_samples, &rms);	216 endpointer_.ProcessAudio(samples, num_samples, &rms);

221 bool did_clip = Clipping(samples, num_samples);	217 bool did_clip = Clipping(samples, num_samples);

222 delete data;	218 delete data;

223 num_samples_recorded_ += num_samples;	219 num_samples_recorded_ += num_samples;

224	220

	221 if (request_ == NULL) {

	222 // This was the first audio packet recorded, so start a request to the

	223 // server to send the data.

	224 request_.reset(new SpeechRecognitionRequest(

	225 Profile::GetDefaultRequestContext(), this));

	226 request_->Start(language_, grammar_, hardware_info_, origin_url_,

	227 encoder_->mime_type());

	228 }

	229

	230 string encoded_data;

	231 encoder_->GetEncodedDataAndClear(&encoded_data);

	232 DCHECK(!encoded_data.empty());

	233 request_->UploadAudioChunk(encoded_data, false);
	bulach 2011/03/04 12:41:08 nit: false /* final / nit: false / final */
	234

225 if (endpointer_.IsEstimatingEnvironment()) {	235 if (endpointer_.IsEstimatingEnvironment()) {

226 // Check if we have gathered enough audio for the endpointer to do	236 // Check if we have gathered enough audio for the endpointer to do

227 // environment estimation and should move on to detect speech/end of speech.	237 // environment estimation and should move on to detect speech/end of speech.

228 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *	238 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *

229 kAudioSampleRate) / 1000) {	239 kAudioSampleRate) / 1000) {

230 endpointer_.SetUserInputMode();	240 endpointer_.SetUserInputMode();

231 delegate_->DidCompleteEnvironmentEstimation(caller_id_);	241 delegate_->DidCompleteEnvironmentEstimation(caller_id_);

232 }	242 }

233 return; // No more processing since we are still estimating environment.	243 return; // No more processing since we are still estimating environment.

234 }	244 }

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
283	293

284 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {	294 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {

285 CancelRecognition();	295 CancelRecognition();

286	296

287 // Guard against the delegate freeing us until we finish our job.	297 // Guard against the delegate freeing us until we finish our job.

288 scoped_refptr<SpeechRecognizer> me(this);	298 scoped_refptr<SpeechRecognizer> me(this);

289 delegate_->OnRecognizerError(caller_id_, error);	299 delegate_->OnRecognizerError(caller_id_, error);

290 }	300 }

291	301

292 } // namespace speech_input	302 } // namespace speech_input

OLD	NEW