content/browser/speech/speech_recognizer.cc - Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording.

Side by Side Diff: content/browser/speech/speech_recognizer.cc

Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: . Created 9 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/browser/speech/speech_recognizer.h"	5 #include "content/browser/speech/speech_recognizer.h"

6	6

7 #include "base/time.h"	7 #include "base/time.h"

8 #include "chrome/browser/profiles/profile.h"	8 #include "chrome/browser/profiles/profile.h"

9 #include "chrome/common/net/url_request_context_getter.h"	9 #include "chrome/common/net/url_request_context_getter.h"

10 #include "content/browser/browser_thread.h"	10 #include "content/browser/browser_thread.h"

(...skipping 123 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));	134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));

135	135

136 // If audio recording has already stopped and we are in recognition phase,	136 // If audio recording has already stopped and we are in recognition phase,

137 // silently ignore any more calls to stop recording.	137 // silently ignore any more calls to stop recording.

138 if (!audio_controller_.get())	138 if (!audio_controller_.get())

139 return;	139 return;

140	140

141 VLOG(1) << "SpeechRecognizer stopping record.";	141 VLOG(1) << "SpeechRecognizer stopping record.";

142 audio_controller_->Close();	142 audio_controller_->Close();

143 audio_controller_ = NULL; // Releases the ref ptr.	143 audio_controller_ = NULL; // Releases the ref ptr.

144 encoder_->Flush();

145	144

146 delegate_->DidCompleteRecording(caller_id_);	145 delegate_->DidCompleteRecording(caller_id_);

147	146

148 // Since the http request takes a single string as POST data, allocate	147 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet

149 // one and copy over bytes from the audio buffers to the string.	148 // of silence in case encoder had no data already.

150 // And If we haven't got any audio yet end the recognition sequence here.	149 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /

151 string mime_type = encoder_->mime_type();	150 1000);

152 string data;	151 encoder_->Encode(&samples[0], samples.size());

153 encoder_->GetEncodedData(&data);	152 encoder_->Flush();

	153 string encoded_data;

	154 encoder_->GetEncodedDataAndClear(&encoded_data);

	155 DCHECK(!encoded_data.empty());

154 encoder_.reset();	156 encoder_.reset();

155	157

156 if (data.empty()) {	158 // If we haven't got any audio yet end the recognition sequence here.

	159 if (request_ == NULL) {

157 // Guard against the delegate freeing us until we finish our job.	160 // Guard against the delegate freeing us until we finish our job.

158 scoped_refptr<SpeechRecognizer> me(this);	161 scoped_refptr<SpeechRecognizer> me(this);

159 delegate_->DidCompleteRecognition(caller_id_);	162 delegate_->DidCompleteRecognition(caller_id_);

160 } else {	163 } else {

161 DCHECK(!request_.get());	164 request_->UploadAudioChunk(encoded_data, true /* is_last_chunk */);

162 request_.reset(new SpeechRecognitionRequest(

163 Profile::GetDefaultRequestContext(), this));

164 request_->Send(language_, grammar_, hardware_info_, origin_url_,

165 mime_type, data);

166 }	165 }

167 }	166 }

168	167

169 void SpeechRecognizer::ReleaseAudioBuffers() {

170 }

171

172 // Invoked in the audio thread.	168 // Invoked in the audio thread.

173 void SpeechRecognizer::OnError(AudioInputController* controller,	169 void SpeechRecognizer::OnError(AudioInputController* controller,

174 int error_code) {	170 int error_code) {

175 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	171 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

176 NewRunnableMethod(this,	172 NewRunnableMethod(this,

177 &SpeechRecognizer::HandleOnError,	173 &SpeechRecognizer::HandleOnError,

178 error_code));	174 error_code));

179 }	175 }

180	176

181 void SpeechRecognizer::HandleOnError(int error_code) {	177 void SpeechRecognizer::HandleOnError(int error_code) {

(...skipping 25 matching lines...) Expand all Loading...
207 // recording might have been stopped after this buffer was posted to the queue	203 // recording might have been stopped after this buffer was posted to the queue

208 // by \|OnData\|.	204 // by \|OnData\|.

209 if (!audio_controller_.get()) {	205 if (!audio_controller_.get()) {

210 delete data;	206 delete data;

211 return;	207 return;

212 }	208 }

213	209

214 const short* samples = reinterpret_cast<const short*>(data->data());	210 const short* samples = reinterpret_cast<const short*>(data->data());

215 DCHECK((data->length() % sizeof(short)) == 0);	211 DCHECK((data->length() % sizeof(short)) == 0);

216 int num_samples = data->length() / sizeof(short);	212 int num_samples = data->length() / sizeof(short);

217

218 encoder_->Encode(samples, num_samples);	213 encoder_->Encode(samples, num_samples);

219 float rms;	214 float rms;

220 endpointer_.ProcessAudio(samples, num_samples, &rms);	215 endpointer_.ProcessAudio(samples, num_samples, &rms);

221 bool did_clip = Clipping(samples, num_samples);	216 bool did_clip = Clipping(samples, num_samples);

222 delete data;	217 delete data;

223 num_samples_recorded_ += num_samples;	218 num_samples_recorded_ += num_samples;

224	219

	220 if (request_ == NULL) {

	221 // This was the first audio packet recorded, so start a request to the

	222 // server to send the data.

	223 request_.reset(new SpeechRecognitionRequest(

	224 Profile::GetDefaultRequestContext(), this));

	225 request_->Start(language_, grammar_, hardware_info_, origin_url_,

	226 encoder_->mime_type());

	227 }

	228

	229 string encoded_data;

	230 encoder_->GetEncodedDataAndClear(&encoded_data);

	231 DCHECK(!encoded_data.empty());

	232 request_->UploadAudioChunk(encoded_data, false /* is_last_chunk */);

	233

225 if (endpointer_.IsEstimatingEnvironment()) {	234 if (endpointer_.IsEstimatingEnvironment()) {

226 // Check if we have gathered enough audio for the endpointer to do	235 // Check if we have gathered enough audio for the endpointer to do

227 // environment estimation and should move on to detect speech/end of speech.	236 // environment estimation and should move on to detect speech/end of speech.

228 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *	237 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *

229 kAudioSampleRate) / 1000) {	238 kAudioSampleRate) / 1000) {

230 endpointer_.SetUserInputMode();	239 endpointer_.SetUserInputMode();

231 delegate_->DidCompleteEnvironmentEstimation(caller_id_);	240 delegate_->DidCompleteEnvironmentEstimation(caller_id_);

232 }	241 }

233 return; // No more processing since we are still estimating environment.	242 return; // No more processing since we are still estimating environment.

234 }	243 }

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
283	292

284 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {	293 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {

285 CancelRecognition();	294 CancelRecognition();

286	295

287 // Guard against the delegate freeing us until we finish our job.	296 // Guard against the delegate freeing us until we finish our job.

288 scoped_refptr<SpeechRecognizer> me(this);	297 scoped_refptr<SpeechRecognizer> me(this);

289 delegate_->OnRecognizerError(caller_id_, error);	298 delegate_->OnRecognizerError(caller_id_, error);

290 }	299 }

291	300

292 } // namespace speech_input	301 } // namespace speech_input

OLD	NEW

« no previous file with comments | « content/browser/speech/speech_recognizer.h ('k') | content/browser/speech/speech_recognizer_unittest.cc » ('j') | no next file with comments »