content/browser/speech/speech_recognizer.cc - Issue 8137005: Applying changes to the existing speech input code to support the extension API.

Side by Side Diff: content/browser/speech/speech_recognizer.cc

Issue 8137005: Applying changes to the existing speech input code to support the extension API. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: review and unit test fixes. Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« content/browser/speech/speech_recognizer.h ('K') | « content/browser/speech/speech_recognizer.h ('k') | content/browser/speech/speech_recognizer_unittest.cc » ('j') | content/browser/speech/speech_recognizer_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/browser/speech/speech_recognizer.h"	5 #include "content/browser/speech/speech_recognizer.h"

6	6

7 #include "base/time.h"	7 #include "base/time.h"

8 #include "content/browser/browser_thread.h"	8 #include "content/browser/browser_thread.h"

9 #include "net/url_request/url_request_context_getter.h"	9 #include "net/url_request/url_request_context_getter.h"

10	10

(...skipping 127 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
138	138

139 // If audio recording has already stopped and we are in recognition phase,	139 // If audio recording has already stopped and we are in recognition phase,

140 // silently ignore any more calls to stop recording.	140 // silently ignore any more calls to stop recording.

141 if (!audio_controller_.get())	141 if (!audio_controller_.get())

142 return;	142 return;

143	143

144 VLOG(1) << "SpeechRecognizer stopping record.";	144 VLOG(1) << "SpeechRecognizer stopping record.";

145 audio_controller_->Close();	145 audio_controller_->Close();

146 audio_controller_ = NULL; // Releases the ref ptr.	146 audio_controller_ = NULL; // Releases the ref ptr.

147	147

	148 delegate_->DidStopReceivingSpeech(caller_id_);

148 delegate_->DidCompleteRecording(caller_id_);	149 delegate_->DidCompleteRecording(caller_id_);

149	150

150 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet	151 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet

151 // of silence in case encoder had no data already.	152 // of silence in case encoder had no data already.

152 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /	153 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /

153 1000);	154 1000);

154 encoder_->Encode(&samples[0], samples.size());	155 encoder_->Encode(&samples[0], samples.size());

155 encoder_->Flush();	156 encoder_->Flush();

156 string encoded_data;	157 string encoded_data;

157 encoder_->GetEncodedDataAndClear(&encoded_data);	158 encoder_->GetEncodedDataAndClear(&encoded_data);

(...skipping 21 matching lines...) Expand all Loading...
179	180

180 void SpeechRecognizer::HandleOnError(int error_code) {	181 void SpeechRecognizer::HandleOnError(int error_code) {

181 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;	182 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;

182	183

183 // Check if we are still recording before canceling recognition, as	184 // Check if we are still recording before canceling recognition, as

184 // recording might have been stopped after this error was posted to the queue	185 // recording might have been stopped after this error was posted to the queue

185 // by \|OnError\|.	186 // by \|OnError\|.

186 if (!audio_controller_.get())	187 if (!audio_controller_.get())

187 return;	188 return;

188	189

189 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_CAPTURE);	190 InformErrorAndCancelRecognition(kErrorAudio);

190 }	191 }

191	192

192 void SpeechRecognizer::OnData(AudioInputController* controller,	193 void SpeechRecognizer::OnData(AudioInputController* controller,

193 const uint8* data, uint32 size) {	194 const uint8* data, uint32 size) {

194 if (size == 0) // This could happen when recording stops and is normal.	195 if (size == 0) // This could happen when recording stops and is normal.

195 return;	196 return;

196	197

197 string* str_data = new string(reinterpret_cast<const char*>(data), size);	198 string* str_data = new string(reinterpret_cast<const char*>(data), size);

198 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	199 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

199 NewRunnableMethod(this,	200 NewRunnableMethod(this,

200 &SpeechRecognizer::HandleOnData,	201 &SpeechRecognizer::HandleOnData,

201 str_data));	202 str_data));

202 }	203 }

203	204

204 void SpeechRecognizer::HandleOnData(string* data) {	205 void SpeechRecognizer::HandleOnData(string* data) {

205 // Check if we are still recording and if not discard this buffer, as	206 // Check if we are still recording and if not discard this buffer, as

206 // recording might have been stopped after this buffer was posted to the queue	207 // recording might have been stopped after this buffer was posted to the queue

207 // by \|OnData\|.	208 // by \|OnData\|.

208 if (!audio_controller_.get()) {	209 if (!audio_controller_.get()) {

209 delete data;	210 delete data;

210 return;	211 return;

211 }	212 }

212	213

	214 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();

	215

213 const short* samples = reinterpret_cast<const short*>(data->data());	216 const short* samples = reinterpret_cast<const short*>(data->data());

214 DCHECK((data->length() % sizeof(short)) == 0);	217 DCHECK((data->length() % sizeof(short)) == 0);

215 int num_samples = data->length() / sizeof(short);	218 int num_samples = data->length() / sizeof(short);

216 encoder_->Encode(samples, num_samples);	219 encoder_->Encode(samples, num_samples);

217 float rms;	220 float rms;

218 endpointer_.ProcessAudio(samples, num_samples, &rms);	221 endpointer_.ProcessAudio(samples, num_samples, &rms);

219 bool did_clip = Clipping(samples, num_samples);	222 bool did_clip = Clipping(samples, num_samples);

220 delete data;	223 delete data;

221 num_samples_recorded_ += num_samples;	224 num_samples_recorded_ += num_samples;

222	225

(...skipping 16 matching lines...) Expand all Loading...
239 // environment estimation and should move on to detect speech/end of speech.	242 // environment estimation and should move on to detect speech/end of speech.

240 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *	243 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *

241 kAudioSampleRate) / 1000) {	244 kAudioSampleRate) / 1000) {

242 endpointer_.SetUserInputMode();	245 endpointer_.SetUserInputMode();

243 delegate_->DidCompleteEnvironmentEstimation(caller_id_);	246 delegate_->DidCompleteEnvironmentEstimation(caller_id_);

244 }	247 }

245 return; // No more processing since we are still estimating environment.	248 return; // No more processing since we are still estimating environment.

246 }	249 }

247	250

248 // Check if we have waited too long without hearing any speech.	251 // Check if we have waited too long without hearing any speech.

249 if (!endpointer_.DidStartReceivingSpeech() &&	252 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech();

	253 if (!speech_was_heard_after_packet &&

250 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) {	254 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) {

251 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH);	255 InformErrorAndCancelRecognition(kErrorNoSpeech);

252 return;	256 return;

253 }	257 }

254	258

	259 if (!speech_was_heard_before_packet && speech_was_heard_after_packet)

	260 delegate_->DidStartReceivingSpeech(caller_id_);

	261

255 // Calculate the input volume to display in the UI, smoothing towards the	262 // Calculate the input volume to display in the UI, smoothing towards the

256 // new level.	263 // new level.

257 float level = (rms - kAudioMeterMinDb) /	264 float level = (rms - kAudioMeterMinDb) /

258 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);	265 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);

259 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped);	266 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped);

260 if (level > audio_level_) {	267 if (level > audio_level_) {

261 audio_level_ += (level - audio_level_) * kUpSmoothingFactor;	268 audio_level_ += (level - audio_level_) * kUpSmoothingFactor;

262 } else {	269 } else {

263 audio_level_ += (level - audio_level_) * kDownSmoothingFactor;	270 audio_level_ += (level - audio_level_) * kDownSmoothingFactor;

264 }	271 }

265	272

266 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /	273 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /

267 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);	274 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);

268 noise_level = std::min(std::max(0.0f, noise_level),	275 noise_level = std::min(std::max(0.0f, noise_level),

269 kAudioMeterRangeMaxUnclipped);	276 kAudioMeterRangeMaxUnclipped);

270	277

271 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_,	278 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_,

272 noise_level);	279 noise_level);

273	280

274 if (endpointer_.speech_input_complete()) {	281 if (endpointer_.speech_input_complete())

275 StopRecording();	282 StopRecording();

276 }

277	283

278 // TODO(satish): Once we have streaming POST, start sending the data received	284 // TODO(satish): Once we have streaming POST, start sending the data received
	Satish 2011/10/06 09:09:06 This todo looks obsolete as we are uploading audio This todo looks obsolete as we are uploading audio in chunks now (see UploadAudioChunk() call above). Can you remove this as well? Leandro Graciá Gil 2011/10/06 18:26:25 Done. Show quoted text On 2011/10/06 09:09:06, Satish wrote: > This todo looks obsolete as we are uploading audio in chunks now (see > UploadAudioChunk() call above). Can you remove this as well? Done.
279 // here as POST chunks.	285 // here as POST chunks.

280 }	286 }

281	287

282 void SpeechRecognizer::SetRecognitionResult(	288 void SpeechRecognizer::SetRecognitionResult(

283 bool error, const SpeechInputResultArray& result) {	289 bool error, const SpeechInputResult& result) {

284 if (error \|\| result.empty()) {	290 if (error) {

285 InformErrorAndCancelRecognition(error ? RECOGNIZER_ERROR_NETWORK :	291 // Request failed or received an invalid response that couldn't be parsed.

286 RECOGNIZER_ERROR_NO_RESULTS);	292 InformErrorAndCancelRecognition(kErrorNetwork);

287 return;	293 return;

288 }	294 }

289	295

290 delegate_->SetRecognitionResult(caller_id_, error, result);	296 if (result.error != kErrorNone) {

	297 InformErrorAndCancelRecognition(result.error);

	298 return;

	299 }

291	300

292 // Guard against the delegate freeing us until we finish our job.	301 // Guard against the delegate freeing us until we finish our job.

293 scoped_refptr<SpeechRecognizer> me(this);	302 scoped_refptr<SpeechRecognizer> me(this);

	303 delegate_->SetRecognitionResult(caller_id_, error, result);

294 delegate_->DidCompleteRecognition(caller_id_);	304 delegate_->DidCompleteRecognition(caller_id_);

295 }	305 }

296	306

297 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {	307 void SpeechRecognizer::InformErrorAndCancelRecognition(

	308 SpeechInputError error) {

	309 DCHECK_NE(error, kErrorNone);

298 CancelRecognition();	310 CancelRecognition();

299	311

300 // Guard against the delegate freeing us until we finish our job.	312 // Guard against the delegate freeing us until we finish our job.

301 scoped_refptr<SpeechRecognizer> me(this);	313 scoped_refptr<SpeechRecognizer> me(this);

302 delegate_->OnRecognizerError(caller_id_, error);	314 delegate_->OnRecognizerError(caller_id_, error);

303 }	315 }

304	316

305 } // namespace speech_input	317 } // namespace speech_input

OLD	NEW