content/browser/speech/speech_recognizer.cc - Issue 8137005: Applying changes to the existing speech input code to support the extension API.

Side by Side Diff: content/browser/speech/speech_recognizer.cc

Issue 8137005: Applying changes to the existing speech input code to support the extension API. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: mac bot fix. Created 9 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/browser/speech/speech_recognizer.h"	5 #include "content/browser/speech/speech_recognizer.h"

6	6

7 #include "base/time.h"	7 #include "base/time.h"

8 #include "content/browser/browser_thread.h"	8 #include "content/browser/browser_thread.h"

9 #include "net/url_request/url_request_context_getter.h"	9 #include "net/url_request/url_request_context_getter.h"

10	10

(...skipping 127 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
138	138

139 // If audio recording has already stopped and we are in recognition phase,	139 // If audio recording has already stopped and we are in recognition phase,

140 // silently ignore any more calls to stop recording.	140 // silently ignore any more calls to stop recording.

141 if (!audio_controller_.get())	141 if (!audio_controller_.get())

142 return;	142 return;

143	143

144 VLOG(1) << "SpeechRecognizer stopping record.";	144 VLOG(1) << "SpeechRecognizer stopping record.";

145 audio_controller_->Close();	145 audio_controller_->Close();

146 audio_controller_ = NULL; // Releases the ref ptr.	146 audio_controller_ = NULL; // Releases the ref ptr.

147	147

	148 delegate_->DidStopReceivingSpeech(caller_id_);

148 delegate_->DidCompleteRecording(caller_id_);	149 delegate_->DidCompleteRecording(caller_id_);

149	150

150 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet	151 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet

151 // of silence in case encoder had no data already.	152 // of silence in case encoder had no data already.

152 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /	153 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /

153 1000);	154 1000);

154 encoder_->Encode(&samples[0], samples.size());	155 encoder_->Encode(&samples[0], samples.size());

155 encoder_->Flush();	156 encoder_->Flush();

156 string encoded_data;	157 string encoded_data;

157 encoder_->GetEncodedDataAndClear(&encoded_data);	158 encoder_->GetEncodedDataAndClear(&encoded_data);

(...skipping 21 matching lines...) Expand all Loading...
179	180

180 void SpeechRecognizer::HandleOnError(int error_code) {	181 void SpeechRecognizer::HandleOnError(int error_code) {

181 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;	182 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;

182	183

183 // Check if we are still recording before canceling recognition, as	184 // Check if we are still recording before canceling recognition, as

184 // recording might have been stopped after this error was posted to the queue	185 // recording might have been stopped after this error was posted to the queue

185 // by \|OnError\|.	186 // by \|OnError\|.

186 if (!audio_controller_.get())	187 if (!audio_controller_.get())

187 return;	188 return;

188	189

189 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_CAPTURE);	190 InformErrorAndCancelRecognition(kErrorAudio);

190 }	191 }

191	192

192 void SpeechRecognizer::OnData(AudioInputController* controller,	193 void SpeechRecognizer::OnData(AudioInputController* controller,

193 const uint8* data, uint32 size) {	194 const uint8* data, uint32 size) {

194 if (size == 0) // This could happen when recording stops and is normal.	195 if (size == 0) // This could happen when recording stops and is normal.

195 return;	196 return;

196	197

197 string* str_data = new string(reinterpret_cast<const char*>(data), size);	198 string* str_data = new string(reinterpret_cast<const char*>(data), size);

198 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,	199 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,

199 NewRunnableMethod(this,	200 NewRunnableMethod(this,

200 &SpeechRecognizer::HandleOnData,	201 &SpeechRecognizer::HandleOnData,

201 str_data));	202 str_data));

202 }	203 }

203	204

204 void SpeechRecognizer::HandleOnData(string* data) {	205 void SpeechRecognizer::HandleOnData(string* data) {

205 // Check if we are still recording and if not discard this buffer, as	206 // Check if we are still recording and if not discard this buffer, as

206 // recording might have been stopped after this buffer was posted to the queue	207 // recording might have been stopped after this buffer was posted to the queue

207 // by \|OnData\|.	208 // by \|OnData\|.

208 if (!audio_controller_.get()) {	209 if (!audio_controller_.get()) {

209 delete data;	210 delete data;

210 return;	211 return;

211 }	212 }

212	213

	214 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();

	215

213 const short* samples = reinterpret_cast<const short*>(data->data());	216 const short* samples = reinterpret_cast<const short*>(data->data());

214 DCHECK((data->length() % sizeof(short)) == 0);	217 DCHECK((data->length() % sizeof(short)) == 0);

215 int num_samples = data->length() / sizeof(short);	218 int num_samples = data->length() / sizeof(short);

216 encoder_->Encode(samples, num_samples);	219 encoder_->Encode(samples, num_samples);

217 float rms;	220 float rms;

218 endpointer_.ProcessAudio(samples, num_samples, &rms);	221 endpointer_.ProcessAudio(samples, num_samples, &rms);

219 bool did_clip = Clipping(samples, num_samples);	222 bool did_clip = Clipping(samples, num_samples);

220 delete data;	223 delete data;

221 num_samples_recorded_ += num_samples;	224 num_samples_recorded_ += num_samples;

222	225

(...skipping 16 matching lines...) Expand all Loading...
239 // environment estimation and should move on to detect speech/end of speech.	242 // environment estimation and should move on to detect speech/end of speech.

240 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *	243 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *

241 kAudioSampleRate) / 1000) {	244 kAudioSampleRate) / 1000) {

242 endpointer_.SetUserInputMode();	245 endpointer_.SetUserInputMode();

243 delegate_->DidCompleteEnvironmentEstimation(caller_id_);	246 delegate_->DidCompleteEnvironmentEstimation(caller_id_);

244 }	247 }

245 return; // No more processing since we are still estimating environment.	248 return; // No more processing since we are still estimating environment.

246 }	249 }

247	250

248 // Check if we have waited too long without hearing any speech.	251 // Check if we have waited too long without hearing any speech.

249 if (!endpointer_.DidStartReceivingSpeech() &&	252 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech();

	253 if (!speech_was_heard_after_packet &&

250 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) {	254 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) {

251 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH);	255 InformErrorAndCancelRecognition(kErrorNoSpeech);

252 return;	256 return;

253 }	257 }

254	258

	259 if (!speech_was_heard_before_packet && speech_was_heard_after_packet)

	260 delegate_->DidStartReceivingSpeech(caller_id_);

	261

255 // Calculate the input volume to display in the UI, smoothing towards the	262 // Calculate the input volume to display in the UI, smoothing towards the

256 // new level.	263 // new level.

257 float level = (rms - kAudioMeterMinDb) /	264 float level = (rms - kAudioMeterMinDb) /

258 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);	265 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);

259 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped);	266 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped);

260 if (level > audio_level_) {	267 if (level > audio_level_) {

261 audio_level_ += (level - audio_level_) * kUpSmoothingFactor;	268 audio_level_ += (level - audio_level_) * kUpSmoothingFactor;

262 } else {	269 } else {

263 audio_level_ += (level - audio_level_) * kDownSmoothingFactor;	270 audio_level_ += (level - audio_level_) * kDownSmoothingFactor;

264 }	271 }

265	272

266 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /	273 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /

267 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);	274 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);

268 noise_level = std::min(std::max(0.0f, noise_level),	275 noise_level = std::min(std::max(0.0f, noise_level),

269 kAudioMeterRangeMaxUnclipped);	276 kAudioMeterRangeMaxUnclipped);

270	277

271 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_,	278 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_,

272 noise_level);	279 noise_level);

273	280

274 if (endpointer_.speech_input_complete()) {	281 if (endpointer_.speech_input_complete())

275 StopRecording();	282 StopRecording();

276 }

277

278 // TODO(satish): Once we have streaming POST, start sending the data received

279 // here as POST chunks.

280 }	283 }

281	284

282 void SpeechRecognizer::SetRecognitionResult(	285 void SpeechRecognizer::SetRecognitionResult(

283 bool error, const SpeechInputResultArray& result) {	286 const SpeechInputResult& result) {

284 if (error \|\| result.empty()) {	287 if (result.error != kErrorNone) {

285 InformErrorAndCancelRecognition(error ? RECOGNIZER_ERROR_NETWORK :	288 InformErrorAndCancelRecognition(result.error);

286 RECOGNIZER_ERROR_NO_RESULTS);

287 return;	289 return;

288 }	290 }

289	291

290 delegate_->SetRecognitionResult(caller_id_, error, result);

291

292 // Guard against the delegate freeing us until we finish our job.	292 // Guard against the delegate freeing us until we finish our job.

293 scoped_refptr<SpeechRecognizer> me(this);	293 scoped_refptr<SpeechRecognizer> me(this);

	294 delegate_->SetRecognitionResult(caller_id_, result);

294 delegate_->DidCompleteRecognition(caller_id_);	295 delegate_->DidCompleteRecognition(caller_id_);

295 }	296 }

296	297

297 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {	298 void SpeechRecognizer::InformErrorAndCancelRecognition(

	299 SpeechInputError error) {

	300 DCHECK_NE(error, kErrorNone);

298 CancelRecognition();	301 CancelRecognition();

299	302

300 // Guard against the delegate freeing us until we finish our job.	303 // Guard against the delegate freeing us until we finish our job.

301 scoped_refptr<SpeechRecognizer> me(this);	304 scoped_refptr<SpeechRecognizer> me(this);

302 delegate_->OnRecognizerError(caller_id_, error);	305 delegate_->OnRecognizerError(caller_id_, error);

303 }	306 }

304	307

305 } // namespace speech_input	308 } // namespace speech_input

OLD	NEW

« no previous file with comments | « content/browser/speech/speech_recognizer.h ('k') | content/browser/speech/speech_recognizer_unittest.cc » ('j') | no next file with comments »