Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(127)

Side by Side Diff: content/browser/speech/speech_recognizer.cc

Issue 8137005: Applying changes to the existing speech input code to support the extension API. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: review and unit test fixes. Created 9 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/speech/speech_recognizer.h" 5 #include "content/browser/speech/speech_recognizer.h"
6 6
7 #include "base/time.h" 7 #include "base/time.h"
8 #include "content/browser/browser_thread.h" 8 #include "content/browser/browser_thread.h"
9 #include "net/url_request/url_request_context_getter.h" 9 #include "net/url_request/url_request_context_getter.h"
10 10
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after
138 138
139 // If audio recording has already stopped and we are in recognition phase, 139 // If audio recording has already stopped and we are in recognition phase,
140 // silently ignore any more calls to stop recording. 140 // silently ignore any more calls to stop recording.
141 if (!audio_controller_.get()) 141 if (!audio_controller_.get())
142 return; 142 return;
143 143
144 VLOG(1) << "SpeechRecognizer stopping record."; 144 VLOG(1) << "SpeechRecognizer stopping record.";
145 audio_controller_->Close(); 145 audio_controller_->Close();
146 audio_controller_ = NULL; // Releases the ref ptr. 146 audio_controller_ = NULL; // Releases the ref ptr.
147 147
148 delegate_->DidStopReceivingSpeech(caller_id_);
148 delegate_->DidCompleteRecording(caller_id_); 149 delegate_->DidCompleteRecording(caller_id_);
149 150
150 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet 151 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet
151 // of silence in case encoder had no data already. 152 // of silence in case encoder had no data already.
152 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) / 153 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) /
153 1000); 154 1000);
154 encoder_->Encode(&samples[0], samples.size()); 155 encoder_->Encode(&samples[0], samples.size());
155 encoder_->Flush(); 156 encoder_->Flush();
156 string encoded_data; 157 string encoded_data;
157 encoder_->GetEncodedDataAndClear(&encoded_data); 158 encoder_->GetEncodedDataAndClear(&encoded_data);
(...skipping 21 matching lines...) Expand all
179 180
180 void SpeechRecognizer::HandleOnError(int error_code) { 181 void SpeechRecognizer::HandleOnError(int error_code) {
181 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; 182 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code;
182 183
183 // Check if we are still recording before canceling recognition, as 184 // Check if we are still recording before canceling recognition, as
184 // recording might have been stopped after this error was posted to the queue 185 // recording might have been stopped after this error was posted to the queue
185 // by |OnError|. 186 // by |OnError|.
186 if (!audio_controller_.get()) 187 if (!audio_controller_.get())
187 return; 188 return;
188 189
189 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_CAPTURE); 190 InformErrorAndCancelRecognition(kErrorAudio);
190 } 191 }
191 192
192 void SpeechRecognizer::OnData(AudioInputController* controller, 193 void SpeechRecognizer::OnData(AudioInputController* controller,
193 const uint8* data, uint32 size) { 194 const uint8* data, uint32 size) {
194 if (size == 0) // This could happen when recording stops and is normal. 195 if (size == 0) // This could happen when recording stops and is normal.
195 return; 196 return;
196 197
197 string* str_data = new string(reinterpret_cast<const char*>(data), size); 198 string* str_data = new string(reinterpret_cast<const char*>(data), size);
198 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 199 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
199 NewRunnableMethod(this, 200 NewRunnableMethod(this,
200 &SpeechRecognizer::HandleOnData, 201 &SpeechRecognizer::HandleOnData,
201 str_data)); 202 str_data));
202 } 203 }
203 204
204 void SpeechRecognizer::HandleOnData(string* data) { 205 void SpeechRecognizer::HandleOnData(string* data) {
205 // Check if we are still recording and if not discard this buffer, as 206 // Check if we are still recording and if not discard this buffer, as
206 // recording might have been stopped after this buffer was posted to the queue 207 // recording might have been stopped after this buffer was posted to the queue
207 // by |OnData|. 208 // by |OnData|.
208 if (!audio_controller_.get()) { 209 if (!audio_controller_.get()) {
209 delete data; 210 delete data;
210 return; 211 return;
211 } 212 }
212 213
214 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech();
215
213 const short* samples = reinterpret_cast<const short*>(data->data()); 216 const short* samples = reinterpret_cast<const short*>(data->data());
214 DCHECK((data->length() % sizeof(short)) == 0); 217 DCHECK((data->length() % sizeof(short)) == 0);
215 int num_samples = data->length() / sizeof(short); 218 int num_samples = data->length() / sizeof(short);
216 encoder_->Encode(samples, num_samples); 219 encoder_->Encode(samples, num_samples);
217 float rms; 220 float rms;
218 endpointer_.ProcessAudio(samples, num_samples, &rms); 221 endpointer_.ProcessAudio(samples, num_samples, &rms);
219 bool did_clip = Clipping(samples, num_samples); 222 bool did_clip = Clipping(samples, num_samples);
220 delete data; 223 delete data;
221 num_samples_recorded_ += num_samples; 224 num_samples_recorded_ += num_samples;
222 225
(...skipping 16 matching lines...) Expand all
239 // environment estimation and should move on to detect speech/end of speech. 242 // environment estimation and should move on to detect speech/end of speech.
240 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * 243 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *
241 kAudioSampleRate) / 1000) { 244 kAudioSampleRate) / 1000) {
242 endpointer_.SetUserInputMode(); 245 endpointer_.SetUserInputMode();
243 delegate_->DidCompleteEnvironmentEstimation(caller_id_); 246 delegate_->DidCompleteEnvironmentEstimation(caller_id_);
244 } 247 }
245 return; // No more processing since we are still estimating environment. 248 return; // No more processing since we are still estimating environment.
246 } 249 }
247 250
248 // Check if we have waited too long without hearing any speech. 251 // Check if we have waited too long without hearing any speech.
249 if (!endpointer_.DidStartReceivingSpeech() && 252 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech();
253 if (!speech_was_heard_after_packet &&
250 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { 254 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) {
251 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH); 255 InformErrorAndCancelRecognition(kErrorNoSpeech);
252 return; 256 return;
253 } 257 }
254 258
259 if (!speech_was_heard_before_packet && speech_was_heard_after_packet)
260 delegate_->DidStartReceivingSpeech(caller_id_);
261
255 // Calculate the input volume to display in the UI, smoothing towards the 262 // Calculate the input volume to display in the UI, smoothing towards the
256 // new level. 263 // new level.
257 float level = (rms - kAudioMeterMinDb) / 264 float level = (rms - kAudioMeterMinDb) /
258 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); 265 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
259 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); 266 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped);
260 if (level > audio_level_) { 267 if (level > audio_level_) {
261 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; 268 audio_level_ += (level - audio_level_) * kUpSmoothingFactor;
262 } else { 269 } else {
263 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; 270 audio_level_ += (level - audio_level_) * kDownSmoothingFactor;
264 } 271 }
265 272
266 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / 273 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) /
267 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); 274 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped);
268 noise_level = std::min(std::max(0.0f, noise_level), 275 noise_level = std::min(std::max(0.0f, noise_level),
269 kAudioMeterRangeMaxUnclipped); 276 kAudioMeterRangeMaxUnclipped);
270 277
271 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_, 278 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_,
272 noise_level); 279 noise_level);
273 280
274 if (endpointer_.speech_input_complete()) { 281 if (endpointer_.speech_input_complete())
275 StopRecording(); 282 StopRecording();
276 }
277 283
278 // TODO(satish): Once we have streaming POST, start sending the data received 284 // TODO(satish): Once we have streaming POST, start sending the data received
Satish 2011/10/06 09:09:06 This todo looks obsolete as we are uploading audio
Leandro GraciĆ” Gil 2011/10/06 18:26:25 Done.
279 // here as POST chunks. 285 // here as POST chunks.
280 } 286 }
281 287
282 void SpeechRecognizer::SetRecognitionResult( 288 void SpeechRecognizer::SetRecognitionResult(
283 bool error, const SpeechInputResultArray& result) { 289 bool error, const SpeechInputResult& result) {
284 if (error || result.empty()) { 290 if (error) {
285 InformErrorAndCancelRecognition(error ? RECOGNIZER_ERROR_NETWORK : 291 // Request failed or received an invalid response that couldn't be parsed.
286 RECOGNIZER_ERROR_NO_RESULTS); 292 InformErrorAndCancelRecognition(kErrorNetwork);
287 return; 293 return;
288 } 294 }
289 295
290 delegate_->SetRecognitionResult(caller_id_, error, result); 296 if (result.error != kErrorNone) {
297 InformErrorAndCancelRecognition(result.error);
298 return;
299 }
291 300
292 // Guard against the delegate freeing us until we finish our job. 301 // Guard against the delegate freeing us until we finish our job.
293 scoped_refptr<SpeechRecognizer> me(this); 302 scoped_refptr<SpeechRecognizer> me(this);
303 delegate_->SetRecognitionResult(caller_id_, error, result);
294 delegate_->DidCompleteRecognition(caller_id_); 304 delegate_->DidCompleteRecognition(caller_id_);
295 } 305 }
296 306
297 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { 307 void SpeechRecognizer::InformErrorAndCancelRecognition(
308 SpeechInputError error) {
309 DCHECK_NE(error, kErrorNone);
298 CancelRecognition(); 310 CancelRecognition();
299 311
300 // Guard against the delegate freeing us until we finish our job. 312 // Guard against the delegate freeing us until we finish our job.
301 scoped_refptr<SpeechRecognizer> me(this); 313 scoped_refptr<SpeechRecognizer> me(this);
302 delegate_->OnRecognizerError(caller_id_, error); 314 delegate_->OnRecognizerError(caller_id_, error);
303 } 315 }
304 316
305 } // namespace speech_input 317 } // namespace speech_input
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698