OLD | NEW |
---|---|
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognizer.h" | 5 #include "content/browser/speech/speech_recognizer.h" |
6 | 6 |
7 #include "base/time.h" | 7 #include "base/time.h" |
8 #include "content/browser/browser_thread.h" | 8 #include "content/browser/browser_thread.h" |
9 #include "net/url_request/url_request_context_getter.h" | 9 #include "net/url_request/url_request_context_getter.h" |
10 | 10 |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
138 | 138 |
139 // If audio recording has already stopped and we are in recognition phase, | 139 // If audio recording has already stopped and we are in recognition phase, |
140 // silently ignore any more calls to stop recording. | 140 // silently ignore any more calls to stop recording. |
141 if (!audio_controller_.get()) | 141 if (!audio_controller_.get()) |
142 return; | 142 return; |
143 | 143 |
144 VLOG(1) << "SpeechRecognizer stopping record."; | 144 VLOG(1) << "SpeechRecognizer stopping record."; |
145 audio_controller_->Close(); | 145 audio_controller_->Close(); |
146 audio_controller_ = NULL; // Releases the ref ptr. | 146 audio_controller_ = NULL; // Releases the ref ptr. |
147 | 147 |
148 delegate_->DidStopReceivingSpeech(caller_id_); | |
148 delegate_->DidCompleteRecording(caller_id_); | 149 delegate_->DidCompleteRecording(caller_id_); |
149 | 150 |
150 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet | 151 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet |
151 // of silence in case encoder had no data already. | 152 // of silence in case encoder had no data already. |
152 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) / | 153 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) / |
153 1000); | 154 1000); |
154 encoder_->Encode(&samples[0], samples.size()); | 155 encoder_->Encode(&samples[0], samples.size()); |
155 encoder_->Flush(); | 156 encoder_->Flush(); |
156 string encoded_data; | 157 string encoded_data; |
157 encoder_->GetEncodedDataAndClear(&encoded_data); | 158 encoder_->GetEncodedDataAndClear(&encoded_data); |
(...skipping 21 matching lines...) Expand all Loading... | |
179 | 180 |
180 void SpeechRecognizer::HandleOnError(int error_code) { | 181 void SpeechRecognizer::HandleOnError(int error_code) { |
181 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; | 182 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; |
182 | 183 |
183 // Check if we are still recording before canceling recognition, as | 184 // Check if we are still recording before canceling recognition, as |
184 // recording might have been stopped after this error was posted to the queue | 185 // recording might have been stopped after this error was posted to the queue |
185 // by |OnError|. | 186 // by |OnError|. |
186 if (!audio_controller_.get()) | 187 if (!audio_controller_.get()) |
187 return; | 188 return; |
188 | 189 |
189 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_CAPTURE); | 190 InformErrorAndCancelRecognition(kErrorAudio); |
190 } | 191 } |
191 | 192 |
192 void SpeechRecognizer::OnData(AudioInputController* controller, | 193 void SpeechRecognizer::OnData(AudioInputController* controller, |
193 const uint8* data, uint32 size) { | 194 const uint8* data, uint32 size) { |
194 if (size == 0) // This could happen when recording stops and is normal. | 195 if (size == 0) // This could happen when recording stops and is normal. |
195 return; | 196 return; |
196 | 197 |
197 string* str_data = new string(reinterpret_cast<const char*>(data), size); | 198 string* str_data = new string(reinterpret_cast<const char*>(data), size); |
198 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 199 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
199 NewRunnableMethod(this, | 200 NewRunnableMethod(this, |
200 &SpeechRecognizer::HandleOnData, | 201 &SpeechRecognizer::HandleOnData, |
201 str_data)); | 202 str_data)); |
202 } | 203 } |
203 | 204 |
204 void SpeechRecognizer::HandleOnData(string* data) { | 205 void SpeechRecognizer::HandleOnData(string* data) { |
205 // Check if we are still recording and if not discard this buffer, as | 206 // Check if we are still recording and if not discard this buffer, as |
206 // recording might have been stopped after this buffer was posted to the queue | 207 // recording might have been stopped after this buffer was posted to the queue |
207 // by |OnData|. | 208 // by |OnData|. |
208 if (!audio_controller_.get()) { | 209 if (!audio_controller_.get()) { |
209 delete data; | 210 delete data; |
210 return; | 211 return; |
211 } | 212 } |
212 | 213 |
214 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); | |
215 | |
213 const short* samples = reinterpret_cast<const short*>(data->data()); | 216 const short* samples = reinterpret_cast<const short*>(data->data()); |
214 DCHECK((data->length() % sizeof(short)) == 0); | 217 DCHECK((data->length() % sizeof(short)) == 0); |
215 int num_samples = data->length() / sizeof(short); | 218 int num_samples = data->length() / sizeof(short); |
216 encoder_->Encode(samples, num_samples); | 219 encoder_->Encode(samples, num_samples); |
217 float rms; | 220 float rms; |
218 endpointer_.ProcessAudio(samples, num_samples, &rms); | 221 endpointer_.ProcessAudio(samples, num_samples, &rms); |
219 bool did_clip = Clipping(samples, num_samples); | 222 bool did_clip = Clipping(samples, num_samples); |
220 delete data; | 223 delete data; |
221 num_samples_recorded_ += num_samples; | 224 num_samples_recorded_ += num_samples; |
222 | 225 |
(...skipping 16 matching lines...) Expand all Loading... | |
239 // environment estimation and should move on to detect speech/end of speech. | 242 // environment estimation and should move on to detect speech/end of speech. |
240 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | 243 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * |
241 kAudioSampleRate) / 1000) { | 244 kAudioSampleRate) / 1000) { |
242 endpointer_.SetUserInputMode(); | 245 endpointer_.SetUserInputMode(); |
243 delegate_->DidCompleteEnvironmentEstimation(caller_id_); | 246 delegate_->DidCompleteEnvironmentEstimation(caller_id_); |
244 } | 247 } |
245 return; // No more processing since we are still estimating environment. | 248 return; // No more processing since we are still estimating environment. |
246 } | 249 } |
247 | 250 |
248 // Check if we have waited too long without hearing any speech. | 251 // Check if we have waited too long without hearing any speech. |
249 if (!endpointer_.DidStartReceivingSpeech() && | 252 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech(); |
253 if (!speech_was_heard_after_packet && | |
250 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { | 254 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { |
251 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH); | 255 InformErrorAndCancelRecognition(kErrorNoSpeech); |
252 return; | 256 return; |
253 } | 257 } |
254 | 258 |
259 if (!speech_was_heard_before_packet && speech_was_heard_after_packet) | |
260 delegate_->DidStartReceivingSpeech(caller_id_); | |
261 | |
255 // Calculate the input volume to display in the UI, smoothing towards the | 262 // Calculate the input volume to display in the UI, smoothing towards the |
256 // new level. | 263 // new level. |
257 float level = (rms - kAudioMeterMinDb) / | 264 float level = (rms - kAudioMeterMinDb) / |
258 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | 265 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
259 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); | 266 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); |
260 if (level > audio_level_) { | 267 if (level > audio_level_) { |
261 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; | 268 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; |
262 } else { | 269 } else { |
263 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; | 270 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; |
264 } | 271 } |
265 | 272 |
266 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | 273 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / |
267 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | 274 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
268 noise_level = std::min(std::max(0.0f, noise_level), | 275 noise_level = std::min(std::max(0.0f, noise_level), |
269 kAudioMeterRangeMaxUnclipped); | 276 kAudioMeterRangeMaxUnclipped); |
270 | 277 |
271 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_, | 278 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_, |
272 noise_level); | 279 noise_level); |
273 | 280 |
274 if (endpointer_.speech_input_complete()) { | 281 if (endpointer_.speech_input_complete()) |
275 StopRecording(); | 282 StopRecording(); |
276 } | |
277 | 283 |
278 // TODO(satish): Once we have streaming POST, start sending the data received | 284 // TODO(satish): Once we have streaming POST, start sending the data received |
Satish
2011/10/06 09:09:06
This todo looks obsolete as we are uploading audio
Leandro GraciĆ” Gil
2011/10/06 18:26:25
Done.
| |
279 // here as POST chunks. | 285 // here as POST chunks. |
280 } | 286 } |
281 | 287 |
282 void SpeechRecognizer::SetRecognitionResult( | 288 void SpeechRecognizer::SetRecognitionResult( |
283 bool error, const SpeechInputResultArray& result) { | 289 bool error, const SpeechInputResult& result) { |
284 if (error || result.empty()) { | 290 if (error) { |
285 InformErrorAndCancelRecognition(error ? RECOGNIZER_ERROR_NETWORK : | 291 // Request failed or received an invalid response that couldn't be parsed. |
286 RECOGNIZER_ERROR_NO_RESULTS); | 292 InformErrorAndCancelRecognition(kErrorNetwork); |
287 return; | 293 return; |
288 } | 294 } |
289 | 295 |
290 delegate_->SetRecognitionResult(caller_id_, error, result); | 296 if (result.error != kErrorNone) { |
297 InformErrorAndCancelRecognition(result.error); | |
298 return; | |
299 } | |
291 | 300 |
292 // Guard against the delegate freeing us until we finish our job. | 301 // Guard against the delegate freeing us until we finish our job. |
293 scoped_refptr<SpeechRecognizer> me(this); | 302 scoped_refptr<SpeechRecognizer> me(this); |
303 delegate_->SetRecognitionResult(caller_id_, error, result); | |
294 delegate_->DidCompleteRecognition(caller_id_); | 304 delegate_->DidCompleteRecognition(caller_id_); |
295 } | 305 } |
296 | 306 |
297 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { | 307 void SpeechRecognizer::InformErrorAndCancelRecognition( |
308 SpeechInputError error) { | |
309 DCHECK_NE(error, kErrorNone); | |
298 CancelRecognition(); | 310 CancelRecognition(); |
299 | 311 |
300 // Guard against the delegate freeing us until we finish our job. | 312 // Guard against the delegate freeing us until we finish our job. |
301 scoped_refptr<SpeechRecognizer> me(this); | 313 scoped_refptr<SpeechRecognizer> me(this); |
302 delegate_->OnRecognizerError(caller_id_, error); | 314 delegate_->OnRecognizerError(caller_id_, error); |
303 } | 315 } |
304 | 316 |
305 } // namespace speech_input | 317 } // namespace speech_input |
OLD | NEW |