| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/speech/speech_recognizer.h" | 5 #include "content/browser/speech/speech_recognizer.h" |
| 6 | 6 |
| 7 #include "base/time.h" | 7 #include "base/time.h" |
| 8 #include "content/browser/browser_thread.h" | 8 #include "content/browser/browser_thread.h" |
| 9 #include "net/url_request/url_request_context_getter.h" | 9 #include "net/url_request/url_request_context_getter.h" |
| 10 | 10 |
| (...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 138 | 138 |
| 139 // If audio recording has already stopped and we are in recognition phase, | 139 // If audio recording has already stopped and we are in recognition phase, |
| 140 // silently ignore any more calls to stop recording. | 140 // silently ignore any more calls to stop recording. |
| 141 if (!audio_controller_.get()) | 141 if (!audio_controller_.get()) |
| 142 return; | 142 return; |
| 143 | 143 |
| 144 VLOG(1) << "SpeechRecognizer stopping record."; | 144 VLOG(1) << "SpeechRecognizer stopping record."; |
| 145 audio_controller_->Close(); | 145 audio_controller_->Close(); |
| 146 audio_controller_ = NULL; // Releases the ref ptr. | 146 audio_controller_ = NULL; // Releases the ref ptr. |
| 147 | 147 |
| 148 delegate_->DidStopReceivingSpeech(caller_id_); |
| 148 delegate_->DidCompleteRecording(caller_id_); | 149 delegate_->DidCompleteRecording(caller_id_); |
| 149 | 150 |
| 150 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet | 151 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet |
| 151 // of silence in case encoder had no data already. | 152 // of silence in case encoder had no data already. |
| 152 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) / | 153 std::vector<short> samples((kAudioSampleRate * kAudioPacketIntervalMs) / |
| 153 1000); | 154 1000); |
| 154 encoder_->Encode(&samples[0], samples.size()); | 155 encoder_->Encode(&samples[0], samples.size()); |
| 155 encoder_->Flush(); | 156 encoder_->Flush(); |
| 156 string encoded_data; | 157 string encoded_data; |
| 157 encoder_->GetEncodedDataAndClear(&encoded_data); | 158 encoder_->GetEncodedDataAndClear(&encoded_data); |
| (...skipping 21 matching lines...) Expand all Loading... |
| 179 | 180 |
| 180 void SpeechRecognizer::HandleOnError(int error_code) { | 181 void SpeechRecognizer::HandleOnError(int error_code) { |
| 181 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; | 182 LOG(WARNING) << "SpeechRecognizer::HandleOnError, code=" << error_code; |
| 182 | 183 |
| 183 // Check if we are still recording before canceling recognition, as | 184 // Check if we are still recording before canceling recognition, as |
| 184 // recording might have been stopped after this error was posted to the queue | 185 // recording might have been stopped after this error was posted to the queue |
| 185 // by |OnError|. | 186 // by |OnError|. |
| 186 if (!audio_controller_.get()) | 187 if (!audio_controller_.get()) |
| 187 return; | 188 return; |
| 188 | 189 |
| 189 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_CAPTURE); | 190 InformErrorAndCancelRecognition(kErrorAudio); |
| 190 } | 191 } |
| 191 | 192 |
| 192 void SpeechRecognizer::OnData(AudioInputController* controller, | 193 void SpeechRecognizer::OnData(AudioInputController* controller, |
| 193 const uint8* data, uint32 size) { | 194 const uint8* data, uint32 size) { |
| 194 if (size == 0) // This could happen when recording stops and is normal. | 195 if (size == 0) // This could happen when recording stops and is normal. |
| 195 return; | 196 return; |
| 196 | 197 |
| 197 string* str_data = new string(reinterpret_cast<const char*>(data), size); | 198 string* str_data = new string(reinterpret_cast<const char*>(data), size); |
| 198 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, | 199 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, |
| 199 NewRunnableMethod(this, | 200 NewRunnableMethod(this, |
| 200 &SpeechRecognizer::HandleOnData, | 201 &SpeechRecognizer::HandleOnData, |
| 201 str_data)); | 202 str_data)); |
| 202 } | 203 } |
| 203 | 204 |
| 204 void SpeechRecognizer::HandleOnData(string* data) { | 205 void SpeechRecognizer::HandleOnData(string* data) { |
| 205 // Check if we are still recording and if not discard this buffer, as | 206 // Check if we are still recording and if not discard this buffer, as |
| 206 // recording might have been stopped after this buffer was posted to the queue | 207 // recording might have been stopped after this buffer was posted to the queue |
| 207 // by |OnData|. | 208 // by |OnData|. |
| 208 if (!audio_controller_.get()) { | 209 if (!audio_controller_.get()) { |
| 209 delete data; | 210 delete data; |
| 210 return; | 211 return; |
| 211 } | 212 } |
| 212 | 213 |
| 214 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); |
| 215 |
| 213 const short* samples = reinterpret_cast<const short*>(data->data()); | 216 const short* samples = reinterpret_cast<const short*>(data->data()); |
| 214 DCHECK((data->length() % sizeof(short)) == 0); | 217 DCHECK((data->length() % sizeof(short)) == 0); |
| 215 int num_samples = data->length() / sizeof(short); | 218 int num_samples = data->length() / sizeof(short); |
| 216 encoder_->Encode(samples, num_samples); | 219 encoder_->Encode(samples, num_samples); |
| 217 float rms; | 220 float rms; |
| 218 endpointer_.ProcessAudio(samples, num_samples, &rms); | 221 endpointer_.ProcessAudio(samples, num_samples, &rms); |
| 219 bool did_clip = Clipping(samples, num_samples); | 222 bool did_clip = Clipping(samples, num_samples); |
| 220 delete data; | 223 delete data; |
| 221 num_samples_recorded_ += num_samples; | 224 num_samples_recorded_ += num_samples; |
| 222 | 225 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 239 // environment estimation and should move on to detect speech/end of speech. | 242 // environment estimation and should move on to detect speech/end of speech. |
| 240 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * | 243 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * |
| 241 kAudioSampleRate) / 1000) { | 244 kAudioSampleRate) / 1000) { |
| 242 endpointer_.SetUserInputMode(); | 245 endpointer_.SetUserInputMode(); |
| 243 delegate_->DidCompleteEnvironmentEstimation(caller_id_); | 246 delegate_->DidCompleteEnvironmentEstimation(caller_id_); |
| 244 } | 247 } |
| 245 return; // No more processing since we are still estimating environment. | 248 return; // No more processing since we are still estimating environment. |
| 246 } | 249 } |
| 247 | 250 |
| 248 // Check if we have waited too long without hearing any speech. | 251 // Check if we have waited too long without hearing any speech. |
| 249 if (!endpointer_.DidStartReceivingSpeech() && | 252 bool speech_was_heard_after_packet = endpointer_.DidStartReceivingSpeech(); |
| 253 if (!speech_was_heard_after_packet && |
| 250 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { | 254 num_samples_recorded_ >= kNoSpeechTimeoutSec * kAudioSampleRate) { |
| 251 InformErrorAndCancelRecognition(RECOGNIZER_ERROR_NO_SPEECH); | 255 InformErrorAndCancelRecognition(kErrorNoSpeech); |
| 252 return; | 256 return; |
| 253 } | 257 } |
| 254 | 258 |
| 259 if (!speech_was_heard_before_packet && speech_was_heard_after_packet) |
| 260 delegate_->DidStartReceivingSpeech(caller_id_); |
| 261 |
| 255 // Calculate the input volume to display in the UI, smoothing towards the | 262 // Calculate the input volume to display in the UI, smoothing towards the |
| 256 // new level. | 263 // new level. |
| 257 float level = (rms - kAudioMeterMinDb) / | 264 float level = (rms - kAudioMeterMinDb) / |
| 258 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | 265 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
| 259 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); | 266 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); |
| 260 if (level > audio_level_) { | 267 if (level > audio_level_) { |
| 261 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; | 268 audio_level_ += (level - audio_level_) * kUpSmoothingFactor; |
| 262 } else { | 269 } else { |
| 263 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; | 270 audio_level_ += (level - audio_level_) * kDownSmoothingFactor; |
| 264 } | 271 } |
| 265 | 272 |
| 266 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / | 273 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / |
| 267 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); | 274 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); |
| 268 noise_level = std::min(std::max(0.0f, noise_level), | 275 noise_level = std::min(std::max(0.0f, noise_level), |
| 269 kAudioMeterRangeMaxUnclipped); | 276 kAudioMeterRangeMaxUnclipped); |
| 270 | 277 |
| 271 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_, | 278 delegate_->SetInputVolume(caller_id_, did_clip ? 1.0f : audio_level_, |
| 272 noise_level); | 279 noise_level); |
| 273 | 280 |
| 274 if (endpointer_.speech_input_complete()) { | 281 if (endpointer_.speech_input_complete()) |
| 275 StopRecording(); | 282 StopRecording(); |
| 276 } | |
| 277 | |
| 278 // TODO(satish): Once we have streaming POST, start sending the data received | |
| 279 // here as POST chunks. | |
| 280 } | 283 } |
| 281 | 284 |
| 282 void SpeechRecognizer::SetRecognitionResult( | 285 void SpeechRecognizer::SetRecognitionResult( |
| 283 bool error, const SpeechInputResultArray& result) { | 286 const SpeechInputResult& result) { |
| 284 if (error || result.empty()) { | 287 if (result.error != kErrorNone) { |
| 285 InformErrorAndCancelRecognition(error ? RECOGNIZER_ERROR_NETWORK : | 288 InformErrorAndCancelRecognition(result.error); |
| 286 RECOGNIZER_ERROR_NO_RESULTS); | |
| 287 return; | 289 return; |
| 288 } | 290 } |
| 289 | 291 |
| 290 delegate_->SetRecognitionResult(caller_id_, error, result); | |
| 291 | |
| 292 // Guard against the delegate freeing us until we finish our job. | 292 // Guard against the delegate freeing us until we finish our job. |
| 293 scoped_refptr<SpeechRecognizer> me(this); | 293 scoped_refptr<SpeechRecognizer> me(this); |
| 294 delegate_->SetRecognitionResult(caller_id_, result); |
| 294 delegate_->DidCompleteRecognition(caller_id_); | 295 delegate_->DidCompleteRecognition(caller_id_); |
| 295 } | 296 } |
| 296 | 297 |
| 297 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { | 298 void SpeechRecognizer::InformErrorAndCancelRecognition( |
| 299 SpeechInputError error) { |
| 300 DCHECK_NE(error, kErrorNone); |
| 298 CancelRecognition(); | 301 CancelRecognition(); |
| 299 | 302 |
| 300 // Guard against the delegate freeing us until we finish our job. | 303 // Guard against the delegate freeing us until we finish our job. |
| 301 scoped_refptr<SpeechRecognizer> me(this); | 304 scoped_refptr<SpeechRecognizer> me(this); |
| 302 delegate_->OnRecognizerError(caller_id_, error); | 305 delegate_->OnRecognizerError(caller_id_, error); |
| 303 } | 306 } |
| 304 | 307 |
| 305 } // namespace speech_input | 308 } // namespace speech_input |
| OLD | NEW |