OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/speech/speech_recognizer.h" | 5 #include "content/browser/speech/speech_recognizer.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/time.h" | 8 #include "base/time.h" |
9 #include "content/public/browser/browser_thread.h" | 9 #include "content/public/browser/browser_thread.h" |
10 #include "content/public/common/speech_input_result.h" | 10 #include "content/public/common/speech_input_result.h" |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
54 const ChannelLayout SpeechRecognizer::kChannelLayout = CHANNEL_LAYOUT_MONO; | 54 const ChannelLayout SpeechRecognizer::kChannelLayout = CHANNEL_LAYOUT_MONO; |
55 const int SpeechRecognizer::kNumBitsPerAudioSample = 16; | 55 const int SpeechRecognizer::kNumBitsPerAudioSample = 16; |
56 const int SpeechRecognizer::kNoSpeechTimeoutSec = 8; | 56 const int SpeechRecognizer::kNoSpeechTimeoutSec = 8; |
57 const int SpeechRecognizer::kEndpointerEstimationTimeMs = 300; | 57 const int SpeechRecognizer::kEndpointerEstimationTimeMs = 300; |
58 | 58 |
59 SpeechRecognizer::SpeechRecognizer(Delegate* delegate, | 59 SpeechRecognizer::SpeechRecognizer(Delegate* delegate, |
60 int caller_id, | 60 int caller_id, |
61 const std::string& language, | 61 const std::string& language, |
62 const std::string& grammar, | 62 const std::string& grammar, |
63 net::URLRequestContextGetter* context_getter, | 63 net::URLRequestContextGetter* context_getter, |
| 64 AudioManager* audio_manager, |
64 bool filter_profanities, | 65 bool filter_profanities, |
65 const std::string& hardware_info, | 66 const std::string& hardware_info, |
66 const std::string& origin_url) | 67 const std::string& origin_url) |
67 : delegate_(delegate), | 68 : delegate_(delegate), |
68 caller_id_(caller_id), | 69 caller_id_(caller_id), |
69 language_(language), | 70 language_(language), |
70 grammar_(grammar), | 71 grammar_(grammar), |
71 filter_profanities_(filter_profanities), | 72 filter_profanities_(filter_profanities), |
72 hardware_info_(hardware_info), | 73 hardware_info_(hardware_info), |
73 origin_url_(origin_url), | 74 origin_url_(origin_url), |
74 context_getter_(context_getter), | 75 context_getter_(context_getter), |
| 76 audio_manager_(audio_manager), |
75 codec_(AudioEncoder::CODEC_FLAC), | 77 codec_(AudioEncoder::CODEC_FLAC), |
76 encoder_(NULL), | 78 encoder_(NULL), |
77 endpointer_(kAudioSampleRate), | 79 endpointer_(kAudioSampleRate), |
78 num_samples_recorded_(0), | 80 num_samples_recorded_(0), |
79 audio_level_(0.0f) { | 81 audio_level_(0.0f) { |
80 endpointer_.set_speech_input_complete_silence_length( | 82 endpointer_.set_speech_input_complete_silence_length( |
81 base::Time::kMicrosecondsPerSecond / 2); | 83 base::Time::kMicrosecondsPerSecond / 2); |
82 endpointer_.set_long_speech_input_complete_silence_length( | 84 endpointer_.set_long_speech_input_complete_silence_length( |
83 base::Time::kMicrosecondsPerSecond); | 85 base::Time::kMicrosecondsPerSecond); |
84 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); | 86 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); |
(...skipping 19 matching lines...) Expand all Loading... |
104 // starting to treat the audio as user input. In |HandleOnData| we wait until | 106 // starting to treat the audio as user input. In |HandleOnData| we wait until |
105 // such time has passed before switching to user input mode. | 107 // such time has passed before switching to user input mode. |
106 endpointer_.SetEnvironmentEstimationMode(); | 108 endpointer_.SetEnvironmentEstimationMode(); |
107 | 109 |
108 encoder_.reset(AudioEncoder::Create(codec_, kAudioSampleRate, | 110 encoder_.reset(AudioEncoder::Create(codec_, kAudioSampleRate, |
109 kNumBitsPerAudioSample)); | 111 kNumBitsPerAudioSample)); |
110 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; | 112 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; |
111 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, | 113 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, |
112 kAudioSampleRate, kNumBitsPerAudioSample, | 114 kAudioSampleRate, kNumBitsPerAudioSample, |
113 samples_per_packet); | 115 samples_per_packet); |
114 audio_controller_ = AudioInputController::Create(this, params); | 116 audio_controller_ = AudioInputController::Create(audio_manager_, this, |
| 117 params); |
115 DCHECK(audio_controller_.get()); | 118 DCHECK(audio_controller_.get()); |
116 VLOG(1) << "SpeechRecognizer starting record."; | 119 VLOG(1) << "SpeechRecognizer starting record."; |
117 num_samples_recorded_ = 0; | 120 num_samples_recorded_ = 0; |
118 audio_controller_->Record(); | 121 audio_controller_->Record(); |
119 | 122 |
120 return true; | 123 return true; |
121 } | 124 } |
122 | 125 |
123 void SpeechRecognizer::CancelRecognition() { | 126 void SpeechRecognizer::CancelRecognition() { |
124 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); | 127 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
208 // recording might have been stopped after this buffer was posted to the queue | 211 // recording might have been stopped after this buffer was posted to the queue |
209 // by |OnData|. | 212 // by |OnData|. |
210 if (!audio_controller_.get()) { | 213 if (!audio_controller_.get()) { |
211 delete data; | 214 delete data; |
212 return; | 215 return; |
213 } | 216 } |
214 | 217 |
215 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); | 218 bool speech_was_heard_before_packet = endpointer_.DidStartReceivingSpeech(); |
216 | 219 |
217 const short* samples = reinterpret_cast<const short*>(data->data()); | 220 const short* samples = reinterpret_cast<const short*>(data->data()); |
218 DCHECK((data->length() % sizeof(short)) == 0); | 221 DCHECK_EQ((data->length() % sizeof(short)), 0U); |
219 int num_samples = data->length() / sizeof(short); | 222 int num_samples = data->length() / sizeof(short); |
220 encoder_->Encode(samples, num_samples); | 223 encoder_->Encode(samples, num_samples); |
221 float rms; | 224 float rms; |
222 endpointer_.ProcessAudio(samples, num_samples, &rms); | 225 endpointer_.ProcessAudio(samples, num_samples, &rms); |
223 bool did_clip = Clipping(samples, num_samples); | 226 bool did_clip = Clipping(samples, num_samples); |
224 delete data; | 227 delete data; |
225 num_samples_recorded_ += num_samples; | 228 num_samples_recorded_ += num_samples; |
226 | 229 |
227 if (request_ == NULL) { | 230 if (request_ == NULL) { |
228 // This was the first audio packet recorded, so start a request to the | 231 // This was the first audio packet recorded, so start a request to the |
(...skipping 71 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
300 content::SpeechInputError error) { | 303 content::SpeechInputError error) { |
301 DCHECK_NE(error, content::SPEECH_INPUT_ERROR_NONE); | 304 DCHECK_NE(error, content::SPEECH_INPUT_ERROR_NONE); |
302 CancelRecognition(); | 305 CancelRecognition(); |
303 | 306 |
304 // Guard against the delegate freeing us until we finish our job. | 307 // Guard against the delegate freeing us until we finish our job. |
305 scoped_refptr<SpeechRecognizer> me(this); | 308 scoped_refptr<SpeechRecognizer> me(this); |
306 delegate_->OnRecognizerError(caller_id_, error); | 309 delegate_->OnRecognizerError(caller_id_, error); |
307 } | 310 } |
308 | 311 |
309 } // namespace speech_input | 312 } // namespace speech_input |
OLD | NEW |