Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(176)

Side by Side Diff: content/browser/speech/speech_recognizer.cc

Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « content/browser/speech/speech_recognizer.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/speech/speech_recognizer.h" 5 #include "content/browser/speech/speech_recognizer.h"
6 6
7 #include "base/time.h" 7 #include "base/time.h"
8 #include "chrome/browser/profiles/profile.h" 8 #include "chrome/browser/profiles/profile.h"
9 #include "chrome/common/net/url_request_context_getter.h" 9 #include "chrome/common/net/url_request_context_getter.h"
10 #include "content/browser/browser_thread.h" 10 #include "content/browser/browser_thread.h"
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after
103 kNumBitsPerAudioSample)); 103 kNumBitsPerAudioSample));
104 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000; 104 int samples_per_packet = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000;
105 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels, 105 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kNumAudioChannels,
106 kAudioSampleRate, kNumBitsPerAudioSample, 106 kAudioSampleRate, kNumBitsPerAudioSample,
107 samples_per_packet); 107 samples_per_packet);
108 audio_controller_ = AudioInputController::Create(this, params); 108 audio_controller_ = AudioInputController::Create(this, params);
109 DCHECK(audio_controller_.get()); 109 DCHECK(audio_controller_.get());
110 VLOG(1) << "SpeechRecognizer starting record."; 110 VLOG(1) << "SpeechRecognizer starting record.";
111 num_samples_recorded_ = 0; 111 num_samples_recorded_ = 0;
112 audio_controller_->Record(); 112 audio_controller_->Record();
113 previous_audio_chunk_.clear();
113 114
114 return true; 115 return true;
115 } 116 }
116 117
117 void SpeechRecognizer::CancelRecognition() { 118 void SpeechRecognizer::CancelRecognition() {
118 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 119 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
119 DCHECK(audio_controller_.get() || request_.get()); 120 DCHECK(audio_controller_.get() || request_.get());
120 121
121 // Stop recording if required. 122 // Stop recording if required.
122 if (audio_controller_.get()) { 123 if (audio_controller_.get()) {
(...skipping 11 matching lines...) Expand all
134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 135 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
135 136
136 // If audio recording has already stopped and we are in recognition phase, 137 // If audio recording has already stopped and we are in recognition phase,
137 // silently ignore any more calls to stop recording. 138 // silently ignore any more calls to stop recording.
138 if (!audio_controller_.get()) 139 if (!audio_controller_.get())
139 return; 140 return;
140 141
141 VLOG(1) << "SpeechRecognizer stopping record."; 142 VLOG(1) << "SpeechRecognizer stopping record.";
142 audio_controller_->Close(); 143 audio_controller_->Close();
143 audio_controller_ = NULL; // Releases the ref ptr. 144 audio_controller_ = NULL; // Releases the ref ptr.
144 encoder_->Flush();
145 145
146 delegate_->DidCompleteRecording(caller_id_); 146 delegate_->DidCompleteRecording(caller_id_);
147 147
148 // Since the http request takes a single string as POST data, allocate 148 // Get any last bits of encoded data left.
149 // one and copy over bytes from the audio buffers to the string. 149 encoder_->Flush();
150 // And If we haven't got any audio yet end the recognition sequence here. 150 string encoded_data;
151 string mime_type = encoder_->mime_type(); 151 encoder_->GetEncodedDataAndClear(&encoded_data);
152 string data;
153 encoder_->GetEncodedData(&data);
154 encoder_.reset(); 152 encoder_.reset();
155 153
156 if (data.empty()) { 154 // If we haven't got any audio yet end the recognition sequence here.
155 if (request_ == NULL) {
157 // Guard against the delegate freeing us until we finish our job. 156 // Guard against the delegate freeing us until we finish our job.
158 scoped_refptr<SpeechRecognizer> me(this); 157 scoped_refptr<SpeechRecognizer> me(this);
159 delegate_->DidCompleteRecognition(caller_id_); 158 delegate_->DidCompleteRecognition(caller_id_);
160 } else { 159 } else {
161 DCHECK(!request_.get()); 160 // UploadAudioChunk requires a non-empty buffer. So we check if there was
162 request_.reset(new SpeechRecognitionRequest( 161 // any data available since last time we sent and if not try to send the
163 Profile::GetDefaultRequestContext(), this)); 162 // last chunk again (i.e. repeat the last 100ms of audio). If nothing was
164 request_->Send(language_, grammar_, hardware_info_, origin_url_, 163 // recorded yet, we just send a whitespace string.
bulach 2011/03/03 20:05:00 hmm, I haven't followed the other CL this one depe
165 mime_type, data); 164 if (encoded_data.empty()) {
165 encoded_data = !previous_audio_chunk_.empty() ?
166 previous_audio_chunk_ : " ";
167 }
168 request_->UploadAudioChunk(encoded_data, true);
166 } 169 }
167 } 170 }
168 171
169 void SpeechRecognizer::ReleaseAudioBuffers() {
170 }
171
172 // Invoked in the audio thread. 172 // Invoked in the audio thread.
173 void SpeechRecognizer::OnError(AudioInputController* controller, 173 void SpeechRecognizer::OnError(AudioInputController* controller,
174 int error_code) { 174 int error_code) {
175 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 175 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
176 NewRunnableMethod(this, 176 NewRunnableMethod(this,
177 &SpeechRecognizer::HandleOnError, 177 &SpeechRecognizer::HandleOnError,
178 error_code)); 178 error_code));
179 } 179 }
180 180
181 void SpeechRecognizer::HandleOnError(int error_code) { 181 void SpeechRecognizer::HandleOnError(int error_code) {
(...skipping 25 matching lines...) Expand all
207 // recording might have been stopped after this buffer was posted to the queue 207 // recording might have been stopped after this buffer was posted to the queue
208 // by |OnData|. 208 // by |OnData|.
209 if (!audio_controller_.get()) { 209 if (!audio_controller_.get()) {
210 delete data; 210 delete data;
211 return; 211 return;
212 } 212 }
213 213
214 const short* samples = reinterpret_cast<const short*>(data->data()); 214 const short* samples = reinterpret_cast<const short*>(data->data());
215 DCHECK((data->length() % sizeof(short)) == 0); 215 DCHECK((data->length() % sizeof(short)) == 0);
216 int num_samples = data->length() / sizeof(short); 216 int num_samples = data->length() / sizeof(short);
217
218 encoder_->Encode(samples, num_samples); 217 encoder_->Encode(samples, num_samples);
219 float rms; 218 float rms;
220 endpointer_.ProcessAudio(samples, num_samples, &rms); 219 endpointer_.ProcessAudio(samples, num_samples, &rms);
221 bool did_clip = Clipping(samples, num_samples); 220 bool did_clip = Clipping(samples, num_samples);
222 delete data; 221 delete data;
223 num_samples_recorded_ += num_samples; 222 num_samples_recorded_ += num_samples;
224 223
224 if (request_ == NULL) {
225 // This was the first audio packet recorded, so start a request to the
226 // server to send the data.
227 request_.reset(new SpeechRecognitionRequest(
228 Profile::GetDefaultRequestContext(), this));
229 request_->Start(language_, grammar_, hardware_info_, origin_url_,
230 encoder_->mime_type());
231 }
232
233 string encoded_data;
234 encoder_->GetEncodedDataAndClear(&encoded_data);
235 DCHECK(!encoded_data.empty());
236 request_->UploadAudioChunk(encoded_data, false);
237 previous_audio_chunk_ = encoded_data;
238
225 if (endpointer_.IsEstimatingEnvironment()) { 239 if (endpointer_.IsEstimatingEnvironment()) {
226 // Check if we have gathered enough audio for the endpointer to do 240 // Check if we have gathered enough audio for the endpointer to do
227 // environment estimation and should move on to detect speech/end of speech. 241 // environment estimation and should move on to detect speech/end of speech.
228 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * 242 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *
229 kAudioSampleRate) / 1000) { 243 kAudioSampleRate) / 1000) {
230 endpointer_.SetUserInputMode(); 244 endpointer_.SetUserInputMode();
231 delegate_->DidCompleteEnvironmentEstimation(caller_id_); 245 delegate_->DidCompleteEnvironmentEstimation(caller_id_);
232 } 246 }
233 return; // No more processing since we are still estimating environment. 247 return; // No more processing since we are still estimating environment.
234 } 248 }
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
283 297
284 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { 298 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {
285 CancelRecognition(); 299 CancelRecognition();
286 300
287 // Guard against the delegate freeing us until we finish our job. 301 // Guard against the delegate freeing us until we finish our job.
288 scoped_refptr<SpeechRecognizer> me(this); 302 scoped_refptr<SpeechRecognizer> me(this);
289 delegate_->OnRecognizerError(caller_id_, error); 303 delegate_->OnRecognizerError(caller_id_, error);
290 } 304 }
291 305
292 } // namespace speech_input 306 } // namespace speech_input
OLDNEW
« no previous file with comments | « content/browser/speech/speech_recognizer.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698