Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(187)

Side by Side Diff: content/browser/speech/speech_recognizer.cc

Issue 6615020: Stream speech audio to server as it gets recorded, instead of waiting until end of recording. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: . Created 9 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/speech/speech_recognizer.h" 5 #include "content/browser/speech/speech_recognizer.h"
6 6
7 #include "base/time.h" 7 #include "base/time.h"
8 #include "chrome/browser/profiles/profile.h" 8 #include "chrome/browser/profiles/profile.h"
9 #include "chrome/common/net/url_request_context_getter.h" 9 #include "chrome/common/net/url_request_context_getter.h"
10 #include "content/browser/browser_thread.h" 10 #include "content/browser/browser_thread.h"
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after
134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
135 135
136 // If audio recording has already stopped and we are in recognition phase, 136 // If audio recording has already stopped and we are in recognition phase,
137 // silently ignore any more calls to stop recording. 137 // silently ignore any more calls to stop recording.
138 if (!audio_controller_.get()) 138 if (!audio_controller_.get())
139 return; 139 return;
140 140
141 VLOG(1) << "SpeechRecognizer stopping record."; 141 VLOG(1) << "SpeechRecognizer stopping record.";
142 audio_controller_->Close(); 142 audio_controller_->Close();
143 audio_controller_ = NULL; // Releases the ref ptr. 143 audio_controller_ = NULL; // Releases the ref ptr.
144 encoder_->Flush();
145 144
146 delegate_->DidCompleteRecording(caller_id_); 145 delegate_->DidCompleteRecording(caller_id_);
147 146
148 // Since the http request takes a single string as POST data, allocate 147 // UploadAudioChunk requires a non-empty buffer. So we encode a packet of
bulach 2011/03/04 12:41:08 maybe: non-empty final buffer
149 // one and copy over bytes from the audio buffers to the string. 148 // silence in case encoder had no data already.
150 // And If we haven't got any audio yet end the recognition sequence here. 149 int num_samples = (kAudioSampleRate * kAudioPacketIntervalMs) / 1000;
151 string mime_type = encoder_->mime_type(); 150 scoped_ptr<short> samples(new short[num_samples]);
152 string data; 151 memset(samples.get(), 0, sizeof(short) * num_samples);
bulach 2011/03/04 12:41:08 would this work? vector<short> sample((kAudioSamp
153 encoder_->GetEncodedData(&data); 152 encoder_->Encode(samples.get(), num_samples);
153 encoder_->Flush();
154 string encoded_data;
155 encoder_->GetEncodedDataAndClear(&encoded_data);
156 DCHECK(!encoded_data.empty());
154 encoder_.reset(); 157 encoder_.reset();
155 158
156 if (data.empty()) { 159 // If we haven't got any audio yet end the recognition sequence here.
160 if (request_ == NULL) {
157 // Guard against the delegate freeing us until we finish our job. 161 // Guard against the delegate freeing us until we finish our job.
158 scoped_refptr<SpeechRecognizer> me(this); 162 scoped_refptr<SpeechRecognizer> me(this);
159 delegate_->DidCompleteRecognition(caller_id_); 163 delegate_->DidCompleteRecognition(caller_id_);
160 } else { 164 } else {
161 DCHECK(!request_.get()); 165 request_->UploadAudioChunk(encoded_data, true);
bulach 2011/03/04 12:41:08 nit: true /* final */
162 request_.reset(new SpeechRecognitionRequest(
163 Profile::GetDefaultRequestContext(), this));
164 request_->Send(language_, grammar_, hardware_info_, origin_url_,
165 mime_type, data);
166 } 166 }
167 } 167 }
168 168
169 void SpeechRecognizer::ReleaseAudioBuffers() {
170 }
171
172 // Invoked in the audio thread. 169 // Invoked in the audio thread.
173 void SpeechRecognizer::OnError(AudioInputController* controller, 170 void SpeechRecognizer::OnError(AudioInputController* controller,
174 int error_code) { 171 int error_code) {
175 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 172 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
176 NewRunnableMethod(this, 173 NewRunnableMethod(this,
177 &SpeechRecognizer::HandleOnError, 174 &SpeechRecognizer::HandleOnError,
178 error_code)); 175 error_code));
179 } 176 }
180 177
181 void SpeechRecognizer::HandleOnError(int error_code) { 178 void SpeechRecognizer::HandleOnError(int error_code) {
(...skipping 25 matching lines...) Expand all
207 // recording might have been stopped after this buffer was posted to the queue 204 // recording might have been stopped after this buffer was posted to the queue
208 // by |OnData|. 205 // by |OnData|.
209 if (!audio_controller_.get()) { 206 if (!audio_controller_.get()) {
210 delete data; 207 delete data;
211 return; 208 return;
212 } 209 }
213 210
214 const short* samples = reinterpret_cast<const short*>(data->data()); 211 const short* samples = reinterpret_cast<const short*>(data->data());
215 DCHECK((data->length() % sizeof(short)) == 0); 212 DCHECK((data->length() % sizeof(short)) == 0);
216 int num_samples = data->length() / sizeof(short); 213 int num_samples = data->length() / sizeof(short);
217
218 encoder_->Encode(samples, num_samples); 214 encoder_->Encode(samples, num_samples);
219 float rms; 215 float rms;
220 endpointer_.ProcessAudio(samples, num_samples, &rms); 216 endpointer_.ProcessAudio(samples, num_samples, &rms);
221 bool did_clip = Clipping(samples, num_samples); 217 bool did_clip = Clipping(samples, num_samples);
222 delete data; 218 delete data;
223 num_samples_recorded_ += num_samples; 219 num_samples_recorded_ += num_samples;
224 220
221 if (request_ == NULL) {
222 // This was the first audio packet recorded, so start a request to the
223 // server to send the data.
224 request_.reset(new SpeechRecognitionRequest(
225 Profile::GetDefaultRequestContext(), this));
226 request_->Start(language_, grammar_, hardware_info_, origin_url_,
227 encoder_->mime_type());
228 }
229
230 string encoded_data;
231 encoder_->GetEncodedDataAndClear(&encoded_data);
232 DCHECK(!encoded_data.empty());
233 request_->UploadAudioChunk(encoded_data, false);
bulach 2011/03/04 12:41:08 nit: false /* final */
234
225 if (endpointer_.IsEstimatingEnvironment()) { 235 if (endpointer_.IsEstimatingEnvironment()) {
226 // Check if we have gathered enough audio for the endpointer to do 236 // Check if we have gathered enough audio for the endpointer to do
227 // environment estimation and should move on to detect speech/end of speech. 237 // environment estimation and should move on to detect speech/end of speech.
228 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs * 238 if (num_samples_recorded_ >= (kEndpointerEstimationTimeMs *
229 kAudioSampleRate) / 1000) { 239 kAudioSampleRate) / 1000) {
230 endpointer_.SetUserInputMode(); 240 endpointer_.SetUserInputMode();
231 delegate_->DidCompleteEnvironmentEstimation(caller_id_); 241 delegate_->DidCompleteEnvironmentEstimation(caller_id_);
232 } 242 }
233 return; // No more processing since we are still estimating environment. 243 return; // No more processing since we are still estimating environment.
234 } 244 }
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
283 293
284 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) { 294 void SpeechRecognizer::InformErrorAndCancelRecognition(ErrorCode error) {
285 CancelRecognition(); 295 CancelRecognition();
286 296
287 // Guard against the delegate freeing us until we finish our job. 297 // Guard against the delegate freeing us until we finish our job.
288 scoped_refptr<SpeechRecognizer> me(this); 298 scoped_refptr<SpeechRecognizer> me(this);
289 delegate_->OnRecognizerError(caller_id_, error); 299 delegate_->OnRecognizerError(caller_id_, error);
290 } 300 }
291 301
292 } // namespace speech_input 302 } // namespace speech_input
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698