Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(107)

Side by Side Diff: content/browser/speech/speech_recognition_engine.cc

Issue 1891543002: Devirtualize SpeechRecognitionEngine (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@kill_one_shot_engine
Patch Set: drop an include Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "content/browser/speech/speech_recognition_engine.h" 5 #include "content/browser/speech/speech_recognition_engine.h"
6 6
7 #include <algorithm>
8 #include <vector>
9
10 #include "base/big_endian.h"
11 #include "base/bind.h"
12 #include "base/rand_util.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/strings/utf_string_conversions.h"
16 #include "base/time/time.h"
17 #include "content/browser/speech/audio_buffer.h"
18 #include "content/browser/speech/proto/google_streaming_api.pb.h"
19 #include "content/public/common/speech_recognition_error.h"
20 #include "content/public/common/speech_recognition_result.h"
21 #include "google_apis/google_api_keys.h"
22 #include "net/base/escape.h"
23 #include "net/base/load_flags.h"
24 #include "net/url_request/http_user_agent_settings.h"
25 #include "net/url_request/url_fetcher.h"
26 #include "net/url_request/url_request_context.h"
27 #include "net/url_request/url_request_context_getter.h"
28 #include "net/url_request/url_request_status.h"
29
30 using net::URLFetcher;
31
32 namespace content {
7 namespace { 33 namespace {
34
35 const char kWebServiceBaseUrl[] =
36 "https://www.google.com/speech-api/full-duplex/v1";
37 const char kDownstreamUrl[] = "/down?";
38 const char kUpstreamUrl[] = "/up?";
39
40 // This matches the maximum maxAlternatives value supported by the server.
41 const uint32_t kMaxMaxAlternatives = 30;
42
43 // TODO(hans): Remove this and other logging when we don't need it anymore.
44 void DumpResponse(const std::string& response) {
45 DVLOG(1) << "------------";
46 proto::SpeechRecognitionEvent event;
47 if (!event.ParseFromString(response)) {
48 DVLOG(1) << "Parse failed!";
49 return;
50 }
51 if (event.has_status())
52 DVLOG(1) << "STATUS\t" << event.status();
53 if (event.has_endpoint())
54 DVLOG(1) << "ENDPOINT\t" << event.endpoint();
55 for (int i = 0; i < event.result_size(); ++i) {
56 DVLOG(1) << "RESULT #" << i << ":";
57 const proto::SpeechRecognitionResult& res = event.result(i);
58 if (res.has_final())
59 DVLOG(1) << " final:\t" << res.final();
60 if (res.has_stability())
61 DVLOG(1) << " STABILITY:\t" << res.stability();
62 for (int j = 0; j < res.alternative_size(); ++j) {
63 const proto::SpeechRecognitionAlternative& alt =
64 res.alternative(j);
65 if (alt.has_confidence())
66 DVLOG(1) << " CONFIDENCE:\t" << alt.confidence();
67 if (alt.has_transcript())
68 DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript();
69 }
70 }
71 }
72
8 const int kDefaultConfigSampleRate = 8000; 73 const int kDefaultConfigSampleRate = 8000;
9 const int kDefaultConfigBitsPerSample = 16; 74 const int kDefaultConfigBitsPerSample = 16;
10 const uint32_t kDefaultMaxHypotheses = 1; 75 const uint32_t kDefaultMaxHypotheses = 1;
76
11 } // namespace 77 } // namespace
12 78
13 namespace content {
14
15 SpeechRecognitionEngine::Config::Config() 79 SpeechRecognitionEngine::Config::Config()
16 : filter_profanities(false), 80 : filter_profanities(false),
17 continuous(true), 81 continuous(true),
18 interim_results(true), 82 interim_results(true),
19 max_hypotheses(kDefaultMaxHypotheses), 83 max_hypotheses(kDefaultMaxHypotheses),
20 audio_sample_rate(kDefaultConfigSampleRate), 84 audio_sample_rate(kDefaultConfigSampleRate),
21 audio_num_bits_per_sample(kDefaultConfigBitsPerSample) { 85 audio_num_bits_per_sample(kDefaultConfigBitsPerSample) {}
22 } 86
23 87 SpeechRecognitionEngine::Config::~Config() {}
24 SpeechRecognitionEngine::Config::~Config() { 88
89 const int SpeechRecognitionEngine::kAudioPacketIntervalMs = 100;
90 const int SpeechRecognitionEngine::kUpstreamUrlFetcherIdForTesting = 0;
91 const int SpeechRecognitionEngine::kDownstreamUrlFetcherIdForTesting = 1;
92 const int SpeechRecognitionEngine::kWebserviceStatusNoError = 0;
93 const int SpeechRecognitionEngine::kWebserviceStatusErrorNoMatch = 5;
94
95 SpeechRecognitionEngine::SpeechRecognitionEngine(
96 net::URLRequestContextGetter* context)
97 : url_context_(context),
98 previous_response_length_(0),
99 got_last_definitive_result_(false),
100 is_dispatching_event_(false),
101 use_framed_post_data_(false),
102 state_(STATE_IDLE) {}
103
104 SpeechRecognitionEngine::~SpeechRecognitionEngine() {}
105
106 void SpeechRecognitionEngine::SetConfig(const Config& config) {
107 config_ = config;
108 }
109
110 void SpeechRecognitionEngine::StartRecognition() {
111 FSMEventArgs event_args(EVENT_START_RECOGNITION);
112 DispatchEvent(event_args);
113 }
114
115 void SpeechRecognitionEngine::EndRecognition() {
116 FSMEventArgs event_args(EVENT_END_RECOGNITION);
117 DispatchEvent(event_args);
118 }
119
120 void SpeechRecognitionEngine::TakeAudioChunk(const AudioChunk& data) {
121 FSMEventArgs event_args(EVENT_AUDIO_CHUNK);
122 event_args.audio_data = &data;
123 DispatchEvent(event_args);
124 }
125
126 void SpeechRecognitionEngine::AudioChunksEnded() {
127 FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);
128 DispatchEvent(event_args);
129 }
130
131 void SpeechRecognitionEngine::OnURLFetchComplete(const URLFetcher* source) {
132 const bool kResponseComplete = true;
133 DispatchHTTPResponse(source, kResponseComplete);
134 }
135
136 void SpeechRecognitionEngine::OnURLFetchDownloadProgress(
137 const URLFetcher* source,
138 int64_t current,
139 int64_t total) {
140 const bool kPartialResponse = false;
141 DispatchHTTPResponse(source, kPartialResponse);
142 }
143
144 void SpeechRecognitionEngine::DispatchHTTPResponse(const URLFetcher* source,
145 bool end_of_response) {
146 DCHECK(CalledOnValidThread());
147 DCHECK(source);
148 const bool response_is_good = source->GetStatus().is_success() &&
149 source->GetResponseCode() == 200;
150 std::string response;
151 if (response_is_good)
152 source->GetResponseAsString(&response);
153 const size_t current_response_length = response.size();
154
155 DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")
156 << "HTTP, code: " << source->GetResponseCode()
157 << " length: " << current_response_length
158 << " eor: " << end_of_response;
159
160 // URLFetcher provides always the entire response buffer, but we are only
161 // interested in the fresh data introduced by the last chunk. Therefore, we
162 // drop the previous content we have already processed.
163 if (current_response_length != 0) {
164 DCHECK_GE(current_response_length, previous_response_length_);
165 response.erase(0, previous_response_length_);
166 previous_response_length_ = current_response_length;
167 }
168
169 if (!response_is_good && source == downstream_fetcher_.get()) {
170 DVLOG(1) << "Downstream error " << source->GetResponseCode();
171 FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);
172 DispatchEvent(event_args);
173 return;
174 }
175 if (!response_is_good && source == upstream_fetcher_.get()) {
176 DVLOG(1) << "Upstream error " << source->GetResponseCode()
177 << " EOR " << end_of_response;
178 FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);
179 DispatchEvent(event_args);
180 return;
181 }
182
183 // Ignore incoming data on the upstream connection.
184 if (source == upstream_fetcher_.get())
185 return;
186
187 DCHECK(response_is_good && source == downstream_fetcher_.get());
188
189 // The downstream response is organized in chunks, whose size is determined
190 // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.
191 // Such chunks are sent by the speech recognition webservice over the HTTP
192 // downstream channel using HTTP chunked transfer (unrelated to our chunks).
193 // This function is called every time an HTTP chunk is received by the
194 // url fetcher. However there isn't any particular matching beween our
195 // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can
196 // contain a portion of one chunk or even more chunks together.
197 chunked_byte_buffer_.Append(response);
198
199 // A single HTTP chunk can contain more than one data chunk, thus the while.
200 while (chunked_byte_buffer_.HasChunks()) {
201 FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);
202 event_args.response = chunked_byte_buffer_.PopChunk();
203 DCHECK(event_args.response.get());
204 DumpResponse(std::string(event_args.response->begin(),
205 event_args.response->end()));
206 DispatchEvent(event_args);
207 }
208 if (end_of_response) {
209 FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);
210 DispatchEvent(event_args);
211 }
212 }
213
214 bool SpeechRecognitionEngine::IsRecognitionPending() const {
215 DCHECK(CalledOnValidThread());
216 return state_ != STATE_IDLE;
217 }
218
219 int SpeechRecognitionEngine::GetDesiredAudioChunkDurationMs() const {
220 return kAudioPacketIntervalMs;
221 }
222
223 // ----------------------- Core FSM implementation ---------------------------
224
225 void SpeechRecognitionEngine::DispatchEvent(
226 const FSMEventArgs& event_args) {
227 DCHECK(CalledOnValidThread());
228 DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
229 DCHECK_LE(state_, STATE_MAX_VALUE);
230
231 // Event dispatching must be sequential, otherwise it will break all the rules
232 // and the assumptions of the finite state automata model.
233 DCHECK(!is_dispatching_event_);
234 is_dispatching_event_ = true;
235
236 state_ = ExecuteTransitionAndGetNextState(event_args);
237
238 is_dispatching_event_ = false;
239 }
240
241 SpeechRecognitionEngine::FSMState
242 SpeechRecognitionEngine::ExecuteTransitionAndGetNextState(
243 const FSMEventArgs& event_args) {
244 const FSMEvent event = event_args.event;
245 switch (state_) {
246 case STATE_IDLE:
247 switch (event) {
248 case EVENT_START_RECOGNITION:
249 return ConnectBothStreams(event_args);
250 case EVENT_END_RECOGNITION:
251 // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of
252 // abort, so we just silently drop them here.
253 case EVENT_AUDIO_CHUNK:
254 case EVENT_AUDIO_CHUNKS_ENDED:
255 // DOWNSTREAM_CLOSED can be received if we end up here due to an error.
256 case EVENT_DOWNSTREAM_CLOSED:
257 return DoNothing(event_args);
258 case EVENT_UPSTREAM_ERROR:
259 case EVENT_DOWNSTREAM_ERROR:
260 case EVENT_DOWNSTREAM_RESPONSE:
261 return NotFeasible(event_args);
262 }
263 break;
264 case STATE_BOTH_STREAMS_CONNECTED:
265 switch (event) {
266 case EVENT_AUDIO_CHUNK:
267 return TransmitAudioUpstream(event_args);
268 case EVENT_DOWNSTREAM_RESPONSE:
269 return ProcessDownstreamResponse(event_args);
270 case EVENT_AUDIO_CHUNKS_ENDED:
271 return CloseUpstreamAndWaitForResults(event_args);
272 case EVENT_END_RECOGNITION:
273 return AbortSilently(event_args);
274 case EVENT_UPSTREAM_ERROR:
275 case EVENT_DOWNSTREAM_ERROR:
276 case EVENT_DOWNSTREAM_CLOSED:
277 return AbortWithError(event_args);
278 case EVENT_START_RECOGNITION:
279 return NotFeasible(event_args);
280 }
281 break;
282 case STATE_WAITING_DOWNSTREAM_RESULTS:
283 switch (event) {
284 case EVENT_DOWNSTREAM_RESPONSE:
285 return ProcessDownstreamResponse(event_args);
286 case EVENT_DOWNSTREAM_CLOSED:
287 return RaiseNoMatchErrorIfGotNoResults(event_args);
288 case EVENT_END_RECOGNITION:
289 return AbortSilently(event_args);
290 case EVENT_UPSTREAM_ERROR:
291 case EVENT_DOWNSTREAM_ERROR:
292 return AbortWithError(event_args);
293 case EVENT_START_RECOGNITION:
294 case EVENT_AUDIO_CHUNK:
295 case EVENT_AUDIO_CHUNKS_ENDED:
296 return NotFeasible(event_args);
297 }
298 break;
299 }
300 return NotFeasible(event_args);
301 }
302
303 // ----------- Contract for all the FSM evolution functions below -------------
304 // - Are guaranteed to be executed in the same thread (IO, except for tests);
305 // - Are guaranteed to be not reentrant (themselves and each other);
306 // - event_args members are guaranteed to be stable during the call;
307
308 SpeechRecognitionEngine::FSMState
309 SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) {
310 DCHECK(!upstream_fetcher_.get());
311 DCHECK(!downstream_fetcher_.get());
312
313 encoder_.reset(new AudioEncoder(config_.audio_sample_rate,
314 config_.audio_num_bits_per_sample));
315 DCHECK(encoder_.get());
316 const std::string request_key = GenerateRequestKey();
317
318 // Only use the framed post data format when a preamble needs to be logged.
319 use_framed_post_data_ = (config_.preamble &&
320 !config_.preamble->sample_data.empty() &&
321 !config_.auth_token.empty() &&
322 !config_.auth_scope.empty());
323 if (use_framed_post_data_) {
324 preamble_encoder_.reset(new AudioEncoder(
325 config_.preamble->sample_rate,
326 config_.preamble->sample_depth * 8));
327 }
328
329 // Setup downstream fetcher.
330 std::vector<std::string> downstream_args;
331 downstream_args.push_back(
332 "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
333 downstream_args.push_back("pair=" + request_key);
334 downstream_args.push_back("output=pb");
335 GURL downstream_url(std::string(kWebServiceBaseUrl) +
336 std::string(kDownstreamUrl) +
337 base::JoinString(downstream_args, "&"));
338
339 downstream_fetcher_ = URLFetcher::Create(
340 kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET, this);
341 downstream_fetcher_->SetRequestContext(url_context_.get());
342 downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
343 net::LOAD_DO_NOT_SEND_COOKIES |
344 net::LOAD_DO_NOT_SEND_AUTH_DATA);
345 downstream_fetcher_->Start();
346
347 // Setup upstream fetcher.
348 // TODO(hans): Support for user-selected grammars.
349 std::vector<std::string> upstream_args;
350 upstream_args.push_back("key=" +
351 net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
352 upstream_args.push_back("pair=" + request_key);
353 upstream_args.push_back("output=pb");
354 upstream_args.push_back(
355 "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
356 upstream_args.push_back(
357 config_.filter_profanities ? "pFilter=2" : "pFilter=0");
358 if (config_.max_hypotheses > 0U) {
359 uint32_t max_alternatives =
360 std::min(kMaxMaxAlternatives, config_.max_hypotheses);
361 upstream_args.push_back("maxAlternatives=" +
362 base::UintToString(max_alternatives));
363 }
364 upstream_args.push_back("app=chromium");
365 if (!config_.hardware_info.empty()) {
366 upstream_args.push_back(
367 "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
368 }
369 for (const SpeechRecognitionGrammar& grammar : config_.grammars) {
370 std::string grammar_value(
371 base::DoubleToString(grammar.weight) + ":" + grammar.url);
372 upstream_args.push_back(
373 "grammar=" + net::EscapeQueryParamValue(grammar_value, true));
374 }
375 if (config_.continuous)
376 upstream_args.push_back("continuous");
377 else
378 upstream_args.push_back("endpoint=1");
379 if (config_.interim_results)
380 upstream_args.push_back("interim");
381 if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {
382 upstream_args.push_back(
383 "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));
384 upstream_args.push_back(
385 "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));
386 }
387 if (use_framed_post_data_) {
388 std::string audio_format;
389 if (preamble_encoder_)
390 audio_format = preamble_encoder_->GetMimeType() + ",";
391 audio_format += encoder_->GetMimeType();
392 upstream_args.push_back(
393 "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));
394 }
395 GURL upstream_url(std::string(kWebServiceBaseUrl) +
396 std::string(kUpstreamUrl) +
397 base::JoinString(upstream_args, "&"));
398
399 upstream_fetcher_ = URLFetcher::Create(kUpstreamUrlFetcherIdForTesting,
400 upstream_url, URLFetcher::POST, this);
401 if (use_framed_post_data_)
402 upstream_fetcher_->SetChunkedUpload("application/octet-stream");
403 else
404 upstream_fetcher_->SetChunkedUpload(encoder_->GetMimeType());
405 upstream_fetcher_->SetRequestContext(url_context_.get());
406 upstream_fetcher_->SetReferrer(config_.origin_url);
407 upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
408 net::LOAD_DO_NOT_SEND_COOKIES |
409 net::LOAD_DO_NOT_SEND_AUTH_DATA);
410 upstream_fetcher_->Start();
411 previous_response_length_ = 0;
412
413 if (preamble_encoder_) {
414 // Encode and send preamble right away.
415 scoped_refptr<AudioChunk> chunk = new AudioChunk(
416 reinterpret_cast<const uint8_t*>(config_.preamble->sample_data.data()),
417 config_.preamble->sample_data.size(), config_.preamble->sample_depth);
418 preamble_encoder_->Encode(*chunk);
419 preamble_encoder_->Flush();
420 scoped_refptr<AudioChunk> encoded_data(
421 preamble_encoder_->GetEncodedDataAndClear());
422 UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);
423 }
424 return STATE_BOTH_STREAMS_CONNECTED;
425 }
426
427 SpeechRecognitionEngine::FSMState
428 SpeechRecognitionEngine::TransmitAudioUpstream(
429 const FSMEventArgs& event_args) {
430 DCHECK(upstream_fetcher_.get());
431 DCHECK(event_args.audio_data.get());
432 const AudioChunk& audio = *(event_args.audio_data.get());
433
434 DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
435 encoder_->Encode(audio);
436 scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
437 UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);
438 return state_;
439 }
440
441 SpeechRecognitionEngine::FSMState
442 SpeechRecognitionEngine::ProcessDownstreamResponse(
443 const FSMEventArgs& event_args) {
444 DCHECK(event_args.response.get());
445
446 proto::SpeechRecognitionEvent ws_event;
447 if (!ws_event.ParseFromString(std::string(event_args.response->begin(),
448 event_args.response->end())))
449 return AbortWithError(event_args);
450
451 if (ws_event.has_status()) {
452 switch (ws_event.status()) {
453 case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
454 break;
455 case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
456 return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);
457 case proto::SpeechRecognitionEvent::STATUS_ABORTED:
458 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
459 case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
460 return Abort(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE);
461 case proto::SpeechRecognitionEvent::STATUS_NETWORK:
462 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
463 case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
464 return Abort(SPEECH_RECOGNITION_ERROR_NOT_ALLOWED);
465 case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
466 return Abort(SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED);
467 case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
468 return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);
469 case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
470 return Abort(SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED);
471 }
472 }
473
474 if (!config_.continuous && ws_event.has_endpoint() &&
475 ws_event.endpoint() == proto::SpeechRecognitionEvent::END_OF_UTTERANCE) {
476 delegate_->OnSpeechRecognitionEngineEndOfUtterance();
477 }
478
479 SpeechRecognitionResults results;
480 for (int i = 0; i < ws_event.result_size(); ++i) {
481 const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
482 results.push_back(SpeechRecognitionResult());
483 SpeechRecognitionResult& result = results.back();
484 result.is_provisional = !(ws_result.has_final() && ws_result.final());
485
486 if (!result.is_provisional)
487 got_last_definitive_result_ = true;
488
489 for (int j = 0; j < ws_result.alternative_size(); ++j) {
490 const proto::SpeechRecognitionAlternative& ws_alternative =
491 ws_result.alternative(j);
492 SpeechRecognitionHypothesis hypothesis;
493 if (ws_alternative.has_confidence())
494 hypothesis.confidence = ws_alternative.confidence();
495 else if (ws_result.has_stability())
496 hypothesis.confidence = ws_result.stability();
497 DCHECK(ws_alternative.has_transcript());
498 // TODO(hans): Perhaps the transcript should be required in the proto?
499 if (ws_alternative.has_transcript())
500 hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());
501
502 result.hypotheses.push_back(hypothesis);
503 }
504 }
505 if (results.size()) {
506 delegate_->OnSpeechRecognitionEngineResults(results);
507 }
508
509 return state_;
510 }
511
512 SpeechRecognitionEngine::FSMState
513 SpeechRecognitionEngine::RaiseNoMatchErrorIfGotNoResults(
514 const FSMEventArgs& event_args) {
515 if (!got_last_definitive_result_) {
516 // Provide an empty result to notify that recognition is ended with no
517 // errors, yet neither any further results.
518 delegate_->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());
519 }
520 return AbortSilently(event_args);
521 }
522
523 SpeechRecognitionEngine::FSMState
524 SpeechRecognitionEngine::CloseUpstreamAndWaitForResults(
525 const FSMEventArgs&) {
526 DCHECK(upstream_fetcher_.get());
527 DCHECK(encoder_.get());
528
529 DVLOG(1) << "Closing upstream.";
530
531 // The encoder requires a non-empty final buffer. So we encode a packet
532 // of silence in case encoder had no data already.
533 size_t sample_count =
534 config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
535 scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(
536 sample_count * sizeof(int16_t), encoder_->GetBitsPerSample() / 8);
537 encoder_->Encode(*dummy_chunk.get());
538 encoder_->Flush();
539 scoped_refptr<AudioChunk> encoded_dummy_data =
540 encoder_->GetEncodedDataAndClear();
541 DCHECK(!encoded_dummy_data->IsEmpty());
542 encoder_.reset();
543
544 UploadAudioChunk(encoded_dummy_data->AsString(),
545 FRAME_RECOGNITION_AUDIO,
546 true);
547 got_last_definitive_result_ = false;
548 return STATE_WAITING_DOWNSTREAM_RESULTS;
549 }
550
551 SpeechRecognitionEngine::FSMState
552 SpeechRecognitionEngine::CloseDownstream(const FSMEventArgs&) {
553 DCHECK(!upstream_fetcher_.get());
554 DCHECK(downstream_fetcher_.get());
555
556 DVLOG(1) << "Closing downstream.";
557 downstream_fetcher_.reset();
558 return STATE_IDLE;
559 }
560
561 SpeechRecognitionEngine::FSMState
562 SpeechRecognitionEngine::AbortSilently(const FSMEventArgs&) {
563 return Abort(SPEECH_RECOGNITION_ERROR_NONE);
564 }
565
566 SpeechRecognitionEngine::FSMState
567 SpeechRecognitionEngine::AbortWithError(const FSMEventArgs&) {
568 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
569 }
570
571 SpeechRecognitionEngine::FSMState SpeechRecognitionEngine::Abort(
572 SpeechRecognitionErrorCode error_code) {
573 DVLOG(1) << "Aborting with error " << error_code;
574
575 if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {
576 delegate_->OnSpeechRecognitionEngineError(
577 SpeechRecognitionError(error_code));
578 }
579 downstream_fetcher_.reset();
580 upstream_fetcher_.reset();
581 encoder_.reset();
582 return STATE_IDLE;
583 }
584
585 SpeechRecognitionEngine::FSMState
586 SpeechRecognitionEngine::DoNothing(const FSMEventArgs&) {
587 return state_;
588 }
589
590 SpeechRecognitionEngine::FSMState
591 SpeechRecognitionEngine::NotFeasible(const FSMEventArgs& event_args) {
592 NOTREACHED() << "Unfeasible event " << event_args.event
593 << " in state " << state_;
594 return state_;
595 }
596
597 std::string SpeechRecognitionEngine::GetAcceptedLanguages() const {
598 std::string langs = config_.language;
599 if (langs.empty() && url_context_.get()) {
600 // If no language is provided then we use the first from the accepted
601 // language list. If this list is empty then it defaults to "en-US".
602 // Example of the contents of this list: "es,en-GB;q=0.8", ""
603 net::URLRequestContext* request_context =
604 url_context_->GetURLRequestContext();
605 DCHECK(request_context);
606 // TODO(pauljensen): SpeechRecognitionEngine should be constructed with
607 // a reference to the HttpUserAgentSettings rather than accessing the
608 // accept language through the URLRequestContext.
609 if (request_context->http_user_agent_settings()) {
610 std::string accepted_language_list =
611 request_context->http_user_agent_settings()->GetAcceptLanguage();
612 size_t separator = accepted_language_list.find_first_of(",;");
613 if (separator != std::string::npos)
614 langs = accepted_language_list.substr(0, separator);
615 }
616 }
617 if (langs.empty())
618 langs = "en-US";
619 return langs;
620 }
621
622 // TODO(primiano): Is there any utility in the codebase that already does this?
623 std::string SpeechRecognitionEngine::GenerateRequestKey() const {
624 const int64_t kKeepLowBytes = 0x00000000FFFFFFFFLL;
625 const int64_t kKeepHighBytes = 0xFFFFFFFF00000000LL;
626
627 // Just keep the least significant bits of timestamp, in order to reduce
628 // probability of collisions.
629 int64_t key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |
630 (base::RandUint64() & kKeepHighBytes);
631 return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));
632 }
633
634 void SpeechRecognitionEngine::UploadAudioChunk(const std::string& data,
635 FrameType type,
636 bool is_final) {
637 if (use_framed_post_data_) {
638 std::string frame(data.size() + 8, 0);
639 base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));
640 base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));
641 frame.replace(8, data.size(), data);
642 upstream_fetcher_->AppendChunkToUpload(frame, is_final);
643 } else {
644 upstream_fetcher_->AppendChunkToUpload(data, is_final);
645 }
646 }
647
648 SpeechRecognitionEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
649 : event(event_value) {
650 }
651
652 SpeechRecognitionEngine::FSMEventArgs::~FSMEventArgs() {
25 } 653 }
26 654
27 } // namespace content 655 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698