content/browser/speech/speech_recognition_engine.cc - Issue 1891543002: Devirtualize SpeechRecognitionEngine

Side by Side Diff: content/browser/speech/speech_recognition_engine.cc

Issue 1891543002: Devirtualize SpeechRecognitionEngine (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@kill_one_shot_engine

Patch Set: drop an include Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« content/browser/speech/speech_recognition_engine.h ('K') | « content/browser/speech/speech_recognition_engine.h ('k') | content/browser/speech/speech_recognition_engine_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "content/browser/speech/speech_recognition_engine.h"	5 #include "content/browser/speech/speech_recognition_engine.h"

6	6

	7 #include <algorithm>

	8 #include <vector>

	9

	10 #include "base/big_endian.h"

	11 #include "base/bind.h"

	12 #include "base/rand_util.h"

	13 #include "base/strings/string_number_conversions.h"

	14 #include "base/strings/string_util.h"

	15 #include "base/strings/utf_string_conversions.h"

	16 #include "base/time/time.h"

	17 #include "content/browser/speech/audio_buffer.h"

	18 #include "content/browser/speech/proto/google_streaming_api.pb.h"

	19 #include "content/public/common/speech_recognition_error.h"

	20 #include "content/public/common/speech_recognition_result.h"

	21 #include "google_apis/google_api_keys.h"

	22 #include "net/base/escape.h"

	23 #include "net/base/load_flags.h"

	24 #include "net/url_request/http_user_agent_settings.h"

	25 #include "net/url_request/url_fetcher.h"

	26 #include "net/url_request/url_request_context.h"

	27 #include "net/url_request/url_request_context_getter.h"

	28 #include "net/url_request/url_request_status.h"

	29

	30 using net::URLFetcher;

	31

	32 namespace content {

7 namespace {	33 namespace {

	34

	35 const char kWebServiceBaseUrl[] =

	36 "https://www.google.com/speech-api/full-duplex/v1";

	37 const char kDownstreamUrl[] = "/down?";

	38 const char kUpstreamUrl[] = "/up?";

	39

	40 // This matches the maximum maxAlternatives value supported by the server.

	41 const uint32_t kMaxMaxAlternatives = 30;

	42

	43 // TODO(hans): Remove this and other logging when we don't need it anymore.

	44 void DumpResponse(const std::string& response) {

	45 DVLOG(1) << "------------";

	46 proto::SpeechRecognitionEvent event;

	47 if (!event.ParseFromString(response)) {

	48 DVLOG(1) << "Parse failed!";

	49 return;

	50 }

	51 if (event.has_status())

	52 DVLOG(1) << "STATUS\t" << event.status();

	53 if (event.has_endpoint())

	54 DVLOG(1) << "ENDPOINT\t" << event.endpoint();

	55 for (int i = 0; i < event.result_size(); ++i) {

	56 DVLOG(1) << "RESULT #" << i << ":";

	57 const proto::SpeechRecognitionResult& res = event.result(i);

	58 if (res.has_final())

	59 DVLOG(1) << " final:\t" << res.final();

	60 if (res.has_stability())

	61 DVLOG(1) << " STABILITY:\t" << res.stability();

	62 for (int j = 0; j < res.alternative_size(); ++j) {

	63 const proto::SpeechRecognitionAlternative& alt =

	64 res.alternative(j);

	65 if (alt.has_confidence())

	66 DVLOG(1) << " CONFIDENCE:\t" << alt.confidence();

	67 if (alt.has_transcript())

	68 DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript();

	69 }

	70 }

	71 }

	72

8 const int kDefaultConfigSampleRate = 8000;	73 const int kDefaultConfigSampleRate = 8000;

9 const int kDefaultConfigBitsPerSample = 16;	74 const int kDefaultConfigBitsPerSample = 16;

10 const uint32_t kDefaultMaxHypotheses = 1;	75 const uint32_t kDefaultMaxHypotheses = 1;

	76

11 } // namespace	77 } // namespace

12	78

13 namespace content {

14

15 SpeechRecognitionEngine::Config::Config()	79 SpeechRecognitionEngine::Config::Config()

16 : filter_profanities(false),	80 : filter_profanities(false),

17 continuous(true),	81 continuous(true),

18 interim_results(true),	82 interim_results(true),

19 max_hypotheses(kDefaultMaxHypotheses),	83 max_hypotheses(kDefaultMaxHypotheses),

20 audio_sample_rate(kDefaultConfigSampleRate),	84 audio_sample_rate(kDefaultConfigSampleRate),

21 audio_num_bits_per_sample(kDefaultConfigBitsPerSample) {	85 audio_num_bits_per_sample(kDefaultConfigBitsPerSample) {}

22 }	86

23	87 SpeechRecognitionEngine::Config::~Config() {}

24 SpeechRecognitionEngine::Config::~Config() {	88

	89 const int SpeechRecognitionEngine::kAudioPacketIntervalMs = 100;

	90 const int SpeechRecognitionEngine::kUpstreamUrlFetcherIdForTesting = 0;

	91 const int SpeechRecognitionEngine::kDownstreamUrlFetcherIdForTesting = 1;

	92 const int SpeechRecognitionEngine::kWebserviceStatusNoError = 0;

	93 const int SpeechRecognitionEngine::kWebserviceStatusErrorNoMatch = 5;

	94

	95 SpeechRecognitionEngine::SpeechRecognitionEngine(

	96 net::URLRequestContextGetter* context)

	97 : url_context_(context),

	98 previous_response_length_(0),

	99 got_last_definitive_result_(false),

	100 is_dispatching_event_(false),

	101 use_framed_post_data_(false),

	102 state_(STATE_IDLE) {}

	103

	104 SpeechRecognitionEngine::~SpeechRecognitionEngine() {}

	105

	106 void SpeechRecognitionEngine::SetConfig(const Config& config) {

	107 config_ = config;

	108 }

	109

	110 void SpeechRecognitionEngine::StartRecognition() {

	111 FSMEventArgs event_args(EVENT_START_RECOGNITION);

	112 DispatchEvent(event_args);

	113 }

	114

	115 void SpeechRecognitionEngine::EndRecognition() {

	116 FSMEventArgs event_args(EVENT_END_RECOGNITION);

	117 DispatchEvent(event_args);

	118 }

	119

	120 void SpeechRecognitionEngine::TakeAudioChunk(const AudioChunk& data) {

	121 FSMEventArgs event_args(EVENT_AUDIO_CHUNK);

	122 event_args.audio_data = &data;

	123 DispatchEvent(event_args);

	124 }

	125

	126 void SpeechRecognitionEngine::AudioChunksEnded() {

	127 FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);

	128 DispatchEvent(event_args);

	129 }

	130

	131 void SpeechRecognitionEngine::OnURLFetchComplete(const URLFetcher* source) {

	132 const bool kResponseComplete = true;

	133 DispatchHTTPResponse(source, kResponseComplete);

	134 }

	135

	136 void SpeechRecognitionEngine::OnURLFetchDownloadProgress(

	137 const URLFetcher* source,

	138 int64_t current,

	139 int64_t total) {

	140 const bool kPartialResponse = false;

	141 DispatchHTTPResponse(source, kPartialResponse);

	142 }

	143

	144 void SpeechRecognitionEngine::DispatchHTTPResponse(const URLFetcher* source,

	145 bool end_of_response) {

	146 DCHECK(CalledOnValidThread());

	147 DCHECK(source);

	148 const bool response_is_good = source->GetStatus().is_success() &&

	149 source->GetResponseCode() == 200;

	150 std::string response;

	151 if (response_is_good)

	152 source->GetResponseAsString(&response);

	153 const size_t current_response_length = response.size();

	154

	155 DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")

	156 << "HTTP, code: " << source->GetResponseCode()

	157 << " length: " << current_response_length

	158 << " eor: " << end_of_response;

	159

	160 // URLFetcher provides always the entire response buffer, but we are only

	161 // interested in the fresh data introduced by the last chunk. Therefore, we

	162 // drop the previous content we have already processed.

	163 if (current_response_length != 0) {

	164 DCHECK_GE(current_response_length, previous_response_length_);

	165 response.erase(0, previous_response_length_);

	166 previous_response_length_ = current_response_length;

	167 }

	168

	169 if (!response_is_good && source == downstream_fetcher_.get()) {

	170 DVLOG(1) << "Downstream error " << source->GetResponseCode();

	171 FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);

	172 DispatchEvent(event_args);

	173 return;

	174 }

	175 if (!response_is_good && source == upstream_fetcher_.get()) {

	176 DVLOG(1) << "Upstream error " << source->GetResponseCode()

	177 << " EOR " << end_of_response;

	178 FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);

	179 DispatchEvent(event_args);

	180 return;

	181 }

	182

	183 // Ignore incoming data on the upstream connection.

	184 if (source == upstream_fetcher_.get())

	185 return;

	186

	187 DCHECK(response_is_good && source == downstream_fetcher_.get());

	188

	189 // The downstream response is organized in chunks, whose size is determined

	190 // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.

	191 // Such chunks are sent by the speech recognition webservice over the HTTP

	192 // downstream channel using HTTP chunked transfer (unrelated to our chunks).

	193 // This function is called every time an HTTP chunk is received by the

	194 // url fetcher. However there isn't any particular matching beween our

	195 // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can

	196 // contain a portion of one chunk or even more chunks together.

	197 chunked_byte_buffer_.Append(response);

	198

	199 // A single HTTP chunk can contain more than one data chunk, thus the while.

	200 while (chunked_byte_buffer_.HasChunks()) {

	201 FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);

	202 event_args.response = chunked_byte_buffer_.PopChunk();

	203 DCHECK(event_args.response.get());

	204 DumpResponse(std::string(event_args.response->begin(),

	205 event_args.response->end()));

	206 DispatchEvent(event_args);

	207 }

	208 if (end_of_response) {

	209 FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);

	210 DispatchEvent(event_args);

	211 }

	212 }

	213

	214 bool SpeechRecognitionEngine::IsRecognitionPending() const {

	215 DCHECK(CalledOnValidThread());

	216 return state_ != STATE_IDLE;

	217 }

	218

	219 int SpeechRecognitionEngine::GetDesiredAudioChunkDurationMs() const {

	220 return kAudioPacketIntervalMs;

	221 }

	222

	223 // ----------------------- Core FSM implementation ---------------------------

	224

	225 void SpeechRecognitionEngine::DispatchEvent(

	226 const FSMEventArgs& event_args) {

	227 DCHECK(CalledOnValidThread());

	228 DCHECK_LE(event_args.event, EVENT_MAX_VALUE);

	229 DCHECK_LE(state_, STATE_MAX_VALUE);

	230

	231 // Event dispatching must be sequential, otherwise it will break all the rules

	232 // and the assumptions of the finite state automata model.

	233 DCHECK(!is_dispatching_event_);

	234 is_dispatching_event_ = true;

	235

	236 state_ = ExecuteTransitionAndGetNextState(event_args);

	237

	238 is_dispatching_event_ = false;

	239 }

	240

	241 SpeechRecognitionEngine::FSMState

	242 SpeechRecognitionEngine::ExecuteTransitionAndGetNextState(

	243 const FSMEventArgs& event_args) {

	244 const FSMEvent event = event_args.event;

	245 switch (state_) {

	246 case STATE_IDLE:

	247 switch (event) {

	248 case EVENT_START_RECOGNITION:

	249 return ConnectBothStreams(event_args);

	250 case EVENT_END_RECOGNITION:

	251 // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of

	252 // abort, so we just silently drop them here.

	253 case EVENT_AUDIO_CHUNK:

	254 case EVENT_AUDIO_CHUNKS_ENDED:

	255 // DOWNSTREAM_CLOSED can be received if we end up here due to an error.

	256 case EVENT_DOWNSTREAM_CLOSED:

	257 return DoNothing(event_args);

	258 case EVENT_UPSTREAM_ERROR:

	259 case EVENT_DOWNSTREAM_ERROR:

	260 case EVENT_DOWNSTREAM_RESPONSE:

	261 return NotFeasible(event_args);

	262 }

	263 break;

	264 case STATE_BOTH_STREAMS_CONNECTED:

	265 switch (event) {

	266 case EVENT_AUDIO_CHUNK:

	267 return TransmitAudioUpstream(event_args);

	268 case EVENT_DOWNSTREAM_RESPONSE:

	269 return ProcessDownstreamResponse(event_args);

	270 case EVENT_AUDIO_CHUNKS_ENDED:

	271 return CloseUpstreamAndWaitForResults(event_args);

	272 case EVENT_END_RECOGNITION:

	273 return AbortSilently(event_args);

	274 case EVENT_UPSTREAM_ERROR:

	275 case EVENT_DOWNSTREAM_ERROR:

	276 case EVENT_DOWNSTREAM_CLOSED:

	277 return AbortWithError(event_args);

	278 case EVENT_START_RECOGNITION:

	279 return NotFeasible(event_args);

	280 }

	281 break;

	282 case STATE_WAITING_DOWNSTREAM_RESULTS:

	283 switch (event) {

	284 case EVENT_DOWNSTREAM_RESPONSE:

	285 return ProcessDownstreamResponse(event_args);

	286 case EVENT_DOWNSTREAM_CLOSED:

	287 return RaiseNoMatchErrorIfGotNoResults(event_args);

	288 case EVENT_END_RECOGNITION:

	289 return AbortSilently(event_args);

	290 case EVENT_UPSTREAM_ERROR:

	291 case EVENT_DOWNSTREAM_ERROR:

	292 return AbortWithError(event_args);

	293 case EVENT_START_RECOGNITION:

	294 case EVENT_AUDIO_CHUNK:

	295 case EVENT_AUDIO_CHUNKS_ENDED:

	296 return NotFeasible(event_args);

	297 }

	298 break;

	299 }

	300 return NotFeasible(event_args);

	301 }

	302

	303 // ----------- Contract for all the FSM evolution functions below -------------

	304 // - Are guaranteed to be executed in the same thread (IO, except for tests);

	305 // - Are guaranteed to be not reentrant (themselves and each other);

	306 // - event_args members are guaranteed to be stable during the call;

	307

	308 SpeechRecognitionEngine::FSMState

	309 SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) {

	310 DCHECK(!upstream_fetcher_.get());

	311 DCHECK(!downstream_fetcher_.get());

	312

	313 encoder_.reset(new AudioEncoder(config_.audio_sample_rate,

	314 config_.audio_num_bits_per_sample));

	315 DCHECK(encoder_.get());

	316 const std::string request_key = GenerateRequestKey();

	317

	318 // Only use the framed post data format when a preamble needs to be logged.

	319 use_framed_post_data_ = (config_.preamble &&

	320 !config_.preamble->sample_data.empty() &&

	321 !config_.auth_token.empty() &&

	322 !config_.auth_scope.empty());

	323 if (use_framed_post_data_) {

	324 preamble_encoder_.reset(new AudioEncoder(

	325 config_.preamble->sample_rate,

	326 config_.preamble->sample_depth * 8));

	327 }

	328

	329 // Setup downstream fetcher.

	330 std::vector<std::string> downstream_args;

	331 downstream_args.push_back(

	332 "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));

	333 downstream_args.push_back("pair=" + request_key);

	334 downstream_args.push_back("output=pb");

	335 GURL downstream_url(std::string(kWebServiceBaseUrl) +

	336 std::string(kDownstreamUrl) +

	337 base::JoinString(downstream_args, "&"));

	338

	339 downstream_fetcher_ = URLFetcher::Create(

	340 kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET, this);

	341 downstream_fetcher_->SetRequestContext(url_context_.get());

	342 downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES \|

	343 net::LOAD_DO_NOT_SEND_COOKIES \|

	344 net::LOAD_DO_NOT_SEND_AUTH_DATA);

	345 downstream_fetcher_->Start();

	346

	347 // Setup upstream fetcher.

	348 // TODO(hans): Support for user-selected grammars.

	349 std::vector<std::string> upstream_args;

	350 upstream_args.push_back("key=" +

	351 net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));

	352 upstream_args.push_back("pair=" + request_key);

	353 upstream_args.push_back("output=pb");

	354 upstream_args.push_back(

	355 "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));

	356 upstream_args.push_back(

	357 config_.filter_profanities ? "pFilter=2" : "pFilter=0");

	358 if (config_.max_hypotheses > 0U) {

	359 uint32_t max_alternatives =

	360 std::min(kMaxMaxAlternatives, config_.max_hypotheses);

	361 upstream_args.push_back("maxAlternatives=" +

	362 base::UintToString(max_alternatives));

	363 }

	364 upstream_args.push_back("app=chromium");

	365 if (!config_.hardware_info.empty()) {

	366 upstream_args.push_back(

	367 "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));

	368 }

	369 for (const SpeechRecognitionGrammar& grammar : config_.grammars) {

	370 std::string grammar_value(

	371 base::DoubleToString(grammar.weight) + ":" + grammar.url);

	372 upstream_args.push_back(

	373 "grammar=" + net::EscapeQueryParamValue(grammar_value, true));

	374 }

	375 if (config_.continuous)

	376 upstream_args.push_back("continuous");

	377 else

	378 upstream_args.push_back("endpoint=1");

	379 if (config_.interim_results)

	380 upstream_args.push_back("interim");

	381 if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {

	382 upstream_args.push_back(

	383 "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));

	384 upstream_args.push_back(

	385 "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));

	386 }

	387 if (use_framed_post_data_) {

	388 std::string audio_format;

	389 if (preamble_encoder_)

	390 audio_format = preamble_encoder_->GetMimeType() + ",";

	391 audio_format += encoder_->GetMimeType();

	392 upstream_args.push_back(

	393 "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));

	394 }

	395 GURL upstream_url(std::string(kWebServiceBaseUrl) +

	396 std::string(kUpstreamUrl) +

	397 base::JoinString(upstream_args, "&"));

	398

	399 upstream_fetcher_ = URLFetcher::Create(kUpstreamUrlFetcherIdForTesting,

	400 upstream_url, URLFetcher::POST, this);

	401 if (use_framed_post_data_)

	402 upstream_fetcher_->SetChunkedUpload("application/octet-stream");

	403 else

	404 upstream_fetcher_->SetChunkedUpload(encoder_->GetMimeType());

	405 upstream_fetcher_->SetRequestContext(url_context_.get());

	406 upstream_fetcher_->SetReferrer(config_.origin_url);

	407 upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES \|

	408 net::LOAD_DO_NOT_SEND_COOKIES \|

	409 net::LOAD_DO_NOT_SEND_AUTH_DATA);

	410 upstream_fetcher_->Start();

	411 previous_response_length_ = 0;

	412

	413 if (preamble_encoder_) {

	414 // Encode and send preamble right away.

	415 scoped_refptr<AudioChunk> chunk = new AudioChunk(

	416 reinterpret_cast<const uint8_t*>(config_.preamble->sample_data.data()),

	417 config_.preamble->sample_data.size(), config_.preamble->sample_depth);

	418 preamble_encoder_->Encode(*chunk);

	419 preamble_encoder_->Flush();

	420 scoped_refptr<AudioChunk> encoded_data(

	421 preamble_encoder_->GetEncodedDataAndClear());

	422 UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);

	423 }

	424 return STATE_BOTH_STREAMS_CONNECTED;

	425 }

	426

	427 SpeechRecognitionEngine::FSMState

	428 SpeechRecognitionEngine::TransmitAudioUpstream(

	429 const FSMEventArgs& event_args) {

	430 DCHECK(upstream_fetcher_.get());

	431 DCHECK(event_args.audio_data.get());

	432 const AudioChunk& audio = *(event_args.audio_data.get());

	433

	434 DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);

	435 encoder_->Encode(audio);

	436 scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());

	437 UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);

	438 return state_;

	439 }

	440

	441 SpeechRecognitionEngine::FSMState

	442 SpeechRecognitionEngine::ProcessDownstreamResponse(

	443 const FSMEventArgs& event_args) {

	444 DCHECK(event_args.response.get());

	445

	446 proto::SpeechRecognitionEvent ws_event;

	447 if (!ws_event.ParseFromString(std::string(event_args.response->begin(),

	448 event_args.response->end())))

	449 return AbortWithError(event_args);

	450

	451 if (ws_event.has_status()) {

	452 switch (ws_event.status()) {

	453 case proto::SpeechRecognitionEvent::STATUS_SUCCESS:

	454 break;

	455 case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:

	456 return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);

	457 case proto::SpeechRecognitionEvent::STATUS_ABORTED:

	458 return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);

	459 case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:

	460 return Abort(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE);

	461 case proto::SpeechRecognitionEvent::STATUS_NETWORK:

	462 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);

	463 case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:

	464 return Abort(SPEECH_RECOGNITION_ERROR_NOT_ALLOWED);

	465 case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:

	466 return Abort(SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED);

	467 case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:

	468 return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);

	469 case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:

	470 return Abort(SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED);

	471 }

	472 }

	473

	474 if (!config_.continuous && ws_event.has_endpoint() &&

	475 ws_event.endpoint() == proto::SpeechRecognitionEvent::END_OF_UTTERANCE) {

	476 delegate_->OnSpeechRecognitionEngineEndOfUtterance();

	477 }

	478

	479 SpeechRecognitionResults results;

	480 for (int i = 0; i < ws_event.result_size(); ++i) {

	481 const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);

	482 results.push_back(SpeechRecognitionResult());

	483 SpeechRecognitionResult& result = results.back();

	484 result.is_provisional = !(ws_result.has_final() && ws_result.final());

	485

	486 if (!result.is_provisional)

	487 got_last_definitive_result_ = true;

	488

	489 for (int j = 0; j < ws_result.alternative_size(); ++j) {

	490 const proto::SpeechRecognitionAlternative& ws_alternative =

	491 ws_result.alternative(j);

	492 SpeechRecognitionHypothesis hypothesis;

	493 if (ws_alternative.has_confidence())

	494 hypothesis.confidence = ws_alternative.confidence();

	495 else if (ws_result.has_stability())

	496 hypothesis.confidence = ws_result.stability();

	497 DCHECK(ws_alternative.has_transcript());

	498 // TODO(hans): Perhaps the transcript should be required in the proto?

	499 if (ws_alternative.has_transcript())

	500 hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());

	501

	502 result.hypotheses.push_back(hypothesis);

	503 }

	504 }

	505 if (results.size()) {

	506 delegate_->OnSpeechRecognitionEngineResults(results);

	507 }

	508

	509 return state_;

	510 }

	511

	512 SpeechRecognitionEngine::FSMState

	513 SpeechRecognitionEngine::RaiseNoMatchErrorIfGotNoResults(

	514 const FSMEventArgs& event_args) {

	515 if (!got_last_definitive_result_) {

	516 // Provide an empty result to notify that recognition is ended with no

	517 // errors, yet neither any further results.

	518 delegate_->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());

	519 }

	520 return AbortSilently(event_args);

	521 }

	522

	523 SpeechRecognitionEngine::FSMState

	524 SpeechRecognitionEngine::CloseUpstreamAndWaitForResults(

	525 const FSMEventArgs&) {

	526 DCHECK(upstream_fetcher_.get());

	527 DCHECK(encoder_.get());

	528

	529 DVLOG(1) << "Closing upstream.";

	530

	531 // The encoder requires a non-empty final buffer. So we encode a packet

	532 // of silence in case encoder had no data already.

	533 size_t sample_count =

	534 config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;

	535 scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(

	536 sample_count * sizeof(int16_t), encoder_->GetBitsPerSample() / 8);

	537 encoder_->Encode(*dummy_chunk.get());

	538 encoder_->Flush();

	539 scoped_refptr<AudioChunk> encoded_dummy_data =

	540 encoder_->GetEncodedDataAndClear();

	541 DCHECK(!encoded_dummy_data->IsEmpty());

	542 encoder_.reset();

	543

	544 UploadAudioChunk(encoded_dummy_data->AsString(),

	545 FRAME_RECOGNITION_AUDIO,

	546 true);

	547 got_last_definitive_result_ = false;

	548 return STATE_WAITING_DOWNSTREAM_RESULTS;

	549 }

	550

	551 SpeechRecognitionEngine::FSMState

	552 SpeechRecognitionEngine::CloseDownstream(const FSMEventArgs&) {

	553 DCHECK(!upstream_fetcher_.get());

	554 DCHECK(downstream_fetcher_.get());

	555

	556 DVLOG(1) << "Closing downstream.";

	557 downstream_fetcher_.reset();

	558 return STATE_IDLE;

	559 }

	560

	561 SpeechRecognitionEngine::FSMState

	562 SpeechRecognitionEngine::AbortSilently(const FSMEventArgs&) {

	563 return Abort(SPEECH_RECOGNITION_ERROR_NONE);

	564 }

	565

	566 SpeechRecognitionEngine::FSMState

	567 SpeechRecognitionEngine::AbortWithError(const FSMEventArgs&) {

	568 return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);

	569 }

	570

	571 SpeechRecognitionEngine::FSMState SpeechRecognitionEngine::Abort(

	572 SpeechRecognitionErrorCode error_code) {

	573 DVLOG(1) << "Aborting with error " << error_code;

	574

	575 if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {

	576 delegate_->OnSpeechRecognitionEngineError(

	577 SpeechRecognitionError(error_code));

	578 }

	579 downstream_fetcher_.reset();

	580 upstream_fetcher_.reset();

	581 encoder_.reset();

	582 return STATE_IDLE;

	583 }

	584

	585 SpeechRecognitionEngine::FSMState

	586 SpeechRecognitionEngine::DoNothing(const FSMEventArgs&) {

	587 return state_;

	588 }

	589

	590 SpeechRecognitionEngine::FSMState

	591 SpeechRecognitionEngine::NotFeasible(const FSMEventArgs& event_args) {

	592 NOTREACHED() << "Unfeasible event " << event_args.event

	593 << " in state " << state_;

	594 return state_;

	595 }

	596

	597 std::string SpeechRecognitionEngine::GetAcceptedLanguages() const {

	598 std::string langs = config_.language;

	599 if (langs.empty() && url_context_.get()) {

	600 // If no language is provided then we use the first from the accepted

	601 // language list. If this list is empty then it defaults to "en-US".

	602 // Example of the contents of this list: "es,en-GB;q=0.8", ""

	603 net::URLRequestContext* request_context =

	604 url_context_->GetURLRequestContext();

	605 DCHECK(request_context);

	606 // TODO(pauljensen): SpeechRecognitionEngine should be constructed with

	607 // a reference to the HttpUserAgentSettings rather than accessing the

	608 // accept language through the URLRequestContext.

	609 if (request_context->http_user_agent_settings()) {

	610 std::string accepted_language_list =

	611 request_context->http_user_agent_settings()->GetAcceptLanguage();

	612 size_t separator = accepted_language_list.find_first_of(",;");

	613 if (separator != std::string::npos)

	614 langs = accepted_language_list.substr(0, separator);

	615 }

	616 }

	617 if (langs.empty())

	618 langs = "en-US";

	619 return langs;

	620 }

	621

	622 // TODO(primiano): Is there any utility in the codebase that already does this?

	623 std::string SpeechRecognitionEngine::GenerateRequestKey() const {

	624 const int64_t kKeepLowBytes = 0x00000000FFFFFFFFLL;

	625 const int64_t kKeepHighBytes = 0xFFFFFFFF00000000LL;

	626

	627 // Just keep the least significant bits of timestamp, in order to reduce

	628 // probability of collisions.

	629 int64_t key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) \|

	630 (base::RandUint64() & kKeepHighBytes);

	631 return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));

	632 }

	633

	634 void SpeechRecognitionEngine::UploadAudioChunk(const std::string& data,

	635 FrameType type,

	636 bool is_final) {

	637 if (use_framed_post_data_) {

	638 std::string frame(data.size() + 8, 0);

	639 base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));

	640 base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));

	641 frame.replace(8, data.size(), data);

	642 upstream_fetcher_->AppendChunkToUpload(frame, is_final);

	643 } else {

	644 upstream_fetcher_->AppendChunkToUpload(data, is_final);

	645 }

	646 }

	647

	648 SpeechRecognitionEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)

	649 : event(event_value) {

	650 }

	651

	652 SpeechRecognitionEngine::FSMEventArgs::~FSMEventArgs() {

25 }	653 }

26	654

27 } // namespace content	655 } // namespace content

OLD	NEW