content/browser/speech/speech_recognition_engine.cc - Issue 1891543002: Devirtualize SpeechRecognitionEngine

Unified Diff: content/browser/speech/speech_recognition_engine.cc

Issue 1891543002: Devirtualize SpeechRecognitionEngine (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@kill_one_shot_engine

Patch Set: drop an include Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« content/browser/speech/speech_recognition_engine.h ('K') | « content/browser/speech/speech_recognition_engine.h ('k') | content/browser/speech/speech_recognition_engine_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: content/browser/speech/speech_recognition_engine.cc

diff --git a/content/browser/speech/speech_recognition_engine.cc b/content/browser/speech/speech_recognition_engine.cc

index e3b236b79760d3c27632462189b7b97b8ed8aa47..b0fac7db62feb1ea375d7c2772dd53e7ec85d8c3 100644

--- a/content/browser/speech/speech_recognition_engine.cc

+++ b/content/browser/speech/speech_recognition_engine.cc

@@ -4,13 +4,77 @@

#include "content/browser/speech/speech_recognition_engine.h"

+#include <algorithm>

+#include <vector>

+#include "base/big_endian.h"

+#include "base/bind.h"

+#include "base/rand_util.h"

+#include "base/strings/string_number_conversions.h"

+#include "base/strings/string_util.h"

+#include "base/strings/utf_string_conversions.h"

+#include "base/time/time.h"

+#include "content/browser/speech/audio_buffer.h"

+#include "content/browser/speech/proto/google_streaming_api.pb.h"

+#include "content/public/common/speech_recognition_error.h"

+#include "content/public/common/speech_recognition_result.h"

+#include "google_apis/google_api_keys.h"

+#include "net/base/escape.h"

+#include "net/base/load_flags.h"

+#include "net/url_request/http_user_agent_settings.h"

+#include "net/url_request/url_fetcher.h"

+#include "net/url_request/url_request_context.h"

+#include "net/url_request/url_request_context_getter.h"

+#include "net/url_request/url_request_status.h"

+using net::URLFetcher;

+namespace content {

namespace {

+const char kWebServiceBaseUrl[] =

+ "https://www.google.com/speech-api/full-duplex/v1";

+const char kDownstreamUrl[] = "/down?";

+const char kUpstreamUrl[] = "/up?";

+// This matches the maximum maxAlternatives value supported by the server.

+const uint32_t kMaxMaxAlternatives = 30;

+// TODO(hans): Remove this and other logging when we don't need it anymore.

+void DumpResponse(const std::string& response) {

+ DVLOG(1) << "------------";

+ proto::SpeechRecognitionEvent event;

+ if (!event.ParseFromString(response)) {

+ DVLOG(1) << "Parse failed!";

+ return;

+ }

+ if (event.has_status())

+ DVLOG(1) << "STATUS\t" << event.status();

+ if (event.has_endpoint())

+ DVLOG(1) << "ENDPOINT\t" << event.endpoint();

+ for (int i = 0; i < event.result_size(); ++i) {

+ DVLOG(1) << "RESULT #" << i << ":";

+ const proto::SpeechRecognitionResult& res = event.result(i);

+ if (res.has_final())

+ DVLOG(1) << " final:\t" << res.final();

+ if (res.has_stability())

+ DVLOG(1) << " STABILITY:\t" << res.stability();

+ for (int j = 0; j < res.alternative_size(); ++j) {

+ const proto::SpeechRecognitionAlternative& alt =

+ res.alternative(j);

+ if (alt.has_confidence())

+ DVLOG(1) << " CONFIDENCE:\t" << alt.confidence();

+ if (alt.has_transcript())

+ DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript();

+ }

const int kDefaultConfigSampleRate = 8000;

const int kDefaultConfigBitsPerSample = 16;

const uint32_t kDefaultMaxHypotheses = 1;

-} // namespace

-namespace content {

+} // namespace

SpeechRecognitionEngine::Config::Config()

: filter_profanities(false),

@@ -18,10 +82,574 @@ SpeechRecognitionEngine::Config::Config()

interim_results(true),

max_hypotheses(kDefaultMaxHypotheses),

audio_sample_rate(kDefaultConfigSampleRate),

- audio_num_bits_per_sample(kDefaultConfigBitsPerSample) {

+ audio_num_bits_per_sample(kDefaultConfigBitsPerSample) {}

+SpeechRecognitionEngine::Config::~Config() {}

+const int SpeechRecognitionEngine::kAudioPacketIntervalMs = 100;

+const int SpeechRecognitionEngine::kUpstreamUrlFetcherIdForTesting = 0;

+const int SpeechRecognitionEngine::kDownstreamUrlFetcherIdForTesting = 1;

+const int SpeechRecognitionEngine::kWebserviceStatusNoError = 0;

+const int SpeechRecognitionEngine::kWebserviceStatusErrorNoMatch = 5;

+SpeechRecognitionEngine::SpeechRecognitionEngine(

+ net::URLRequestContextGetter* context)

+ : url_context_(context),

+ previous_response_length_(0),

+ got_last_definitive_result_(false),

+ is_dispatching_event_(false),

+ use_framed_post_data_(false),

+ state_(STATE_IDLE) {}

+SpeechRecognitionEngine::~SpeechRecognitionEngine() {}

+void SpeechRecognitionEngine::SetConfig(const Config& config) {

+ config_ = config;

+void SpeechRecognitionEngine::StartRecognition() {

+ FSMEventArgs event_args(EVENT_START_RECOGNITION);

+ DispatchEvent(event_args);

+void SpeechRecognitionEngine::EndRecognition() {

+ FSMEventArgs event_args(EVENT_END_RECOGNITION);

+ DispatchEvent(event_args);

+void SpeechRecognitionEngine::TakeAudioChunk(const AudioChunk& data) {

+ FSMEventArgs event_args(EVENT_AUDIO_CHUNK);

+ event_args.audio_data = &data;

+ DispatchEvent(event_args);

+void SpeechRecognitionEngine::AudioChunksEnded() {

+ FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);

+ DispatchEvent(event_args);

+void SpeechRecognitionEngine::OnURLFetchComplete(const URLFetcher* source) {

+ const bool kResponseComplete = true;

+ DispatchHTTPResponse(source, kResponseComplete);

+void SpeechRecognitionEngine::OnURLFetchDownloadProgress(

+ const URLFetcher* source,

+ int64_t current,

+ int64_t total) {

+ const bool kPartialResponse = false;

+ DispatchHTTPResponse(source, kPartialResponse);

+void SpeechRecognitionEngine::DispatchHTTPResponse(const URLFetcher* source,

+ bool end_of_response) {

+ DCHECK(CalledOnValidThread());

+ DCHECK(source);

+ const bool response_is_good = source->GetStatus().is_success() &&

+ source->GetResponseCode() == 200;

+ std::string response;

+ if (response_is_good)

+ source->GetResponseAsString(&response);

+ const size_t current_response_length = response.size();

+ DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")

+ << "HTTP, code: " << source->GetResponseCode()

+ << " length: " << current_response_length

+ << " eor: " << end_of_response;

+ // URLFetcher provides always the entire response buffer, but we are only

+ // interested in the fresh data introduced by the last chunk. Therefore, we

+ // drop the previous content we have already processed.

+ if (current_response_length != 0) {

+ DCHECK_GE(current_response_length, previous_response_length_);

+ response.erase(0, previous_response_length_);

+ previous_response_length_ = current_response_length;

+ }

+ if (!response_is_good && source == downstream_fetcher_.get()) {

+ DVLOG(1) << "Downstream error " << source->GetResponseCode();

+ FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);

+ DispatchEvent(event_args);

+ return;

+ }

+ if (!response_is_good && source == upstream_fetcher_.get()) {

+ DVLOG(1) << "Upstream error " << source->GetResponseCode()

+ << " EOR " << end_of_response;

+ FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);

+ DispatchEvent(event_args);

+ return;

+ }

+ // Ignore incoming data on the upstream connection.

+ if (source == upstream_fetcher_.get())

+ return;

+ DCHECK(response_is_good && source == downstream_fetcher_.get());

+ // The downstream response is organized in chunks, whose size is determined

+ // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.

+ // Such chunks are sent by the speech recognition webservice over the HTTP

+ // downstream channel using HTTP chunked transfer (unrelated to our chunks).

+ // This function is called every time an HTTP chunk is received by the

+ // url fetcher. However there isn't any particular matching beween our

+ // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can

+ // contain a portion of one chunk or even more chunks together.

+ chunked_byte_buffer_.Append(response);

+ // A single HTTP chunk can contain more than one data chunk, thus the while.

+ while (chunked_byte_buffer_.HasChunks()) {

+ FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);

+ event_args.response = chunked_byte_buffer_.PopChunk();

+ DCHECK(event_args.response.get());

+ DumpResponse(std::string(event_args.response->begin(),

+ event_args.response->end()));

+ DispatchEvent(event_args);

+ }

+ if (end_of_response) {

+ FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);

+ DispatchEvent(event_args);

+ }

+bool SpeechRecognitionEngine::IsRecognitionPending() const {

+ DCHECK(CalledOnValidThread());

+ return state_ != STATE_IDLE;

+int SpeechRecognitionEngine::GetDesiredAudioChunkDurationMs() const {

+ return kAudioPacketIntervalMs;

+// ----------------------- Core FSM implementation ---------------------------

+void SpeechRecognitionEngine::DispatchEvent(

+ const FSMEventArgs& event_args) {

+ DCHECK(CalledOnValidThread());

+ DCHECK_LE(event_args.event, EVENT_MAX_VALUE);

+ DCHECK_LE(state_, STATE_MAX_VALUE);

+ // Event dispatching must be sequential, otherwise it will break all the rules

+ // and the assumptions of the finite state automata model.

+ DCHECK(!is_dispatching_event_);

+ is_dispatching_event_ = true;

+ state_ = ExecuteTransitionAndGetNextState(event_args);

+ is_dispatching_event_ = false;

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::ExecuteTransitionAndGetNextState(

+ const FSMEventArgs& event_args) {

+ const FSMEvent event = event_args.event;

+ switch (state_) {

+ case STATE_IDLE:

+ switch (event) {

+ case EVENT_START_RECOGNITION:

+ return ConnectBothStreams(event_args);

+ case EVENT_END_RECOGNITION:

+ // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of

+ // abort, so we just silently drop them here.

+ case EVENT_AUDIO_CHUNK:

+ case EVENT_AUDIO_CHUNKS_ENDED:

+ // DOWNSTREAM_CLOSED can be received if we end up here due to an error.

+ case EVENT_DOWNSTREAM_CLOSED:

+ return DoNothing(event_args);

+ case EVENT_UPSTREAM_ERROR:

+ case EVENT_DOWNSTREAM_ERROR:

+ case EVENT_DOWNSTREAM_RESPONSE:

+ return NotFeasible(event_args);

+ }

+ break;

+ case STATE_BOTH_STREAMS_CONNECTED:

+ switch (event) {

+ case EVENT_AUDIO_CHUNK:

+ return TransmitAudioUpstream(event_args);

+ case EVENT_DOWNSTREAM_RESPONSE:

+ return ProcessDownstreamResponse(event_args);

+ case EVENT_AUDIO_CHUNKS_ENDED:

+ return CloseUpstreamAndWaitForResults(event_args);

+ case EVENT_END_RECOGNITION:

+ return AbortSilently(event_args);

+ case EVENT_UPSTREAM_ERROR:

+ case EVENT_DOWNSTREAM_ERROR:

+ case EVENT_DOWNSTREAM_CLOSED:

+ return AbortWithError(event_args);

+ case EVENT_START_RECOGNITION:

+ return NotFeasible(event_args);

+ }

+ break;

+ case STATE_WAITING_DOWNSTREAM_RESULTS:

+ switch (event) {

+ case EVENT_DOWNSTREAM_RESPONSE:

+ return ProcessDownstreamResponse(event_args);

+ case EVENT_DOWNSTREAM_CLOSED:

+ return RaiseNoMatchErrorIfGotNoResults(event_args);

+ case EVENT_END_RECOGNITION:

+ return AbortSilently(event_args);

+ case EVENT_UPSTREAM_ERROR:

+ case EVENT_DOWNSTREAM_ERROR:

+ return AbortWithError(event_args);

+ case EVENT_START_RECOGNITION:

+ case EVENT_AUDIO_CHUNK:

+ case EVENT_AUDIO_CHUNKS_ENDED:

+ return NotFeasible(event_args);

+ }

+ break;

+ }

+ return NotFeasible(event_args);

+// ----------- Contract for all the FSM evolution functions below -------------

+// - Are guaranteed to be executed in the same thread (IO, except for tests);

+// - Are guaranteed to be not reentrant (themselves and each other);

+// - event_args members are guaranteed to be stable during the call;

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) {

+ DCHECK(!upstream_fetcher_.get());

+ DCHECK(!downstream_fetcher_.get());

+ encoder_.reset(new AudioEncoder(config_.audio_sample_rate,

+ config_.audio_num_bits_per_sample));

+ DCHECK(encoder_.get());

+ const std::string request_key = GenerateRequestKey();

+ // Only use the framed post data format when a preamble needs to be logged.

+ use_framed_post_data_ = (config_.preamble &&

+ !config_.preamble->sample_data.empty() &&

+ !config_.auth_token.empty() &&

+ !config_.auth_scope.empty());

+ if (use_framed_post_data_) {

+ preamble_encoder_.reset(new AudioEncoder(

+ config_.preamble->sample_rate,

+ config_.preamble->sample_depth * 8));

+ }

+ // Setup downstream fetcher.

+ std::vector<std::string> downstream_args;

+ downstream_args.push_back(

+ "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));

+ downstream_args.push_back("pair=" + request_key);

+ downstream_args.push_back("output=pb");

+ GURL downstream_url(std::string(kWebServiceBaseUrl) +

+ std::string(kDownstreamUrl) +

+ base::JoinString(downstream_args, "&"));

+ downstream_fetcher_ = URLFetcher::Create(

+ kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET, this);

+ downstream_fetcher_->SetRequestContext(url_context_.get());

+ downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |

+ net::LOAD_DO_NOT_SEND_COOKIES |

+ net::LOAD_DO_NOT_SEND_AUTH_DATA);

+ downstream_fetcher_->Start();

+ // Setup upstream fetcher.

+ // TODO(hans): Support for user-selected grammars.

+ std::vector<std::string> upstream_args;

+ upstream_args.push_back("key=" +

+ net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));

+ upstream_args.push_back("pair=" + request_key);

+ upstream_args.push_back("output=pb");

+ upstream_args.push_back(

+ "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));

+ upstream_args.push_back(

+ config_.filter_profanities ? "pFilter=2" : "pFilter=0");

+ if (config_.max_hypotheses > 0U) {

+ uint32_t max_alternatives =

+ std::min(kMaxMaxAlternatives, config_.max_hypotheses);

+ upstream_args.push_back("maxAlternatives=" +

+ base::UintToString(max_alternatives));

+ }

+ upstream_args.push_back("app=chromium");

+ if (!config_.hardware_info.empty()) {

+ upstream_args.push_back(

+ "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));

+ }

+ for (const SpeechRecognitionGrammar& grammar : config_.grammars) {

+ std::string grammar_value(

+ base::DoubleToString(grammar.weight) + ":" + grammar.url);

+ upstream_args.push_back(

+ "grammar=" + net::EscapeQueryParamValue(grammar_value, true));

+ }

+ if (config_.continuous)

+ upstream_args.push_back("continuous");

+ else

+ upstream_args.push_back("endpoint=1");

+ if (config_.interim_results)

+ upstream_args.push_back("interim");

+ if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {

+ upstream_args.push_back(

+ "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));

+ upstream_args.push_back(

+ "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));

+ }

+ if (use_framed_post_data_) {

+ std::string audio_format;

+ if (preamble_encoder_)

+ audio_format = preamble_encoder_->GetMimeType() + ",";

+ audio_format += encoder_->GetMimeType();

+ upstream_args.push_back(

+ "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));

+ }

+ GURL upstream_url(std::string(kWebServiceBaseUrl) +

+ std::string(kUpstreamUrl) +

+ base::JoinString(upstream_args, "&"));

+ upstream_fetcher_ = URLFetcher::Create(kUpstreamUrlFetcherIdForTesting,

+ upstream_url, URLFetcher::POST, this);

+ if (use_framed_post_data_)

+ upstream_fetcher_->SetChunkedUpload("application/octet-stream");

+ else

+ upstream_fetcher_->SetChunkedUpload(encoder_->GetMimeType());

+ upstream_fetcher_->SetRequestContext(url_context_.get());

+ upstream_fetcher_->SetReferrer(config_.origin_url);

+ upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |

+ net::LOAD_DO_NOT_SEND_COOKIES |

+ net::LOAD_DO_NOT_SEND_AUTH_DATA);

+ upstream_fetcher_->Start();

+ previous_response_length_ = 0;

+ if (preamble_encoder_) {

+ // Encode and send preamble right away.

+ scoped_refptr<AudioChunk> chunk = new AudioChunk(

+ reinterpret_cast<const uint8_t*>(config_.preamble->sample_data.data()),

+ config_.preamble->sample_data.size(), config_.preamble->sample_depth);

+ preamble_encoder_->Encode(*chunk);

+ preamble_encoder_->Flush();

+ scoped_refptr<AudioChunk> encoded_data(

+ preamble_encoder_->GetEncodedDataAndClear());

+ UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);

+ }

+ return STATE_BOTH_STREAMS_CONNECTED;

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::TransmitAudioUpstream(

+ const FSMEventArgs& event_args) {

+ DCHECK(upstream_fetcher_.get());

+ DCHECK(event_args.audio_data.get());

+ const AudioChunk& audio = *(event_args.audio_data.get());

+ DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);

+ encoder_->Encode(audio);

+ scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());

+ UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);

+ return state_;

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::ProcessDownstreamResponse(

+ const FSMEventArgs& event_args) {

+ DCHECK(event_args.response.get());

+ proto::SpeechRecognitionEvent ws_event;

+ if (!ws_event.ParseFromString(std::string(event_args.response->begin(),

+ event_args.response->end())))

+ return AbortWithError(event_args);

+ if (ws_event.has_status()) {

+ switch (ws_event.status()) {

+ case proto::SpeechRecognitionEvent::STATUS_SUCCESS:

+ break;

+ case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:

+ return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);

+ case proto::SpeechRecognitionEvent::STATUS_ABORTED:

+ return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);

+ case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:

+ return Abort(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE);

+ case proto::SpeechRecognitionEvent::STATUS_NETWORK:

+ return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);

+ case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:

+ return Abort(SPEECH_RECOGNITION_ERROR_NOT_ALLOWED);

+ case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:

+ return Abort(SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED);

+ case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:

+ return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);

+ case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:

+ return Abort(SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED);

+ }

+ if (!config_.continuous && ws_event.has_endpoint() &&

+ ws_event.endpoint() == proto::SpeechRecognitionEvent::END_OF_UTTERANCE) {

+ delegate_->OnSpeechRecognitionEngineEndOfUtterance();

+ }

+ SpeechRecognitionResults results;

+ for (int i = 0; i < ws_event.result_size(); ++i) {

+ const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);

+ results.push_back(SpeechRecognitionResult());

+ SpeechRecognitionResult& result = results.back();

+ result.is_provisional = !(ws_result.has_final() && ws_result.final());

+ if (!result.is_provisional)

+ got_last_definitive_result_ = true;

+ for (int j = 0; j < ws_result.alternative_size(); ++j) {

+ const proto::SpeechRecognitionAlternative& ws_alternative =

+ ws_result.alternative(j);

+ SpeechRecognitionHypothesis hypothesis;

+ if (ws_alternative.has_confidence())

+ hypothesis.confidence = ws_alternative.confidence();

+ else if (ws_result.has_stability())

+ hypothesis.confidence = ws_result.stability();

+ DCHECK(ws_alternative.has_transcript());

+ // TODO(hans): Perhaps the transcript should be required in the proto?

+ if (ws_alternative.has_transcript())

+ hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());

+ result.hypotheses.push_back(hypothesis);

+ }

+ if (results.size()) {

+ delegate_->OnSpeechRecognitionEngineResults(results);

+ }

+ return state_;

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::RaiseNoMatchErrorIfGotNoResults(

+ const FSMEventArgs& event_args) {

+ if (!got_last_definitive_result_) {

+ // Provide an empty result to notify that recognition is ended with no

+ // errors, yet neither any further results.

+ delegate_->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());

+ }

+ return AbortSilently(event_args);

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::CloseUpstreamAndWaitForResults(

+ const FSMEventArgs&) {

+ DCHECK(upstream_fetcher_.get());

+ DCHECK(encoder_.get());

+ DVLOG(1) << "Closing upstream.";

+ // The encoder requires a non-empty final buffer. So we encode a packet

+ // of silence in case encoder had no data already.

+ size_t sample_count =

+ config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;

+ scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(

+ sample_count * sizeof(int16_t), encoder_->GetBitsPerSample() / 8);

+ encoder_->Encode(*dummy_chunk.get());

+ encoder_->Flush();

+ scoped_refptr<AudioChunk> encoded_dummy_data =

+ encoder_->GetEncodedDataAndClear();

+ DCHECK(!encoded_dummy_data->IsEmpty());

+ encoder_.reset();

+ UploadAudioChunk(encoded_dummy_data->AsString(),

+ FRAME_RECOGNITION_AUDIO,

+ true);

+ got_last_definitive_result_ = false;

+ return STATE_WAITING_DOWNSTREAM_RESULTS;

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::CloseDownstream(const FSMEventArgs&) {

+ DCHECK(!upstream_fetcher_.get());

+ DCHECK(downstream_fetcher_.get());

+ DVLOG(1) << "Closing downstream.";

+ downstream_fetcher_.reset();

+ return STATE_IDLE;

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::AbortSilently(const FSMEventArgs&) {

+ return Abort(SPEECH_RECOGNITION_ERROR_NONE);

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::AbortWithError(const FSMEventArgs&) {

+ return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);

+SpeechRecognitionEngine::FSMState SpeechRecognitionEngine::Abort(

+ SpeechRecognitionErrorCode error_code) {

+ DVLOG(1) << "Aborting with error " << error_code;

+ if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {

+ delegate_->OnSpeechRecognitionEngineError(

+ SpeechRecognitionError(error_code));

+ }

+ downstream_fetcher_.reset();

+ upstream_fetcher_.reset();

+ encoder_.reset();

+ return STATE_IDLE;

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::DoNothing(const FSMEventArgs&) {

+ return state_;

+SpeechRecognitionEngine::FSMState

+SpeechRecognitionEngine::NotFeasible(const FSMEventArgs& event_args) {

+ NOTREACHED() << "Unfeasible event " << event_args.event

+ << " in state " << state_;

+ return state_;

+std::string SpeechRecognitionEngine::GetAcceptedLanguages() const {

+ std::string langs = config_.language;

+ if (langs.empty() && url_context_.get()) {

+ // If no language is provided then we use the first from the accepted

+ // language list. If this list is empty then it defaults to "en-US".

+ // Example of the contents of this list: "es,en-GB;q=0.8", ""

+ net::URLRequestContext* request_context =

+ url_context_->GetURLRequestContext();

+ DCHECK(request_context);

+ // TODO(pauljensen): SpeechRecognitionEngine should be constructed with

+ // a reference to the HttpUserAgentSettings rather than accessing the

+ // accept language through the URLRequestContext.

+ if (request_context->http_user_agent_settings()) {

+ std::string accepted_language_list =

+ request_context->http_user_agent_settings()->GetAcceptLanguage();

+ size_t separator = accepted_language_list.find_first_of(",;");

+ if (separator != std::string::npos)

+ langs = accepted_language_list.substr(0, separator);

+ }

+ if (langs.empty())

+ langs = "en-US";

+ return langs;

+// TODO(primiano): Is there any utility in the codebase that already does this?

+std::string SpeechRecognitionEngine::GenerateRequestKey() const {

+ const int64_t kKeepLowBytes = 0x00000000FFFFFFFFLL;

+ const int64_t kKeepHighBytes = 0xFFFFFFFF00000000LL;

+ // Just keep the least significant bits of timestamp, in order to reduce

+ // probability of collisions.

+ int64_t key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |

+ (base::RandUint64() & kKeepHighBytes);

+ return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));

+void SpeechRecognitionEngine::UploadAudioChunk(const std::string& data,

+ FrameType type,

+ bool is_final) {

+ if (use_framed_post_data_) {

+ std::string frame(data.size() + 8, 0);

+ base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));

+ base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));

+ frame.replace(8, data.size(), data);

+ upstream_fetcher_->AppendChunkToUpload(frame, is_final);

+ } else {

+ upstream_fetcher_->AppendChunkToUpload(data, is_final);

+ }

+SpeechRecognitionEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)

+ : event(event_value) {

}

-SpeechRecognitionEngine::Config::~Config() {

+SpeechRecognitionEngine::FSMEventArgs::~FSMEventArgs() {

}

} // namespace content