Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1492)

Unified Diff: content/browser/speech/speech_recognition_engine.cc

Issue 1891543002: Devirtualize SpeechRecognitionEngine (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@kill_one_shot_engine
Patch Set: drop an include Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/browser/speech/speech_recognition_engine.cc
diff --git a/content/browser/speech/speech_recognition_engine.cc b/content/browser/speech/speech_recognition_engine.cc
index e3b236b79760d3c27632462189b7b97b8ed8aa47..b0fac7db62feb1ea375d7c2772dd53e7ec85d8c3 100644
--- a/content/browser/speech/speech_recognition_engine.cc
+++ b/content/browser/speech/speech_recognition_engine.cc
@@ -4,13 +4,77 @@
#include "content/browser/speech/speech_recognition_engine.h"
+#include <algorithm>
+#include <vector>
+
+#include "base/big_endian.h"
+#include "base/bind.h"
+#include "base/rand_util.h"
+#include "base/strings/string_number_conversions.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversions.h"
+#include "base/time/time.h"
+#include "content/browser/speech/audio_buffer.h"
+#include "content/browser/speech/proto/google_streaming_api.pb.h"
+#include "content/public/common/speech_recognition_error.h"
+#include "content/public/common/speech_recognition_result.h"
+#include "google_apis/google_api_keys.h"
+#include "net/base/escape.h"
+#include "net/base/load_flags.h"
+#include "net/url_request/http_user_agent_settings.h"
+#include "net/url_request/url_fetcher.h"
+#include "net/url_request/url_request_context.h"
+#include "net/url_request/url_request_context_getter.h"
+#include "net/url_request/url_request_status.h"
+
+using net::URLFetcher;
+
+namespace content {
namespace {
+
+const char kWebServiceBaseUrl[] =
+ "https://www.google.com/speech-api/full-duplex/v1";
+const char kDownstreamUrl[] = "/down?";
+const char kUpstreamUrl[] = "/up?";
+
+// This matches the maximum maxAlternatives value supported by the server.
+const uint32_t kMaxMaxAlternatives = 30;
+
+// TODO(hans): Remove this and other logging when we don't need it anymore.
+void DumpResponse(const std::string& response) {
+ DVLOG(1) << "------------";
+ proto::SpeechRecognitionEvent event;
+ if (!event.ParseFromString(response)) {
+ DVLOG(1) << "Parse failed!";
+ return;
+ }
+ if (event.has_status())
+ DVLOG(1) << "STATUS\t" << event.status();
+ if (event.has_endpoint())
+ DVLOG(1) << "ENDPOINT\t" << event.endpoint();
+ for (int i = 0; i < event.result_size(); ++i) {
+ DVLOG(1) << "RESULT #" << i << ":";
+ const proto::SpeechRecognitionResult& res = event.result(i);
+ if (res.has_final())
+ DVLOG(1) << " final:\t" << res.final();
+ if (res.has_stability())
+ DVLOG(1) << " STABILITY:\t" << res.stability();
+ for (int j = 0; j < res.alternative_size(); ++j) {
+ const proto::SpeechRecognitionAlternative& alt =
+ res.alternative(j);
+ if (alt.has_confidence())
+ DVLOG(1) << " CONFIDENCE:\t" << alt.confidence();
+ if (alt.has_transcript())
+ DVLOG(1) << " TRANSCRIPT:\t" << alt.transcript();
+ }
+ }
+}
+
const int kDefaultConfigSampleRate = 8000;
const int kDefaultConfigBitsPerSample = 16;
const uint32_t kDefaultMaxHypotheses = 1;
-} // namespace
-namespace content {
+} // namespace
SpeechRecognitionEngine::Config::Config()
: filter_profanities(false),
@@ -18,10 +82,574 @@ SpeechRecognitionEngine::Config::Config()
interim_results(true),
max_hypotheses(kDefaultMaxHypotheses),
audio_sample_rate(kDefaultConfigSampleRate),
- audio_num_bits_per_sample(kDefaultConfigBitsPerSample) {
+ audio_num_bits_per_sample(kDefaultConfigBitsPerSample) {}
+
+SpeechRecognitionEngine::Config::~Config() {}
+
+const int SpeechRecognitionEngine::kAudioPacketIntervalMs = 100;
+const int SpeechRecognitionEngine::kUpstreamUrlFetcherIdForTesting = 0;
+const int SpeechRecognitionEngine::kDownstreamUrlFetcherIdForTesting = 1;
+const int SpeechRecognitionEngine::kWebserviceStatusNoError = 0;
+const int SpeechRecognitionEngine::kWebserviceStatusErrorNoMatch = 5;
+
+SpeechRecognitionEngine::SpeechRecognitionEngine(
+ net::URLRequestContextGetter* context)
+ : url_context_(context),
+ previous_response_length_(0),
+ got_last_definitive_result_(false),
+ is_dispatching_event_(false),
+ use_framed_post_data_(false),
+ state_(STATE_IDLE) {}
+
+SpeechRecognitionEngine::~SpeechRecognitionEngine() {}
+
+void SpeechRecognitionEngine::SetConfig(const Config& config) {
+ config_ = config;
+}
+
+void SpeechRecognitionEngine::StartRecognition() {
+ FSMEventArgs event_args(EVENT_START_RECOGNITION);
+ DispatchEvent(event_args);
+}
+
+void SpeechRecognitionEngine::EndRecognition() {
+ FSMEventArgs event_args(EVENT_END_RECOGNITION);
+ DispatchEvent(event_args);
+}
+
+void SpeechRecognitionEngine::TakeAudioChunk(const AudioChunk& data) {
+ FSMEventArgs event_args(EVENT_AUDIO_CHUNK);
+ event_args.audio_data = &data;
+ DispatchEvent(event_args);
+}
+
+void SpeechRecognitionEngine::AudioChunksEnded() {
+ FSMEventArgs event_args(EVENT_AUDIO_CHUNKS_ENDED);
+ DispatchEvent(event_args);
+}
+
+void SpeechRecognitionEngine::OnURLFetchComplete(const URLFetcher* source) {
+ const bool kResponseComplete = true;
+ DispatchHTTPResponse(source, kResponseComplete);
+}
+
+void SpeechRecognitionEngine::OnURLFetchDownloadProgress(
+ const URLFetcher* source,
+ int64_t current,
+ int64_t total) {
+ const bool kPartialResponse = false;
+ DispatchHTTPResponse(source, kPartialResponse);
+}
+
+void SpeechRecognitionEngine::DispatchHTTPResponse(const URLFetcher* source,
+ bool end_of_response) {
+ DCHECK(CalledOnValidThread());
+ DCHECK(source);
+ const bool response_is_good = source->GetStatus().is_success() &&
+ source->GetResponseCode() == 200;
+ std::string response;
+ if (response_is_good)
+ source->GetResponseAsString(&response);
+ const size_t current_response_length = response.size();
+
+ DVLOG(1) << (source == downstream_fetcher_.get() ? "Downstream" : "Upstream")
+ << "HTTP, code: " << source->GetResponseCode()
+ << " length: " << current_response_length
+ << " eor: " << end_of_response;
+
+ // URLFetcher provides always the entire response buffer, but we are only
+ // interested in the fresh data introduced by the last chunk. Therefore, we
+ // drop the previous content we have already processed.
+ if (current_response_length != 0) {
+ DCHECK_GE(current_response_length, previous_response_length_);
+ response.erase(0, previous_response_length_);
+ previous_response_length_ = current_response_length;
+ }
+
+ if (!response_is_good && source == downstream_fetcher_.get()) {
+ DVLOG(1) << "Downstream error " << source->GetResponseCode();
+ FSMEventArgs event_args(EVENT_DOWNSTREAM_ERROR);
+ DispatchEvent(event_args);
+ return;
+ }
+ if (!response_is_good && source == upstream_fetcher_.get()) {
+ DVLOG(1) << "Upstream error " << source->GetResponseCode()
+ << " EOR " << end_of_response;
+ FSMEventArgs event_args(EVENT_UPSTREAM_ERROR);
+ DispatchEvent(event_args);
+ return;
+ }
+
+ // Ignore incoming data on the upstream connection.
+ if (source == upstream_fetcher_.get())
+ return;
+
+ DCHECK(response_is_good && source == downstream_fetcher_.get());
+
+ // The downstream response is organized in chunks, whose size is determined
+ // by a 4 bytes prefix, transparently handled by the ChunkedByteBuffer class.
+ // Such chunks are sent by the speech recognition webservice over the HTTP
+ // downstream channel using HTTP chunked transfer (unrelated to our chunks).
+ // This function is called every time an HTTP chunk is received by the
+ // url fetcher. However there isn't any particular matching beween our
+ // protocol chunks and HTTP chunks, in the sense that a single HTTP chunk can
+ // contain a portion of one chunk or even more chunks together.
+ chunked_byte_buffer_.Append(response);
+
+ // A single HTTP chunk can contain more than one data chunk, thus the while.
+ while (chunked_byte_buffer_.HasChunks()) {
+ FSMEventArgs event_args(EVENT_DOWNSTREAM_RESPONSE);
+ event_args.response = chunked_byte_buffer_.PopChunk();
+ DCHECK(event_args.response.get());
+ DumpResponse(std::string(event_args.response->begin(),
+ event_args.response->end()));
+ DispatchEvent(event_args);
+ }
+ if (end_of_response) {
+ FSMEventArgs event_args(EVENT_DOWNSTREAM_CLOSED);
+ DispatchEvent(event_args);
+ }
+}
+
+bool SpeechRecognitionEngine::IsRecognitionPending() const {
+ DCHECK(CalledOnValidThread());
+ return state_ != STATE_IDLE;
+}
+
+int SpeechRecognitionEngine::GetDesiredAudioChunkDurationMs() const {
+ return kAudioPacketIntervalMs;
+}
+
+// ----------------------- Core FSM implementation ---------------------------
+
+void SpeechRecognitionEngine::DispatchEvent(
+ const FSMEventArgs& event_args) {
+ DCHECK(CalledOnValidThread());
+ DCHECK_LE(event_args.event, EVENT_MAX_VALUE);
+ DCHECK_LE(state_, STATE_MAX_VALUE);
+
+ // Event dispatching must be sequential, otherwise it will break all the rules
+ // and the assumptions of the finite state automata model.
+ DCHECK(!is_dispatching_event_);
+ is_dispatching_event_ = true;
+
+ state_ = ExecuteTransitionAndGetNextState(event_args);
+
+ is_dispatching_event_ = false;
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::ExecuteTransitionAndGetNextState(
+ const FSMEventArgs& event_args) {
+ const FSMEvent event = event_args.event;
+ switch (state_) {
+ case STATE_IDLE:
+ switch (event) {
+ case EVENT_START_RECOGNITION:
+ return ConnectBothStreams(event_args);
+ case EVENT_END_RECOGNITION:
+ // Note AUDIO_CHUNK and AUDIO_END events can remain enqueued in case of
+ // abort, so we just silently drop them here.
+ case EVENT_AUDIO_CHUNK:
+ case EVENT_AUDIO_CHUNKS_ENDED:
+ // DOWNSTREAM_CLOSED can be received if we end up here due to an error.
+ case EVENT_DOWNSTREAM_CLOSED:
+ return DoNothing(event_args);
+ case EVENT_UPSTREAM_ERROR:
+ case EVENT_DOWNSTREAM_ERROR:
+ case EVENT_DOWNSTREAM_RESPONSE:
+ return NotFeasible(event_args);
+ }
+ break;
+ case STATE_BOTH_STREAMS_CONNECTED:
+ switch (event) {
+ case EVENT_AUDIO_CHUNK:
+ return TransmitAudioUpstream(event_args);
+ case EVENT_DOWNSTREAM_RESPONSE:
+ return ProcessDownstreamResponse(event_args);
+ case EVENT_AUDIO_CHUNKS_ENDED:
+ return CloseUpstreamAndWaitForResults(event_args);
+ case EVENT_END_RECOGNITION:
+ return AbortSilently(event_args);
+ case EVENT_UPSTREAM_ERROR:
+ case EVENT_DOWNSTREAM_ERROR:
+ case EVENT_DOWNSTREAM_CLOSED:
+ return AbortWithError(event_args);
+ case EVENT_START_RECOGNITION:
+ return NotFeasible(event_args);
+ }
+ break;
+ case STATE_WAITING_DOWNSTREAM_RESULTS:
+ switch (event) {
+ case EVENT_DOWNSTREAM_RESPONSE:
+ return ProcessDownstreamResponse(event_args);
+ case EVENT_DOWNSTREAM_CLOSED:
+ return RaiseNoMatchErrorIfGotNoResults(event_args);
+ case EVENT_END_RECOGNITION:
+ return AbortSilently(event_args);
+ case EVENT_UPSTREAM_ERROR:
+ case EVENT_DOWNSTREAM_ERROR:
+ return AbortWithError(event_args);
+ case EVENT_START_RECOGNITION:
+ case EVENT_AUDIO_CHUNK:
+ case EVENT_AUDIO_CHUNKS_ENDED:
+ return NotFeasible(event_args);
+ }
+ break;
+ }
+ return NotFeasible(event_args);
+}
+
+// ----------- Contract for all the FSM evolution functions below -------------
+// - Are guaranteed to be executed in the same thread (IO, except for tests);
+// - Are guaranteed to be not reentrant (themselves and each other);
+// - event_args members are guaranteed to be stable during the call;
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::ConnectBothStreams(const FSMEventArgs&) {
+ DCHECK(!upstream_fetcher_.get());
+ DCHECK(!downstream_fetcher_.get());
+
+ encoder_.reset(new AudioEncoder(config_.audio_sample_rate,
+ config_.audio_num_bits_per_sample));
+ DCHECK(encoder_.get());
+ const std::string request_key = GenerateRequestKey();
+
+ // Only use the framed post data format when a preamble needs to be logged.
+ use_framed_post_data_ = (config_.preamble &&
+ !config_.preamble->sample_data.empty() &&
+ !config_.auth_token.empty() &&
+ !config_.auth_scope.empty());
+ if (use_framed_post_data_) {
+ preamble_encoder_.reset(new AudioEncoder(
+ config_.preamble->sample_rate,
+ config_.preamble->sample_depth * 8));
+ }
+
+ // Setup downstream fetcher.
+ std::vector<std::string> downstream_args;
+ downstream_args.push_back(
+ "key=" + net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
+ downstream_args.push_back("pair=" + request_key);
+ downstream_args.push_back("output=pb");
+ GURL downstream_url(std::string(kWebServiceBaseUrl) +
+ std::string(kDownstreamUrl) +
+ base::JoinString(downstream_args, "&"));
+
+ downstream_fetcher_ = URLFetcher::Create(
+ kDownstreamUrlFetcherIdForTesting, downstream_url, URLFetcher::GET, this);
+ downstream_fetcher_->SetRequestContext(url_context_.get());
+ downstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
+ net::LOAD_DO_NOT_SEND_COOKIES |
+ net::LOAD_DO_NOT_SEND_AUTH_DATA);
+ downstream_fetcher_->Start();
+
+ // Setup upstream fetcher.
+ // TODO(hans): Support for user-selected grammars.
+ std::vector<std::string> upstream_args;
+ upstream_args.push_back("key=" +
+ net::EscapeQueryParamValue(google_apis::GetAPIKey(), true));
+ upstream_args.push_back("pair=" + request_key);
+ upstream_args.push_back("output=pb");
+ upstream_args.push_back(
+ "lang=" + net::EscapeQueryParamValue(GetAcceptedLanguages(), true));
+ upstream_args.push_back(
+ config_.filter_profanities ? "pFilter=2" : "pFilter=0");
+ if (config_.max_hypotheses > 0U) {
+ uint32_t max_alternatives =
+ std::min(kMaxMaxAlternatives, config_.max_hypotheses);
+ upstream_args.push_back("maxAlternatives=" +
+ base::UintToString(max_alternatives));
+ }
+ upstream_args.push_back("app=chromium");
+ if (!config_.hardware_info.empty()) {
+ upstream_args.push_back(
+ "xhw=" + net::EscapeQueryParamValue(config_.hardware_info, true));
+ }
+ for (const SpeechRecognitionGrammar& grammar : config_.grammars) {
+ std::string grammar_value(
+ base::DoubleToString(grammar.weight) + ":" + grammar.url);
+ upstream_args.push_back(
+ "grammar=" + net::EscapeQueryParamValue(grammar_value, true));
+ }
+ if (config_.continuous)
+ upstream_args.push_back("continuous");
+ else
+ upstream_args.push_back("endpoint=1");
+ if (config_.interim_results)
+ upstream_args.push_back("interim");
+ if (!config_.auth_token.empty() && !config_.auth_scope.empty()) {
+ upstream_args.push_back(
+ "authScope=" + net::EscapeQueryParamValue(config_.auth_scope, true));
+ upstream_args.push_back(
+ "authToken=" + net::EscapeQueryParamValue(config_.auth_token, true));
+ }
+ if (use_framed_post_data_) {
+ std::string audio_format;
+ if (preamble_encoder_)
+ audio_format = preamble_encoder_->GetMimeType() + ",";
+ audio_format += encoder_->GetMimeType();
+ upstream_args.push_back(
+ "audioFormat=" + net::EscapeQueryParamValue(audio_format, true));
+ }
+ GURL upstream_url(std::string(kWebServiceBaseUrl) +
+ std::string(kUpstreamUrl) +
+ base::JoinString(upstream_args, "&"));
+
+ upstream_fetcher_ = URLFetcher::Create(kUpstreamUrlFetcherIdForTesting,
+ upstream_url, URLFetcher::POST, this);
+ if (use_framed_post_data_)
+ upstream_fetcher_->SetChunkedUpload("application/octet-stream");
+ else
+ upstream_fetcher_->SetChunkedUpload(encoder_->GetMimeType());
+ upstream_fetcher_->SetRequestContext(url_context_.get());
+ upstream_fetcher_->SetReferrer(config_.origin_url);
+ upstream_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
+ net::LOAD_DO_NOT_SEND_COOKIES |
+ net::LOAD_DO_NOT_SEND_AUTH_DATA);
+ upstream_fetcher_->Start();
+ previous_response_length_ = 0;
+
+ if (preamble_encoder_) {
+ // Encode and send preamble right away.
+ scoped_refptr<AudioChunk> chunk = new AudioChunk(
+ reinterpret_cast<const uint8_t*>(config_.preamble->sample_data.data()),
+ config_.preamble->sample_data.size(), config_.preamble->sample_depth);
+ preamble_encoder_->Encode(*chunk);
+ preamble_encoder_->Flush();
+ scoped_refptr<AudioChunk> encoded_data(
+ preamble_encoder_->GetEncodedDataAndClear());
+ UploadAudioChunk(encoded_data->AsString(), FRAME_PREAMBLE_AUDIO, false);
+ }
+ return STATE_BOTH_STREAMS_CONNECTED;
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::TransmitAudioUpstream(
+ const FSMEventArgs& event_args) {
+ DCHECK(upstream_fetcher_.get());
+ DCHECK(event_args.audio_data.get());
+ const AudioChunk& audio = *(event_args.audio_data.get());
+
+ DCHECK_EQ(audio.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
+ encoder_->Encode(audio);
+ scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
+ UploadAudioChunk(encoded_data->AsString(), FRAME_RECOGNITION_AUDIO, false);
+ return state_;
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::ProcessDownstreamResponse(
+ const FSMEventArgs& event_args) {
+ DCHECK(event_args.response.get());
+
+ proto::SpeechRecognitionEvent ws_event;
+ if (!ws_event.ParseFromString(std::string(event_args.response->begin(),
+ event_args.response->end())))
+ return AbortWithError(event_args);
+
+ if (ws_event.has_status()) {
+ switch (ws_event.status()) {
+ case proto::SpeechRecognitionEvent::STATUS_SUCCESS:
+ break;
+ case proto::SpeechRecognitionEvent::STATUS_NO_SPEECH:
+ return Abort(SPEECH_RECOGNITION_ERROR_NO_SPEECH);
+ case proto::SpeechRecognitionEvent::STATUS_ABORTED:
+ return Abort(SPEECH_RECOGNITION_ERROR_ABORTED);
+ case proto::SpeechRecognitionEvent::STATUS_AUDIO_CAPTURE:
+ return Abort(SPEECH_RECOGNITION_ERROR_AUDIO_CAPTURE);
+ case proto::SpeechRecognitionEvent::STATUS_NETWORK:
+ return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
+ case proto::SpeechRecognitionEvent::STATUS_NOT_ALLOWED:
+ return Abort(SPEECH_RECOGNITION_ERROR_NOT_ALLOWED);
+ case proto::SpeechRecognitionEvent::STATUS_SERVICE_NOT_ALLOWED:
+ return Abort(SPEECH_RECOGNITION_ERROR_SERVICE_NOT_ALLOWED);
+ case proto::SpeechRecognitionEvent::STATUS_BAD_GRAMMAR:
+ return Abort(SPEECH_RECOGNITION_ERROR_BAD_GRAMMAR);
+ case proto::SpeechRecognitionEvent::STATUS_LANGUAGE_NOT_SUPPORTED:
+ return Abort(SPEECH_RECOGNITION_ERROR_LANGUAGE_NOT_SUPPORTED);
+ }
+ }
+
+ if (!config_.continuous && ws_event.has_endpoint() &&
+ ws_event.endpoint() == proto::SpeechRecognitionEvent::END_OF_UTTERANCE) {
+ delegate_->OnSpeechRecognitionEngineEndOfUtterance();
+ }
+
+ SpeechRecognitionResults results;
+ for (int i = 0; i < ws_event.result_size(); ++i) {
+ const proto::SpeechRecognitionResult& ws_result = ws_event.result(i);
+ results.push_back(SpeechRecognitionResult());
+ SpeechRecognitionResult& result = results.back();
+ result.is_provisional = !(ws_result.has_final() && ws_result.final());
+
+ if (!result.is_provisional)
+ got_last_definitive_result_ = true;
+
+ for (int j = 0; j < ws_result.alternative_size(); ++j) {
+ const proto::SpeechRecognitionAlternative& ws_alternative =
+ ws_result.alternative(j);
+ SpeechRecognitionHypothesis hypothesis;
+ if (ws_alternative.has_confidence())
+ hypothesis.confidence = ws_alternative.confidence();
+ else if (ws_result.has_stability())
+ hypothesis.confidence = ws_result.stability();
+ DCHECK(ws_alternative.has_transcript());
+ // TODO(hans): Perhaps the transcript should be required in the proto?
+ if (ws_alternative.has_transcript())
+ hypothesis.utterance = base::UTF8ToUTF16(ws_alternative.transcript());
+
+ result.hypotheses.push_back(hypothesis);
+ }
+ }
+ if (results.size()) {
+ delegate_->OnSpeechRecognitionEngineResults(results);
+ }
+
+ return state_;
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::RaiseNoMatchErrorIfGotNoResults(
+ const FSMEventArgs& event_args) {
+ if (!got_last_definitive_result_) {
+ // Provide an empty result to notify that recognition is ended with no
+ // errors, yet neither any further results.
+ delegate_->OnSpeechRecognitionEngineResults(SpeechRecognitionResults());
+ }
+ return AbortSilently(event_args);
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::CloseUpstreamAndWaitForResults(
+ const FSMEventArgs&) {
+ DCHECK(upstream_fetcher_.get());
+ DCHECK(encoder_.get());
+
+ DVLOG(1) << "Closing upstream.";
+
+ // The encoder requires a non-empty final buffer. So we encode a packet
+ // of silence in case encoder had no data already.
+ size_t sample_count =
+ config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
+ scoped_refptr<AudioChunk> dummy_chunk = new AudioChunk(
+ sample_count * sizeof(int16_t), encoder_->GetBitsPerSample() / 8);
+ encoder_->Encode(*dummy_chunk.get());
+ encoder_->Flush();
+ scoped_refptr<AudioChunk> encoded_dummy_data =
+ encoder_->GetEncodedDataAndClear();
+ DCHECK(!encoded_dummy_data->IsEmpty());
+ encoder_.reset();
+
+ UploadAudioChunk(encoded_dummy_data->AsString(),
+ FRAME_RECOGNITION_AUDIO,
+ true);
+ got_last_definitive_result_ = false;
+ return STATE_WAITING_DOWNSTREAM_RESULTS;
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::CloseDownstream(const FSMEventArgs&) {
+ DCHECK(!upstream_fetcher_.get());
+ DCHECK(downstream_fetcher_.get());
+
+ DVLOG(1) << "Closing downstream.";
+ downstream_fetcher_.reset();
+ return STATE_IDLE;
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::AbortSilently(const FSMEventArgs&) {
+ return Abort(SPEECH_RECOGNITION_ERROR_NONE);
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::AbortWithError(const FSMEventArgs&) {
+ return Abort(SPEECH_RECOGNITION_ERROR_NETWORK);
+}
+
+SpeechRecognitionEngine::FSMState SpeechRecognitionEngine::Abort(
+ SpeechRecognitionErrorCode error_code) {
+ DVLOG(1) << "Aborting with error " << error_code;
+
+ if (error_code != SPEECH_RECOGNITION_ERROR_NONE) {
+ delegate_->OnSpeechRecognitionEngineError(
+ SpeechRecognitionError(error_code));
+ }
+ downstream_fetcher_.reset();
+ upstream_fetcher_.reset();
+ encoder_.reset();
+ return STATE_IDLE;
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::DoNothing(const FSMEventArgs&) {
+ return state_;
+}
+
+SpeechRecognitionEngine::FSMState
+SpeechRecognitionEngine::NotFeasible(const FSMEventArgs& event_args) {
+ NOTREACHED() << "Unfeasible event " << event_args.event
+ << " in state " << state_;
+ return state_;
+}
+
+std::string SpeechRecognitionEngine::GetAcceptedLanguages() const {
+ std::string langs = config_.language;
+ if (langs.empty() && url_context_.get()) {
+ // If no language is provided then we use the first from the accepted
+ // language list. If this list is empty then it defaults to "en-US".
+ // Example of the contents of this list: "es,en-GB;q=0.8", ""
+ net::URLRequestContext* request_context =
+ url_context_->GetURLRequestContext();
+ DCHECK(request_context);
+ // TODO(pauljensen): SpeechRecognitionEngine should be constructed with
+ // a reference to the HttpUserAgentSettings rather than accessing the
+ // accept language through the URLRequestContext.
+ if (request_context->http_user_agent_settings()) {
+ std::string accepted_language_list =
+ request_context->http_user_agent_settings()->GetAcceptLanguage();
+ size_t separator = accepted_language_list.find_first_of(",;");
+ if (separator != std::string::npos)
+ langs = accepted_language_list.substr(0, separator);
+ }
+ }
+ if (langs.empty())
+ langs = "en-US";
+ return langs;
+}
+
+// TODO(primiano): Is there any utility in the codebase that already does this?
+std::string SpeechRecognitionEngine::GenerateRequestKey() const {
+ const int64_t kKeepLowBytes = 0x00000000FFFFFFFFLL;
+ const int64_t kKeepHighBytes = 0xFFFFFFFF00000000LL;
+
+ // Just keep the least significant bits of timestamp, in order to reduce
+ // probability of collisions.
+ int64_t key = (base::Time::Now().ToInternalValue() & kKeepLowBytes) |
+ (base::RandUint64() & kKeepHighBytes);
+ return base::HexEncode(reinterpret_cast<void*>(&key), sizeof(key));
+}
+
+void SpeechRecognitionEngine::UploadAudioChunk(const std::string& data,
+ FrameType type,
+ bool is_final) {
+ if (use_framed_post_data_) {
+ std::string frame(data.size() + 8, 0);
+ base::WriteBigEndian(&frame[0], static_cast<uint32_t>(data.size()));
+ base::WriteBigEndian(&frame[4], static_cast<uint32_t>(type));
+ frame.replace(8, data.size(), data);
+ upstream_fetcher_->AppendChunkToUpload(frame, is_final);
+ } else {
+ upstream_fetcher_->AppendChunkToUpload(data, is_final);
+ }
+}
+
+SpeechRecognitionEngine::FSMEventArgs::FSMEventArgs(FSMEvent event_value)
+ : event(event_value) {
}
-SpeechRecognitionEngine::Config::~Config() {
+SpeechRecognitionEngine::FSMEventArgs::~FSMEventArgs() {
}
} // namespace content

Powered by Google App Engine
This is Rietveld 408576698