| Index: content/browser/speech/google_one_shot_remote_engine.cc
|
| diff --git a/content/browser/speech/google_one_shot_remote_engine.cc b/content/browser/speech/google_one_shot_remote_engine.cc
|
| deleted file mode 100644
|
| index 0dd5bf37b9adbd67648d7a5b025e1ce7f1a3e6d3..0000000000000000000000000000000000000000
|
| --- a/content/browser/speech/google_one_shot_remote_engine.cc
|
| +++ /dev/null
|
| @@ -1,298 +0,0 @@
|
| -// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
| -// Use of this source code is governed by a BSD-style license that can be
|
| -// found in the LICENSE file.
|
| -
|
| -#include "content/browser/speech/google_one_shot_remote_engine.h"
|
| -
|
| -#include <stddef.h>
|
| -#include <stdint.h>
|
| -
|
| -#include <vector>
|
| -
|
| -#include "base/json/json_reader.h"
|
| -#include "base/strings/string_number_conversions.h"
|
| -#include "base/strings/string_util.h"
|
| -#include "base/values.h"
|
| -#include "content/browser/speech/audio_buffer.h"
|
| -#include "content/public/common/speech_recognition_error.h"
|
| -#include "content/public/common/speech_recognition_result.h"
|
| -#include "google_apis/google_api_keys.h"
|
| -#include "net/base/escape.h"
|
| -#include "net/base/load_flags.h"
|
| -#include "net/url_request/http_user_agent_settings.h"
|
| -#include "net/url_request/url_fetcher.h"
|
| -#include "net/url_request/url_request_context.h"
|
| -#include "net/url_request/url_request_context_getter.h"
|
| -#include "net/url_request/url_request_status.h"
|
| -
|
| -namespace content {
|
| -namespace {
|
| -
|
| -const char* const kDefaultSpeechRecognitionUrl =
|
| - "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&";
|
| -const char* const kStatusString = "status";
|
| -const char* const kHypothesesString = "hypotheses";
|
| -const char* const kUtteranceString = "utterance";
|
| -const char* const kConfidenceString = "confidence";
|
| -const int kWebServiceStatusNoError = 0;
|
| -const int kWebServiceStatusNoSpeech = 4;
|
| -const int kWebServiceStatusNoMatch = 5;
|
| -
|
| -bool ParseServerResponse(const std::string& response_body,
|
| - SpeechRecognitionResult* result,
|
| - SpeechRecognitionError* error) {
|
| - if (response_body.empty()) {
|
| - LOG(WARNING) << "ParseServerResponse: Response was empty.";
|
| - return false;
|
| - }
|
| - DVLOG(1) << "ParseServerResponse: Parsing response " << response_body;
|
| -
|
| - // Parse the response, ignoring comments.
|
| - std::string error_msg;
|
| - std::unique_ptr<base::Value> response_value =
|
| - base::JSONReader::ReadAndReturnError(response_body, base::JSON_PARSE_RFC,
|
| - NULL, &error_msg);
|
| - if (response_value == NULL) {
|
| - LOG(WARNING) << "ParseServerResponse: JSONReader failed : " << error_msg;
|
| - return false;
|
| - }
|
| -
|
| - if (!response_value->IsType(base::Value::TYPE_DICTIONARY)) {
|
| - DVLOG(1) << "ParseServerResponse: Unexpected response type "
|
| - << response_value->GetType();
|
| - return false;
|
| - }
|
| - const base::DictionaryValue* response_object =
|
| - static_cast<const base::DictionaryValue*>(response_value.get());
|
| -
|
| - // Get the status.
|
| - int status;
|
| - if (!response_object->GetInteger(kStatusString, &status)) {
|
| - DVLOG(1) << "ParseServerResponse: " << kStatusString
|
| - << " is not a valid integer value.";
|
| - return false;
|
| - }
|
| -
|
| - // Process the status.
|
| - switch (status) {
|
| - case kWebServiceStatusNoError:
|
| - break;
|
| - case kWebServiceStatusNoSpeech:
|
| - error->code = SPEECH_RECOGNITION_ERROR_NO_SPEECH;
|
| - return false;
|
| - case kWebServiceStatusNoMatch:
|
| - error->code = SPEECH_RECOGNITION_ERROR_NO_MATCH;
|
| - return false;
|
| - default:
|
| - error->code = SPEECH_RECOGNITION_ERROR_NETWORK;
|
| - // Other status codes should not be returned by the server.
|
| - DVLOG(1) << "ParseServerResponse: unexpected status code " << status;
|
| - return false;
|
| - }
|
| -
|
| - // Get the hypotheses.
|
| - const base::Value* hypotheses_value = NULL;
|
| - if (!response_object->Get(kHypothesesString, &hypotheses_value)) {
|
| - DVLOG(1) << "ParseServerResponse: Missing hypotheses attribute.";
|
| - return false;
|
| - }
|
| -
|
| - DCHECK(hypotheses_value);
|
| - if (!hypotheses_value->IsType(base::Value::TYPE_LIST)) {
|
| - DVLOG(1) << "ParseServerResponse: Unexpected hypotheses type "
|
| - << hypotheses_value->GetType();
|
| - return false;
|
| - }
|
| -
|
| - const base::ListValue* hypotheses_list =
|
| - static_cast<const base::ListValue*>(hypotheses_value);
|
| -
|
| - // For now we support only single shot recognition, so we are giving only a
|
| - // final result, consisting of one fragment (with one or more hypotheses).
|
| - size_t index = 0;
|
| - for (; index < hypotheses_list->GetSize(); ++index) {
|
| - const base::Value* hypothesis = NULL;
|
| - if (!hypotheses_list->Get(index, &hypothesis)) {
|
| - LOG(WARNING) << "ParseServerResponse: Unable to read hypothesis value.";
|
| - break;
|
| - }
|
| - DCHECK(hypothesis);
|
| - if (!hypothesis->IsType(base::Value::TYPE_DICTIONARY)) {
|
| - LOG(WARNING) << "ParseServerResponse: Unexpected value type "
|
| - << hypothesis->GetType();
|
| - break;
|
| - }
|
| -
|
| - const base::DictionaryValue* hypothesis_value =
|
| - static_cast<const base::DictionaryValue*>(hypothesis);
|
| - base::string16 utterance;
|
| -
|
| - if (!hypothesis_value->GetString(kUtteranceString, &utterance)) {
|
| - LOG(WARNING) << "ParseServerResponse: Missing utterance value.";
|
| - break;
|
| - }
|
| -
|
| - // It is not an error if the 'confidence' field is missing.
|
| - double confidence = 0.0;
|
| - hypothesis_value->GetDouble(kConfidenceString, &confidence);
|
| - result->hypotheses.push_back(SpeechRecognitionHypothesis(utterance,
|
| - confidence));
|
| - }
|
| -
|
| - if (index < hypotheses_list->GetSize()) {
|
| - result->hypotheses.clear();
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -} // namespace
|
| -
|
| -const int GoogleOneShotRemoteEngine::kAudioPacketIntervalMs = 100;
|
| -int GoogleOneShotRemoteEngine::url_fetcher_id_for_tests = 0;
|
| -
|
| -GoogleOneShotRemoteEngine::GoogleOneShotRemoteEngine(
|
| - net::URLRequestContextGetter* context)
|
| - : url_context_(context) {
|
| -}
|
| -
|
| -GoogleOneShotRemoteEngine::~GoogleOneShotRemoteEngine() {}
|
| -
|
| -void GoogleOneShotRemoteEngine::SetConfig(
|
| - const SpeechRecognitionEngineConfig& config) {
|
| - config_ = config;
|
| -}
|
| -
|
| -void GoogleOneShotRemoteEngine::StartRecognition() {
|
| - DCHECK(delegate());
|
| - DCHECK(!url_fetcher_.get());
|
| - std::string lang_param = config_.language;
|
| -
|
| - if (lang_param.empty() && url_context_.get()) {
|
| - // If no language is provided then we use the first from the accepted
|
| - // language list. If this list is empty then it defaults to "en-US".
|
| - // Example of the contents of this list: "es,en-GB;q=0.8", ""
|
| - net::URLRequestContext* request_context =
|
| - url_context_->GetURLRequestContext();
|
| - DCHECK(request_context);
|
| - // TODO(pauljensen): GoogleOneShotRemoteEngine should be constructed with
|
| - // a reference to the HttpUserAgentSettings rather than accessing the
|
| - // accept language through the URLRequestContext.
|
| - if (request_context->http_user_agent_settings()) {
|
| - std::string accepted_language_list =
|
| - request_context->http_user_agent_settings()->GetAcceptLanguage();
|
| - size_t separator = accepted_language_list.find_first_of(",;");
|
| - lang_param = accepted_language_list.substr(0, separator);
|
| - }
|
| - }
|
| -
|
| - if (lang_param.empty())
|
| - lang_param = "en-US";
|
| -
|
| - std::vector<std::string> parts;
|
| - parts.push_back("lang=" + net::EscapeQueryParamValue(lang_param, true));
|
| -
|
| - if (!config_.grammars.empty()) {
|
| - DCHECK_EQ(config_.grammars.size(), 1U);
|
| - parts.push_back("lm=" + net::EscapeQueryParamValue(config_.grammars[0].url,
|
| - true));
|
| - }
|
| -
|
| - if (!config_.hardware_info.empty())
|
| - parts.push_back("xhw=" + net::EscapeQueryParamValue(config_.hardware_info,
|
| - true));
|
| - parts.push_back("maxresults=" + base::UintToString(config_.max_hypotheses));
|
| - parts.push_back(config_.filter_profanities ? "pfilter=2" : "pfilter=0");
|
| -
|
| - std::string api_key = google_apis::GetAPIKey();
|
| - parts.push_back("key=" + net::EscapeQueryParamValue(api_key, true));
|
| -
|
| - GURL url(std::string(kDefaultSpeechRecognitionUrl) +
|
| - base::JoinString(parts, "&"));
|
| -
|
| - encoder_.reset(new AudioEncoder(config_.audio_sample_rate,
|
| - config_.audio_num_bits_per_sample));
|
| - DCHECK(encoder_.get());
|
| - url_fetcher_ = net::URLFetcher::Create(url_fetcher_id_for_tests, url,
|
| - net::URLFetcher::POST, this);
|
| - url_fetcher_->SetChunkedUpload(encoder_->GetMimeType());
|
| - url_fetcher_->SetRequestContext(url_context_.get());
|
| - url_fetcher_->SetReferrer(config_.origin_url);
|
| -
|
| - // The speech recognition API does not require user identification as part
|
| - // of requests, so we don't send cookies or auth data for these requests to
|
| - // prevent any accidental connection between users who are logged into the
|
| - // domain for other services (e.g. bookmark sync) with the speech requests.
|
| - url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
|
| - net::LOAD_DO_NOT_SEND_COOKIES |
|
| - net::LOAD_DO_NOT_SEND_AUTH_DATA);
|
| - url_fetcher_->Start();
|
| -}
|
| -
|
| -void GoogleOneShotRemoteEngine::EndRecognition() {
|
| - url_fetcher_.reset();
|
| -}
|
| -
|
| -void GoogleOneShotRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
|
| - DCHECK(url_fetcher_.get());
|
| - DCHECK(encoder_.get());
|
| - DCHECK_EQ(data.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
|
| - encoder_->Encode(data);
|
| - scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
|
| - url_fetcher_->AppendChunkToUpload(encoded_data->AsString(), false);
|
| -}
|
| -
|
| -void GoogleOneShotRemoteEngine::AudioChunksEnded() {
|
| - DCHECK(url_fetcher_.get());
|
| - DCHECK(encoder_.get());
|
| -
|
| - // UploadAudioChunk requires a non-empty final buffer. So we encode a packet
|
| - // of silence in case encoder had no data already.
|
| - size_t sample_count =
|
| - config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
|
| - scoped_refptr<AudioChunk> dummy_chunk(new AudioChunk(
|
| - sample_count * sizeof(int16_t), encoder_->GetBitsPerSample() / 8));
|
| - encoder_->Encode(*dummy_chunk.get());
|
| - encoder_->Flush();
|
| - scoped_refptr<AudioChunk> encoded_dummy_data(
|
| - encoder_->GetEncodedDataAndClear());
|
| - DCHECK(!encoded_dummy_data->IsEmpty());
|
| - encoder_.reset();
|
| -
|
| - url_fetcher_->AppendChunkToUpload(encoded_dummy_data->AsString(), true);
|
| -}
|
| -
|
| -void GoogleOneShotRemoteEngine::OnURLFetchComplete(
|
| - const net::URLFetcher* source) {
|
| - DCHECK_EQ(url_fetcher_.get(), source);
|
| - SpeechRecognitionResults results;
|
| - results.push_back(SpeechRecognitionResult());
|
| - SpeechRecognitionResult& result = results.back();
|
| - SpeechRecognitionError error(SPEECH_RECOGNITION_ERROR_NETWORK);
|
| - std::string data;
|
| -
|
| - // The default error code in case of parse errors is NETWORK_FAILURE, however
|
| - // ParseServerResponse can change the error to a more appropriate one.
|
| - bool error_occurred = (!source->GetStatus().is_success() ||
|
| - source->GetResponseCode() != 200 ||
|
| - !source->GetResponseAsString(&data) ||
|
| - !ParseServerResponse(data, &result, &error));
|
| - url_fetcher_.reset();
|
| - if (error_occurred) {
|
| - DVLOG(1) << "GoogleOneShotRemoteEngine: Network Error " << error.code;
|
| - delegate()->OnSpeechRecognitionEngineError(error);
|
| - } else {
|
| - DVLOG(1) << "GoogleOneShotRemoteEngine: Invoking delegate with result.";
|
| - delegate()->OnSpeechRecognitionEngineResults(results);
|
| - }
|
| -}
|
| -
|
| -bool GoogleOneShotRemoteEngine::IsRecognitionPending() const {
|
| - return url_fetcher_ != NULL;
|
| -}
|
| -
|
| -int GoogleOneShotRemoteEngine::GetDesiredAudioChunkDurationMs() const {
|
| - return kAudioPacketIntervalMs;
|
| -}
|
| -
|
| -} // namespace content
|
|
|