content/browser/speech/google_ssfe_remote_engine.cc - Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3)

Unified Diff: content/browser/speech/google_ssfe_remote_engine.cc

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fixed according to Hans review. Created 8 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« content/browser/speech/google_ssfe_remote_engine.h ('K') | « content/browser/speech/google_ssfe_remote_engine.h ('k') | content/browser/speech/google_ssfe_remote_engine_unittest.cc » ('j') | content/browser/speech/speech_recognition_engine.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: content/browser/speech/google_ssfe_remote_engine.cc

diff --git a/content/browser/speech/speech_recognition_request.cc b/content/browser/speech/google_ssfe_remote_engine.cc

similarity index 53%

rename from content/browser/speech/speech_recognition_request.cc

rename to content/browser/speech/google_ssfe_remote_engine.cc

index a14369976d45ff0c267ef0af7221f4cac547fc32..98573af95e4c2941362c03ababeb3a5c0d1df9d0 100644

--- a/content/browser/speech/speech_recognition_request.cc

+++ b/content/browser/speech/google_ssfe_remote_engine.cc

@@ -2,11 +2,12 @@

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.

-#include "content/browser/speech/speech_recognition_request.h"

+#include "content/browser/speech/google_ssfe_remote_engine.h"

#include <vector>

#include "base/json/json_reader.h"

+#include "base/memory/scoped_ptr.h"

#include "base/string_number_conversions.h"

#include "base/string_util.h"

#include "base/values.h"

@@ -19,6 +20,10 @@

#include "net/url_request/url_request_context_getter.h"

#include "net/url_request/url_request_status.h"

+using content::SpeechRecognitionError;

+using content::SpeechRecognitionHypothesis;

+using content::SpeechRecognitionResult;

namespace {

const char* const kDefaultSpeechRecognitionUrl =

@@ -32,8 +37,13 @@ const char* const kConfidenceString = "confidence";

// set this via an attribute.

const int kMaxResults = 6;

+const int SPEECH_API_STATUS_NO_ERROR = 0;

Satish 2012/03/16 17:00:35 add comment here on what these are (i.e. codes ret

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

+const int SPEECH_API_STATUS_NO_SPEECH = 4;

+const int SPEECH_API_STATUS_NO_MATCH = 5;

bool ParseServerResponse(const std::string& response_body,

- content::SpeechRecognitionResult* result) {

+ SpeechRecognitionResult* result,

+ SpeechRecognitionError* error) {

if (response_body.empty()) {

LOG(WARNING) << "ParseServerResponse: Response was empty.";

return false;

@@ -67,19 +77,21 @@ bool ParseServerResponse(const std::string& response_body,

// Process the status.

switch (status) {

- case content::SPEECH_RECOGNITION_ERROR_NONE:

- case content::SPEECH_RECOGNITION_ERROR_NO_SPEECH:

- case content::SPEECH_RECOGNITION_ERROR_NO_MATCH:

- break;

- default:

- // Other status codes should not be returned by the server.

- VLOG(1) << "ParseServerResponse: unexpected status code " << status;

- return false;

+ case SPEECH_API_STATUS_NO_ERROR:

Satish 2012/03/16 17:00:35 should we set the error code to none in this case?

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

+ break;

+ case SPEECH_API_STATUS_NO_SPEECH:

+ error->code = content::SPEECH_RECOGNITION_ERROR_NO_SPEECH;

+ return false;

+ case SPEECH_API_STATUS_NO_MATCH:

+ error->code = content::SPEECH_RECOGNITION_ERROR_NO_MATCH;

+ return false;

+ default:

+ error->code = content::SPEECH_RECOGNITION_ERROR_NETWORK;

+ // Other status codes should not be returned by the server.

+ VLOG(1) << "ParseServerResponse: unexpected status code " << status;

+ return false;

}

- result->error = static_cast<content::SpeechRecognitionErrorCode>(status);

// Get the hypotheses.

Value* hypotheses_value = NULL;

if (!response_object->Get(kHypothesesString, &hypotheses_value)) {

@@ -95,7 +107,8 @@ bool ParseServerResponse(const std::string& response_body,

}

const ListValue* hypotheses_list = static_cast<ListValue*>(hypotheses_value);

+ // For now we support only single shot recognition, so we are giving only a

+ // final result, consisting of one fragment (with one or more hypotheses).

size_t index = 0;

for (; index < hypotheses_list->GetSize(); ++index) {

Value* hypothesis = NULL;

@@ -113,6 +126,7 @@ bool ParseServerResponse(const std::string& response_body,

const DictionaryValue* hypothesis_value =

static_cast<DictionaryValue*>(hypothesis);

string16 utterance;

if (!hypothesis_value->GetString(kUtteranceString, &utterance)) {

LOG(WARNING) << "ParseServerResponse: Missing utterance value.";

break;

@@ -121,45 +135,56 @@ bool ParseServerResponse(const std::string& response_body,

// It is not an error if the 'confidence' field is missing.

double confidence = 0.0;

hypothesis_value->GetDouble(kConfidenceString, &confidence);

- result->hypotheses.push_back(content::SpeechRecognitionHypothesis(

- utterance, confidence));

+ result->hypotheses.push_back(SpeechRecognitionHypothesis(utterance,

+ confidence));

}

if (index < hypotheses_list->GetSize()) {

result->hypotheses.clear();

return false;

}

return true;

}

} // namespace

Satish 2012/03/16 17:00:35 remove extra newline

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

namespace speech {

-int SpeechRecognitionRequest::url_fetcher_id_for_tests = 0;

+const int GoogleSSFERemoteEngine::kAudioPacketIntervalMs = 100;

+int GoogleSSFERemoteEngine::url_fetcher_id_for_tests = 0;

+GoogleSSFERemoteEngineConfig::GoogleSSFERemoteEngineConfig()

+ : filter_profanities(false),

Satish 2012/03/16 17:00:35 4 spaces before :

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

+ audio_sample_rate(8000),

Satish 2012/03/16 17:00:35 these two values should probably be constants at t

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

+ audio_num_bits_per_sample(16) {

+GoogleSSFERemoteEngineConfig::~GoogleSSFERemoteEngineConfig() {}

-SpeechRecognitionRequest::SpeechRecognitionRequest(

- net::URLRequestContextGetter* context, Delegate* delegate)

+GoogleSSFERemoteEngine::GoogleSSFERemoteEngine(

+ net::URLRequestContextGetter* context)

: url_context_(context),

- delegate_(delegate) {

- DCHECK(delegate);

+ codec_(AudioEncoder::CODEC_FLAC),

+ encoder_(NULL) {

Satish 2012/03/16 17:00:35 this should get autoinitialized?

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

}

-SpeechRecognitionRequest::~SpeechRecognitionRequest() {}

+GoogleSSFERemoteEngine::~GoogleSSFERemoteEngine() {}

-void SpeechRecognitionRequest::Start(const std::string& language,

- const std::string& grammar,

- bool filter_profanities,

- const std::string& hardware_info,

- const std::string& origin_url,

- const std::string& content_type) {

- DCHECK(!url_fetcher_.get());

+void GoogleSSFERemoteEngine::SetConfiguration(

+ const GoogleSSFERemoteEngineConfig& config) {

+ config_ = config;

+void GoogleSSFERemoteEngine::SpeechRecognitionBegins() {

+ DCHECK(delegate());

+ DCHECK(!url_fetcher_.get());

std::vector<std::string> parts;

Satish 2012/03/16 17:00:35 move this to line 203 where it is getting used for

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

+ encoder_.reset(AudioEncoder::Create(codec_, config_.audio_sample_rate,

+ config_.audio_num_bits_per_sample));

+ DCHECK(encoder_.get());

+ std::string lang_param = config_.language;

- std::string lang_param = language;

if (lang_param.empty() && url_context_) {

// If no language is provided then we use the first from the accepted

// language list. If this list is empty then it defaults to "en-US".

@@ -171,16 +196,20 @@ void SpeechRecognitionRequest::Start(const std::string& language,

size_t separator = accepted_language_list.find_first_of(",;");

lang_param = accepted_language_list.substr(0, separator);

}

if (lang_param.empty())

lang_param = "en-US";

parts.push_back("lang=" + net::EscapeQueryParamValue(lang_param, true));

- if (!grammar.empty())

- parts.push_back("lm=" + net::EscapeQueryParamValue(grammar, true));

- if (!hardware_info.empty())

- parts.push_back("xhw=" + net::EscapeQueryParamValue(hardware_info, true));

+ if (!config_.grammar.empty())

+ parts.push_back("lm=" + net::EscapeQueryParamValue(config_.grammar, true));

+ if (!config_.hardware_info.empty())

+ parts.push_back("xhw=" + net::EscapeQueryParamValue(config_.hardware_info,

+ true));

parts.push_back("maxresults=" + base::IntToString(kMaxResults));

- parts.push_back(filter_profanities ? "pfilter=2" : "pfilter=0");

+ parts.push_back(config_.filter_profanities ? "pfilter=2" : "pfilter=0");

GURL url(std::string(kDefaultSpeechRecognitionUrl) + JoinString(parts, '&'));

@@ -188,41 +217,79 @@ void SpeechRecognitionRequest::Start(const std::string& language,

url,

URLFetcherImpl::POST,

this));

- url_fetcher_->SetChunkedUpload(content_type);

+ url_fetcher_->SetChunkedUpload(encoder_->mime_type());

url_fetcher_->SetRequestContext(url_context_);

- url_fetcher_->SetReferrer(origin_url);

+ url_fetcher_->SetReferrer(config_.origin_url);

// The speech recognition API does not require user identification as part

// of requests, so we don't send cookies or auth data for these requests to

// prevent any accidental connection between users who are logged into the

// domain for other services (e.g. bookmark sync) with the speech requests.

- url_fetcher_->SetLoadFlags(

- net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |

- net::LOAD_DO_NOT_SEND_AUTH_DATA);

+ url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |

+ net::LOAD_DO_NOT_SEND_COOKIES |

+ net::LOAD_DO_NOT_SEND_AUTH_DATA);

url_fetcher_->Start();

}

-void SpeechRecognitionRequest::UploadAudioChunk(const AudioChunk& audio_chunk,

- bool is_last_chunk) {

+// Called only after the results have been retrieved.

+void GoogleSSFERemoteEngine::SpeechRecognitionEnds() {

+ url_fetcher_.reset();

Satish 2012/03/16 17:00:35 do you expect callers to reuse this object across

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 In some occasions it can be reused (e.g, the user

+void GoogleSSFERemoteEngine::PushSpeechAudio(const AudioChunk& data) {

DCHECK(url_fetcher_.get());

Satish 2012/03/16 17:00:35 also check for encoder_ as done below

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

- url_fetcher_->AppendChunkToUpload(audio_chunk.AsString(), is_last_chunk);

+ DCHECK_EQ(data.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);

+ encoder_->Encode(data);

+ scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());

+ url_fetcher_->AppendChunkToUpload(encoded_data->AsString(), false);

}

-void SpeechRecognitionRequest::OnURLFetchComplete(

+void GoogleSSFERemoteEngine::SpeechAudioStreamComplete() {

+ DCHECK(url_fetcher_.get());

+ DCHECK(encoder_.get());

+ // UploadAudioChunk requires a non-empty final buffer. So we encode a packet

+ // of silence in case encoder had no data already.

+ std::vector<short> samples(

Satish 2012/03/16 17:00:35 short -> int16 ?

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

+ config_.audio_sample_rate * kAudioPacketIntervalMs / 1000);

+ AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]),

+ samples.size() * sizeof(short),

Satish 2012/03/16 17:00:35 ditto

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

+ encoder_->bits_per_sample() / 8);

+ encoder_->Encode(dummy_chunk);

+ encoder_->Flush();

+ scoped_ptr<AudioChunk> encoded_dummy_data(encoder_->GetEncodedDataAndClear());

+ DCHECK(!encoded_dummy_data->IsEmpty());

+ encoder_.reset();

+ url_fetcher_->AppendChunkToUpload(encoded_dummy_data->AsString(), true);

+void GoogleSSFERemoteEngine::OnURLFetchComplete(

const content::URLFetcher* source) {

DCHECK_EQ(url_fetcher_.get(), source);

- content::SpeechRecognitionResult result;

+ SpeechRecognitionResult result;

+ SpeechRecognitionError error(content::SPEECH_RECOGNITION_ERROR_NETWORK);

std::string data;

- if (!source->GetStatus().is_success() || source->GetResponseCode() != 200 ||

+ // The default error code in case of parse errors is NETWORK_FAILURE, however

Satish 2012/03/16 17:00:35 add a newline before full line comments such as th

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

+ // ParseServerResponse can change the error to a more appropriate one.

+ if (!source->GetStatus().is_success() ||

Satish 2012/03/16 17:00:35 no need to align the ||, could just leave 1 space

Primiano Tucci (use gerrit) 2012/03/20 13:14:50 Done.

+ source->GetResponseCode() != 200 ||

!source->GetResponseAsString(&data) ||

- !ParseServerResponse(data, &result)) {

- result.error = content::SPEECH_RECOGNITION_ERROR_NETWORK;

+ !ParseServerResponse(data, &result, &error)) {

+ DVLOG(1) << "GoogleSSFERemoteEngine: Network Error " << error.code;

+ delegate()->OnSpeechEngineError(error);

+ } else {

+ DVLOG(1) << "GoogleSSFERemoteEngine: Invoking delegate with result.";

+ delegate()->OnSpeechEngineResult(result);

}

- DVLOG(1) << "SpeechRecognitionRequest: Invoking delegate with result.";

url_fetcher_.reset();

- delegate_->SetRecognitionResult(result);

+bool GoogleSSFERemoteEngine::IsRecognitionPending() const {

+ return url_fetcher_ != NULL;

+int GoogleSSFERemoteEngine::DesiredAudioChunkDurationMs() const {

+ return kAudioPacketIntervalMs;

}

} // namespace speech