Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1083)

Unified Diff: content/browser/speech/google_one_shot_remote_engine.cc

Issue 9663066: Refactoring of chrome speech recognition architecture (CL1.3) (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fixed according to (partial) Satish review. Created 8 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/browser/speech/google_one_shot_remote_engine.cc
diff --git a/content/browser/speech/speech_recognition_request.cc b/content/browser/speech/google_one_shot_remote_engine.cc
similarity index 51%
rename from content/browser/speech/speech_recognition_request.cc
rename to content/browser/speech/google_one_shot_remote_engine.cc
index a14369976d45ff0c267ef0af7221f4cac547fc32..7cc7739f60d3f1ba9c6da793c2b30c07c291de52 100644
--- a/content/browser/speech/speech_recognition_request.cc
+++ b/content/browser/speech/google_one_shot_remote_engine.cc
@@ -2,11 +2,12 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "content/browser/speech/speech_recognition_request.h"
+#include "content/browser/speech/google_one_shot_remote_engine.h"
#include <vector>
#include "base/json/json_reader.h"
+#include "base/memory/scoped_ptr.h"
#include "base/string_number_conversions.h"
#include "base/string_util.h"
#include "base/values.h"
@@ -19,6 +20,10 @@
#include "net/url_request/url_request_context_getter.h"
#include "net/url_request/url_request_status.h"
+using content::SpeechRecognitionError;
+using content::SpeechRecognitionHypothesis;
+using content::SpeechRecognitionResult;
+
namespace {
const char* const kDefaultSpeechRecognitionUrl =
@@ -27,13 +32,19 @@ const char* const kStatusString = "status";
const char* const kHypothesesString = "hypotheses";
const char* const kUtteranceString = "utterance";
const char* const kConfidenceString = "confidence";
-
+const int kGoogleSSFEStatusNoError = 0;
Satish 2012/03/21 13:29:48 SSFE to something else?
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
+const int kGoogleSSFEStatusNoSpeech = 4;
+const int kGoogleSSFEStatusNoMatch = 5;
+const int kDefaultConfigSampleRate = 8000;
+const int kDefaultConfigBPS = 16;
Satish 2012/03/21 13:29:48 BPS -> BitsPerSample
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
+const speech::AudioEncoder::Codec kCodec = speech::AudioEncoder::CODEC_FLAC;
Satish 2012/03/21 13:29:48 kCodec > kAudioCodec or kDefaultAudioCodec
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
// TODO(satish): Remove this hardcoded value once the page is allowed to
// set this via an attribute.
const int kMaxResults = 6;
bool ParseServerResponse(const std::string& response_body,
- content::SpeechRecognitionResult* result) {
+ SpeechRecognitionResult* result,
+ SpeechRecognitionError* error) {
if (response_body.empty()) {
LOG(WARNING) << "ParseServerResponse: Response was empty.";
return false;
@@ -67,19 +78,22 @@ bool ParseServerResponse(const std::string& response_body,
// Process the status.
switch (status) {
- case content::SPEECH_RECOGNITION_ERROR_NONE:
- case content::SPEECH_RECOGNITION_ERROR_NO_SPEECH:
- case content::SPEECH_RECOGNITION_ERROR_NO_MATCH:
- break;
-
- default:
- // Other status codes should not be returned by the server.
- VLOG(1) << "ParseServerResponse: unexpected status code " << status;
- return false;
+ case kGoogleSSFEStatusNoError:
+ error->code = content::SPEECH_RECOGNITION_ERROR_NONE;
+ break;
+ case kGoogleSSFEStatusNoSpeech:
+ error->code = content::SPEECH_RECOGNITION_ERROR_NO_SPEECH;
+ return false;
+ case kGoogleSSFEStatusNoMatch:
+ error->code = content::SPEECH_RECOGNITION_ERROR_NO_MATCH;
+ return false;
+ default:
+ error->code = content::SPEECH_RECOGNITION_ERROR_NETWORK;
+ // Other status codes should not be returned by the server.
+ VLOG(1) << "ParseServerResponse: unexpected status code " << status;
+ return false;
}
- result->error = static_cast<content::SpeechRecognitionErrorCode>(status);
-
// Get the hypotheses.
Value* hypotheses_value = NULL;
if (!response_object->Get(kHypothesesString, &hypotheses_value)) {
@@ -95,7 +109,8 @@ bool ParseServerResponse(const std::string& response_body,
}
const ListValue* hypotheses_list = static_cast<ListValue*>(hypotheses_value);
-
+ // For now we support only single shot recognition, so we are giving only a
+ // final result, consisting of one fragment (with one or more hypotheses).
size_t index = 0;
for (; index < hypotheses_list->GetSize(); ++index) {
Value* hypothesis = NULL;
@@ -113,6 +128,7 @@ bool ParseServerResponse(const std::string& response_body,
const DictionaryValue* hypothesis_value =
static_cast<DictionaryValue*>(hypothesis);
string16 utterance;
+
if (!hypothesis_value->GetString(kUtteranceString, &utterance)) {
LOG(WARNING) << "ParseServerResponse: Missing utterance value.";
break;
@@ -121,16 +137,14 @@ bool ParseServerResponse(const std::string& response_body,
// It is not an error if the 'confidence' field is missing.
double confidence = 0.0;
hypothesis_value->GetDouble(kConfidenceString, &confidence);
-
- result->hypotheses.push_back(content::SpeechRecognitionHypothesis(
- utterance, confidence));
+ result->hypotheses.push_back(SpeechRecognitionHypothesis(utterance,
+ confidence));
}
if (index < hypotheses_list->GetSize()) {
result->hypotheses.clear();
return false;
}
-
return true;
}
@@ -138,28 +152,35 @@ bool ParseServerResponse(const std::string& response_body,
namespace speech {
-int SpeechRecognitionRequest::url_fetcher_id_for_tests = 0;
+const int GoogleOneShotRemoteEngine::kAudioPacketIntervalMs = 100;
+int GoogleOneShotRemoteEngine::url_fetcher_id_for_tests = 0;
-SpeechRecognitionRequest::SpeechRecognitionRequest(
- net::URLRequestContextGetter* context, Delegate* delegate)
- : url_context_(context),
- delegate_(delegate) {
- DCHECK(delegate);
+GoogleOneShotRemoteEngineConfig::GoogleOneShotRemoteEngineConfig()
+ : filter_profanities(false),
+ audio_sample_rate(kDefaultConfigSampleRate),
+ audio_num_bits_per_sample(kDefaultConfigBPS) {
}
-SpeechRecognitionRequest::~SpeechRecognitionRequest() {}
+GoogleOneShotRemoteEngineConfig::~GoogleOneShotRemoteEngineConfig() {}
-void SpeechRecognitionRequest::Start(const std::string& language,
- const std::string& grammar,
- bool filter_profanities,
- const std::string& hardware_info,
- const std::string& origin_url,
- const std::string& content_type) {
- DCHECK(!url_fetcher_.get());
+GoogleOneShotRemoteEngine::GoogleOneShotRemoteEngine(
+ net::URLRequestContextGetter* context)
+ : url_context_(context) {
+}
+
+GoogleOneShotRemoteEngine::~GoogleOneShotRemoteEngine() {}
+void GoogleOneShotRemoteEngine::SetConfiguration(
Satish 2012/03/21 13:29:48 since the type name is Config how about renaming t
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
+ const GoogleOneShotRemoteEngineConfig& config) {
+ config_ = config;
+}
+
+void GoogleOneShotRemoteEngine::Initialize() {
Satish 2012/03/21 13:29:48 Reading this I got confused a bit why Initialize w
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
+ DCHECK(delegate());
+ DCHECK(!url_fetcher_.get());
std::vector<std::string> parts;
Satish 2012/03/21 13:29:48 move this to line 199 where it is first used
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
+ std::string lang_param = config_.language;
- std::string lang_param = language;
if (lang_param.empty() && url_context_) {
// If no language is provided then we use the first from the accepted
// language list. If this list is empty then it defaults to "en-US".
@@ -171,58 +192,105 @@ void SpeechRecognitionRequest::Start(const std::string& language,
size_t separator = accepted_language_list.find_first_of(",;");
lang_param = accepted_language_list.substr(0, separator);
}
+
if (lang_param.empty())
lang_param = "en-US";
+
parts.push_back("lang=" + net::EscapeQueryParamValue(lang_param, true));
- if (!grammar.empty())
- parts.push_back("lm=" + net::EscapeQueryParamValue(grammar, true));
- if (!hardware_info.empty())
- parts.push_back("xhw=" + net::EscapeQueryParamValue(hardware_info, true));
+ if (!config_.grammar.empty())
+ parts.push_back("lm=" + net::EscapeQueryParamValue(config_.grammar, true));
+
+ if (!config_.hardware_info.empty())
+ parts.push_back("xhw=" + net::EscapeQueryParamValue(config_.hardware_info,
+ true));
parts.push_back("maxresults=" + base::IntToString(kMaxResults));
- parts.push_back(filter_profanities ? "pfilter=2" : "pfilter=0");
+ parts.push_back(config_.filter_profanities ? "pfilter=2" : "pfilter=0");
GURL url(std::string(kDefaultSpeechRecognitionUrl) + JoinString(parts, '&'));
+ encoder_.reset(AudioEncoder::Create(kCodec, config_.audio_sample_rate,
+ config_.audio_num_bits_per_sample));
+ DCHECK(encoder_.get());
url_fetcher_.reset(URLFetcherImpl::Create(url_fetcher_id_for_tests,
url,
URLFetcherImpl::POST,
this));
- url_fetcher_->SetChunkedUpload(content_type);
+ url_fetcher_->SetChunkedUpload(encoder_->mime_type());
url_fetcher_->SetRequestContext(url_context_);
- url_fetcher_->SetReferrer(origin_url);
+ url_fetcher_->SetReferrer(config_.origin_url);
// The speech recognition API does not require user identification as part
// of requests, so we don't send cookies or auth data for these requests to
// prevent any accidental connection between users who are logged into the
// domain for other services (e.g. bookmark sync) with the speech requests.
- url_fetcher_->SetLoadFlags(
- net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES |
- net::LOAD_DO_NOT_SEND_AUTH_DATA);
+ url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
+ net::LOAD_DO_NOT_SEND_COOKIES |
+ net::LOAD_DO_NOT_SEND_AUTH_DATA);
url_fetcher_->Start();
}
-void SpeechRecognitionRequest::UploadAudioChunk(const AudioChunk& audio_chunk,
- bool is_last_chunk) {
+// Called only after the results have been retrieved.
Satish 2012/03/21 13:29:48 this comment isn't correct as this could be called
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
+void GoogleOneShotRemoteEngine::Cleanup() {
+ url_fetcher_.reset();
+}
+
+void GoogleOneShotRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
+ DCHECK(url_fetcher_.get());
+ DCHECK(encoder_.get());
+ DCHECK_EQ(data.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
+ encoder_->Encode(data);
+ scoped_ptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
+ url_fetcher_->AppendChunkToUpload(encoded_data->AsString(), false);
+}
+
+void GoogleOneShotRemoteEngine::AudioChunksEnded() {
DCHECK(url_fetcher_.get());
- url_fetcher_->AppendChunkToUpload(audio_chunk.AsString(), is_last_chunk);
+ DCHECK(encoder_.get());
+ // UploadAudioChunk requires a non-empty final buffer. So we encode a packet
Satish 2012/03/21 13:29:48 suggest leaving a newline above full line comments
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
+ // of silence in case encoder had no data already.
+ std::vector<int16> samples(
+ config_.audio_sample_rate * kAudioPacketIntervalMs / 1000);
+ AudioChunk dummy_chunk(reinterpret_cast<uint8*>(&samples[0]),
+ samples.size() * sizeof(int16),
+ encoder_->bits_per_sample() / 8);
+ encoder_->Encode(dummy_chunk);
+ encoder_->Flush();
+ scoped_ptr<AudioChunk> encoded_dummy_data(encoder_->GetEncodedDataAndClear());
+ DCHECK(!encoded_dummy_data->IsEmpty());
+ encoder_.reset();
+
+ url_fetcher_->AppendChunkToUpload(encoded_dummy_data->AsString(), true);
}
-void SpeechRecognitionRequest::OnURLFetchComplete(
+void GoogleOneShotRemoteEngine::OnURLFetchComplete(
const content::URLFetcher* source) {
DCHECK_EQ(url_fetcher_.get(), source);
-
- content::SpeechRecognitionResult result;
+ SpeechRecognitionResult result;
+ SpeechRecognitionError error(content::SPEECH_RECOGNITION_ERROR_NETWORK);
std::string data;
- if (!source->GetStatus().is_success() || source->GetResponseCode() != 200 ||
+ // The default error code in case of parse errors is NETWORK_FAILURE, however
Satish 2012/03/21 13:29:48 ditto
Primiano Tucci (use gerrit) 2012/03/22 11:20:41 Done.
+ // ParseServerResponse can change the error to a more appropriate one.
+
+ if (!source->GetStatus().is_success() ||
+ source->GetResponseCode() != 200 ||
!source->GetResponseAsString(&data) ||
- !ParseServerResponse(data, &result)) {
- result.error = content::SPEECH_RECOGNITION_ERROR_NETWORK;
+ !ParseServerResponse(data, &result, &error)) {
+ DVLOG(1) << "GoogleOneShotRemoteEngine: Network Error " << error.code;
+ delegate()->OnSpeechEngineError(error);
+ } else {
+ DVLOG(1) << "GoogleOneShotRemoteEngine: Invoking delegate with result.";
+ delegate()->OnSpeechEngineResult(result);
}
-
- DVLOG(1) << "SpeechRecognitionRequest: Invoking delegate with result.";
url_fetcher_.reset();
- delegate_->SetRecognitionResult(result);
+}
+
+bool GoogleOneShotRemoteEngine::IsRecognitionPending() const {
+ return url_fetcher_ != NULL;
+}
+
+int GoogleOneShotRemoteEngine::GetDesiredAudioChunkDurationMs() const {
+ return kAudioPacketIntervalMs;
}
} // namespace speech

Powered by Google App Engine
This is Rietveld 408576698