Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(548)

Side by Side Diff: content/browser/speech/google_one_shot_remote_engine.cc

Issue 1886813002: Speech: remove GoogleOneShotRemoteEngine (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@drop_is_legacy_api
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/speech/google_one_shot_remote_engine.h"
6
7 #include <stddef.h>
8 #include <stdint.h>
9
10 #include <vector>
11
12 #include "base/json/json_reader.h"
13 #include "base/strings/string_number_conversions.h"
14 #include "base/strings/string_util.h"
15 #include "base/values.h"
16 #include "content/browser/speech/audio_buffer.h"
17 #include "content/public/common/speech_recognition_error.h"
18 #include "content/public/common/speech_recognition_result.h"
19 #include "google_apis/google_api_keys.h"
20 #include "net/base/escape.h"
21 #include "net/base/load_flags.h"
22 #include "net/url_request/http_user_agent_settings.h"
23 #include "net/url_request/url_fetcher.h"
24 #include "net/url_request/url_request_context.h"
25 #include "net/url_request/url_request_context_getter.h"
26 #include "net/url_request/url_request_status.h"
27
28 namespace content {
29 namespace {
30
31 const char* const kDefaultSpeechRecognitionUrl =
32 "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&";
33 const char* const kStatusString = "status";
34 const char* const kHypothesesString = "hypotheses";
35 const char* const kUtteranceString = "utterance";
36 const char* const kConfidenceString = "confidence";
37 const int kWebServiceStatusNoError = 0;
38 const int kWebServiceStatusNoSpeech = 4;
39 const int kWebServiceStatusNoMatch = 5;
40
41 bool ParseServerResponse(const std::string& response_body,
42 SpeechRecognitionResult* result,
43 SpeechRecognitionError* error) {
44 if (response_body.empty()) {
45 LOG(WARNING) << "ParseServerResponse: Response was empty.";
46 return false;
47 }
48 DVLOG(1) << "ParseServerResponse: Parsing response " << response_body;
49
50 // Parse the response, ignoring comments.
51 std::string error_msg;
52 std::unique_ptr<base::Value> response_value =
53 base::JSONReader::ReadAndReturnError(response_body, base::JSON_PARSE_RFC,
54 NULL, &error_msg);
55 if (response_value == NULL) {
56 LOG(WARNING) << "ParseServerResponse: JSONReader failed : " << error_msg;
57 return false;
58 }
59
60 if (!response_value->IsType(base::Value::TYPE_DICTIONARY)) {
61 DVLOG(1) << "ParseServerResponse: Unexpected response type "
62 << response_value->GetType();
63 return false;
64 }
65 const base::DictionaryValue* response_object =
66 static_cast<const base::DictionaryValue*>(response_value.get());
67
68 // Get the status.
69 int status;
70 if (!response_object->GetInteger(kStatusString, &status)) {
71 DVLOG(1) << "ParseServerResponse: " << kStatusString
72 << " is not a valid integer value.";
73 return false;
74 }
75
76 // Process the status.
77 switch (status) {
78 case kWebServiceStatusNoError:
79 break;
80 case kWebServiceStatusNoSpeech:
81 error->code = SPEECH_RECOGNITION_ERROR_NO_SPEECH;
82 return false;
83 case kWebServiceStatusNoMatch:
84 error->code = SPEECH_RECOGNITION_ERROR_NO_MATCH;
85 return false;
86 default:
87 error->code = SPEECH_RECOGNITION_ERROR_NETWORK;
88 // Other status codes should not be returned by the server.
89 DVLOG(1) << "ParseServerResponse: unexpected status code " << status;
90 return false;
91 }
92
93 // Get the hypotheses.
94 const base::Value* hypotheses_value = NULL;
95 if (!response_object->Get(kHypothesesString, &hypotheses_value)) {
96 DVLOG(1) << "ParseServerResponse: Missing hypotheses attribute.";
97 return false;
98 }
99
100 DCHECK(hypotheses_value);
101 if (!hypotheses_value->IsType(base::Value::TYPE_LIST)) {
102 DVLOG(1) << "ParseServerResponse: Unexpected hypotheses type "
103 << hypotheses_value->GetType();
104 return false;
105 }
106
107 const base::ListValue* hypotheses_list =
108 static_cast<const base::ListValue*>(hypotheses_value);
109
110 // For now we support only single shot recognition, so we are giving only a
111 // final result, consisting of one fragment (with one or more hypotheses).
112 size_t index = 0;
113 for (; index < hypotheses_list->GetSize(); ++index) {
114 const base::Value* hypothesis = NULL;
115 if (!hypotheses_list->Get(index, &hypothesis)) {
116 LOG(WARNING) << "ParseServerResponse: Unable to read hypothesis value.";
117 break;
118 }
119 DCHECK(hypothesis);
120 if (!hypothesis->IsType(base::Value::TYPE_DICTIONARY)) {
121 LOG(WARNING) << "ParseServerResponse: Unexpected value type "
122 << hypothesis->GetType();
123 break;
124 }
125
126 const base::DictionaryValue* hypothesis_value =
127 static_cast<const base::DictionaryValue*>(hypothesis);
128 base::string16 utterance;
129
130 if (!hypothesis_value->GetString(kUtteranceString, &utterance)) {
131 LOG(WARNING) << "ParseServerResponse: Missing utterance value.";
132 break;
133 }
134
135 // It is not an error if the 'confidence' field is missing.
136 double confidence = 0.0;
137 hypothesis_value->GetDouble(kConfidenceString, &confidence);
138 result->hypotheses.push_back(SpeechRecognitionHypothesis(utterance,
139 confidence));
140 }
141
142 if (index < hypotheses_list->GetSize()) {
143 result->hypotheses.clear();
144 return false;
145 }
146 return true;
147 }
148
149 } // namespace
150
151 const int GoogleOneShotRemoteEngine::kAudioPacketIntervalMs = 100;
152 int GoogleOneShotRemoteEngine::url_fetcher_id_for_tests = 0;
153
154 GoogleOneShotRemoteEngine::GoogleOneShotRemoteEngine(
155 net::URLRequestContextGetter* context)
156 : url_context_(context) {
157 }
158
159 GoogleOneShotRemoteEngine::~GoogleOneShotRemoteEngine() {}
160
161 void GoogleOneShotRemoteEngine::SetConfig(
162 const SpeechRecognitionEngineConfig& config) {
163 config_ = config;
164 }
165
166 void GoogleOneShotRemoteEngine::StartRecognition() {
167 DCHECK(delegate());
168 DCHECK(!url_fetcher_.get());
169 std::string lang_param = config_.language;
170
171 if (lang_param.empty() && url_context_.get()) {
172 // If no language is provided then we use the first from the accepted
173 // language list. If this list is empty then it defaults to "en-US".
174 // Example of the contents of this list: "es,en-GB;q=0.8", ""
175 net::URLRequestContext* request_context =
176 url_context_->GetURLRequestContext();
177 DCHECK(request_context);
178 // TODO(pauljensen): GoogleOneShotRemoteEngine should be constructed with
179 // a reference to the HttpUserAgentSettings rather than accessing the
180 // accept language through the URLRequestContext.
181 if (request_context->http_user_agent_settings()) {
182 std::string accepted_language_list =
183 request_context->http_user_agent_settings()->GetAcceptLanguage();
184 size_t separator = accepted_language_list.find_first_of(",;");
185 lang_param = accepted_language_list.substr(0, separator);
186 }
187 }
188
189 if (lang_param.empty())
190 lang_param = "en-US";
191
192 std::vector<std::string> parts;
193 parts.push_back("lang=" + net::EscapeQueryParamValue(lang_param, true));
194
195 if (!config_.grammars.empty()) {
196 DCHECK_EQ(config_.grammars.size(), 1U);
197 parts.push_back("lm=" + net::EscapeQueryParamValue(config_.grammars[0].url,
198 true));
199 }
200
201 if (!config_.hardware_info.empty())
202 parts.push_back("xhw=" + net::EscapeQueryParamValue(config_.hardware_info,
203 true));
204 parts.push_back("maxresults=" + base::UintToString(config_.max_hypotheses));
205 parts.push_back(config_.filter_profanities ? "pfilter=2" : "pfilter=0");
206
207 std::string api_key = google_apis::GetAPIKey();
208 parts.push_back("key=" + net::EscapeQueryParamValue(api_key, true));
209
210 GURL url(std::string(kDefaultSpeechRecognitionUrl) +
211 base::JoinString(parts, "&"));
212
213 encoder_.reset(new AudioEncoder(config_.audio_sample_rate,
214 config_.audio_num_bits_per_sample));
215 DCHECK(encoder_.get());
216 url_fetcher_ = net::URLFetcher::Create(url_fetcher_id_for_tests, url,
217 net::URLFetcher::POST, this);
218 url_fetcher_->SetChunkedUpload(encoder_->GetMimeType());
219 url_fetcher_->SetRequestContext(url_context_.get());
220 url_fetcher_->SetReferrer(config_.origin_url);
221
222 // The speech recognition API does not require user identification as part
223 // of requests, so we don't send cookies or auth data for these requests to
224 // prevent any accidental connection between users who are logged into the
225 // domain for other services (e.g. bookmark sync) with the speech requests.
226 url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES |
227 net::LOAD_DO_NOT_SEND_COOKIES |
228 net::LOAD_DO_NOT_SEND_AUTH_DATA);
229 url_fetcher_->Start();
230 }
231
232 void GoogleOneShotRemoteEngine::EndRecognition() {
233 url_fetcher_.reset();
234 }
235
236 void GoogleOneShotRemoteEngine::TakeAudioChunk(const AudioChunk& data) {
237 DCHECK(url_fetcher_.get());
238 DCHECK(encoder_.get());
239 DCHECK_EQ(data.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);
240 encoder_->Encode(data);
241 scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());
242 url_fetcher_->AppendChunkToUpload(encoded_data->AsString(), false);
243 }
244
245 void GoogleOneShotRemoteEngine::AudioChunksEnded() {
246 DCHECK(url_fetcher_.get());
247 DCHECK(encoder_.get());
248
249 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet
250 // of silence in case encoder had no data already.
251 size_t sample_count =
252 config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;
253 scoped_refptr<AudioChunk> dummy_chunk(new AudioChunk(
254 sample_count * sizeof(int16_t), encoder_->GetBitsPerSample() / 8));
255 encoder_->Encode(*dummy_chunk.get());
256 encoder_->Flush();
257 scoped_refptr<AudioChunk> encoded_dummy_data(
258 encoder_->GetEncodedDataAndClear());
259 DCHECK(!encoded_dummy_data->IsEmpty());
260 encoder_.reset();
261
262 url_fetcher_->AppendChunkToUpload(encoded_dummy_data->AsString(), true);
263 }
264
265 void GoogleOneShotRemoteEngine::OnURLFetchComplete(
266 const net::URLFetcher* source) {
267 DCHECK_EQ(url_fetcher_.get(), source);
268 SpeechRecognitionResults results;
269 results.push_back(SpeechRecognitionResult());
270 SpeechRecognitionResult& result = results.back();
271 SpeechRecognitionError error(SPEECH_RECOGNITION_ERROR_NETWORK);
272 std::string data;
273
274 // The default error code in case of parse errors is NETWORK_FAILURE, however
275 // ParseServerResponse can change the error to a more appropriate one.
276 bool error_occurred = (!source->GetStatus().is_success() ||
277 source->GetResponseCode() != 200 ||
278 !source->GetResponseAsString(&data) ||
279 !ParseServerResponse(data, &result, &error));
280 url_fetcher_.reset();
281 if (error_occurred) {
282 DVLOG(1) << "GoogleOneShotRemoteEngine: Network Error " << error.code;
283 delegate()->OnSpeechRecognitionEngineError(error);
284 } else {
285 DVLOG(1) << "GoogleOneShotRemoteEngine: Invoking delegate with result.";
286 delegate()->OnSpeechRecognitionEngineResults(results);
287 }
288 }
289
290 bool GoogleOneShotRemoteEngine::IsRecognitionPending() const {
291 return url_fetcher_ != NULL;
292 }
293
294 int GoogleOneShotRemoteEngine::GetDesiredAudioChunkDurationMs() const {
295 return kAudioPacketIntervalMs;
296 }
297
298 } // namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698