content/browser/speech/google_one_shot_remote_engine.cc - Issue 1886813002: Speech: remove GoogleOneShotRemoteEngine

Side by Side Diff: content/browser/speech/google_one_shot_remote_engine.cc

Issue 1886813002: Speech: remove GoogleOneShotRemoteEngine (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@drop_is_legacy_api

Patch Set: Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « content/browser/speech/google_one_shot_remote_engine.h ('k') | content/browser/speech/google_one_shot_remote_engine_unittest.cc » ('j') | content/browser/speech/speech_recognizer_impl_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "content/browser/speech/google_one_shot_remote_engine.h"

6

7 #include <stddef.h>

8 #include <stdint.h>

9

10 #include <vector>

11

12 #include "base/json/json_reader.h"

13 #include "base/strings/string_number_conversions.h"

14 #include "base/strings/string_util.h"

15 #include "base/values.h"

16 #include "content/browser/speech/audio_buffer.h"

17 #include "content/public/common/speech_recognition_error.h"

18 #include "content/public/common/speech_recognition_result.h"

19 #include "google_apis/google_api_keys.h"

20 #include "net/base/escape.h"

21 #include "net/base/load_flags.h"

22 #include "net/url_request/http_user_agent_settings.h"

23 #include "net/url_request/url_fetcher.h"

24 #include "net/url_request/url_request_context.h"

25 #include "net/url_request/url_request_context_getter.h"

26 #include "net/url_request/url_request_status.h"

27

28 namespace content {

29 namespace {

30

31 const char* const kDefaultSpeechRecognitionUrl =

32 "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&";

33 const char* const kStatusString = "status";

34 const char* const kHypothesesString = "hypotheses";

35 const char* const kUtteranceString = "utterance";

36 const char* const kConfidenceString = "confidence";

37 const int kWebServiceStatusNoError = 0;

38 const int kWebServiceStatusNoSpeech = 4;

39 const int kWebServiceStatusNoMatch = 5;

40

41 bool ParseServerResponse(const std::string& response_body,

42 SpeechRecognitionResult* result,

43 SpeechRecognitionError* error) {

44 if (response_body.empty()) {

45 LOG(WARNING) << "ParseServerResponse: Response was empty.";

46 return false;

47 }

48 DVLOG(1) << "ParseServerResponse: Parsing response " << response_body;

49

50 // Parse the response, ignoring comments.

51 std::string error_msg;

52 std::unique_ptr<base::Value> response_value =

53 base::JSONReader::ReadAndReturnError(response_body, base::JSON_PARSE_RFC,

54 NULL, &error_msg);

55 if (response_value == NULL) {

56 LOG(WARNING) << "ParseServerResponse: JSONReader failed : " << error_msg;

57 return false;

58 }

59

60 if (!response_value->IsType(base::Value::TYPE_DICTIONARY)) {

61 DVLOG(1) << "ParseServerResponse: Unexpected response type "

62 << response_value->GetType();

63 return false;

64 }

65 const base::DictionaryValue* response_object =

66 static_cast<const base::DictionaryValue*>(response_value.get());

67

68 // Get the status.

69 int status;

70 if (!response_object->GetInteger(kStatusString, &status)) {

71 DVLOG(1) << "ParseServerResponse: " << kStatusString

72 << " is not a valid integer value.";

73 return false;

74 }

75

76 // Process the status.

77 switch (status) {

78 case kWebServiceStatusNoError:

79 break;

80 case kWebServiceStatusNoSpeech:

81 error->code = SPEECH_RECOGNITION_ERROR_NO_SPEECH;

82 return false;

83 case kWebServiceStatusNoMatch:

84 error->code = SPEECH_RECOGNITION_ERROR_NO_MATCH;

85 return false;

86 default:

87 error->code = SPEECH_RECOGNITION_ERROR_NETWORK;

88 // Other status codes should not be returned by the server.

89 DVLOG(1) << "ParseServerResponse: unexpected status code " << status;

90 return false;

91 }

92

93 // Get the hypotheses.

94 const base::Value* hypotheses_value = NULL;

95 if (!response_object->Get(kHypothesesString, &hypotheses_value)) {

96 DVLOG(1) << "ParseServerResponse: Missing hypotheses attribute.";

97 return false;

98 }

99

100 DCHECK(hypotheses_value);

101 if (!hypotheses_value->IsType(base::Value::TYPE_LIST)) {

102 DVLOG(1) << "ParseServerResponse: Unexpected hypotheses type "

103 << hypotheses_value->GetType();

104 return false;

105 }

106

107 const base::ListValue* hypotheses_list =

108 static_cast<const base::ListValue*>(hypotheses_value);

109

110 // For now we support only single shot recognition, so we are giving only a

111 // final result, consisting of one fragment (with one or more hypotheses).

112 size_t index = 0;

113 for (; index < hypotheses_list->GetSize(); ++index) {

114 const base::Value* hypothesis = NULL;

115 if (!hypotheses_list->Get(index, &hypothesis)) {

116 LOG(WARNING) << "ParseServerResponse: Unable to read hypothesis value.";

117 break;

118 }

119 DCHECK(hypothesis);

120 if (!hypothesis->IsType(base::Value::TYPE_DICTIONARY)) {

121 LOG(WARNING) << "ParseServerResponse: Unexpected value type "

122 << hypothesis->GetType();

123 break;

124 }

125

126 const base::DictionaryValue* hypothesis_value =

127 static_cast<const base::DictionaryValue*>(hypothesis);

128 base::string16 utterance;

129

130 if (!hypothesis_value->GetString(kUtteranceString, &utterance)) {

131 LOG(WARNING) << "ParseServerResponse: Missing utterance value.";

132 break;

133 }

134

135 // It is not an error if the 'confidence' field is missing.

136 double confidence = 0.0;

137 hypothesis_value->GetDouble(kConfidenceString, &confidence);

138 result->hypotheses.push_back(SpeechRecognitionHypothesis(utterance,

139 confidence));

140 }

141

142 if (index < hypotheses_list->GetSize()) {

143 result->hypotheses.clear();

144 return false;

145 }

146 return true;

147 }

148

149 } // namespace

150

151 const int GoogleOneShotRemoteEngine::kAudioPacketIntervalMs = 100;

152 int GoogleOneShotRemoteEngine::url_fetcher_id_for_tests = 0;

153

154 GoogleOneShotRemoteEngine::GoogleOneShotRemoteEngine(

155 net::URLRequestContextGetter* context)

156 : url_context_(context) {

157 }

158

159 GoogleOneShotRemoteEngine::~GoogleOneShotRemoteEngine() {}

160

161 void GoogleOneShotRemoteEngine::SetConfig(

162 const SpeechRecognitionEngineConfig& config) {

163 config_ = config;

164 }

165

166 void GoogleOneShotRemoteEngine::StartRecognition() {

167 DCHECK(delegate());

168 DCHECK(!url_fetcher_.get());

169 std::string lang_param = config_.language;

170

171 if (lang_param.empty() && url_context_.get()) {

172 // If no language is provided then we use the first from the accepted

173 // language list. If this list is empty then it defaults to "en-US".

174 // Example of the contents of this list: "es,en-GB;q=0.8", ""

175 net::URLRequestContext* request_context =

176 url_context_->GetURLRequestContext();

177 DCHECK(request_context);

178 // TODO(pauljensen): GoogleOneShotRemoteEngine should be constructed with

179 // a reference to the HttpUserAgentSettings rather than accessing the

180 // accept language through the URLRequestContext.

181 if (request_context->http_user_agent_settings()) {

182 std::string accepted_language_list =

183 request_context->http_user_agent_settings()->GetAcceptLanguage();

184 size_t separator = accepted_language_list.find_first_of(",;");

185 lang_param = accepted_language_list.substr(0, separator);

186 }

187 }

188

189 if (lang_param.empty())

190 lang_param = "en-US";

191

192 std::vector<std::string> parts;

193 parts.push_back("lang=" + net::EscapeQueryParamValue(lang_param, true));

194

195 if (!config_.grammars.empty()) {

196 DCHECK_EQ(config_.grammars.size(), 1U);

197 parts.push_back("lm=" + net::EscapeQueryParamValue(config_.grammars[0].url,

198 true));

199 }

200

201 if (!config_.hardware_info.empty())

202 parts.push_back("xhw=" + net::EscapeQueryParamValue(config_.hardware_info,

203 true));

204 parts.push_back("maxresults=" + base::UintToString(config_.max_hypotheses));

205 parts.push_back(config_.filter_profanities ? "pfilter=2" : "pfilter=0");

206

207 std::string api_key = google_apis::GetAPIKey();

208 parts.push_back("key=" + net::EscapeQueryParamValue(api_key, true));

209

210 GURL url(std::string(kDefaultSpeechRecognitionUrl) +

211 base::JoinString(parts, "&"));

212

213 encoder_.reset(new AudioEncoder(config_.audio_sample_rate,

214 config_.audio_num_bits_per_sample));

215 DCHECK(encoder_.get());

216 url_fetcher_ = net::URLFetcher::Create(url_fetcher_id_for_tests, url,

217 net::URLFetcher::POST, this);

218 url_fetcher_->SetChunkedUpload(encoder_->GetMimeType());

219 url_fetcher_->SetRequestContext(url_context_.get());

220 url_fetcher_->SetReferrer(config_.origin_url);

221

222 // The speech recognition API does not require user identification as part

223 // of requests, so we don't send cookies or auth data for these requests to

224 // prevent any accidental connection between users who are logged into the

225 // domain for other services (e.g. bookmark sync) with the speech requests.

226 url_fetcher_->SetLoadFlags(net::LOAD_DO_NOT_SAVE_COOKIES \|

227 net::LOAD_DO_NOT_SEND_COOKIES \|

228 net::LOAD_DO_NOT_SEND_AUTH_DATA);

229 url_fetcher_->Start();

230 }

231

232 void GoogleOneShotRemoteEngine::EndRecognition() {

233 url_fetcher_.reset();

234 }

235

236 void GoogleOneShotRemoteEngine::TakeAudioChunk(const AudioChunk& data) {

237 DCHECK(url_fetcher_.get());

238 DCHECK(encoder_.get());

239 DCHECK_EQ(data.bytes_per_sample(), config_.audio_num_bits_per_sample / 8);

240 encoder_->Encode(data);

241 scoped_refptr<AudioChunk> encoded_data(encoder_->GetEncodedDataAndClear());

242 url_fetcher_->AppendChunkToUpload(encoded_data->AsString(), false);

243 }

244

245 void GoogleOneShotRemoteEngine::AudioChunksEnded() {

246 DCHECK(url_fetcher_.get());

247 DCHECK(encoder_.get());

248

249 // UploadAudioChunk requires a non-empty final buffer. So we encode a packet

250 // of silence in case encoder had no data already.

251 size_t sample_count =

252 config_.audio_sample_rate * kAudioPacketIntervalMs / 1000;

253 scoped_refptr<AudioChunk> dummy_chunk(new AudioChunk(

254 sample_count * sizeof(int16_t), encoder_->GetBitsPerSample() / 8));

255 encoder_->Encode(*dummy_chunk.get());

256 encoder_->Flush();

257 scoped_refptr<AudioChunk> encoded_dummy_data(

258 encoder_->GetEncodedDataAndClear());

259 DCHECK(!encoded_dummy_data->IsEmpty());

260 encoder_.reset();

261

262 url_fetcher_->AppendChunkToUpload(encoded_dummy_data->AsString(), true);

263 }

264

265 void GoogleOneShotRemoteEngine::OnURLFetchComplete(

266 const net::URLFetcher* source) {

267 DCHECK_EQ(url_fetcher_.get(), source);

268 SpeechRecognitionResults results;

269 results.push_back(SpeechRecognitionResult());

270 SpeechRecognitionResult& result = results.back();

271 SpeechRecognitionError error(SPEECH_RECOGNITION_ERROR_NETWORK);

272 std::string data;

273

274 // The default error code in case of parse errors is NETWORK_FAILURE, however

275 // ParseServerResponse can change the error to a more appropriate one.

276 bool error_occurred = (!source->GetStatus().is_success() \|\|

277 source->GetResponseCode() != 200 \|\|

278 !source->GetResponseAsString(&data) \|\|

279 !ParseServerResponse(data, &result, &error));

280 url_fetcher_.reset();

281 if (error_occurred) {

282 DVLOG(1) << "GoogleOneShotRemoteEngine: Network Error " << error.code;

283 delegate()->OnSpeechRecognitionEngineError(error);

284 } else {

285 DVLOG(1) << "GoogleOneShotRemoteEngine: Invoking delegate with result.";

286 delegate()->OnSpeechRecognitionEngineResults(results);

287 }

288 }

289

290 bool GoogleOneShotRemoteEngine::IsRecognitionPending() const {

291 return url_fetcher_ != NULL;

292 }

293

294 int GoogleOneShotRemoteEngine::GetDesiredAudioChunkDurationMs() const {

295 return kAudioPacketIntervalMs;

296 }

297

298 } // namespace content

OLD	NEW