OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "content/browser/speech/speech_recognition_request.h" | |
6 | |
7 #include <vector> | |
8 | |
9 #include "base/json/json_reader.h" | |
10 #include "base/string_number_conversions.h" | |
11 #include "base/string_util.h" | |
12 #include "base/values.h" | |
13 #include "content/browser/speech/audio_buffer.h" | |
14 #include "content/common/net/url_fetcher_impl.h" | |
15 #include "content/public/common/speech_recognition_result.h" | |
16 #include "net/base/escape.h" | |
17 #include "net/base/load_flags.h" | |
18 #include "net/url_request/url_request_context.h" | |
19 #include "net/url_request/url_request_context_getter.h" | |
20 #include "net/url_request/url_request_status.h" | |
21 | |
22 namespace { | |
23 | |
24 const char* const kDefaultSpeechRecognitionUrl = | |
25 "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=chromium&"; | |
26 const char* const kStatusString = "status"; | |
27 const char* const kHypothesesString = "hypotheses"; | |
28 const char* const kUtteranceString = "utterance"; | |
29 const char* const kConfidenceString = "confidence"; | |
30 | |
31 // TODO(satish): Remove this hardcoded value once the page is allowed to | |
32 // set this via an attribute. | |
33 const int kMaxResults = 6; | |
34 | |
35 bool ParseServerResponse(const std::string& response_body, | |
36 content::SpeechRecognitionResult* result) { | |
37 if (response_body.empty()) { | |
38 LOG(WARNING) << "ParseServerResponse: Response was empty."; | |
39 return false; | |
40 } | |
41 DVLOG(1) << "ParseServerResponse: Parsing response " << response_body; | |
42 | |
43 // Parse the response, ignoring comments. | |
44 std::string error_msg; | |
45 scoped_ptr<Value> response_value(base::JSONReader::ReadAndReturnError( | |
46 response_body, false, NULL, &error_msg)); | |
47 if (response_value == NULL) { | |
48 LOG(WARNING) << "ParseServerResponse: JSONReader failed : " << error_msg; | |
49 return false; | |
50 } | |
51 | |
52 if (!response_value->IsType(Value::TYPE_DICTIONARY)) { | |
53 VLOG(1) << "ParseServerResponse: Unexpected response type " | |
54 << response_value->GetType(); | |
55 return false; | |
56 } | |
57 const DictionaryValue* response_object = | |
58 static_cast<DictionaryValue*>(response_value.get()); | |
59 | |
60 // Get the status. | |
61 int status; | |
62 if (!response_object->GetInteger(kStatusString, &status)) { | |
63 VLOG(1) << "ParseServerResponse: " << kStatusString | |
64 << " is not a valid integer value."; | |
65 return false; | |
66 } | |
67 | |
68 // Process the status. | |
69 switch (status) { | |
70 case content::SPEECH_RECOGNITION_ERROR_NONE: | |
71 case content::SPEECH_RECOGNITION_ERROR_NO_SPEECH: | |
72 case content::SPEECH_RECOGNITION_ERROR_NO_MATCH: | |
73 break; | |
74 | |
75 default: | |
76 // Other status codes should not be returned by the server. | |
77 VLOG(1) << "ParseServerResponse: unexpected status code " << status; | |
78 return false; | |
79 } | |
80 | |
81 result->error = static_cast<content::SpeechRecognitionErrorCode>(status); | |
82 | |
83 // Get the hypotheses. | |
84 Value* hypotheses_value = NULL; | |
85 if (!response_object->Get(kHypothesesString, &hypotheses_value)) { | |
86 VLOG(1) << "ParseServerResponse: Missing hypotheses attribute."; | |
87 return false; | |
88 } | |
89 | |
90 DCHECK(hypotheses_value); | |
91 if (!hypotheses_value->IsType(Value::TYPE_LIST)) { | |
92 VLOG(1) << "ParseServerResponse: Unexpected hypotheses type " | |
93 << hypotheses_value->GetType(); | |
94 return false; | |
95 } | |
96 | |
97 const ListValue* hypotheses_list = static_cast<ListValue*>(hypotheses_value); | |
98 | |
99 size_t index = 0; | |
100 for (; index < hypotheses_list->GetSize(); ++index) { | |
101 Value* hypothesis = NULL; | |
102 if (!hypotheses_list->Get(index, &hypothesis)) { | |
103 LOG(WARNING) << "ParseServerResponse: Unable to read hypothesis value."; | |
104 break; | |
105 } | |
106 DCHECK(hypothesis); | |
107 if (!hypothesis->IsType(Value::TYPE_DICTIONARY)) { | |
108 LOG(WARNING) << "ParseServerResponse: Unexpected value type " | |
109 << hypothesis->GetType(); | |
110 break; | |
111 } | |
112 | |
113 const DictionaryValue* hypothesis_value = | |
114 static_cast<DictionaryValue*>(hypothesis); | |
115 string16 utterance; | |
116 if (!hypothesis_value->GetString(kUtteranceString, &utterance)) { | |
117 LOG(WARNING) << "ParseServerResponse: Missing utterance value."; | |
118 break; | |
119 } | |
120 | |
121 // It is not an error if the 'confidence' field is missing. | |
122 double confidence = 0.0; | |
123 hypothesis_value->GetDouble(kConfidenceString, &confidence); | |
124 | |
125 result->hypotheses.push_back(content::SpeechRecognitionHypothesis( | |
126 utterance, confidence)); | |
127 } | |
128 | |
129 if (index < hypotheses_list->GetSize()) { | |
130 result->hypotheses.clear(); | |
131 return false; | |
132 } | |
133 | |
134 return true; | |
135 } | |
136 | |
137 } // namespace | |
138 | |
139 namespace speech { | |
140 | |
141 int SpeechRecognitionRequest::url_fetcher_id_for_tests = 0; | |
142 | |
143 SpeechRecognitionRequest::SpeechRecognitionRequest( | |
144 net::URLRequestContextGetter* context, Delegate* delegate) | |
145 : url_context_(context), | |
146 delegate_(delegate) { | |
147 DCHECK(delegate); | |
148 } | |
149 | |
150 SpeechRecognitionRequest::~SpeechRecognitionRequest() {} | |
151 | |
152 void SpeechRecognitionRequest::Start(const std::string& language, | |
153 const std::string& grammar, | |
154 bool filter_profanities, | |
155 const std::string& hardware_info, | |
156 const std::string& origin_url, | |
157 const std::string& content_type) { | |
158 DCHECK(!url_fetcher_.get()); | |
159 | |
160 std::vector<std::string> parts; | |
161 | |
162 std::string lang_param = language; | |
163 if (lang_param.empty() && url_context_) { | |
164 // If no language is provided then we use the first from the accepted | |
165 // language list. If this list is empty then it defaults to "en-US". | |
166 // Example of the contents of this list: "es,en-GB;q=0.8", "" | |
167 net::URLRequestContext* request_context = | |
168 url_context_->GetURLRequestContext(); | |
169 DCHECK(request_context); | |
170 std::string accepted_language_list = request_context->accept_language(); | |
171 size_t separator = accepted_language_list.find_first_of(",;"); | |
172 lang_param = accepted_language_list.substr(0, separator); | |
173 } | |
174 if (lang_param.empty()) | |
175 lang_param = "en-US"; | |
176 parts.push_back("lang=" + net::EscapeQueryParamValue(lang_param, true)); | |
177 | |
178 if (!grammar.empty()) | |
179 parts.push_back("lm=" + net::EscapeQueryParamValue(grammar, true)); | |
180 if (!hardware_info.empty()) | |
181 parts.push_back("xhw=" + net::EscapeQueryParamValue(hardware_info, true)); | |
182 parts.push_back("maxresults=" + base::IntToString(kMaxResults)); | |
183 parts.push_back(filter_profanities ? "pfilter=2" : "pfilter=0"); | |
184 | |
185 GURL url(std::string(kDefaultSpeechRecognitionUrl) + JoinString(parts, '&')); | |
186 | |
187 url_fetcher_.reset(URLFetcherImpl::Create(url_fetcher_id_for_tests, | |
188 url, | |
189 URLFetcherImpl::POST, | |
190 this)); | |
191 url_fetcher_->SetChunkedUpload(content_type); | |
192 url_fetcher_->SetRequestContext(url_context_); | |
193 url_fetcher_->SetReferrer(origin_url); | |
194 | |
195 // The speech recognition API does not require user identification as part | |
196 // of requests, so we don't send cookies or auth data for these requests to | |
197 // prevent any accidental connection between users who are logged into the | |
198 // domain for other services (e.g. bookmark sync) with the speech requests. | |
199 url_fetcher_->SetLoadFlags( | |
200 net::LOAD_DO_NOT_SAVE_COOKIES | net::LOAD_DO_NOT_SEND_COOKIES | | |
201 net::LOAD_DO_NOT_SEND_AUTH_DATA); | |
202 url_fetcher_->Start(); | |
203 } | |
204 | |
205 void SpeechRecognitionRequest::UploadAudioChunk(const AudioChunk& audio_chunk, | |
206 bool is_last_chunk) { | |
207 DCHECK(url_fetcher_.get()); | |
208 url_fetcher_->AppendChunkToUpload(audio_chunk.AsString(), is_last_chunk); | |
209 } | |
210 | |
211 void SpeechRecognitionRequest::OnURLFetchComplete( | |
212 const content::URLFetcher* source) { | |
213 DCHECK_EQ(url_fetcher_.get(), source); | |
214 | |
215 content::SpeechRecognitionResult result; | |
216 std::string data; | |
217 if (!source->GetStatus().is_success() || source->GetResponseCode() != 200 || | |
218 !source->GetResponseAsString(&data) || | |
219 !ParseServerResponse(data, &result)) { | |
220 result.error = content::SPEECH_RECOGNITION_ERROR_NETWORK; | |
221 } | |
222 | |
223 DVLOG(1) << "SpeechRecognitionRequest: Invoking delegate with result."; | |
224 url_fetcher_.reset(); | |
225 delegate_->SetRecognitionResult(result); | |
226 } | |
227 | |
228 } // namespace speech | |
OLD | NEW |