OLD | NEW |
| (Empty) |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "components/autocomplete/search_suggestion_parser.h" | |
6 | |
7 #include "base/i18n/icu_string_conversions.h" | |
8 #include "base/json/json_string_value_serializer.h" | |
9 #include "base/json/json_writer.h" | |
10 #include "base/logging.h" | |
11 #include "base/strings/string_util.h" | |
12 #include "base/strings/utf_string_conversions.h" | |
13 #include "base/values.h" | |
14 #include "components/autocomplete/autocomplete_input.h" | |
15 #include "components/autocomplete/url_prefix.h" | |
16 #include "components/url_fixer/url_fixer.h" | |
17 #include "net/base/net_util.h" | |
18 #include "net/http/http_response_headers.h" | |
19 #include "net/url_request/url_fetcher.h" | |
20 | |
21 namespace { | |
22 | |
23 AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) { | |
24 if (type == "ENTITY") | |
25 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY; | |
26 if (type == "INFINITE") | |
27 return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE; | |
28 if (type == "PERSONALIZED_QUERY") | |
29 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED; | |
30 if (type == "PROFILE") | |
31 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE; | |
32 if (type == "NAVIGATION") | |
33 return AutocompleteMatchType::NAVSUGGEST; | |
34 if (type == "PERSONALIZED_NAVIGATION") | |
35 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED; | |
36 return AutocompleteMatchType::SEARCH_SUGGEST; | |
37 } | |
38 | |
39 } // namespace | |
40 | |
41 // SearchSuggestionParser::Result ---------------------------------------------- | |
42 | |
43 SearchSuggestionParser::Result::Result(bool from_keyword_provider, | |
44 int relevance, | |
45 bool relevance_from_server, | |
46 AutocompleteMatchType::Type type, | |
47 const std::string& deletion_url) | |
48 : from_keyword_provider_(from_keyword_provider), | |
49 type_(type), | |
50 relevance_(relevance), | |
51 relevance_from_server_(relevance_from_server), | |
52 deletion_url_(deletion_url) {} | |
53 | |
54 SearchSuggestionParser::Result::~Result() {} | |
55 | |
56 // SearchSuggestionParser::SuggestResult --------------------------------------- | |
57 | |
58 SearchSuggestionParser::SuggestResult::SuggestResult( | |
59 const base::string16& suggestion, | |
60 AutocompleteMatchType::Type type, | |
61 const base::string16& match_contents, | |
62 const base::string16& match_contents_prefix, | |
63 const base::string16& annotation, | |
64 const base::string16& answer_contents, | |
65 const base::string16& answer_type, | |
66 const std::string& suggest_query_params, | |
67 const std::string& deletion_url, | |
68 bool from_keyword_provider, | |
69 int relevance, | |
70 bool relevance_from_server, | |
71 bool should_prefetch, | |
72 const base::string16& input_text) | |
73 : Result(from_keyword_provider, | |
74 relevance, | |
75 relevance_from_server, | |
76 type, | |
77 deletion_url), | |
78 suggestion_(suggestion), | |
79 match_contents_prefix_(match_contents_prefix), | |
80 annotation_(annotation), | |
81 suggest_query_params_(suggest_query_params), | |
82 answer_contents_(answer_contents), | |
83 answer_type_(answer_type), | |
84 should_prefetch_(should_prefetch) { | |
85 match_contents_ = match_contents; | |
86 DCHECK(!match_contents_.empty()); | |
87 ClassifyMatchContents(true, input_text); | |
88 } | |
89 | |
90 SearchSuggestionParser::SuggestResult::~SuggestResult() {} | |
91 | |
92 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents( | |
93 const bool allow_bolding_all, | |
94 const base::string16& input_text) { | |
95 if (input_text.empty()) { | |
96 // In case of zero-suggest results, do not highlight matches. | |
97 match_contents_class_.push_back( | |
98 ACMatchClassification(0, ACMatchClassification::NONE)); | |
99 return; | |
100 } | |
101 | |
102 base::string16 lookup_text = input_text; | |
103 if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_INFINITE) { | |
104 const size_t contents_index = | |
105 suggestion_.length() - match_contents_.length(); | |
106 // Ensure the query starts with the input text, and ends with the match | |
107 // contents, and the input text has an overlap with contents. | |
108 if (StartsWith(suggestion_, input_text, true) && | |
109 EndsWith(suggestion_, match_contents_, true) && | |
110 (input_text.length() > contents_index)) { | |
111 lookup_text = input_text.substr(contents_index); | |
112 } | |
113 } | |
114 size_t lookup_position = match_contents_.find(lookup_text); | |
115 if (!allow_bolding_all && (lookup_position == base::string16::npos)) { | |
116 // Bail if the code below to update the bolding would bold the whole | |
117 // string. Note that the string may already be entirely bolded; if | |
118 // so, leave it as is. | |
119 return; | |
120 } | |
121 match_contents_class_.clear(); | |
122 // We do intra-string highlighting for suggestions - the suggested segment | |
123 // will be highlighted, e.g. for input_text = "you" the suggestion may be | |
124 // "youtube", so we'll bold the "tube" section: you*tube*. | |
125 if (input_text != match_contents_) { | |
126 if (lookup_position == base::string16::npos) { | |
127 // The input text is not a substring of the query string, e.g. input | |
128 // text is "slasdot" and the query string is "slashdot", so we bold the | |
129 // whole thing. | |
130 match_contents_class_.push_back( | |
131 ACMatchClassification(0, ACMatchClassification::MATCH)); | |
132 } else { | |
133 // We don't iterate over the string here annotating all matches because | |
134 // it looks odd to have every occurrence of a substring that may be as | |
135 // short as a single character highlighted in a query suggestion result, | |
136 // e.g. for input text "s" and query string "southwest airlines", it | |
137 // looks odd if both the first and last s are highlighted. | |
138 if (lookup_position != 0) { | |
139 match_contents_class_.push_back( | |
140 ACMatchClassification(0, ACMatchClassification::MATCH)); | |
141 } | |
142 match_contents_class_.push_back( | |
143 ACMatchClassification(lookup_position, ACMatchClassification::NONE)); | |
144 size_t next_fragment_position = lookup_position + lookup_text.length(); | |
145 if (next_fragment_position < match_contents_.length()) { | |
146 match_contents_class_.push_back(ACMatchClassification( | |
147 next_fragment_position, ACMatchClassification::MATCH)); | |
148 } | |
149 } | |
150 } else { | |
151 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either | |
152 // for the default provider or a keyword search provider. | |
153 match_contents_class_.push_back( | |
154 ACMatchClassification(0, ACMatchClassification::NONE)); | |
155 } | |
156 } | |
157 | |
158 int SearchSuggestionParser::SuggestResult::CalculateRelevance( | |
159 const AutocompleteInput& input, | |
160 bool keyword_provider_requested) const { | |
161 if (!from_keyword_provider_ && keyword_provider_requested) | |
162 return 100; | |
163 return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600); | |
164 } | |
165 | |
166 // SearchSuggestionParser::NavigationResult ------------------------------------ | |
167 | |
168 SearchSuggestionParser::NavigationResult::NavigationResult( | |
169 const AutocompleteSchemeClassifier& scheme_classifier, | |
170 const GURL& url, | |
171 AutocompleteMatchType::Type type, | |
172 const base::string16& description, | |
173 const std::string& deletion_url, | |
174 bool from_keyword_provider, | |
175 int relevance, | |
176 bool relevance_from_server, | |
177 const base::string16& input_text, | |
178 const std::string& languages) | |
179 : Result(from_keyword_provider, relevance, relevance_from_server, type, | |
180 deletion_url), | |
181 url_(url), | |
182 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning( | |
183 url, net::FormatUrl(url, languages, | |
184 net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP, | |
185 net::UnescapeRule::SPACES, NULL, NULL, NULL), | |
186 scheme_classifier)), | |
187 description_(description) { | |
188 DCHECK(url_.is_valid()); | |
189 CalculateAndClassifyMatchContents(true, input_text, languages); | |
190 } | |
191 | |
192 SearchSuggestionParser::NavigationResult::~NavigationResult() {} | |
193 | |
194 void | |
195 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents( | |
196 const bool allow_bolding_nothing, | |
197 const base::string16& input_text, | |
198 const std::string& languages) { | |
199 if (input_text.empty()) { | |
200 // In case of zero-suggest results, do not highlight matches. | |
201 match_contents_class_.push_back( | |
202 ACMatchClassification(0, ACMatchClassification::NONE)); | |
203 return; | |
204 } | |
205 | |
206 // First look for the user's input inside the formatted url as it would be | |
207 // without trimming the scheme, so we can find matches at the beginning of the | |
208 // scheme. | |
209 const URLPrefix* prefix = | |
210 URLPrefix::BestURLPrefix(formatted_url_, input_text); | |
211 size_t match_start = (prefix == NULL) ? | |
212 formatted_url_.find(input_text) : prefix->prefix.length(); | |
213 bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) && | |
214 (!prefix || (match_start != 0)); | |
215 const net::FormatUrlTypes format_types = | |
216 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP); | |
217 | |
218 base::string16 match_contents = net::FormatUrl(url_, languages, format_types, | |
219 net::UnescapeRule::SPACES, NULL, NULL, &match_start); | |
220 // If the first match in the untrimmed string was inside a scheme that we | |
221 // trimmed, look for a subsequent match. | |
222 if (match_start == base::string16::npos) | |
223 match_start = match_contents.find(input_text); | |
224 // Update |match_contents_| and |match_contents_class_| if it's allowed. | |
225 if (allow_bolding_nothing || (match_start != base::string16::npos)) { | |
226 match_contents_ = match_contents; | |
227 // Safe if |match_start| is npos; also safe if the input is longer than the | |
228 // remaining contents after |match_start|. | |
229 AutocompleteMatch::ClassifyLocationInString(match_start, | |
230 input_text.length(), match_contents_.length(), | |
231 ACMatchClassification::URL, &match_contents_class_); | |
232 } | |
233 } | |
234 | |
235 int SearchSuggestionParser::NavigationResult::CalculateRelevance( | |
236 const AutocompleteInput& input, | |
237 bool keyword_provider_requested) const { | |
238 return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150; | |
239 } | |
240 | |
241 // SearchSuggestionParser::Results --------------------------------------------- | |
242 | |
243 SearchSuggestionParser::Results::Results() | |
244 : verbatim_relevance(-1), | |
245 field_trial_triggered(false), | |
246 relevances_from_server(false) {} | |
247 | |
248 SearchSuggestionParser::Results::~Results() {} | |
249 | |
250 void SearchSuggestionParser::Results::Clear() { | |
251 suggest_results.clear(); | |
252 navigation_results.clear(); | |
253 verbatim_relevance = -1; | |
254 metadata.clear(); | |
255 } | |
256 | |
257 bool SearchSuggestionParser::Results::HasServerProvidedScores() const { | |
258 if (verbatim_relevance >= 0) | |
259 return true; | |
260 | |
261 // Right now either all results of one type will be server-scored or they will | |
262 // all be locally scored, but in case we change this later, we'll just check | |
263 // them all. | |
264 for (SuggestResults::const_iterator i(suggest_results.begin()); | |
265 i != suggest_results.end(); ++i) { | |
266 if (i->relevance_from_server()) | |
267 return true; | |
268 } | |
269 for (NavigationResults::const_iterator i(navigation_results.begin()); | |
270 i != navigation_results.end(); ++i) { | |
271 if (i->relevance_from_server()) | |
272 return true; | |
273 } | |
274 | |
275 return false; | |
276 } | |
277 | |
278 // SearchSuggestionParser ------------------------------------------------------ | |
279 | |
280 // static | |
281 std::string SearchSuggestionParser::ExtractJsonData( | |
282 const net::URLFetcher* source) { | |
283 const net::HttpResponseHeaders* const response_headers = | |
284 source->GetResponseHeaders(); | |
285 std::string json_data; | |
286 source->GetResponseAsString(&json_data); | |
287 | |
288 // JSON is supposed to be UTF-8, but some suggest service providers send | |
289 // JSON files in non-UTF-8 encodings. The actual encoding is usually | |
290 // specified in the Content-Type header field. | |
291 if (response_headers) { | |
292 std::string charset; | |
293 if (response_headers->GetCharset(&charset)) { | |
294 base::string16 data_16; | |
295 // TODO(jungshik): Switch to CodePageToUTF8 after it's added. | |
296 if (base::CodepageToUTF16(json_data, charset.c_str(), | |
297 base::OnStringConversionError::FAIL, | |
298 &data_16)) | |
299 json_data = base::UTF16ToUTF8(data_16); | |
300 } | |
301 } | |
302 return json_data; | |
303 } | |
304 | |
305 // static | |
306 scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData( | |
307 std::string json_data) { | |
308 // The JSON response should be an array. | |
309 for (size_t response_start_index = json_data.find("["), i = 0; | |
310 response_start_index != std::string::npos && i < 5; | |
311 response_start_index = json_data.find("[", 1), i++) { | |
312 // Remove any XSSI guards to allow for JSON parsing. | |
313 if (response_start_index > 0) | |
314 json_data.erase(0, response_start_index); | |
315 | |
316 JSONStringValueSerializer deserializer(json_data); | |
317 deserializer.set_allow_trailing_comma(true); | |
318 int error_code = 0; | |
319 scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL)); | |
320 if (error_code == 0) | |
321 return data.Pass(); | |
322 } | |
323 return scoped_ptr<base::Value>(); | |
324 } | |
325 | |
326 // static | |
327 bool SearchSuggestionParser::ParseSuggestResults( | |
328 const base::Value& root_val, | |
329 const AutocompleteInput& input, | |
330 const AutocompleteSchemeClassifier& scheme_classifier, | |
331 int default_result_relevance, | |
332 const std::string& languages, | |
333 bool is_keyword_result, | |
334 Results* results) { | |
335 base::string16 query; | |
336 const base::ListValue* root_list = NULL; | |
337 const base::ListValue* results_list = NULL; | |
338 | |
339 if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) || | |
340 query != input.text() || !root_list->GetList(1, &results_list)) | |
341 return false; | |
342 | |
343 // 3rd element: Description list. | |
344 const base::ListValue* descriptions = NULL; | |
345 root_list->GetList(2, &descriptions); | |
346 | |
347 // 4th element: Disregard the query URL list for now. | |
348 | |
349 // Reset suggested relevance information. | |
350 results->verbatim_relevance = -1; | |
351 | |
352 // 5th element: Optional key-value pairs from the Suggest server. | |
353 const base::ListValue* types = NULL; | |
354 const base::ListValue* relevances = NULL; | |
355 const base::ListValue* suggestion_details = NULL; | |
356 const base::DictionaryValue* extras = NULL; | |
357 int prefetch_index = -1; | |
358 if (root_list->GetDictionary(4, &extras)) { | |
359 extras->GetList("google:suggesttype", &types); | |
360 | |
361 // Discard this list if its size does not match that of the suggestions. | |
362 if (extras->GetList("google:suggestrelevance", &relevances) && | |
363 (relevances->GetSize() != results_list->GetSize())) | |
364 relevances = NULL; | |
365 extras->GetInteger("google:verbatimrelevance", | |
366 &results->verbatim_relevance); | |
367 | |
368 // Check if the active suggest field trial (if any) has triggered either | |
369 // for the default provider or keyword provider. | |
370 results->field_trial_triggered = false; | |
371 extras->GetBoolean("google:fieldtrialtriggered", | |
372 &results->field_trial_triggered); | |
373 | |
374 const base::DictionaryValue* client_data = NULL; | |
375 if (extras->GetDictionary("google:clientdata", &client_data) && client_data) | |
376 client_data->GetInteger("phi", &prefetch_index); | |
377 | |
378 if (extras->GetList("google:suggestdetail", &suggestion_details) && | |
379 suggestion_details->GetSize() != results_list->GetSize()) | |
380 suggestion_details = NULL; | |
381 | |
382 // Store the metadata that came with the response in case we need to pass it | |
383 // along with the prefetch query to Instant. | |
384 JSONStringValueSerializer json_serializer(&results->metadata); | |
385 json_serializer.Serialize(*extras); | |
386 } | |
387 | |
388 // Clear the previous results now that new results are available. | |
389 results->suggest_results.clear(); | |
390 results->navigation_results.clear(); | |
391 results->answers_image_urls.clear(); | |
392 | |
393 base::string16 suggestion; | |
394 std::string type; | |
395 int relevance = default_result_relevance; | |
396 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs. | |
397 const bool allow_navsuggest = | |
398 input.type() != metrics::OmniboxInputType::FORCED_QUERY; | |
399 const base::string16& trimmed_input = | |
400 base::CollapseWhitespace(input.text(), false); | |
401 for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) { | |
402 // Google search may return empty suggestions for weird input characters, | |
403 // they make no sense at all and can cause problems in our code. | |
404 if (suggestion.empty()) | |
405 continue; | |
406 | |
407 // Apply valid suggested relevance scores; discard invalid lists. | |
408 if (relevances != NULL && !relevances->GetInteger(index, &relevance)) | |
409 relevances = NULL; | |
410 AutocompleteMatchType::Type match_type = | |
411 AutocompleteMatchType::SEARCH_SUGGEST; | |
412 if (types && types->GetString(index, &type)) | |
413 match_type = GetAutocompleteMatchType(type); | |
414 const base::DictionaryValue* suggestion_detail = NULL; | |
415 std::string deletion_url; | |
416 | |
417 if (suggestion_details && | |
418 suggestion_details->GetDictionary(index, &suggestion_detail)) | |
419 suggestion_detail->GetString("du", &deletion_url); | |
420 | |
421 if ((match_type == AutocompleteMatchType::NAVSUGGEST) || | |
422 (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) { | |
423 // Do not blindly trust the URL coming from the server to be valid. | |
424 GURL url( | |
425 url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string())); | |
426 if (url.is_valid() && allow_navsuggest) { | |
427 base::string16 title; | |
428 if (descriptions != NULL) | |
429 descriptions->GetString(index, &title); | |
430 results->navigation_results.push_back(NavigationResult( | |
431 scheme_classifier, url, match_type, title, deletion_url, | |
432 is_keyword_result, relevance, relevances != NULL, input.text(), | |
433 languages)); | |
434 } | |
435 } else { | |
436 base::string16 match_contents = suggestion; | |
437 base::string16 match_contents_prefix; | |
438 base::string16 annotation; | |
439 base::string16 answer_contents; | |
440 base::string16 answer_type; | |
441 std::string suggest_query_params; | |
442 | |
443 if (suggestion_details) { | |
444 suggestion_details->GetDictionary(index, &suggestion_detail); | |
445 if (suggestion_detail) { | |
446 suggestion_detail->GetString("t", &match_contents); | |
447 suggestion_detail->GetString("mp", &match_contents_prefix); | |
448 // Error correction for bad data from server. | |
449 if (match_contents.empty()) | |
450 match_contents = suggestion; | |
451 suggestion_detail->GetString("a", &annotation); | |
452 suggestion_detail->GetString("q", &suggest_query_params); | |
453 | |
454 // Extract Answers, if provided. | |
455 const base::DictionaryValue* answer_json = NULL; | |
456 if (suggestion_detail->GetDictionary("ansa", &answer_json)) { | |
457 match_type = AutocompleteMatchType::SEARCH_SUGGEST_ANSWER; | |
458 GetAnswersImageURLs(answer_json, &results->answers_image_urls); | |
459 std::string contents; | |
460 base::JSONWriter::Write(answer_json, &contents); | |
461 answer_contents = base::UTF8ToUTF16(contents); | |
462 suggestion_detail->GetString("ansb", &answer_type); | |
463 } | |
464 } | |
465 } | |
466 | |
467 bool should_prefetch = static_cast<int>(index) == prefetch_index; | |
468 // TODO(kochi): Improve calculator suggestion presentation. | |
469 results->suggest_results.push_back(SuggestResult( | |
470 base::CollapseWhitespace(suggestion, false), match_type, | |
471 base::CollapseWhitespace(match_contents, false), | |
472 match_contents_prefix, annotation, answer_contents, answer_type, | |
473 suggest_query_params, deletion_url, is_keyword_result, relevance, | |
474 relevances != NULL, should_prefetch, trimmed_input)); | |
475 } | |
476 } | |
477 results->relevances_from_server = relevances != NULL; | |
478 return true; | |
479 } | |
480 | |
481 // static | |
482 void SearchSuggestionParser::GetAnswersImageURLs( | |
483 const base::DictionaryValue* answer_json, | |
484 std::vector<GURL>* urls) { | |
485 DCHECK(answer_json); | |
486 const base::ListValue* lines = NULL; | |
487 answer_json->GetList("l", &lines); | |
488 if (!lines || lines->GetSize() == 0) | |
489 return; | |
490 | |
491 for (size_t line = 0; line < lines->GetSize(); ++line) { | |
492 const base::DictionaryValue* imageLine = NULL; | |
493 lines->GetDictionary(line, &imageLine); | |
494 if (!imageLine) | |
495 continue; | |
496 const base::DictionaryValue* imageData = NULL; | |
497 imageLine->GetDictionary("i", &imageData); | |
498 if (!imageData) | |
499 continue; | |
500 std::string imageUrl; | |
501 imageData->GetString("d", &imageUrl); | |
502 urls->push_back(GURL(imageUrl)); | |
503 } | |
504 } | |
OLD | NEW |