Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(346)

Side by Side Diff: components/autocomplete/search_suggestion_parser.cc

Issue 443043003: Rename components/autocomplete to components/omnibox (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: rebase Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/autocomplete/search_suggestion_parser.h"
6
7 #include "base/i18n/icu_string_conversions.h"
8 #include "base/json/json_string_value_serializer.h"
9 #include "base/json/json_writer.h"
10 #include "base/logging.h"
11 #include "base/strings/string_util.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/values.h"
14 #include "components/autocomplete/autocomplete_input.h"
15 #include "components/autocomplete/url_prefix.h"
16 #include "components/url_fixer/url_fixer.h"
17 #include "net/base/net_util.h"
18 #include "net/http/http_response_headers.h"
19 #include "net/url_request/url_fetcher.h"
20
21 namespace {
22
23 AutocompleteMatchType::Type GetAutocompleteMatchType(const std::string& type) {
24 if (type == "ENTITY")
25 return AutocompleteMatchType::SEARCH_SUGGEST_ENTITY;
26 if (type == "INFINITE")
27 return AutocompleteMatchType::SEARCH_SUGGEST_INFINITE;
28 if (type == "PERSONALIZED_QUERY")
29 return AutocompleteMatchType::SEARCH_SUGGEST_PERSONALIZED;
30 if (type == "PROFILE")
31 return AutocompleteMatchType::SEARCH_SUGGEST_PROFILE;
32 if (type == "NAVIGATION")
33 return AutocompleteMatchType::NAVSUGGEST;
34 if (type == "PERSONALIZED_NAVIGATION")
35 return AutocompleteMatchType::NAVSUGGEST_PERSONALIZED;
36 return AutocompleteMatchType::SEARCH_SUGGEST;
37 }
38
39 } // namespace
40
41 // SearchSuggestionParser::Result ----------------------------------------------
42
43 SearchSuggestionParser::Result::Result(bool from_keyword_provider,
44 int relevance,
45 bool relevance_from_server,
46 AutocompleteMatchType::Type type,
47 const std::string& deletion_url)
48 : from_keyword_provider_(from_keyword_provider),
49 type_(type),
50 relevance_(relevance),
51 relevance_from_server_(relevance_from_server),
52 deletion_url_(deletion_url) {}
53
54 SearchSuggestionParser::Result::~Result() {}
55
56 // SearchSuggestionParser::SuggestResult ---------------------------------------
57
58 SearchSuggestionParser::SuggestResult::SuggestResult(
59 const base::string16& suggestion,
60 AutocompleteMatchType::Type type,
61 const base::string16& match_contents,
62 const base::string16& match_contents_prefix,
63 const base::string16& annotation,
64 const base::string16& answer_contents,
65 const base::string16& answer_type,
66 const std::string& suggest_query_params,
67 const std::string& deletion_url,
68 bool from_keyword_provider,
69 int relevance,
70 bool relevance_from_server,
71 bool should_prefetch,
72 const base::string16& input_text)
73 : Result(from_keyword_provider,
74 relevance,
75 relevance_from_server,
76 type,
77 deletion_url),
78 suggestion_(suggestion),
79 match_contents_prefix_(match_contents_prefix),
80 annotation_(annotation),
81 suggest_query_params_(suggest_query_params),
82 answer_contents_(answer_contents),
83 answer_type_(answer_type),
84 should_prefetch_(should_prefetch) {
85 match_contents_ = match_contents;
86 DCHECK(!match_contents_.empty());
87 ClassifyMatchContents(true, input_text);
88 }
89
90 SearchSuggestionParser::SuggestResult::~SuggestResult() {}
91
92 void SearchSuggestionParser::SuggestResult::ClassifyMatchContents(
93 const bool allow_bolding_all,
94 const base::string16& input_text) {
95 if (input_text.empty()) {
96 // In case of zero-suggest results, do not highlight matches.
97 match_contents_class_.push_back(
98 ACMatchClassification(0, ACMatchClassification::NONE));
99 return;
100 }
101
102 base::string16 lookup_text = input_text;
103 if (type_ == AutocompleteMatchType::SEARCH_SUGGEST_INFINITE) {
104 const size_t contents_index =
105 suggestion_.length() - match_contents_.length();
106 // Ensure the query starts with the input text, and ends with the match
107 // contents, and the input text has an overlap with contents.
108 if (StartsWith(suggestion_, input_text, true) &&
109 EndsWith(suggestion_, match_contents_, true) &&
110 (input_text.length() > contents_index)) {
111 lookup_text = input_text.substr(contents_index);
112 }
113 }
114 size_t lookup_position = match_contents_.find(lookup_text);
115 if (!allow_bolding_all && (lookup_position == base::string16::npos)) {
116 // Bail if the code below to update the bolding would bold the whole
117 // string. Note that the string may already be entirely bolded; if
118 // so, leave it as is.
119 return;
120 }
121 match_contents_class_.clear();
122 // We do intra-string highlighting for suggestions - the suggested segment
123 // will be highlighted, e.g. for input_text = "you" the suggestion may be
124 // "youtube", so we'll bold the "tube" section: you*tube*.
125 if (input_text != match_contents_) {
126 if (lookup_position == base::string16::npos) {
127 // The input text is not a substring of the query string, e.g. input
128 // text is "slasdot" and the query string is "slashdot", so we bold the
129 // whole thing.
130 match_contents_class_.push_back(
131 ACMatchClassification(0, ACMatchClassification::MATCH));
132 } else {
133 // We don't iterate over the string here annotating all matches because
134 // it looks odd to have every occurrence of a substring that may be as
135 // short as a single character highlighted in a query suggestion result,
136 // e.g. for input text "s" and query string "southwest airlines", it
137 // looks odd if both the first and last s are highlighted.
138 if (lookup_position != 0) {
139 match_contents_class_.push_back(
140 ACMatchClassification(0, ACMatchClassification::MATCH));
141 }
142 match_contents_class_.push_back(
143 ACMatchClassification(lookup_position, ACMatchClassification::NONE));
144 size_t next_fragment_position = lookup_position + lookup_text.length();
145 if (next_fragment_position < match_contents_.length()) {
146 match_contents_class_.push_back(ACMatchClassification(
147 next_fragment_position, ACMatchClassification::MATCH));
148 }
149 }
150 } else {
151 // Otherwise, match_contents_ is a verbatim (what-you-typed) match, either
152 // for the default provider or a keyword search provider.
153 match_contents_class_.push_back(
154 ACMatchClassification(0, ACMatchClassification::NONE));
155 }
156 }
157
158 int SearchSuggestionParser::SuggestResult::CalculateRelevance(
159 const AutocompleteInput& input,
160 bool keyword_provider_requested) const {
161 if (!from_keyword_provider_ && keyword_provider_requested)
162 return 100;
163 return ((input.type() == metrics::OmniboxInputType::URL) ? 300 : 600);
164 }
165
166 // SearchSuggestionParser::NavigationResult ------------------------------------
167
168 SearchSuggestionParser::NavigationResult::NavigationResult(
169 const AutocompleteSchemeClassifier& scheme_classifier,
170 const GURL& url,
171 AutocompleteMatchType::Type type,
172 const base::string16& description,
173 const std::string& deletion_url,
174 bool from_keyword_provider,
175 int relevance,
176 bool relevance_from_server,
177 const base::string16& input_text,
178 const std::string& languages)
179 : Result(from_keyword_provider, relevance, relevance_from_server, type,
180 deletion_url),
181 url_(url),
182 formatted_url_(AutocompleteInput::FormattedStringWithEquivalentMeaning(
183 url, net::FormatUrl(url, languages,
184 net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP,
185 net::UnescapeRule::SPACES, NULL, NULL, NULL),
186 scheme_classifier)),
187 description_(description) {
188 DCHECK(url_.is_valid());
189 CalculateAndClassifyMatchContents(true, input_text, languages);
190 }
191
192 SearchSuggestionParser::NavigationResult::~NavigationResult() {}
193
194 void
195 SearchSuggestionParser::NavigationResult::CalculateAndClassifyMatchContents(
196 const bool allow_bolding_nothing,
197 const base::string16& input_text,
198 const std::string& languages) {
199 if (input_text.empty()) {
200 // In case of zero-suggest results, do not highlight matches.
201 match_contents_class_.push_back(
202 ACMatchClassification(0, ACMatchClassification::NONE));
203 return;
204 }
205
206 // First look for the user's input inside the formatted url as it would be
207 // without trimming the scheme, so we can find matches at the beginning of the
208 // scheme.
209 const URLPrefix* prefix =
210 URLPrefix::BestURLPrefix(formatted_url_, input_text);
211 size_t match_start = (prefix == NULL) ?
212 formatted_url_.find(input_text) : prefix->prefix.length();
213 bool trim_http = !AutocompleteInput::HasHTTPScheme(input_text) &&
214 (!prefix || (match_start != 0));
215 const net::FormatUrlTypes format_types =
216 net::kFormatUrlOmitAll & ~(trim_http ? 0 : net::kFormatUrlOmitHTTP);
217
218 base::string16 match_contents = net::FormatUrl(url_, languages, format_types,
219 net::UnescapeRule::SPACES, NULL, NULL, &match_start);
220 // If the first match in the untrimmed string was inside a scheme that we
221 // trimmed, look for a subsequent match.
222 if (match_start == base::string16::npos)
223 match_start = match_contents.find(input_text);
224 // Update |match_contents_| and |match_contents_class_| if it's allowed.
225 if (allow_bolding_nothing || (match_start != base::string16::npos)) {
226 match_contents_ = match_contents;
227 // Safe if |match_start| is npos; also safe if the input is longer than the
228 // remaining contents after |match_start|.
229 AutocompleteMatch::ClassifyLocationInString(match_start,
230 input_text.length(), match_contents_.length(),
231 ACMatchClassification::URL, &match_contents_class_);
232 }
233 }
234
235 int SearchSuggestionParser::NavigationResult::CalculateRelevance(
236 const AutocompleteInput& input,
237 bool keyword_provider_requested) const {
238 return (from_keyword_provider_ || !keyword_provider_requested) ? 800 : 150;
239 }
240
241 // SearchSuggestionParser::Results ---------------------------------------------
242
243 SearchSuggestionParser::Results::Results()
244 : verbatim_relevance(-1),
245 field_trial_triggered(false),
246 relevances_from_server(false) {}
247
248 SearchSuggestionParser::Results::~Results() {}
249
250 void SearchSuggestionParser::Results::Clear() {
251 suggest_results.clear();
252 navigation_results.clear();
253 verbatim_relevance = -1;
254 metadata.clear();
255 }
256
257 bool SearchSuggestionParser::Results::HasServerProvidedScores() const {
258 if (verbatim_relevance >= 0)
259 return true;
260
261 // Right now either all results of one type will be server-scored or they will
262 // all be locally scored, but in case we change this later, we'll just check
263 // them all.
264 for (SuggestResults::const_iterator i(suggest_results.begin());
265 i != suggest_results.end(); ++i) {
266 if (i->relevance_from_server())
267 return true;
268 }
269 for (NavigationResults::const_iterator i(navigation_results.begin());
270 i != navigation_results.end(); ++i) {
271 if (i->relevance_from_server())
272 return true;
273 }
274
275 return false;
276 }
277
278 // SearchSuggestionParser ------------------------------------------------------
279
280 // static
281 std::string SearchSuggestionParser::ExtractJsonData(
282 const net::URLFetcher* source) {
283 const net::HttpResponseHeaders* const response_headers =
284 source->GetResponseHeaders();
285 std::string json_data;
286 source->GetResponseAsString(&json_data);
287
288 // JSON is supposed to be UTF-8, but some suggest service providers send
289 // JSON files in non-UTF-8 encodings. The actual encoding is usually
290 // specified in the Content-Type header field.
291 if (response_headers) {
292 std::string charset;
293 if (response_headers->GetCharset(&charset)) {
294 base::string16 data_16;
295 // TODO(jungshik): Switch to CodePageToUTF8 after it's added.
296 if (base::CodepageToUTF16(json_data, charset.c_str(),
297 base::OnStringConversionError::FAIL,
298 &data_16))
299 json_data = base::UTF16ToUTF8(data_16);
300 }
301 }
302 return json_data;
303 }
304
305 // static
306 scoped_ptr<base::Value> SearchSuggestionParser::DeserializeJsonData(
307 std::string json_data) {
308 // The JSON response should be an array.
309 for (size_t response_start_index = json_data.find("["), i = 0;
310 response_start_index != std::string::npos && i < 5;
311 response_start_index = json_data.find("[", 1), i++) {
312 // Remove any XSSI guards to allow for JSON parsing.
313 if (response_start_index > 0)
314 json_data.erase(0, response_start_index);
315
316 JSONStringValueSerializer deserializer(json_data);
317 deserializer.set_allow_trailing_comma(true);
318 int error_code = 0;
319 scoped_ptr<base::Value> data(deserializer.Deserialize(&error_code, NULL));
320 if (error_code == 0)
321 return data.Pass();
322 }
323 return scoped_ptr<base::Value>();
324 }
325
326 // static
327 bool SearchSuggestionParser::ParseSuggestResults(
328 const base::Value& root_val,
329 const AutocompleteInput& input,
330 const AutocompleteSchemeClassifier& scheme_classifier,
331 int default_result_relevance,
332 const std::string& languages,
333 bool is_keyword_result,
334 Results* results) {
335 base::string16 query;
336 const base::ListValue* root_list = NULL;
337 const base::ListValue* results_list = NULL;
338
339 if (!root_val.GetAsList(&root_list) || !root_list->GetString(0, &query) ||
340 query != input.text() || !root_list->GetList(1, &results_list))
341 return false;
342
343 // 3rd element: Description list.
344 const base::ListValue* descriptions = NULL;
345 root_list->GetList(2, &descriptions);
346
347 // 4th element: Disregard the query URL list for now.
348
349 // Reset suggested relevance information.
350 results->verbatim_relevance = -1;
351
352 // 5th element: Optional key-value pairs from the Suggest server.
353 const base::ListValue* types = NULL;
354 const base::ListValue* relevances = NULL;
355 const base::ListValue* suggestion_details = NULL;
356 const base::DictionaryValue* extras = NULL;
357 int prefetch_index = -1;
358 if (root_list->GetDictionary(4, &extras)) {
359 extras->GetList("google:suggesttype", &types);
360
361 // Discard this list if its size does not match that of the suggestions.
362 if (extras->GetList("google:suggestrelevance", &relevances) &&
363 (relevances->GetSize() != results_list->GetSize()))
364 relevances = NULL;
365 extras->GetInteger("google:verbatimrelevance",
366 &results->verbatim_relevance);
367
368 // Check if the active suggest field trial (if any) has triggered either
369 // for the default provider or keyword provider.
370 results->field_trial_triggered = false;
371 extras->GetBoolean("google:fieldtrialtriggered",
372 &results->field_trial_triggered);
373
374 const base::DictionaryValue* client_data = NULL;
375 if (extras->GetDictionary("google:clientdata", &client_data) && client_data)
376 client_data->GetInteger("phi", &prefetch_index);
377
378 if (extras->GetList("google:suggestdetail", &suggestion_details) &&
379 suggestion_details->GetSize() != results_list->GetSize())
380 suggestion_details = NULL;
381
382 // Store the metadata that came with the response in case we need to pass it
383 // along with the prefetch query to Instant.
384 JSONStringValueSerializer json_serializer(&results->metadata);
385 json_serializer.Serialize(*extras);
386 }
387
388 // Clear the previous results now that new results are available.
389 results->suggest_results.clear();
390 results->navigation_results.clear();
391 results->answers_image_urls.clear();
392
393 base::string16 suggestion;
394 std::string type;
395 int relevance = default_result_relevance;
396 // Prohibit navsuggest in FORCED_QUERY mode. Users wants queries, not URLs.
397 const bool allow_navsuggest =
398 input.type() != metrics::OmniboxInputType::FORCED_QUERY;
399 const base::string16& trimmed_input =
400 base::CollapseWhitespace(input.text(), false);
401 for (size_t index = 0; results_list->GetString(index, &suggestion); ++index) {
402 // Google search may return empty suggestions for weird input characters,
403 // they make no sense at all and can cause problems in our code.
404 if (suggestion.empty())
405 continue;
406
407 // Apply valid suggested relevance scores; discard invalid lists.
408 if (relevances != NULL && !relevances->GetInteger(index, &relevance))
409 relevances = NULL;
410 AutocompleteMatchType::Type match_type =
411 AutocompleteMatchType::SEARCH_SUGGEST;
412 if (types && types->GetString(index, &type))
413 match_type = GetAutocompleteMatchType(type);
414 const base::DictionaryValue* suggestion_detail = NULL;
415 std::string deletion_url;
416
417 if (suggestion_details &&
418 suggestion_details->GetDictionary(index, &suggestion_detail))
419 suggestion_detail->GetString("du", &deletion_url);
420
421 if ((match_type == AutocompleteMatchType::NAVSUGGEST) ||
422 (match_type == AutocompleteMatchType::NAVSUGGEST_PERSONALIZED)) {
423 // Do not blindly trust the URL coming from the server to be valid.
424 GURL url(
425 url_fixer::FixupURL(base::UTF16ToUTF8(suggestion), std::string()));
426 if (url.is_valid() && allow_navsuggest) {
427 base::string16 title;
428 if (descriptions != NULL)
429 descriptions->GetString(index, &title);
430 results->navigation_results.push_back(NavigationResult(
431 scheme_classifier, url, match_type, title, deletion_url,
432 is_keyword_result, relevance, relevances != NULL, input.text(),
433 languages));
434 }
435 } else {
436 base::string16 match_contents = suggestion;
437 base::string16 match_contents_prefix;
438 base::string16 annotation;
439 base::string16 answer_contents;
440 base::string16 answer_type;
441 std::string suggest_query_params;
442
443 if (suggestion_details) {
444 suggestion_details->GetDictionary(index, &suggestion_detail);
445 if (suggestion_detail) {
446 suggestion_detail->GetString("t", &match_contents);
447 suggestion_detail->GetString("mp", &match_contents_prefix);
448 // Error correction for bad data from server.
449 if (match_contents.empty())
450 match_contents = suggestion;
451 suggestion_detail->GetString("a", &annotation);
452 suggestion_detail->GetString("q", &suggest_query_params);
453
454 // Extract Answers, if provided.
455 const base::DictionaryValue* answer_json = NULL;
456 if (suggestion_detail->GetDictionary("ansa", &answer_json)) {
457 match_type = AutocompleteMatchType::SEARCH_SUGGEST_ANSWER;
458 GetAnswersImageURLs(answer_json, &results->answers_image_urls);
459 std::string contents;
460 base::JSONWriter::Write(answer_json, &contents);
461 answer_contents = base::UTF8ToUTF16(contents);
462 suggestion_detail->GetString("ansb", &answer_type);
463 }
464 }
465 }
466
467 bool should_prefetch = static_cast<int>(index) == prefetch_index;
468 // TODO(kochi): Improve calculator suggestion presentation.
469 results->suggest_results.push_back(SuggestResult(
470 base::CollapseWhitespace(suggestion, false), match_type,
471 base::CollapseWhitespace(match_contents, false),
472 match_contents_prefix, annotation, answer_contents, answer_type,
473 suggest_query_params, deletion_url, is_keyword_result, relevance,
474 relevances != NULL, should_prefetch, trimmed_input));
475 }
476 }
477 results->relevances_from_server = relevances != NULL;
478 return true;
479 }
480
481 // static
482 void SearchSuggestionParser::GetAnswersImageURLs(
483 const base::DictionaryValue* answer_json,
484 std::vector<GURL>* urls) {
485 DCHECK(answer_json);
486 const base::ListValue* lines = NULL;
487 answer_json->GetList("l", &lines);
488 if (!lines || lines->GetSize() == 0)
489 return;
490
491 for (size_t line = 0; line < lines->GetSize(); ++line) {
492 const base::DictionaryValue* imageLine = NULL;
493 lines->GetDictionary(line, &imageLine);
494 if (!imageLine)
495 continue;
496 const base::DictionaryValue* imageData = NULL;
497 imageLine->GetDictionary("i", &imageData);
498 if (!imageData)
499 continue;
500 std::string imageUrl;
501 imageData->GetString("d", &imageUrl);
502 urls->push_back(GURL(imageUrl));
503 }
504 }
OLDNEW
« no previous file with comments | « components/autocomplete/search_suggestion_parser.h ('k') | components/autocomplete/test_scheme_classifier.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698