OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "components/omnibox/history_url_provider.h" |
| 6 |
| 7 #include <algorithm> |
| 8 |
| 9 #include "base/basictypes.h" |
| 10 #include "base/bind.h" |
| 11 #include "base/command_line.h" |
| 12 #include "base/location.h" |
| 13 #include "base/message_loop/message_loop.h" |
| 14 #include "base/metrics/histogram.h" |
| 15 #include "base/prefs/pref_service.h" |
| 16 #include "base/single_thread_task_runner.h" |
| 17 #include "base/strings/string_util.h" |
| 18 #include "base/strings/utf_string_conversions.h" |
| 19 #include "base/time/time.h" |
| 20 #include "components/bookmarks/browser/bookmark_utils.h" |
| 21 #include "components/history/core/browser/history_backend.h" |
| 22 #include "components/history/core/browser/history_database.h" |
| 23 #include "components/history/core/browser/history_service.h" |
| 24 #include "components/history/core/browser/history_types.h" |
| 25 #include "components/metrics/proto/omnibox_input_type.pb.h" |
| 26 #include "components/omnibox/autocomplete_match.h" |
| 27 #include "components/omnibox/autocomplete_provider_listener.h" |
| 28 #include "components/omnibox/autocomplete_result.h" |
| 29 #include "components/omnibox/in_memory_url_index_types.h" |
| 30 #include "components/omnibox/omnibox_field_trial.h" |
| 31 #include "components/omnibox/scored_history_match.h" |
| 32 #include "components/omnibox/url_prefix.h" |
| 33 #include "components/search_engines/search_terms_data.h" |
| 34 #include "components/search_engines/template_url_service.h" |
| 35 #include "components/url_fixer/url_fixer.h" |
| 36 #include "net/base/net_util.h" |
| 37 #include "net/base/registry_controlled_domains/registry_controlled_domain.h" |
| 38 #include "url/gurl.h" |
| 39 #include "url/third_party/mozilla/url_parse.h" |
| 40 #include "url/url_util.h" |
| 41 |
| 42 namespace { |
| 43 |
| 44 // Acts like the > operator for URLInfo classes. |
| 45 bool CompareHistoryMatch(const history::HistoryMatch& a, |
| 46 const history::HistoryMatch& b) { |
| 47 // A URL that has been typed at all is better than one that has never been |
| 48 // typed. (Note "!"s on each side) |
| 49 if (!a.url_info.typed_count() != !b.url_info.typed_count()) |
| 50 return a.url_info.typed_count() > b.url_info.typed_count(); |
| 51 |
| 52 // Innermost matches (matches after any scheme or "www.") are better than |
| 53 // non-innermost matches. |
| 54 if (a.innermost_match != b.innermost_match) |
| 55 return a.innermost_match; |
| 56 |
| 57 // URLs that have been typed more often are better. |
| 58 if (a.url_info.typed_count() != b.url_info.typed_count()) |
| 59 return a.url_info.typed_count() > b.url_info.typed_count(); |
| 60 |
| 61 // For URLs that have each been typed once, a host (alone) is better than a |
| 62 // page inside. |
| 63 if ((a.url_info.typed_count() == 1) && (a.IsHostOnly() != b.IsHostOnly())) |
| 64 return a.IsHostOnly(); |
| 65 |
| 66 // URLs that have been visited more often are better. |
| 67 if (a.url_info.visit_count() != b.url_info.visit_count()) |
| 68 return a.url_info.visit_count() > b.url_info.visit_count(); |
| 69 |
| 70 // URLs that have been visited more recently are better. |
| 71 return a.url_info.last_visit() > b.url_info.last_visit(); |
| 72 } |
| 73 |
| 74 // Sorts and dedups the given list of matches. |
| 75 void SortAndDedupMatches(history::HistoryMatches* matches) { |
| 76 // Sort by quality, best first. |
| 77 std::sort(matches->begin(), matches->end(), &CompareHistoryMatch); |
| 78 |
| 79 // Remove duplicate matches (caused by the search string appearing in one of |
| 80 // the prefixes as well as after it). Consider the following scenario: |
| 81 // |
| 82 // User has visited "http://http.com" once and "http://htaccess.com" twice. |
| 83 // User types "http". The autocomplete search with prefix "http://" returns |
| 84 // the first host, while the search with prefix "" returns both hosts. Now |
| 85 // we sort them into rank order: |
| 86 // http://http.com (innermost_match) |
| 87 // http://htaccess.com (!innermost_match, url_info.visit_count == 2) |
| 88 // http://http.com (!innermost_match, url_info.visit_count == 1) |
| 89 // |
| 90 // The above scenario tells us we can't use std::unique(), since our |
| 91 // duplicates are not always sequential. It also tells us we should remove |
| 92 // the lower-quality duplicate(s), since otherwise the returned results won't |
| 93 // be ordered correctly. This is easy to do: we just always remove the later |
| 94 // element of a duplicate pair. |
| 95 // Be careful! Because the vector contents may change as we remove elements, |
| 96 // we use an index instead of an iterator in the outer loop, and don't |
| 97 // precalculate the ending position. |
| 98 for (size_t i = 0; i < matches->size(); ++i) { |
| 99 for (history::HistoryMatches::iterator j(matches->begin() + i + 1); |
| 100 j != matches->end(); ) { |
| 101 if ((*matches)[i].url_info.url() == j->url_info.url()) |
| 102 j = matches->erase(j); |
| 103 else |
| 104 ++j; |
| 105 } |
| 106 } |
| 107 } |
| 108 |
| 109 // Calculates a new relevance score applying half-life time decaying to |count| |
| 110 // using |time_since_last_visit| and |score_buckets|. This function will never |
| 111 // return a score higher than |undecayed_relevance|; in other words, it can only |
| 112 // demote the old score. |
| 113 double CalculateRelevanceUsingScoreBuckets( |
| 114 const HUPScoringParams::ScoreBuckets& score_buckets, |
| 115 const base::TimeDelta& time_since_last_visit, |
| 116 int undecayed_relevance, |
| 117 int count) { |
| 118 // Back off if above relevance cap. |
| 119 if ((score_buckets.relevance_cap() != -1) && |
| 120 (undecayed_relevance >= score_buckets.relevance_cap())) |
| 121 return undecayed_relevance; |
| 122 |
| 123 // Time based decay using half-life time. |
| 124 double decayed_count = count; |
| 125 if (decayed_count > 0) |
| 126 decayed_count *= score_buckets.HalfLifeTimeDecay(time_since_last_visit); |
| 127 |
| 128 // Find a threshold where decayed_count >= bucket. |
| 129 const HUPScoringParams::ScoreBuckets::CountMaxRelevance* score_bucket = NULL; |
| 130 for (size_t i = 0; i < score_buckets.buckets().size(); ++i) { |
| 131 score_bucket = &score_buckets.buckets()[i]; |
| 132 if (decayed_count >= score_bucket->first) |
| 133 break; // Buckets are in descending order, so we can ignore the rest. |
| 134 } |
| 135 |
| 136 return (score_bucket && (undecayed_relevance > score_bucket->second)) ? |
| 137 score_bucket->second : undecayed_relevance; |
| 138 } |
| 139 |
| 140 // Returns a new relevance score for the given |match| based on the |
| 141 // |old_relevance| score and |scoring_params|. The new relevance score is |
| 142 // guaranteed to be less than or equal to |old_relevance|. In other words, this |
| 143 // function can only demote a score, never boost it. Returns |old_relevance| if |
| 144 // experimental scoring is disabled. |
| 145 int CalculateRelevanceScoreUsingScoringParams( |
| 146 const history::HistoryMatch& match, |
| 147 int old_relevance, |
| 148 const HUPScoringParams& scoring_params) { |
| 149 if (!scoring_params.experimental_scoring_enabled) |
| 150 return old_relevance; |
| 151 |
| 152 const base::TimeDelta time_since_last_visit = |
| 153 base::Time::Now() - match.url_info.last_visit(); |
| 154 |
| 155 int relevance = CalculateRelevanceUsingScoreBuckets( |
| 156 scoring_params.typed_count_buckets, time_since_last_visit, old_relevance, |
| 157 match.url_info.typed_count()); |
| 158 |
| 159 // Additional demotion (on top of typed_count demotion) of URLs that were |
| 160 // never typed. |
| 161 if (match.url_info.typed_count() == 0) { |
| 162 relevance = CalculateRelevanceUsingScoreBuckets( |
| 163 scoring_params.visited_count_buckets, time_since_last_visit, relevance, |
| 164 match.url_info.visit_count()); |
| 165 } |
| 166 |
| 167 DCHECK_LE(relevance, old_relevance); |
| 168 return relevance; |
| 169 } |
| 170 |
| 171 // Extracts typed_count, visit_count, and last_visited time from the URLRow and |
| 172 // puts them in the additional info field of the |match| for display in |
| 173 // about:omnibox. |
| 174 void RecordAdditionalInfoFromUrlRow(const history::URLRow& info, |
| 175 AutocompleteMatch* match) { |
| 176 match->RecordAdditionalInfo("typed count", info.typed_count()); |
| 177 match->RecordAdditionalInfo("visit count", info.visit_count()); |
| 178 match->RecordAdditionalInfo("last visit", info.last_visit()); |
| 179 } |
| 180 |
| 181 // If |create_if_necessary| is true, ensures that |matches| contains an entry |
| 182 // for |info|, creating a new such entry if necessary (using |input_location| |
| 183 // and |match_in_scheme|). |
| 184 // |
| 185 // If |promote| is true, this also ensures the entry is the first element in |
| 186 // |matches|, moving or adding it to the front as appropriate. When |promote| |
| 187 // is false, existing matches are left in place, and newly added matches are |
| 188 // placed at the back. |
| 189 // |
| 190 // It's OK to call this function with both |create_if_necessary| and |promote| |
| 191 // false, in which case we'll do nothing. |
| 192 // |
| 193 // Returns whether the match exists regardless if it was promoted/created. |
| 194 bool CreateOrPromoteMatch(const history::URLRow& info, |
| 195 size_t input_location, |
| 196 bool match_in_scheme, |
| 197 history::HistoryMatches* matches, |
| 198 bool create_if_necessary, |
| 199 bool promote) { |
| 200 // |matches| may already have an entry for this. |
| 201 for (history::HistoryMatches::iterator i(matches->begin()); |
| 202 i != matches->end(); ++i) { |
| 203 if (i->url_info.url() == info.url()) { |
| 204 // Rotate it to the front if the caller wishes. |
| 205 if (promote) |
| 206 std::rotate(matches->begin(), i, i + 1); |
| 207 return true; |
| 208 } |
| 209 } |
| 210 |
| 211 if (!create_if_necessary) |
| 212 return false; |
| 213 |
| 214 // No entry, so create one. |
| 215 history::HistoryMatch match(info, input_location, match_in_scheme, true); |
| 216 if (promote) |
| 217 matches->push_front(match); |
| 218 else |
| 219 matches->push_back(match); |
| 220 |
| 221 return true; |
| 222 } |
| 223 |
| 224 // Returns whether |match| is suitable for inline autocompletion. |
| 225 bool CanPromoteMatchForInlineAutocomplete(const history::HistoryMatch& match) { |
| 226 // We can promote this match if it's been typed at least n times, where n == 1 |
| 227 // for "simple" (host-only) URLs and n == 2 for others. We set a higher bar |
| 228 // for these long URLs because it's less likely that users will want to visit |
| 229 // them again. Even though we don't increment the typed_count for pasted-in |
| 230 // URLs, if the user manually edits the URL or types some long thing in by |
| 231 // hand, we wouldn't want to immediately start autocompleting it. |
| 232 return match.url_info.typed_count() && |
| 233 ((match.url_info.typed_count() > 1) || match.IsHostOnly()); |
| 234 } |
| 235 |
| 236 // Given the user's |input| and a |match| created from it, reduce the match's |
| 237 // URL to just a host. If this host still matches the user input, return it. |
| 238 // Returns the empty string on failure. |
| 239 GURL ConvertToHostOnly(const history::HistoryMatch& match, |
| 240 const base::string16& input) { |
| 241 // See if we should try to do host-only suggestions for this URL. Nonstandard |
| 242 // schemes means there's no authority section, so suggesting the host name |
| 243 // is useless. File URLs are standard, but host suggestion is not useful for |
| 244 // them either. |
| 245 const GURL& url = match.url_info.url(); |
| 246 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) |
| 247 return GURL(); |
| 248 |
| 249 // Transform to a host-only match. Bail if the host no longer matches the |
| 250 // user input (e.g. because the user typed more than just a host). |
| 251 GURL host = url.GetWithEmptyPath(); |
| 252 if ((host.spec().length() < (match.input_location + input.length()))) |
| 253 return GURL(); // User typing is longer than this host suggestion. |
| 254 |
| 255 const base::string16 spec = base::UTF8ToUTF16(host.spec()); |
| 256 if (spec.compare(match.input_location, input.length(), input)) |
| 257 return GURL(); // User typing is no longer a prefix. |
| 258 |
| 259 return host; |
| 260 } |
| 261 |
| 262 } // namespace |
| 263 |
| 264 // ----------------------------------------------------------------- |
| 265 // SearchTermsDataSnapshot |
| 266 |
| 267 // Implementation of SearchTermsData that takes a snapshot of another |
| 268 // SearchTermsData by copying all the responses to the different getters into |
| 269 // member strings, then returning those strings when its own getters are called. |
| 270 // This will typically be constructed on the UI thread from |
| 271 // UIThreadSearchTermsData but is subsequently safe to use on any thread. |
| 272 class SearchTermsDataSnapshot : public SearchTermsData { |
| 273 public: |
| 274 explicit SearchTermsDataSnapshot(const SearchTermsData& search_terms_data); |
| 275 ~SearchTermsDataSnapshot() override; |
| 276 |
| 277 std::string GoogleBaseURLValue() const override; |
| 278 std::string GetApplicationLocale() const override; |
| 279 base::string16 GetRlzParameterValue(bool from_app_list) const override; |
| 280 std::string GetSearchClient() const override; |
| 281 bool EnableAnswersInSuggest() const override; |
| 282 bool IsShowingSearchTermsOnSearchResultsPages() const override; |
| 283 std::string InstantExtendedEnabledParam(bool for_search) const override; |
| 284 std::string ForceInstantResultsParam(bool for_prerender) const override; |
| 285 std::string NTPIsThemedParam() const override; |
| 286 std::string GoogleImageSearchSource() const override; |
| 287 |
| 288 private: |
| 289 std::string google_base_url_value_; |
| 290 std::string application_locale_; |
| 291 base::string16 rlz_parameter_value_; |
| 292 std::string search_client_; |
| 293 bool enable_answers_in_suggest_; |
| 294 bool is_showing_search_terms_on_search_results_pages_; |
| 295 std::string instant_extended_enabled_param_; |
| 296 std::string instant_extended_enabled_param_for_search_; |
| 297 std::string force_instant_results_param_; |
| 298 std::string force_instant_results_param_for_prerender_; |
| 299 std::string ntp_is_themed_param_; |
| 300 std::string google_image_search_source_; |
| 301 |
| 302 DISALLOW_COPY_AND_ASSIGN(SearchTermsDataSnapshot); |
| 303 }; |
| 304 |
| 305 SearchTermsDataSnapshot::SearchTermsDataSnapshot( |
| 306 const SearchTermsData& search_terms_data) |
| 307 : google_base_url_value_(search_terms_data.GoogleBaseURLValue()), |
| 308 application_locale_(search_terms_data.GetApplicationLocale()), |
| 309 rlz_parameter_value_(search_terms_data.GetRlzParameterValue(false)), |
| 310 search_client_(search_terms_data.GetSearchClient()), |
| 311 enable_answers_in_suggest_(search_terms_data.EnableAnswersInSuggest()), |
| 312 is_showing_search_terms_on_search_results_pages_( |
| 313 search_terms_data.IsShowingSearchTermsOnSearchResultsPages()), |
| 314 instant_extended_enabled_param_( |
| 315 search_terms_data.InstantExtendedEnabledParam(false)), |
| 316 instant_extended_enabled_param_for_search_( |
| 317 search_terms_data.InstantExtendedEnabledParam(true)), |
| 318 force_instant_results_param_( |
| 319 search_terms_data.ForceInstantResultsParam(false)), |
| 320 force_instant_results_param_for_prerender_( |
| 321 search_terms_data.ForceInstantResultsParam(true)), |
| 322 ntp_is_themed_param_(search_terms_data.NTPIsThemedParam()), |
| 323 google_image_search_source_(search_terms_data.GoogleImageSearchSource()) { |
| 324 } |
| 325 |
| 326 SearchTermsDataSnapshot::~SearchTermsDataSnapshot() { |
| 327 } |
| 328 |
| 329 std::string SearchTermsDataSnapshot::GoogleBaseURLValue() const { |
| 330 return google_base_url_value_; |
| 331 } |
| 332 |
| 333 std::string SearchTermsDataSnapshot::GetApplicationLocale() const { |
| 334 return application_locale_; |
| 335 } |
| 336 |
| 337 base::string16 SearchTermsDataSnapshot::GetRlzParameterValue( |
| 338 bool from_app_list) const { |
| 339 return rlz_parameter_value_; |
| 340 } |
| 341 |
| 342 std::string SearchTermsDataSnapshot::GetSearchClient() const { |
| 343 return search_client_; |
| 344 } |
| 345 |
| 346 bool SearchTermsDataSnapshot::EnableAnswersInSuggest() const { |
| 347 return enable_answers_in_suggest_; |
| 348 } |
| 349 |
| 350 bool SearchTermsDataSnapshot::IsShowingSearchTermsOnSearchResultsPages() const { |
| 351 return is_showing_search_terms_on_search_results_pages_; |
| 352 } |
| 353 |
| 354 std::string SearchTermsDataSnapshot::InstantExtendedEnabledParam( |
| 355 bool for_search) const { |
| 356 return for_search ? instant_extended_enabled_param_ : |
| 357 instant_extended_enabled_param_for_search_; |
| 358 } |
| 359 |
| 360 std::string SearchTermsDataSnapshot::ForceInstantResultsParam( |
| 361 bool for_prerender) const { |
| 362 return for_prerender ? force_instant_results_param_ : |
| 363 force_instant_results_param_for_prerender_; |
| 364 } |
| 365 |
| 366 std::string SearchTermsDataSnapshot::NTPIsThemedParam() const { |
| 367 return ntp_is_themed_param_; |
| 368 } |
| 369 |
| 370 std::string SearchTermsDataSnapshot::GoogleImageSearchSource() const { |
| 371 return google_image_search_source_; |
| 372 } |
| 373 |
| 374 // ----------------------------------------------------------------- |
| 375 // HistoryURLProvider |
| 376 |
| 377 // These ugly magic numbers will go away once we switch all scoring |
| 378 // behavior (including URL-what-you-typed) to HistoryQuick provider. |
| 379 const int HistoryURLProvider::kScoreForBestInlineableResult = 1413; |
| 380 const int HistoryURLProvider::kScoreForUnvisitedIntranetResult = 1403; |
| 381 const int HistoryURLProvider::kScoreForWhatYouTypedResult = 1203; |
| 382 const int HistoryURLProvider::kBaseScoreForNonInlineableResult = 900; |
| 383 |
| 384 // VisitClassifier is used to classify the type of visit to a particular url. |
| 385 class HistoryURLProvider::VisitClassifier { |
| 386 public: |
| 387 enum Type { |
| 388 INVALID, // Navigations to the URL are not allowed. |
| 389 UNVISITED_INTRANET, // A navigable URL for which we have no visit data but |
| 390 // which is known to refer to a visited intranet host. |
| 391 VISITED, // The site has been previously visited. |
| 392 }; |
| 393 |
| 394 VisitClassifier(HistoryURLProvider* provider, |
| 395 const AutocompleteInput& input, |
| 396 history::URLDatabase* db); |
| 397 |
| 398 // Returns the type of visit for the specified input. |
| 399 Type type() const { return type_; } |
| 400 |
| 401 // Returns the URLRow for the visit. |
| 402 const history::URLRow& url_row() const { return url_row_; } |
| 403 |
| 404 private: |
| 405 HistoryURLProvider* provider_; |
| 406 history::URLDatabase* db_; |
| 407 Type type_; |
| 408 history::URLRow url_row_; |
| 409 |
| 410 DISALLOW_COPY_AND_ASSIGN(VisitClassifier); |
| 411 }; |
| 412 |
| 413 HistoryURLProvider::VisitClassifier::VisitClassifier( |
| 414 HistoryURLProvider* provider, |
| 415 const AutocompleteInput& input, |
| 416 history::URLDatabase* db) |
| 417 : provider_(provider), |
| 418 db_(db), |
| 419 type_(INVALID) { |
| 420 const GURL& url = input.canonicalized_url(); |
| 421 // Detect email addresses. These cases will look like "http://user@site/", |
| 422 // and because the history backend strips auth creds, we'll get a bogus exact |
| 423 // match below if the user has visited "site". |
| 424 if (!url.is_valid() || |
| 425 ((input.type() == metrics::OmniboxInputType::UNKNOWN) && |
| 426 input.parts().username.is_nonempty() && |
| 427 !input.parts().password.is_nonempty() && |
| 428 !input.parts().path.is_nonempty())) |
| 429 return; |
| 430 |
| 431 if (db_->GetRowForURL(url, &url_row_)) { |
| 432 type_ = VISITED; |
| 433 return; |
| 434 } |
| 435 |
| 436 if (provider_->CanFindIntranetURL(db_, input)) { |
| 437 // The user typed an intranet hostname that they've visited (albeit with a |
| 438 // different port and/or path) before. |
| 439 url_row_ = history::URLRow(url); |
| 440 type_ = UNVISITED_INTRANET; |
| 441 } |
| 442 } |
| 443 |
| 444 HistoryURLProviderParams::HistoryURLProviderParams( |
| 445 const AutocompleteInput& input, |
| 446 bool trim_http, |
| 447 const AutocompleteMatch& what_you_typed_match, |
| 448 const std::string& languages, |
| 449 TemplateURL* default_search_provider, |
| 450 const SearchTermsData& search_terms_data) |
| 451 : message_loop(base::MessageLoop::current()), |
| 452 input(input), |
| 453 prevent_inline_autocomplete(input.prevent_inline_autocomplete()), |
| 454 trim_http(trim_http), |
| 455 what_you_typed_match(what_you_typed_match), |
| 456 failed(false), |
| 457 exact_suggestion_is_in_history(false), |
| 458 promote_type(NEITHER), |
| 459 languages(languages), |
| 460 default_search_provider(default_search_provider ? |
| 461 new TemplateURL(default_search_provider->data()) : NULL), |
| 462 search_terms_data(new SearchTermsDataSnapshot(search_terms_data)) { |
| 463 } |
| 464 |
| 465 HistoryURLProviderParams::~HistoryURLProviderParams() { |
| 466 } |
| 467 |
| 468 HistoryURLProvider::HistoryURLProvider(AutocompleteProviderClient* client, |
| 469 AutocompleteProviderListener* listener) |
| 470 : HistoryProvider(AutocompleteProvider::TYPE_HISTORY_URL, client), |
| 471 listener_(listener), |
| 472 params_(NULL) { |
| 473 // Initialize HUP scoring params based on the current experiment. |
| 474 OmniboxFieldTrial::GetExperimentalHUPScoringParams(&scoring_params_); |
| 475 } |
| 476 |
| 477 void HistoryURLProvider::Start(const AutocompleteInput& input, |
| 478 bool minimal_changes, |
| 479 bool called_due_to_focus) { |
| 480 // NOTE: We could try hard to do less work in the |minimal_changes| case |
| 481 // here; some clever caching would let us reuse the raw matches from the |
| 482 // history DB without re-querying. However, we'd still have to go back to |
| 483 // the history thread to mark these up properly, and if pass 2 is currently |
| 484 // running, we'd need to wait for it to return to the main thread before |
| 485 // doing this (we can't just write new data for it to read due to thread |
| 486 // safety issues). At that point it's just as fast, and easier, to simply |
| 487 // re-run the query from scratch and ignore |minimal_changes|. |
| 488 |
| 489 // Cancel any in-progress query. |
| 490 Stop(false, false); |
| 491 |
| 492 matches_.clear(); |
| 493 |
| 494 if (called_due_to_focus || |
| 495 (input.type() == metrics::OmniboxInputType::INVALID) || |
| 496 (input.type() == metrics::OmniboxInputType::FORCED_QUERY)) |
| 497 return; |
| 498 |
| 499 // Do some fixup on the user input before matching against it, so we provide |
| 500 // good results for local file paths, input with spaces, etc. |
| 501 const FixupReturn fixup_return(FixupUserInput(input)); |
| 502 if (!fixup_return.first) |
| 503 return; |
| 504 url::Parsed parts; |
| 505 url_fixer::SegmentURL(fixup_return.second, &parts); |
| 506 AutocompleteInput fixed_up_input(input); |
| 507 fixed_up_input.UpdateText(fixup_return.second, base::string16::npos, parts); |
| 508 |
| 509 // Create a match for what the user typed. |
| 510 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text()); |
| 511 AutocompleteMatch what_you_typed_match(SuggestExactInput( |
| 512 fixed_up_input.text(), fixed_up_input.canonicalized_url(), trim_http)); |
| 513 what_you_typed_match.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0); |
| 514 |
| 515 // Add the WYT match as a fallback in case we can't get the history service or |
| 516 // URL DB; otherwise, we'll replace this match lower down. Don't do this for |
| 517 // queries, though -- while we can sometimes mark up a match for them, it's |
| 518 // not what the user wants, and just adds noise. |
| 519 if (fixed_up_input.type() != metrics::OmniboxInputType::QUERY) |
| 520 matches_.push_back(what_you_typed_match); |
| 521 |
| 522 // We'll need the history service to run both passes, so try to obtain it. |
| 523 history::HistoryService* const history_service = client()->HistoryService(); |
| 524 if (!history_service) |
| 525 return; |
| 526 |
| 527 // Get the default search provider and search terms data now since we have to |
| 528 // retrieve these on the UI thread, and the second pass runs on the history |
| 529 // thread. |template_url_service| can be NULL when testing. |
| 530 TemplateURLService* template_url_service = client()->GetTemplateURLService(); |
| 531 TemplateURL* default_search_provider = template_url_service ? |
| 532 template_url_service->GetDefaultSearchProvider() : NULL; |
| 533 |
| 534 // Create the data structure for the autocomplete passes. We'll save this off |
| 535 // onto the |params_| member for later deletion below if we need to run pass |
| 536 // 2. |
| 537 scoped_ptr<HistoryURLProviderParams> params(new HistoryURLProviderParams( |
| 538 fixed_up_input, trim_http, what_you_typed_match, |
| 539 client()->AcceptLanguages(), default_search_provider, |
| 540 client()->GetSearchTermsData())); |
| 541 // Note that we use the non-fixed-up input here, since fixup may strip |
| 542 // trailing whitespace. |
| 543 params->prevent_inline_autocomplete = PreventInlineAutocomplete(input); |
| 544 |
| 545 // Pass 1: Get the in-memory URL database, and use it to find and promote |
| 546 // the inline autocomplete match, if any. |
| 547 history::URLDatabase* url_db = history_service->InMemoryDatabase(); |
| 548 // url_db can be NULL if it hasn't finished initializing (or failed to |
| 549 // initialize). In this case all we can do is fall back on the second |
| 550 // pass. |
| 551 // |
| 552 // TODO(pkasting): We should just block here until this loads. Any time |
| 553 // someone unloads the history backend, we'll get inconsistent inline |
| 554 // autocomplete behavior here. |
| 555 if (url_db) { |
| 556 DoAutocomplete(NULL, url_db, params.get()); |
| 557 matches_.clear(); |
| 558 PromoteMatchesIfNecessary(*params); |
| 559 // NOTE: We don't reset |params| here since at least the |promote_type| |
| 560 // field on it will be read by the second pass -- see comments in |
| 561 // DoAutocomplete(). |
| 562 } |
| 563 |
| 564 // Pass 2: Ask the history service to call us back on the history thread, |
| 565 // where we can read the full on-disk DB. |
| 566 if (input.want_asynchronous_matches()) { |
| 567 done_ = false; |
| 568 params_ = params.release(); // This object will be destroyed in |
| 569 // QueryComplete() once we're done with it. |
| 570 history_service->ScheduleAutocomplete( |
| 571 base::Bind(&HistoryURLProvider::ExecuteWithDB, this, params_)); |
| 572 } |
| 573 } |
| 574 |
| 575 void HistoryURLProvider::Stop(bool clear_cached_results, |
| 576 bool due_to_user_inactivity) { |
| 577 done_ = true; |
| 578 |
| 579 if (params_) |
| 580 params_->cancel_flag.Set(); |
| 581 } |
| 582 |
| 583 AutocompleteMatch HistoryURLProvider::SuggestExactInput( |
| 584 const base::string16& text, |
| 585 const GURL& destination_url, |
| 586 bool trim_http) { |
| 587 // The FormattedStringWithEquivalentMeaning() call below requires callers to |
| 588 // be on the main thread. |
| 589 DCHECK(thread_checker_.CalledOnValidThread()); |
| 590 |
| 591 AutocompleteMatch match(this, 0, false, |
| 592 AutocompleteMatchType::URL_WHAT_YOU_TYPED); |
| 593 |
| 594 if (destination_url.is_valid()) { |
| 595 match.destination_url = destination_url; |
| 596 |
| 597 // Trim off "http://" if the user didn't type it. |
| 598 DCHECK(!trim_http || !AutocompleteInput::HasHTTPScheme(text)); |
| 599 base::string16 display_string( |
| 600 net::FormatUrl(destination_url, std::string(), |
| 601 net::kFormatUrlOmitAll & ~net::kFormatUrlOmitHTTP, |
| 602 net::UnescapeRule::SPACES, NULL, NULL, NULL)); |
| 603 const size_t offset = trim_http ? TrimHttpPrefix(&display_string) : 0; |
| 604 match.fill_into_edit = |
| 605 AutocompleteInput::FormattedStringWithEquivalentMeaning( |
| 606 destination_url, display_string, client()->SchemeClassifier()); |
| 607 match.allowed_to_be_default_match = true; |
| 608 // NOTE: Don't set match.inline_autocompletion to something non-empty here; |
| 609 // it's surprising and annoying. |
| 610 |
| 611 // Try to highlight "innermost" match location. If we fix up "w" into |
| 612 // "www.w.com", we want to highlight the fifth character, not the first. |
| 613 // This relies on match.destination_url being the non-prefix-trimmed version |
| 614 // of match.contents. |
| 615 match.contents = display_string; |
| 616 const URLPrefix* best_prefix = URLPrefix::BestURLPrefix( |
| 617 base::UTF8ToUTF16(destination_url.spec()), text); |
| 618 // It's possible for match.destination_url to not contain the user's input |
| 619 // at all (so |best_prefix| is NULL), for example if the input is |
| 620 // "view-source:x" and |destination_url| has an inserted "http://" in the |
| 621 // middle. |
| 622 if (best_prefix == NULL) { |
| 623 AutocompleteMatch::ClassifyMatchInString(text, match.contents, |
| 624 ACMatchClassification::URL, |
| 625 &match.contents_class); |
| 626 } else { |
| 627 AutocompleteMatch::ClassifyLocationInString( |
| 628 best_prefix->prefix.length() - offset, text.length(), |
| 629 match.contents.length(), ACMatchClassification::URL, |
| 630 &match.contents_class); |
| 631 } |
| 632 } |
| 633 |
| 634 return match; |
| 635 } |
| 636 |
| 637 void HistoryURLProvider::ExecuteWithDB(HistoryURLProviderParams* params, |
| 638 history::HistoryBackend* backend, |
| 639 history::URLDatabase* db) { |
| 640 // We may get called with a NULL database if it couldn't be properly |
| 641 // initialized. |
| 642 if (!db) { |
| 643 params->failed = true; |
| 644 } else if (!params->cancel_flag.IsSet()) { |
| 645 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
| 646 |
| 647 DoAutocomplete(backend, db, params); |
| 648 |
| 649 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", |
| 650 base::TimeTicks::Now() - beginning_time); |
| 651 } |
| 652 |
| 653 // Return the results (if any) to the main thread. |
| 654 params->message_loop->task_runner()->PostTask( |
| 655 FROM_HERE, base::Bind(&HistoryURLProvider::QueryComplete, this, params)); |
| 656 } |
| 657 |
| 658 HistoryURLProvider::~HistoryURLProvider() { |
| 659 // Note: This object can get leaked on shutdown if there are pending |
| 660 // requests on the database (which hold a reference to us). Normally, these |
| 661 // messages get flushed for each thread. We do a round trip from main, to |
| 662 // history, back to main while holding a reference. If the main thread |
| 663 // completes before the history thread, the message to delegate back to the |
| 664 // main thread will not run and the reference will leak. Therefore, don't do |
| 665 // anything on destruction. |
| 666 } |
| 667 |
| 668 // static |
| 669 int HistoryURLProvider::CalculateRelevance(MatchType match_type, |
| 670 int match_number) { |
| 671 switch (match_type) { |
| 672 case INLINE_AUTOCOMPLETE: |
| 673 return kScoreForBestInlineableResult; |
| 674 |
| 675 case UNVISITED_INTRANET: |
| 676 return kScoreForUnvisitedIntranetResult; |
| 677 |
| 678 case WHAT_YOU_TYPED: |
| 679 return kScoreForWhatYouTypedResult; |
| 680 |
| 681 default: // NORMAL |
| 682 return kBaseScoreForNonInlineableResult + match_number; |
| 683 } |
| 684 } |
| 685 |
| 686 // static |
| 687 ACMatchClassifications HistoryURLProvider::ClassifyDescription( |
| 688 const base::string16& input_text, |
| 689 const base::string16& description) { |
| 690 base::string16 clean_description = |
| 691 bookmarks::CleanUpTitleForMatching(description); |
| 692 TermMatches description_matches(SortAndDeoverlapMatches( |
| 693 MatchTermInString(input_text, clean_description, 0))); |
| 694 WordStarts description_word_starts; |
| 695 String16VectorFromString16(clean_description, false, |
| 696 &description_word_starts); |
| 697 // If HistoryURL retrieves any matches (and hence we reach this code), we |
| 698 // are guaranteed that the beginning of input_text must be a word break. |
| 699 WordStarts offsets(1, 0u); |
| 700 description_matches = ScoredHistoryMatch::FilterTermMatchesByWordStarts( |
| 701 description_matches, offsets, description_word_starts, 0, |
| 702 std::string::npos); |
| 703 return SpansFromTermMatch( |
| 704 description_matches, clean_description.length(), false); |
| 705 } |
| 706 |
| 707 void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, |
| 708 history::URLDatabase* db, |
| 709 HistoryURLProviderParams* params) { |
| 710 // Get the matching URLs from the DB. |
| 711 params->matches.clear(); |
| 712 history::URLRows url_matches; |
| 713 const URLPrefixes& prefixes = URLPrefix::GetURLPrefixes(); |
| 714 for (URLPrefixes::const_iterator i(prefixes.begin()); i != prefixes.end(); |
| 715 ++i) { |
| 716 if (params->cancel_flag.IsSet()) |
| 717 return; // Canceled in the middle of a query, give up. |
| 718 |
| 719 // We only need kMaxMatches results in the end, but before we get there we |
| 720 // need to promote lower-quality matches that are prefixes of higher-quality |
| 721 // matches, and remove lower-quality redirects. So we ask for more results |
| 722 // than we need, of every prefix type, in hopes this will give us far more |
| 723 // than enough to work with. CullRedirects() will then reduce the list to |
| 724 // the best kMaxMatches results. |
| 725 db->AutocompleteForPrefix( |
| 726 base::UTF16ToUTF8(i->prefix + params->input.text()), kMaxMatches * 2, |
| 727 !backend, &url_matches); |
| 728 for (history::URLRows::const_iterator j(url_matches.begin()); |
| 729 j != url_matches.end(); ++j) { |
| 730 const URLPrefix* best_prefix = URLPrefix::BestURLPrefix( |
| 731 base::UTF8ToUTF16(j->url().spec()), base::string16()); |
| 732 DCHECK(best_prefix); |
| 733 params->matches.push_back(history::HistoryMatch( |
| 734 *j, i->prefix.length(), !i->num_components, |
| 735 i->num_components >= best_prefix->num_components)); |
| 736 } |
| 737 } |
| 738 |
| 739 // Create sorted list of suggestions. |
| 740 CullPoorMatches(params); |
| 741 SortAndDedupMatches(¶ms->matches); |
| 742 |
| 743 // Try to create a shorter suggestion from the best match. |
| 744 // We consider the what you typed match eligible for display when it's |
| 745 // navigable and there's a reasonable chance the user intended to do |
| 746 // something other than search. We use a variety of heuristics to determine |
| 747 // this, e.g. whether the user explicitly typed a scheme, or if omnibox |
| 748 // searching has been disabled by policy. In the cases where we've parsed as |
| 749 // UNKNOWN, we'll still show an accidental search infobar if need be. |
| 750 VisitClassifier classifier(this, params->input, db); |
| 751 params->have_what_you_typed_match = |
| 752 (params->input.type() != metrics::OmniboxInputType::QUERY) && |
| 753 ((params->input.type() != metrics::OmniboxInputType::UNKNOWN) || |
| 754 (classifier.type() == VisitClassifier::UNVISITED_INTRANET) || |
| 755 !params->trim_http || |
| 756 (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0) || |
| 757 !params->default_search_provider); |
| 758 const bool have_shorter_suggestion_suitable_for_inline_autocomplete = |
| 759 PromoteOrCreateShorterSuggestion(db, params); |
| 760 |
| 761 // Check whether what the user typed appears in history. |
| 762 const bool can_check_history_for_exact_match = |
| 763 // Checking what_you_typed_match.allowed_to_be_default_match tells us |
| 764 // whether SuggestExactInput() succeeded in constructing a valid match. |
| 765 params->what_you_typed_match.allowed_to_be_default_match && |
| 766 // Additionally, in the case where the user has typed "foo.com" and |
| 767 // visited (but not typed) "foo/", and the input is "foo", the first pass |
| 768 // will fall into the FRONT_HISTORY_MATCH case for "foo.com" but the |
| 769 // second pass can suggest the exact input as a better URL. Since we need |
| 770 // both passes to agree, and since during the first pass there's no way to |
| 771 // know about "foo/", ensure that if the promote type was set to |
| 772 // FRONT_HISTORY_MATCH during the first pass, the second pass will not |
| 773 // consider the exact suggestion to be in history and therefore will not |
| 774 // suggest the exact input as a better match. (Note that during the first |
| 775 // pass, this conditional will always succeed since |promote_type| is |
| 776 // initialized to NEITHER.) |
| 777 (params->promote_type != HistoryURLProviderParams::FRONT_HISTORY_MATCH); |
| 778 params->exact_suggestion_is_in_history = can_check_history_for_exact_match && |
| 779 FixupExactSuggestion(db, classifier, params); |
| 780 |
| 781 // If we succeeded in fixing up the exact match based on the user's history, |
| 782 // we should treat it as the best match regardless of input type. If not, |
| 783 // then we check whether there's an inline autocompletion we can create from |
| 784 // this input, so we can promote that as the best match. |
| 785 if (params->exact_suggestion_is_in_history) { |
| 786 params->promote_type = HistoryURLProviderParams::WHAT_YOU_TYPED_MATCH; |
| 787 } else if (!params->matches.empty() && |
| 788 (have_shorter_suggestion_suitable_for_inline_autocomplete || |
| 789 CanPromoteMatchForInlineAutocomplete(params->matches[0]))) { |
| 790 // Note that we promote this inline-autocompleted match even when |
| 791 // params->prevent_inline_autocomplete is true. This is safe because in |
| 792 // this case the match will be marked as "not allowed to be default", and |
| 793 // a non-inlined match that is "allowed to be default" will be reordered |
| 794 // above it by the controller/AutocompleteResult. We ensure there is such |
| 795 // a match in two ways: |
| 796 // * If params->have_what_you_typed_match is true, we force the |
| 797 // what-you-typed match to be added in this case. See comments in |
| 798 // PromoteMatchesIfNecessary(). |
| 799 // * Otherwise, we should have some sort of QUERY or UNKNOWN input that |
| 800 // the SearchProvider will provide a defaultable WYT match for. |
| 801 params->promote_type = HistoryURLProviderParams::FRONT_HISTORY_MATCH; |
| 802 } else { |
| 803 // Failed to promote any URLs. Use the What You Typed match, if we have it. |
| 804 params->promote_type = params->have_what_you_typed_match ? |
| 805 HistoryURLProviderParams::WHAT_YOU_TYPED_MATCH : |
| 806 HistoryURLProviderParams::NEITHER; |
| 807 } |
| 808 |
| 809 const size_t max_results = |
| 810 kMaxMatches + (params->exact_suggestion_is_in_history ? 1 : 0); |
| 811 if (backend) { |
| 812 // Remove redirects and trim list to size. We want to provide up to |
| 813 // kMaxMatches results plus the What You Typed result, if it was added to |
| 814 // params->matches above. |
| 815 CullRedirects(backend, ¶ms->matches, max_results); |
| 816 } else if (params->matches.size() > max_results) { |
| 817 // Simply trim the list to size. |
| 818 params->matches.resize(max_results); |
| 819 } |
| 820 } |
| 821 |
| 822 void HistoryURLProvider::PromoteMatchesIfNecessary( |
| 823 const HistoryURLProviderParams& params) { |
| 824 if (params.promote_type == HistoryURLProviderParams::NEITHER) |
| 825 return; |
| 826 if (params.promote_type == HistoryURLProviderParams::FRONT_HISTORY_MATCH) { |
| 827 matches_.push_back( |
| 828 HistoryMatchToACMatch(params, 0, INLINE_AUTOCOMPLETE, |
| 829 CalculateRelevance(INLINE_AUTOCOMPLETE, 0))); |
| 830 } |
| 831 // There are two cases where we need to add the what-you-typed-match: |
| 832 // * If params.promote_type is WHAT_YOU_TYPED_MATCH, we're being explicitly |
| 833 // directed to. |
| 834 // * If params.have_what_you_typed_match is true, then params.promote_type |
| 835 // can't be NEITHER (see code near the end of DoAutocomplete()), so if |
| 836 // it's not WHAT_YOU_TYPED_MATCH, it must be FRONT_HISTORY_MATCH, and |
| 837 // we'll have promoted the history match above. If |
| 838 // params.prevent_inline_autocomplete is also true, then this match |
| 839 // will be marked "not allowed to be default", and we need to add the |
| 840 // what-you-typed match to ensure there's a legal default match for the |
| 841 // controller/AutocompleteResult to promote. (If |
| 842 // params.have_what_you_typed_match is false, the SearchProvider should |
| 843 // take care of adding this defaultable match.) |
| 844 if ((params.promote_type == HistoryURLProviderParams::WHAT_YOU_TYPED_MATCH) || |
| 845 (params.prevent_inline_autocomplete && |
| 846 params.have_what_you_typed_match)) { |
| 847 matches_.push_back(params.what_you_typed_match); |
| 848 } |
| 849 } |
| 850 |
| 851 void HistoryURLProvider::QueryComplete( |
| 852 HistoryURLProviderParams* params_gets_deleted) { |
| 853 // Ensure |params_gets_deleted| gets deleted on exit. |
| 854 scoped_ptr<HistoryURLProviderParams> params(params_gets_deleted); |
| 855 |
| 856 // If the user hasn't already started another query, clear our member pointer |
| 857 // so we can't write into deleted memory. |
| 858 if (params_ == params_gets_deleted) |
| 859 params_ = NULL; |
| 860 |
| 861 // Don't send responses for queries that have been canceled. |
| 862 if (params->cancel_flag.IsSet()) |
| 863 return; // Already set done_ when we canceled, no need to set it again. |
| 864 |
| 865 // Don't modify |matches_| if the query failed, since it might have a default |
| 866 // match in it, whereas |params->matches| will be empty. |
| 867 if (!params->failed) { |
| 868 matches_.clear(); |
| 869 PromoteMatchesIfNecessary(*params); |
| 870 |
| 871 // Determine relevance of highest scoring match, if any. |
| 872 int relevance = matches_.empty() ? |
| 873 CalculateRelevance(NORMAL, |
| 874 static_cast<int>(params->matches.size() - 1)) : |
| 875 matches_[0].relevance; |
| 876 |
| 877 // Convert the history matches to autocomplete matches. If we promoted the |
| 878 // first match, skip over it. |
| 879 const size_t first_match = |
| 880 (params->exact_suggestion_is_in_history || |
| 881 (params->promote_type == |
| 882 HistoryURLProviderParams::FRONT_HISTORY_MATCH)) ? 1 : 0; |
| 883 for (size_t i = first_match; i < params->matches.size(); ++i) { |
| 884 // All matches score one less than the previous match. |
| 885 --relevance; |
| 886 // The experimental scoring must not change the top result's score. |
| 887 if (!matches_.empty()) { |
| 888 relevance = CalculateRelevanceScoreUsingScoringParams( |
| 889 params->matches[i], relevance, scoring_params_); |
| 890 } |
| 891 matches_.push_back(HistoryMatchToACMatch(*params, i, NORMAL, relevance)); |
| 892 } |
| 893 } |
| 894 |
| 895 done_ = true; |
| 896 listener_->OnProviderUpdate(true); |
| 897 } |
| 898 |
| 899 bool HistoryURLProvider::FixupExactSuggestion( |
| 900 history::URLDatabase* db, |
| 901 const VisitClassifier& classifier, |
| 902 HistoryURLProviderParams* params) const { |
| 903 MatchType type = INLINE_AUTOCOMPLETE; |
| 904 switch (classifier.type()) { |
| 905 case VisitClassifier::INVALID: |
| 906 return false; |
| 907 case VisitClassifier::UNVISITED_INTRANET: |
| 908 type = UNVISITED_INTRANET; |
| 909 break; |
| 910 default: |
| 911 DCHECK_EQ(VisitClassifier::VISITED, classifier.type()); |
| 912 // We have data for this match, use it. |
| 913 params->what_you_typed_match.deletable = true; |
| 914 params->what_you_typed_match.description = classifier.url_row().title(); |
| 915 RecordAdditionalInfoFromUrlRow(classifier.url_row(), |
| 916 ¶ms->what_you_typed_match); |
| 917 params->what_you_typed_match.description_class = ClassifyDescription( |
| 918 params->input.text(), params->what_you_typed_match.description); |
| 919 if (!classifier.url_row().typed_count()) { |
| 920 // If we reach here, we must be in the second pass, and we must not have |
| 921 // this row's data available during the first pass. That means we |
| 922 // either scored it as WHAT_YOU_TYPED or UNVISITED_INTRANET, and to |
| 923 // maintain the ordering between passes consistent, we need to score it |
| 924 // the same way here. |
| 925 type = CanFindIntranetURL(db, params->input) ? |
| 926 UNVISITED_INTRANET : WHAT_YOU_TYPED; |
| 927 } |
| 928 break; |
| 929 } |
| 930 |
| 931 params->what_you_typed_match.relevance = CalculateRelevance(type, 0); |
| 932 |
| 933 // If there are any other matches, then don't promote this match here, in |
| 934 // hopes the caller will be able to inline autocomplete a better suggestion. |
| 935 // DoAutocomplete() will fall back on this match if inline autocompletion |
| 936 // fails. This matches how we react to never-visited URL inputs in the non- |
| 937 // intranet case. |
| 938 if (type == UNVISITED_INTRANET && !params->matches.empty()) |
| 939 return false; |
| 940 |
| 941 // Put it on the front of the HistoryMatches for redirect culling. |
| 942 CreateOrPromoteMatch(classifier.url_row(), base::string16::npos, false, |
| 943 ¶ms->matches, true, true); |
| 944 return true; |
| 945 } |
| 946 |
| 947 bool HistoryURLProvider::CanFindIntranetURL( |
| 948 history::URLDatabase* db, |
| 949 const AutocompleteInput& input) const { |
| 950 // Normally passing the first two conditions below ought to guarantee the |
| 951 // third condition, but because FixupUserInput() can run and modify the |
| 952 // input's text and parts between Parse() and here, it seems better to be |
| 953 // paranoid and check. |
| 954 if ((input.type() != metrics::OmniboxInputType::UNKNOWN) || |
| 955 !base::LowerCaseEqualsASCII(input.scheme(), url::kHttpScheme) || |
| 956 !input.parts().host.is_nonempty()) |
| 957 return false; |
| 958 const std::string host(base::UTF16ToUTF8( |
| 959 input.text().substr(input.parts().host.begin, input.parts().host.len))); |
| 960 const size_t registry_length = |
| 961 net::registry_controlled_domains::GetRegistryLength( |
| 962 host, |
| 963 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
| 964 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| 965 return registry_length == 0 && db->IsTypedHost(host); |
| 966 } |
| 967 |
| 968 bool HistoryURLProvider::PromoteOrCreateShorterSuggestion( |
| 969 history::URLDatabase* db, |
| 970 HistoryURLProviderParams* params) { |
| 971 if (params->matches.empty()) |
| 972 return false; // No matches, nothing to do. |
| 973 |
| 974 // Determine the base URL from which to search, and whether that URL could |
| 975 // itself be added as a match. We can add the base iff it's not "effectively |
| 976 // the same" as any "what you typed" match. |
| 977 const history::HistoryMatch& match = params->matches[0]; |
| 978 GURL search_base = ConvertToHostOnly(match, params->input.text()); |
| 979 bool can_add_search_base_to_matches = !params->have_what_you_typed_match; |
| 980 if (search_base.is_empty()) { |
| 981 // Search from what the user typed when we couldn't reduce the best match |
| 982 // to a host. Careful: use a substring of |match| here, rather than the |
| 983 // first match in |params|, because they might have different prefixes. If |
| 984 // the user typed "google.com", params->what_you_typed_match will hold |
| 985 // "http://google.com/", but |match| might begin with |
| 986 // "http://www.google.com/". |
| 987 // TODO: this should be cleaned up, and is probably incorrect for IDN. |
| 988 std::string new_match = match.url_info.url().possibly_invalid_spec(). |
| 989 substr(0, match.input_location + params->input.text().length()); |
| 990 search_base = GURL(new_match); |
| 991 if (search_base.is_empty()) |
| 992 return false; // Can't construct a URL from which to start a search. |
| 993 } else if (!can_add_search_base_to_matches) { |
| 994 can_add_search_base_to_matches = |
| 995 (search_base != params->what_you_typed_match.destination_url); |
| 996 } |
| 997 if (search_base == match.url_info.url()) |
| 998 return false; // Couldn't shorten |match|, so no URLs to search over. |
| 999 |
| 1000 // Search the DB for short URLs between our base and |match|. |
| 1001 history::URLRow info(search_base); |
| 1002 bool promote = true; |
| 1003 // A short URL is only worth suggesting if it's been visited at least a third |
| 1004 // as often as the longer URL. |
| 1005 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; |
| 1006 // For stability between the in-memory and on-disk autocomplete passes, when |
| 1007 // the long URL has been typed before, only suggest shorter URLs that have |
| 1008 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL |
| 1009 // (which hasn't been typed) that the in-memory pass doesn't know about, |
| 1010 // thereby making the top match, and thus the behavior of inline |
| 1011 // autocomplete, unstable. |
| 1012 const int min_typed_count = match.url_info.typed_count() ? 1 : 0; |
| 1013 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), |
| 1014 match.url_info.url().possibly_invalid_spec(), min_visit_count, |
| 1015 min_typed_count, can_add_search_base_to_matches, &info)) { |
| 1016 if (!can_add_search_base_to_matches) |
| 1017 return false; // Couldn't find anything and can't add the search base. |
| 1018 |
| 1019 // Try to get info on the search base itself. Promote it to the top if the |
| 1020 // original best match isn't good enough to autocomplete. |
| 1021 db->GetRowForURL(search_base, &info); |
| 1022 promote = match.url_info.typed_count() <= 1; |
| 1023 } |
| 1024 |
| 1025 // Promote or add the desired URL to the list of matches. |
| 1026 const bool ensure_can_inline = |
| 1027 promote && CanPromoteMatchForInlineAutocomplete(match); |
| 1028 return CreateOrPromoteMatch(info, match.input_location, match.match_in_scheme, |
| 1029 ¶ms->matches, true, promote) && |
| 1030 ensure_can_inline; |
| 1031 } |
| 1032 |
| 1033 void HistoryURLProvider::CullPoorMatches( |
| 1034 HistoryURLProviderParams* params) const { |
| 1035 const base::Time& threshold(history::AutocompleteAgeThreshold()); |
| 1036 for (history::HistoryMatches::iterator i(params->matches.begin()); |
| 1037 i != params->matches.end(); ) { |
| 1038 if (RowQualifiesAsSignificant(i->url_info, threshold) && |
| 1039 (!params->default_search_provider || |
| 1040 !params->default_search_provider->IsSearchURL( |
| 1041 i->url_info.url(), *params->search_terms_data))) { |
| 1042 ++i; |
| 1043 } else { |
| 1044 i = params->matches.erase(i); |
| 1045 } |
| 1046 } |
| 1047 } |
| 1048 |
| 1049 void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, |
| 1050 history::HistoryMatches* matches, |
| 1051 size_t max_results) const { |
| 1052 for (size_t source = 0; |
| 1053 (source < matches->size()) && (source < max_results); ) { |
| 1054 const GURL& url = (*matches)[source].url_info.url(); |
| 1055 // TODO(brettw) this should go away when everything uses GURL. |
| 1056 history::RedirectList redirects; |
| 1057 backend->QueryRedirectsFrom(url, &redirects); |
| 1058 if (!redirects.empty()) { |
| 1059 // Remove all but the first occurrence of any of these redirects in the |
| 1060 // search results. We also must add the URL we queried for, since it may |
| 1061 // not be the first match and we'd want to remove it. |
| 1062 // |
| 1063 // For example, when A redirects to B and our matches are [A, X, B], |
| 1064 // we'll get B as the redirects from, and we want to remove the second |
| 1065 // item of that pair, removing B. If A redirects to B and our matches are |
| 1066 // [B, X, A], we'll want to remove A instead. |
| 1067 redirects.push_back(url); |
| 1068 source = RemoveSubsequentMatchesOf(matches, source, redirects); |
| 1069 } else { |
| 1070 // Advance to next item. |
| 1071 source++; |
| 1072 } |
| 1073 } |
| 1074 |
| 1075 if (matches->size() > max_results) |
| 1076 matches->resize(max_results); |
| 1077 } |
| 1078 |
| 1079 size_t HistoryURLProvider::RemoveSubsequentMatchesOf( |
| 1080 history::HistoryMatches* matches, |
| 1081 size_t source_index, |
| 1082 const std::vector<GURL>& remove) const { |
| 1083 size_t next_index = source_index + 1; // return value = item after source |
| 1084 |
| 1085 // Find the first occurrence of any URL in the redirect chain. We want to |
| 1086 // keep this one since it is rated the highest. |
| 1087 history::HistoryMatches::iterator first(std::find_first_of( |
| 1088 matches->begin(), matches->end(), remove.begin(), remove.end(), |
| 1089 history::HistoryMatch::EqualsGURL)); |
| 1090 DCHECK(first != matches->end()) << "We should have always found at least the " |
| 1091 "original URL."; |
| 1092 |
| 1093 // Find any following occurrences of any URL in the redirect chain, these |
| 1094 // should be deleted. |
| 1095 for (history::HistoryMatches::iterator next(std::find_first_of(first + 1, |
| 1096 matches->end(), remove.begin(), remove.end(), |
| 1097 history::HistoryMatch::EqualsGURL)); |
| 1098 next != matches->end(); next = std::find_first_of(next, matches->end(), |
| 1099 remove.begin(), remove.end(), history::HistoryMatch::EqualsGURL)) { |
| 1100 // Remove this item. When we remove an item before the source index, we |
| 1101 // need to shift it to the right and remember that so we can return it. |
| 1102 next = matches->erase(next); |
| 1103 if (static_cast<size_t>(next - matches->begin()) < next_index) |
| 1104 --next_index; |
| 1105 } |
| 1106 return next_index; |
| 1107 } |
| 1108 |
| 1109 AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( |
| 1110 const HistoryURLProviderParams& params, |
| 1111 size_t match_number, |
| 1112 MatchType match_type, |
| 1113 int relevance) { |
| 1114 // The FormattedStringWithEquivalentMeaning() call below requires callers to |
| 1115 // be on the main thread. |
| 1116 DCHECK(thread_checker_.CalledOnValidThread()); |
| 1117 |
| 1118 const history::HistoryMatch& history_match = params.matches[match_number]; |
| 1119 const history::URLRow& info = history_match.url_info; |
| 1120 AutocompleteMatch match(this, relevance, |
| 1121 !!info.visit_count(), AutocompleteMatchType::HISTORY_URL); |
| 1122 match.typed_count = info.typed_count(); |
| 1123 match.destination_url = info.url(); |
| 1124 DCHECK(match.destination_url.is_valid()); |
| 1125 size_t inline_autocomplete_offset = |
| 1126 history_match.input_location + params.input.text().length(); |
| 1127 std::string languages = (match_type == WHAT_YOU_TYPED) ? |
| 1128 std::string() : params.languages; |
| 1129 const net::FormatUrlTypes format_types = net::kFormatUrlOmitAll & |
| 1130 ~((params.trim_http && !history_match.match_in_scheme) ? |
| 1131 0 : net::kFormatUrlOmitHTTP); |
| 1132 match.fill_into_edit = |
| 1133 AutocompleteInput::FormattedStringWithEquivalentMeaning( |
| 1134 info.url(), net::FormatUrl(info.url(), languages, format_types, |
| 1135 net::UnescapeRule::SPACES, NULL, NULL, |
| 1136 &inline_autocomplete_offset), |
| 1137 client()->SchemeClassifier()); |
| 1138 if (!params.prevent_inline_autocomplete && |
| 1139 (inline_autocomplete_offset != base::string16::npos)) { |
| 1140 DCHECK(inline_autocomplete_offset <= match.fill_into_edit.length()); |
| 1141 match.inline_autocompletion = |
| 1142 match.fill_into_edit.substr(inline_autocomplete_offset); |
| 1143 } |
| 1144 // The latter part of the test effectively asks "is the inline completion |
| 1145 // empty?" (i.e., is this match effectively the what-you-typed match?). |
| 1146 match.allowed_to_be_default_match = !params.prevent_inline_autocomplete || |
| 1147 ((inline_autocomplete_offset != base::string16::npos) && |
| 1148 (inline_autocomplete_offset >= match.fill_into_edit.length())); |
| 1149 |
| 1150 size_t match_start = history_match.input_location; |
| 1151 match.contents = net::FormatUrl(info.url(), languages, |
| 1152 format_types, net::UnescapeRule::SPACES, NULL, NULL, &match_start); |
| 1153 if ((match_start != base::string16::npos) && |
| 1154 (inline_autocomplete_offset != base::string16::npos) && |
| 1155 (inline_autocomplete_offset != match_start)) { |
| 1156 DCHECK(inline_autocomplete_offset > match_start); |
| 1157 AutocompleteMatch::ClassifyLocationInString(match_start, |
| 1158 inline_autocomplete_offset - match_start, match.contents.length(), |
| 1159 ACMatchClassification::URL, &match.contents_class); |
| 1160 } else { |
| 1161 AutocompleteMatch::ClassifyLocationInString(base::string16::npos, 0, |
| 1162 match.contents.length(), ACMatchClassification::URL, |
| 1163 &match.contents_class); |
| 1164 } |
| 1165 match.description = info.title(); |
| 1166 match.description_class = |
| 1167 ClassifyDescription(params.input.text(), match.description); |
| 1168 RecordAdditionalInfoFromUrlRow(info, &match); |
| 1169 return match; |
| 1170 } |
OLD | NEW |