OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autocomplete/history_url_provider.h" | 5 #include "chrome/browser/autocomplete/history_url_provider.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/basictypes.h" | 9 #include "base/basictypes.h" |
10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
11 #include "base/metrics/histogram.h" | 11 #include "base/metrics/histogram.h" |
12 #include "base/string_util.h" | 12 #include "base/string_util.h" |
13 #include "base/utf_string_conversions.h" | 13 #include "base/utf_string_conversions.h" |
14 #include "chrome/browser/autocomplete/autocomplete_match.h" | 14 #include "chrome/browser/autocomplete/autocomplete_match.h" |
15 #include "chrome/browser/history/history.h" | 15 #include "chrome/browser/history/history.h" |
16 #include "chrome/browser/history/history_backend.h" | 16 #include "chrome/browser/history/history_backend.h" |
17 #include "chrome/browser/history/history_database.h" | 17 #include "chrome/browser/history/history_database.h" |
18 #include "chrome/browser/history/history_types.h" | 18 #include "chrome/browser/history/history_types.h" |
19 #include "chrome/browser/net/url_fixer_upper.h" | 19 #include "chrome/browser/net/url_fixer_upper.h" |
20 #include "chrome/browser/prefs/pref_service.h" | 20 #include "chrome/browser/prefs/pref_service.h" |
21 #include "chrome/browser/profiles/profile.h" | 21 #include "chrome/browser/profiles/profile.h" |
22 #include "chrome/common/pref_names.h" | 22 #include "chrome/common/pref_names.h" |
23 #include "chrome/common/url_constants.h" | 23 #include "chrome/common/url_constants.h" |
24 #include "googleurl/src/gurl.h" | 24 #include "googleurl/src/gurl.h" |
25 #include "googleurl/src/url_parse.h" | 25 #include "googleurl/src/url_parse.h" |
26 #include "googleurl/src/url_util.h" | 26 #include "googleurl/src/url_util.h" |
27 #include "net/base/net_util.h" | 27 #include "net/base/net_util.h" |
28 | 28 |
29 using base::Time; | 29 namespace { |
30 using base::TimeDelta; | |
31 using base::TimeTicks; | |
32 using history::Prefix; | |
33 using history::Prefixes; | |
34 using history::HistoryMatch; | |
35 using history::HistoryMatches; | |
36 | 30 |
37 namespace history { | 31 // Ensures that |matches| contains an entry for |info|, which may mean adding a |
| 32 // new such entry (using |input_location| and |match_in_scheme|). |
| 33 // |
| 34 // If |promote| is true, this also ensures the entry is the first element in |
| 35 // |matches|, moving or adding it to the front as appropriate. When |promote| |
| 36 // is false, existing matches are left in place, and newly added matches are |
| 37 // placed at the back. |
| 38 void EnsureMatchPresent(const history::URLRow& info, |
| 39 size_t input_location, |
| 40 bool match_in_scheme, |
| 41 history::HistoryMatches* matches, |
| 42 bool promote) { |
| 43 // |matches| may already have an entry for this. |
| 44 for (history::HistoryMatches::iterator i(matches->begin()); |
| 45 i != matches->end(); ++i) { |
| 46 if (i->url_info.url() == info.url()) { |
| 47 // Rotate it to the front if the caller wishes. |
| 48 if (promote) |
| 49 std::rotate(matches->begin(), i, i + 1); |
| 50 return; |
| 51 } |
| 52 } |
38 | 53 |
39 // Returns true if |url| is just a host (e.g. "http://www.google.com/") and | 54 // No entry, so create one. |
40 // not some other subpage (e.g. "http://www.google.com/foo.html"). | 55 history::HistoryMatch match(info, input_location, match_in_scheme, true); |
| 56 if (promote) |
| 57 matches->push_front(match); |
| 58 else |
| 59 matches->push_back(match); |
| 60 } |
| 61 |
| 62 // Given the user's |input| and a |match| created from it, reduce the match's |
| 63 // URL to just a host. If this host still matches the user input, return it. |
| 64 // Returns the empty string on failure. |
| 65 GURL ConvertToHostOnly(const history::HistoryMatch& match, |
| 66 const string16& input) { |
| 67 // See if we should try to do host-only suggestions for this URL. Nonstandard |
| 68 // schemes means there's no authority section, so suggesting the host name |
| 69 // is useless. File URLs are standard, but host suggestion is not useful for |
| 70 // them either. |
| 71 const GURL& url = match.url_info.url(); |
| 72 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) |
| 73 return GURL(); |
| 74 |
| 75 // Transform to a host-only match. Bail if the host no longer matches the |
| 76 // user input (e.g. because the user typed more than just a host). |
| 77 GURL host = url.GetWithEmptyPath(); |
| 78 if ((host.spec().length() < (match.input_location + input.length()))) |
| 79 return GURL(); // User typing is longer than this host suggestion. |
| 80 |
| 81 const string16 spec = UTF8ToUTF16(host.spec()); |
| 82 if (spec.compare(match.input_location, input.length(), input)) |
| 83 return GURL(); // User typing is no longer a prefix. |
| 84 |
| 85 return host; |
| 86 } |
| 87 |
| 88 // See if a shorter version of the best match should be created, and if so place |
| 89 // it at the front of |matches|. This can suggest history URLs that are |
| 90 // prefixes of the best match (if they've been visited enough, compared to the |
| 91 // best match), or create host-only suggestions even when they haven't been |
| 92 // visited before: if the user visited http://example.com/asdf once, we'll |
| 93 // suggest http://example.com/ even if they've never been to it. |
| 94 void PromoteOrCreateShorterSuggestion( |
| 95 history::URLDatabase* db, |
| 96 const HistoryURLProviderParams& params, |
| 97 bool have_what_you_typed_match, |
| 98 const AutocompleteMatch& what_you_typed_match, |
| 99 history::HistoryMatches* matches) { |
| 100 if (matches->empty()) |
| 101 return; // No matches, nothing to do. |
| 102 |
| 103 // Determine the base URL from which to search, and whether that URL could |
| 104 // itself be added as a match. We can add the base iff it's not "effectively |
| 105 // the same" as any "what you typed" match. |
| 106 const history::HistoryMatch& match = matches->front(); |
| 107 GURL search_base = ConvertToHostOnly(match, params.input.text()); |
| 108 bool can_add_search_base_to_matches = !have_what_you_typed_match; |
| 109 if (search_base.is_empty()) { |
| 110 // Search from what the user typed when we couldn't reduce the best match |
| 111 // to a host. Careful: use a substring of |match| here, rather than the |
| 112 // first match in |params|, because they might have different prefixes. If |
| 113 // the user typed "google.com", |what_you_typed_match| will hold |
| 114 // "http://google.com/", but |match| might begin with |
| 115 // "http://www.google.com/". |
| 116 // TODO: this should be cleaned up, and is probably incorrect for IDN. |
| 117 std::string new_match = match.url_info.url().possibly_invalid_spec(). |
| 118 substr(0, match.input_location + params.input.text().length()); |
| 119 search_base = GURL(new_match); |
| 120 // TODO(mrossetti): There is a degenerate case where the following may |
| 121 // cause a failure: http://www/~someword/fubar.html. Diagnose. |
| 122 // See: http://crbug.com/50101 |
| 123 if (search_base.is_empty()) |
| 124 return; // Can't construct a valid URL from which to start a search. |
| 125 } else if (!can_add_search_base_to_matches) { |
| 126 can_add_search_base_to_matches = |
| 127 (search_base != what_you_typed_match.destination_url); |
| 128 } |
| 129 if (search_base == match.url_info.url()) |
| 130 return; // Couldn't shorten |match|, so no range of URLs to search over. |
| 131 |
| 132 // Search the DB for short URLs between our base and |match|. |
| 133 history::URLRow info(search_base); |
| 134 bool promote = true; |
| 135 // A short URL is only worth suggesting if it's been visited at least a third |
| 136 // as often as the longer URL. |
| 137 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; |
| 138 // For stability between the in-memory and on-disk autocomplete passes, when |
| 139 // the long URL has been typed before, only suggest shorter URLs that have |
| 140 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL |
| 141 // (which hasn't been typed) that the in-memory pass doesn't know about, |
| 142 // thereby making the top match, and thus the behavior of inline |
| 143 // autocomplete, unstable. |
| 144 const int min_typed_count = match.url_info.typed_count() ? 1 : 0; |
| 145 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), |
| 146 match.url_info.url().possibly_invalid_spec(), min_visit_count, |
| 147 min_typed_count, can_add_search_base_to_matches, &info)) { |
| 148 if (!can_add_search_base_to_matches) |
| 149 return; // Couldn't find anything and can't add the search base, bail. |
| 150 |
| 151 // Try to get info on the search base itself. Promote it to the top if the |
| 152 // original best match isn't good enough to autocomplete. |
| 153 db->GetRowForURL(search_base, &info); |
| 154 promote = match.url_info.typed_count() <= 1; |
| 155 } |
| 156 |
| 157 // Promote or add the desired URL to the list of matches. |
| 158 EnsureMatchPresent(info, match.input_location, match.match_in_scheme, |
| 159 matches, promote); |
| 160 } |
| 161 |
| 162 // Returns true if |url| is just a host (e.g. "http://www.google.com/") and not |
| 163 // some other subpage (e.g. "http://www.google.com/foo.html"). |
41 bool IsHostOnly(const GURL& url) { | 164 bool IsHostOnly(const GURL& url) { |
42 DCHECK(url.is_valid()); | 165 DCHECK(url.is_valid()); |
43 return (!url.has_path() || (url.path() == "/")) && !url.has_query() && | 166 return (!url.has_path() || (url.path() == "/")) && !url.has_query() && |
44 !url.has_ref(); | 167 !url.has_ref(); |
45 } | 168 } |
46 | 169 |
47 // Acts like the > operator for URLInfo classes. | 170 // Acts like the > operator for URLInfo classes. |
48 bool CompareHistoryMatch(const HistoryMatch& a, const HistoryMatch& b) { | 171 bool CompareHistoryMatch(const history::HistoryMatch& a, |
| 172 const history::HistoryMatch& b) { |
49 // A URL that has been typed at all is better than one that has never been | 173 // A URL that has been typed at all is better than one that has never been |
50 // typed. (Note "!"s on each side) | 174 // typed. (Note "!"s on each side) |
51 if (!a.url_info.typed_count() != !b.url_info.typed_count()) | 175 if (!a.url_info.typed_count() != !b.url_info.typed_count()) |
52 return a.url_info.typed_count() > b.url_info.typed_count(); | 176 return a.url_info.typed_count() > b.url_info.typed_count(); |
53 | 177 |
54 // Innermost matches (matches after any scheme or "www.") are better than | 178 // Innermost matches (matches after any scheme or "www.") are better than |
55 // non-innermost matches. | 179 // non-innermost matches. |
56 if (a.innermost_match != b.innermost_match) | 180 if (a.innermost_match != b.innermost_match) |
57 return a.innermost_match; | 181 return a.innermost_match; |
58 | 182 |
59 // URLs that have been typed more often are better. | 183 // URLs that have been typed more often are better. |
60 if (a.url_info.typed_count() != b.url_info.typed_count()) | 184 if (a.url_info.typed_count() != b.url_info.typed_count()) |
61 return a.url_info.typed_count() > b.url_info.typed_count(); | 185 return a.url_info.typed_count() > b.url_info.typed_count(); |
62 | 186 |
63 // For URLs that have each been typed once, a host (alone) is better than a | 187 // For URLs that have each been typed once, a host (alone) is better than a |
64 // page inside. | 188 // page inside. |
65 if (a.url_info.typed_count() == 1) { | 189 if (a.url_info.typed_count() == 1) { |
66 const bool a_is_host_only = history::IsHostOnly(a.url_info.url()); | 190 const bool a_is_host_only = IsHostOnly(a.url_info.url()); |
67 if (a_is_host_only != history::IsHostOnly(b.url_info.url())) | 191 if (a_is_host_only != IsHostOnly(b.url_info.url())) |
68 return a_is_host_only; | 192 return a_is_host_only; |
69 } | 193 } |
70 | 194 |
71 // URLs that have been visited more often are better. | 195 // URLs that have been visited more often are better. |
72 if (a.url_info.visit_count() != b.url_info.visit_count()) | 196 if (a.url_info.visit_count() != b.url_info.visit_count()) |
73 return a.url_info.visit_count() > b.url_info.visit_count(); | 197 return a.url_info.visit_count() > b.url_info.visit_count(); |
74 | 198 |
75 // URLs that have been visited more recently are better. | 199 // URLs that have been visited more recently are better. |
76 return a.url_info.last_visit() > b.url_info.last_visit(); | 200 return a.url_info.last_visit() > b.url_info.last_visit(); |
77 } | 201 } |
78 | 202 |
79 // Given the user's |input| and a |match| created from it, reduce the | 203 // Determines the confidence for a |match| when compared to all the |matches|. |
80 // match's URL to just a host. If this host still matches the user input, | 204 // Returns a number in the range [0, 1]. |
81 // return it. Returns the empty string on failure. | 205 float CalculateConfidence(const history::HistoryMatch& match, |
82 GURL ConvertToHostOnly(const HistoryMatch& match, const string16& input) { | 206 const history::HistoryMatches& matches) { |
83 // See if we should try to do host-only suggestions for this URL. Nonstandard | 207 // TODO(dominich): Take into account bookmarked page? |
84 // schemes means there's no authority section, so suggesting the host name | 208 // TODO(dominich): See CompareHistoryMatch for more measures to include. |
85 // is useless. File URLs are standard, but host suggestion is not useful for | 209 // Using typed count in place of visit count as: |
86 // them either. | 210 // - It's a better indicator of what the user wants to open given that they |
87 const GURL& url = match.url_info.url(); | 211 // are typing in the address bar (users tend to open certain URLs by typing |
88 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile()) | 212 // and others by e.g. bookmarks, so visit_count is a good indicator of |
89 return GURL(); | 213 // overall interest but a bad one for specifically omnibox interest). |
90 | 214 // - Since the DB query is sorted by typed_count, the results may be |
91 // Transform to a host-only match. Bail if the host no longer matches the | 215 // effectively a random selection as far as visit_counts are concerned |
92 // user input (e.g. because the user typed more than just a host). | 216 // (meaning many high-visit_count-URLs may be present in one query and |
93 GURL host = url.GetWithEmptyPath(); | 217 // absent in a similar one), leading to wild swings in confidence for the |
94 if ((host.spec().length() < (match.input_location + input.length()))) | 218 // same result across distinct queries. |
95 return GURL(); // User typing is longer than this host suggestion. | 219 float numerator = match.url_info.typed_count(); |
96 | 220 float denominator = 0.0f; |
97 const string16 spec = UTF8ToUTF16(host.spec()); | 221 for (history::HistoryMatches::const_iterator it = matches.begin(); |
98 if (spec.compare(match.input_location, input.length(), input)) | 222 it != matches.end(); ++it) { |
99 return GURL(); // User typing is no longer a prefix. | 223 denominator += it->url_info.typed_count(); |
100 | 224 } |
101 return host; | 225 if (denominator < 1) { |
| 226 numerator = match.url_info.visit_count(); |
| 227 for (history::HistoryMatches::const_iterator it = matches.begin(); |
| 228 it != matches.end(); ++it) { |
| 229 denominator += it->url_info.visit_count(); |
| 230 } |
| 231 } |
| 232 return (denominator > 0.0f ? numerator / denominator : 0); |
102 } | 233 } |
103 | 234 |
104 } // namespace history | 235 } // namespace history |
105 | 236 |
106 HistoryURLProviderParams::HistoryURLProviderParams( | 237 HistoryURLProviderParams::HistoryURLProviderParams( |
107 const AutocompleteInput& input, | 238 const AutocompleteInput& input, |
108 bool trim_http, | 239 bool trim_http, |
109 const std::string& languages) | 240 const std::string& languages) |
110 : message_loop(MessageLoop::current()), | 241 : message_loop(MessageLoop::current()), |
111 input(input), | 242 input(input), |
112 prevent_inline_autocomplete(input.prevent_inline_autocomplete()), | 243 prevent_inline_autocomplete(input.prevent_inline_autocomplete()), |
113 trim_http(trim_http), | 244 trim_http(trim_http), |
114 failed(false), | 245 failed(false), |
115 languages(languages), | 246 languages(languages), |
116 dont_suggest_exact_input(false) { | 247 dont_suggest_exact_input(false) { |
117 } | 248 } |
118 | 249 |
119 HistoryURLProviderParams::~HistoryURLProviderParams() {} | 250 HistoryURLProviderParams::~HistoryURLProviderParams() { |
| 251 } |
120 | 252 |
121 HistoryURLProvider::HistoryURLProvider(ACProviderListener* listener, | 253 HistoryURLProvider::HistoryURLProvider(ACProviderListener* listener, |
122 Profile* profile) | 254 Profile* profile) |
123 : HistoryProvider(listener, profile, "HistoryURL"), | 255 : HistoryProvider(listener, profile, "HistoryURL"), |
124 prefixes_(GetPrefixes()), | 256 prefixes_(GetPrefixes()), |
125 params_(NULL) { | 257 params_(NULL) { |
126 } | 258 } |
127 | 259 |
128 void HistoryURLProvider::Start(const AutocompleteInput& input, | 260 void HistoryURLProvider::Start(const AutocompleteInput& input, |
129 bool minimal_changes) { | 261 bool minimal_changes) { |
(...skipping 21 matching lines...) Expand all Loading... |
151 | 283 |
152 // Called on the history thread. | 284 // Called on the history thread. |
153 void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, | 285 void HistoryURLProvider::ExecuteWithDB(history::HistoryBackend* backend, |
154 history::URLDatabase* db, | 286 history::URLDatabase* db, |
155 HistoryURLProviderParams* params) { | 287 HistoryURLProviderParams* params) { |
156 // We may get called with a NULL database if it couldn't be properly | 288 // We may get called with a NULL database if it couldn't be properly |
157 // initialized. | 289 // initialized. |
158 if (!db) { | 290 if (!db) { |
159 params->failed = true; | 291 params->failed = true; |
160 } else if (!params->cancel_flag.IsSet()) { | 292 } else if (!params->cancel_flag.IsSet()) { |
161 TimeTicks beginning_time = TimeTicks::Now(); | 293 base::TimeTicks beginning_time = base::TimeTicks::Now(); |
162 | 294 |
163 DoAutocomplete(backend, db, params); | 295 DoAutocomplete(backend, db, params); |
164 | 296 |
165 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", | 297 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime", |
166 TimeTicks::Now() - beginning_time); | 298 base::TimeTicks::Now() - beginning_time); |
167 } | 299 } |
168 | 300 |
169 // Return the results (if any) to the main thread. | 301 // Return the results (if any) to the main thread. |
170 params->message_loop->PostTask(FROM_HERE, NewRunnableMethod( | 302 params->message_loop->PostTask(FROM_HERE, NewRunnableMethod( |
171 this, &HistoryURLProvider::QueryComplete, params)); | 303 this, &HistoryURLProvider::QueryComplete, params)); |
172 } | 304 } |
173 | 305 |
174 // Used by both autocomplete passes, and therefore called on multiple different | 306 // Used by both autocomplete passes, and therefore called on multiple different |
175 // threads (though not simultaneously). | 307 // threads (though not simultaneously). |
176 void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, | 308 void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend, |
(...skipping 14 matching lines...) Expand all Loading... |
191 ((params->input.type() != AutocompleteInput::UNKNOWN) || | 323 ((params->input.type() != AutocompleteInput::UNKNOWN) || |
192 !params->trim_http || | 324 !params->trim_http || |
193 url_util::FindAndCompareScheme(UTF16ToUTF8(params->input.text()), | 325 url_util::FindAndCompareScheme(UTF16ToUTF8(params->input.text()), |
194 chrome::kHttpsScheme, NULL)); | 326 chrome::kHttpsScheme, NULL)); |
195 AutocompleteMatch what_you_typed_match(SuggestExactInput(params->input, | 327 AutocompleteMatch what_you_typed_match(SuggestExactInput(params->input, |
196 params->trim_http)); | 328 params->trim_http)); |
197 | 329 |
198 // Get the matching URLs from the DB | 330 // Get the matching URLs from the DB |
199 typedef std::vector<history::URLRow> URLRowVector; | 331 typedef std::vector<history::URLRow> URLRowVector; |
200 URLRowVector url_matches; | 332 URLRowVector url_matches; |
201 HistoryMatches history_matches; | 333 history::HistoryMatches history_matches; |
202 | 334 |
203 for (Prefixes::const_iterator i(prefixes_.begin()); i != prefixes_.end(); | 335 for (history::Prefixes::const_iterator i(prefixes_.begin()); |
204 ++i) { | 336 i != prefixes_.end(); ++i) { |
205 if (params->cancel_flag.IsSet()) | 337 if (params->cancel_flag.IsSet()) |
206 return; // Canceled in the middle of a query, give up. | 338 return; // Canceled in the middle of a query, give up. |
207 // We only need kMaxMatches results in the end, but before we get there we | 339 // We only need kMaxMatches results in the end, but before we get there we |
208 // need to promote lower-quality matches that are prefixes of | 340 // need to promote lower-quality matches that are prefixes of |
209 // higher-quality matches, and remove lower-quality redirects. So we ask | 341 // higher-quality matches, and remove lower-quality redirects. So we ask |
210 // for more results than we need, of every prefix type, in hopes this will | 342 // for more results than we need, of every prefix type, in hopes this will |
211 // give us far more than enough to work with. CullRedirects() will then | 343 // give us far more than enough to work with. CullRedirects() will then |
212 // reduce the list to the best kMaxMatches results. | 344 // reduce the list to the best kMaxMatches results. |
213 db->AutocompleteForPrefix(i->prefix + params->input.text(), | 345 db->AutocompleteForPrefix(i->prefix + params->input.text(), |
214 kMaxMatches * 2, (backend == NULL), &url_matches); | 346 kMaxMatches * 2, (backend == NULL), &url_matches); |
215 for (URLRowVector::const_iterator j(url_matches.begin()); | 347 for (URLRowVector::const_iterator j(url_matches.begin()); |
216 j != url_matches.end(); ++j) { | 348 j != url_matches.end(); ++j) { |
217 const Prefix* best_prefix = BestPrefix(j->url(), string16()); | 349 const history::Prefix* best_prefix = BestPrefix(j->url(), string16()); |
218 DCHECK(best_prefix != NULL); | 350 DCHECK(best_prefix != NULL); |
219 history_matches.push_back(HistoryMatch(*j, i->prefix.length(), | 351 history_matches.push_back(history::HistoryMatch(*j, i->prefix.length(), |
220 !i->num_components, | 352 !i->num_components, |
221 i->num_components >= best_prefix->num_components)); | 353 i->num_components >= best_prefix->num_components)); |
222 } | 354 } |
223 } | 355 } |
224 | 356 |
225 // Create sorted list of suggestions. | 357 // Create sorted list of suggestions. |
226 CullPoorMatches(&history_matches); | 358 CullPoorMatches(&history_matches); |
227 SortMatches(&history_matches); | 359 SortMatches(&history_matches); |
228 PromoteOrCreateShorterSuggestion(db, *params, have_what_you_typed_match, | 360 PromoteOrCreateShorterSuggestion(db, *params, have_what_you_typed_match, |
229 what_you_typed_match, &history_matches); | 361 what_you_typed_match, &history_matches); |
(...skipping 28 matching lines...) Expand all Loading... |
258 if (!backend) | 390 if (!backend) |
259 return; | 391 return; |
260 | 392 |
261 // Remove redirects and trim list to size. We want to provide up to | 393 // Remove redirects and trim list to size. We want to provide up to |
262 // kMaxMatches results plus the What You Typed result, if it was added to | 394 // kMaxMatches results plus the What You Typed result, if it was added to |
263 // |history_matches| above. | 395 // |history_matches| above. |
264 CullRedirects(backend, &history_matches, kMaxMatches + exact_suggestion); | 396 CullRedirects(backend, &history_matches, kMaxMatches + exact_suggestion); |
265 | 397 |
266 // Convert the history matches to autocomplete matches. | 398 // Convert the history matches to autocomplete matches. |
267 for (size_t i = first_match; i < history_matches.size(); ++i) { | 399 for (size_t i = first_match; i < history_matches.size(); ++i) { |
268 const HistoryMatch& match = history_matches[i]; | 400 const history::HistoryMatch& match = history_matches[i]; |
269 DCHECK(!have_what_you_typed_match || | 401 DCHECK(!have_what_you_typed_match || |
270 (match.url_info.url() != | 402 (match.url_info.url() != |
271 GURL(params->matches.front().destination_url))); | 403 GURL(params->matches.front().destination_url))); |
272 AutocompleteMatch ac_match = | 404 AutocompleteMatch ac_match = |
273 HistoryMatchToACMatch(params, match, history_matches, NORMAL, | 405 HistoryMatchToACMatch(params, match, history_matches, NORMAL, |
274 history_matches.size() - 1 - i); | 406 history_matches.size() - 1 - i); |
275 UMA_HISTOGRAM_COUNTS_100("Autocomplete.Confidence_HistoryUrl", | 407 UMA_HISTOGRAM_COUNTS_100("Autocomplete.Confidence_HistoryUrl", |
276 ac_match.confidence * 100); | 408 ac_match.confidence * 100); |
277 params->matches.push_back(ac_match); | 409 params->matches.push_back(ac_match); |
278 } | 410 } |
(...skipping 18 matching lines...) Expand all Loading... |
297 // match in it, whereas |params->matches| will be empty. | 429 // match in it, whereas |params->matches| will be empty. |
298 if (!params->failed) { | 430 if (!params->failed) { |
299 matches_.swap(params->matches); | 431 matches_.swap(params->matches); |
300 UpdateStarredStateOfMatches(); | 432 UpdateStarredStateOfMatches(); |
301 } | 433 } |
302 | 434 |
303 done_ = true; | 435 done_ = true; |
304 listener_->OnProviderUpdate(true); | 436 listener_->OnProviderUpdate(true); |
305 } | 437 } |
306 | 438 |
| 439 HistoryURLProvider::~HistoryURLProvider() { |
| 440 // Note: This object can get leaked on shutdown if there are pending |
| 441 // requests on the database (which hold a reference to us). Normally, these |
| 442 // messages get flushed for each thread. We do a round trip from main, to |
| 443 // history, back to main while holding a reference. If the main thread |
| 444 // completes before the history thread, the message to delegate back to the |
| 445 // main thread will not run and the reference will leak. Therefore, don't do |
| 446 // anything on destruction. |
| 447 } |
| 448 |
| 449 // static |
| 450 history::Prefixes HistoryURLProvider::GetPrefixes() { |
| 451 // We'll complete text following these prefixes. |
| 452 // NOTE: There's no requirement that these be in any particular order. |
| 453 history::Prefixes prefixes; |
| 454 prefixes.push_back(history::Prefix(ASCIIToUTF16("https://www."), 2)); |
| 455 prefixes.push_back(history::Prefix(ASCIIToUTF16("http://www."), 2)); |
| 456 prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://ftp."), 2)); |
| 457 prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://www."), 2)); |
| 458 prefixes.push_back(history::Prefix(ASCIIToUTF16("https://"), 1)); |
| 459 prefixes.push_back(history::Prefix(ASCIIToUTF16("http://"), 1)); |
| 460 prefixes.push_back(history::Prefix(ASCIIToUTF16("ftp://"), 1)); |
| 461 // Empty string catches within-scheme matches as well. |
| 462 prefixes.push_back(history::Prefix(string16(), 0)); |
| 463 return prefixes; |
| 464 } |
| 465 |
| 466 // static |
| 467 int HistoryURLProvider::CalculateRelevance(AutocompleteInput::Type input_type, |
| 468 MatchType match_type, |
| 469 size_t match_number) { |
| 470 switch (match_type) { |
| 471 case INLINE_AUTOCOMPLETE: |
| 472 return 1400; |
| 473 |
| 474 case WHAT_YOU_TYPED: |
| 475 return 1200; |
| 476 |
| 477 default: |
| 478 return 900 + static_cast<int>(match_number); |
| 479 } |
| 480 } |
| 481 |
| 482 void HistoryURLProvider::RunAutocompletePasses( |
| 483 const AutocompleteInput& input, |
| 484 bool fixup_input_and_run_pass_1) { |
| 485 matches_.clear(); |
| 486 |
| 487 if ((input.type() == AutocompleteInput::INVALID) || |
| 488 (input.type() == AutocompleteInput::FORCED_QUERY)) |
| 489 return; |
| 490 |
| 491 // Create a match for exactly what the user typed. This will only be used as |
| 492 // a fallback in case we can't get the history service or URL DB; otherwise, |
| 493 // we'll run this again in DoAutocomplete() and use that result instead. |
| 494 const bool trim_http = !HasHTTPScheme(input.text()); |
| 495 // Don't do this for queries -- while we can sometimes mark up a match for |
| 496 // this, it's not what the user wants, and just adds noise. |
| 497 if ((input.type() != AutocompleteInput::QUERY) && |
| 498 input.canonicalized_url().is_valid()) |
| 499 matches_.push_back(SuggestExactInput(input, trim_http)); |
| 500 |
| 501 // We'll need the history service to run both passes, so try to obtain it. |
| 502 if (!profile_) |
| 503 return; |
| 504 HistoryService* const history_service = |
| 505 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); |
| 506 if (!history_service) |
| 507 return; |
| 508 |
| 509 // Create the data structure for the autocomplete passes. We'll save this off |
| 510 // onto the |params_| member for later deletion below if we need to run pass |
| 511 // 2. |
| 512 std::string languages(languages_); |
| 513 if (languages.empty()) { |
| 514 languages = |
| 515 profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); |
| 516 } |
| 517 scoped_ptr<HistoryURLProviderParams> params( |
| 518 new HistoryURLProviderParams(input, trim_http, languages)); |
| 519 |
| 520 params->prevent_inline_autocomplete = |
| 521 PreventInlineAutocomplete(input); |
| 522 |
| 523 if (fixup_input_and_run_pass_1) { |
| 524 // Do some fixup on the user input before matching against it, so we provide |
| 525 // good results for local file paths, input with spaces, etc. |
| 526 // NOTE: This purposefully doesn't take input.desired_tld() into account; if |
| 527 // it did, then holding "ctrl" would change all the results from the |
| 528 // HistoryURLProvider provider, not just the What You Typed Result. |
| 529 const string16 fixed_text(FixupUserInput(input)); |
| 530 if (fixed_text.empty()) { |
| 531 // Conceivably fixup could result in an empty string (although I don't |
| 532 // have cases where this happens offhand). We can't do anything with |
| 533 // empty input, so just bail; otherwise we'd crash later. |
| 534 return; |
| 535 } |
| 536 params->input.set_text(fixed_text); |
| 537 |
| 538 // Pass 1: Get the in-memory URL database, and use it to find and promote |
| 539 // the inline autocomplete match, if any. |
| 540 history::URLDatabase* url_db = history_service->InMemoryDatabase(); |
| 541 // url_db can be NULL if it hasn't finished initializing (or failed to |
| 542 // initialize). In this case all we can do is fall back on the second |
| 543 // pass. |
| 544 // |
| 545 // TODO(pkasting): We should just block here until this loads. Any time |
| 546 // someone unloads the history backend, we'll get inconsistent inline |
| 547 // autocomplete behavior here. |
| 548 if (url_db) { |
| 549 DoAutocomplete(NULL, url_db, params.get()); |
| 550 // params->matches now has the matches we should expose to the provider. |
| 551 // Pass 2 expects a "clean slate" set of matches. |
| 552 matches_.clear(); |
| 553 matches_.swap(params->matches); |
| 554 UpdateStarredStateOfMatches(); |
| 555 } |
| 556 } |
| 557 |
| 558 // Pass 2: Ask the history service to call us back on the history thread, |
| 559 // where we can read the full on-disk DB. |
| 560 if (input.matches_requested() == AutocompleteInput::ALL_MATCHES) { |
| 561 done_ = false; |
| 562 params_ = params.release(); // This object will be destroyed in |
| 563 // QueryComplete() once we're done with it. |
| 564 history_service->ScheduleAutocomplete(this, params_); |
| 565 } |
| 566 } |
| 567 |
| 568 const history::Prefix* HistoryURLProvider::BestPrefix( |
| 569 const GURL& url, |
| 570 const string16& prefix_suffix) const { |
| 571 const history::Prefix* best_prefix = NULL; |
| 572 const string16 text(UTF8ToUTF16(url.spec())); |
| 573 for (history::Prefixes::const_iterator i(prefixes_.begin()); |
| 574 i != prefixes_.end(); ++i) { |
| 575 if ((best_prefix == NULL) || |
| 576 (i->num_components > best_prefix->num_components)) { |
| 577 string16 prefix_with_suffix(i->prefix + prefix_suffix); |
| 578 if ((text.length() >= prefix_with_suffix.length()) && |
| 579 !text.compare(0, prefix_with_suffix.length(), prefix_with_suffix)) |
| 580 best_prefix = &(*i); |
| 581 } |
| 582 } |
| 583 return best_prefix; |
| 584 } |
| 585 |
307 AutocompleteMatch HistoryURLProvider::SuggestExactInput( | 586 AutocompleteMatch HistoryURLProvider::SuggestExactInput( |
308 const AutocompleteInput& input, | 587 const AutocompleteInput& input, |
309 bool trim_http) { | 588 bool trim_http) { |
310 // TODO(dominich): Find a confidence measure for this. | 589 // TODO(dominich): Find a confidence measure for this. |
311 AutocompleteMatch match(this, | 590 AutocompleteMatch match(this, |
312 CalculateRelevance(input.type(), WHAT_YOU_TYPED, 0), 0.0f, false, | 591 CalculateRelevance(input.type(), WHAT_YOU_TYPED, 0), 0.0f, false, |
313 AutocompleteMatch::URL_WHAT_YOU_TYPED); | 592 AutocompleteMatch::URL_WHAT_YOU_TYPED); |
314 UMA_HISTOGRAM_COUNTS_100("Autocomplete.Confidence_HistoryUrl", | 593 UMA_HISTOGRAM_COUNTS_100("Autocomplete.Confidence_HistoryUrl", |
315 match.confidence * 100); | 594 match.confidence * 100); |
316 | 595 |
(...skipping 13 matching lines...) Expand all Loading... |
330 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, | 609 AutocompleteInput::FormattedStringWithEquivalentMeaning(url, |
331 display_string); | 610 display_string); |
332 // NOTE: Don't set match.input_location (to allow inline autocompletion) | 611 // NOTE: Don't set match.input_location (to allow inline autocompletion) |
333 // here, it's surprising and annoying. | 612 // here, it's surprising and annoying. |
334 | 613 |
335 // Try to highlight "innermost" match location. If we fix up "w" into | 614 // Try to highlight "innermost" match location. If we fix up "w" into |
336 // "www.w.com", we want to highlight the fifth character, not the first. | 615 // "www.w.com", we want to highlight the fifth character, not the first. |
337 // This relies on match.destination_url being the non-prefix-trimmed version | 616 // This relies on match.destination_url being the non-prefix-trimmed version |
338 // of match.contents. | 617 // of match.contents. |
339 match.contents = display_string; | 618 match.contents = display_string; |
340 const Prefix* best_prefix = BestPrefix(match.destination_url, input.text()); | 619 const history::Prefix* best_prefix = |
| 620 BestPrefix(match.destination_url, input.text()); |
341 // Because of the vagaries of GURL, it's possible for match.destination_url | 621 // Because of the vagaries of GURL, it's possible for match.destination_url |
342 // to not contain the user's input at all. In this case don't mark anything | 622 // to not contain the user's input at all. In this case don't mark anything |
343 // as a match. | 623 // as a match. |
344 const size_t match_location = (best_prefix == NULL) ? | 624 const size_t match_location = (best_prefix == NULL) ? |
345 string16::npos : best_prefix->prefix.length() - offset; | 625 string16::npos : best_prefix->prefix.length() - offset; |
346 AutocompleteMatch::ClassifyLocationInString(match_location, | 626 AutocompleteMatch::ClassifyLocationInString(match_location, |
347 input.text().length(), | 627 input.text().length(), |
348 match.contents.length(), | 628 match.contents.length(), |
349 ACMatchClassification::URL, | 629 ACMatchClassification::URL, |
350 &match.contents_class); | 630 &match.contents_class); |
351 | 631 |
352 match.is_history_what_you_typed_match = true; | 632 match.is_history_what_you_typed_match = true; |
353 } | 633 } |
354 | 634 |
355 return match; | 635 return match; |
356 } | 636 } |
357 | 637 |
358 bool HistoryURLProvider::FixupExactSuggestion(history::URLDatabase* db, | 638 bool HistoryURLProvider::FixupExactSuggestion( |
359 const AutocompleteInput& input, | 639 history::URLDatabase* db, |
360 AutocompleteMatch* match, | 640 const AutocompleteInput& input, |
361 HistoryMatches* matches) const { | 641 AutocompleteMatch* match, |
| 642 history::HistoryMatches* matches) const { |
362 DCHECK(match != NULL); | 643 DCHECK(match != NULL); |
363 DCHECK(matches != NULL); | 644 DCHECK(matches != NULL); |
364 | 645 |
365 // Tricky corner case: The user has visited intranet site "foo", but not | 646 // Tricky corner case: The user has visited intranet site "foo", but not |
366 // internet site "www.foo.com". He types in foo (getting an exact match), | 647 // internet site "www.foo.com". He types in foo (getting an exact match), |
367 // then tries to hit ctrl-enter. When pressing ctrl, the what-you-typed | 648 // then tries to hit ctrl-enter. When pressing ctrl, the what-you-typed |
368 // match ("www.foo.com") doesn't show up in history, and thus doesn't get a | 649 // match ("www.foo.com") doesn't show up in history, and thus doesn't get a |
369 // promoted relevance, but a different match from the input ("foo") does, and | 650 // promoted relevance, but a different match from the input ("foo") does, and |
370 // gets promoted for inline autocomplete. Thus instead of getting | 651 // gets promoted for inline autocomplete. Thus instead of getting |
371 // "www.foo.com", the user still gets "foo" (and, before hitting enter, | 652 // "www.foo.com", the user still gets "foo" (and, before hitting enter, |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
412 // Promote as an exact match. | 693 // Promote as an exact match. |
413 match->relevance = CalculateRelevance(input.type(), type, 0); | 694 match->relevance = CalculateRelevance(input.type(), type, 0); |
414 | 695 |
415 // Put it on the front of the HistoryMatches for redirect culling. | 696 // Put it on the front of the HistoryMatches for redirect culling. |
416 EnsureMatchPresent(info, string16::npos, false, matches, true); | 697 EnsureMatchPresent(info, string16::npos, false, matches, true); |
417 return true; | 698 return true; |
418 } | 699 } |
419 | 700 |
420 bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( | 701 bool HistoryURLProvider::PromoteMatchForInlineAutocomplete( |
421 HistoryURLProviderParams* params, | 702 HistoryURLProviderParams* params, |
422 const HistoryMatch& match, | 703 const history::HistoryMatch& match, |
423 const HistoryMatches& matches) { | 704 const history::HistoryMatches& matches) { |
424 // Promote the first match if it's been typed at least n times, where n == 1 | 705 // Promote the first match if it's been typed at least n times, where n == 1 |
425 // for "simple" (host-only) URLs and n == 2 for others. We set a higher bar | 706 // for "simple" (host-only) URLs and n == 2 for others. We set a higher bar |
426 // for these long URLs because it's less likely that users will want to visit | 707 // for these long URLs because it's less likely that users will want to visit |
427 // them again. Even though we don't increment the typed_count for pasted-in | 708 // them again. Even though we don't increment the typed_count for pasted-in |
428 // URLs, if the user manually edits the URL or types some long thing in by | 709 // URLs, if the user manually edits the URL or types some long thing in by |
429 // hand, we wouldn't want to immediately start autocompleting it. | 710 // hand, we wouldn't want to immediately start autocompleting it. |
430 if (!match.url_info.typed_count() || | 711 if (!match.url_info.typed_count() || |
431 ((match.url_info.typed_count() == 1) && | 712 ((match.url_info.typed_count() == 1) && |
432 !history::IsHostOnly(match.url_info.url()))) | 713 !IsHostOnly(match.url_info.url()))) |
433 return false; | 714 return false; |
434 | 715 |
435 // In the case where the user has typed "foo.com" and visited (but not typed) | 716 // In the case where the user has typed "foo.com" and visited (but not typed) |
436 // "foo/", and the input is "foo", we can reach here for "foo.com" during the | 717 // "foo/", and the input is "foo", we can reach here for "foo.com" during the |
437 // first pass but have the second pass suggest the exact input as a better | 718 // first pass but have the second pass suggest the exact input as a better |
438 // URL. Since we need both passes to agree, and since during the first pass | 719 // URL. Since we need both passes to agree, and since during the first pass |
439 // there's no way to know about "foo/", make reaching this point prevent any | 720 // there's no way to know about "foo/", make reaching this point prevent any |
440 // future pass from suggesting the exact input as a better match. | 721 // future pass from suggesting the exact input as a better match. |
441 params->dont_suggest_exact_input = true; | 722 params->dont_suggest_exact_input = true; |
442 params->matches.push_back(HistoryMatchToACMatch(params, match, matches, | 723 params->matches.push_back(HistoryMatchToACMatch(params, match, matches, |
443 INLINE_AUTOCOMPLETE, 0)); | 724 INLINE_AUTOCOMPLETE, 0)); |
444 return true; | 725 return true; |
445 } | 726 } |
446 | 727 |
447 HistoryURLProvider::~HistoryURLProvider() {} | 728 void HistoryURLProvider::SortMatches(history::HistoryMatches* matches) const { |
448 | |
449 // static | |
450 history::Prefixes HistoryURLProvider::GetPrefixes() { | |
451 // We'll complete text following these prefixes. | |
452 // NOTE: There's no requirement that these be in any particular order. | |
453 Prefixes prefixes; | |
454 prefixes.push_back(Prefix(ASCIIToUTF16("https://www."), 2)); | |
455 prefixes.push_back(Prefix(ASCIIToUTF16("http://www."), 2)); | |
456 prefixes.push_back(Prefix(ASCIIToUTF16("ftp://ftp."), 2)); | |
457 prefixes.push_back(Prefix(ASCIIToUTF16("ftp://www."), 2)); | |
458 prefixes.push_back(Prefix(ASCIIToUTF16("https://"), 1)); | |
459 prefixes.push_back(Prefix(ASCIIToUTF16("http://"), 1)); | |
460 prefixes.push_back(Prefix(ASCIIToUTF16("ftp://"), 1)); | |
461 // Empty string catches within-scheme matches as well. | |
462 prefixes.push_back(Prefix(string16(), 0)); | |
463 return prefixes; | |
464 } | |
465 | |
466 // static | |
467 int HistoryURLProvider::CalculateRelevance(AutocompleteInput::Type input_type, | |
468 MatchType match_type, | |
469 size_t match_number) { | |
470 switch (match_type) { | |
471 case INLINE_AUTOCOMPLETE: | |
472 return 1400; | |
473 | |
474 case WHAT_YOU_TYPED: | |
475 return 1200; | |
476 | |
477 default: | |
478 return 900 + static_cast<int>(match_number); | |
479 } | |
480 } | |
481 | |
482 // static | |
483 float HistoryURLProvider::CalculateConfidence( | |
484 const history::HistoryMatch& match, | |
485 const history::HistoryMatches& matches) { | |
486 // TODO(dominich): Take into account bookmarked page? | |
487 // TODO(dominich): See CompareHistoryMatch for more measures to include. | |
488 // Using typed count in place of visit count as: | |
489 // - It's a better indicator of what the user wants to open given that they | |
490 // are typing in the address bar (users tend to open certain URLs by typing | |
491 // and others by e.g. bookmarks, so visit_count is a good indicator of | |
492 // overall interest but a bad one for specifically omnibox interest). | |
493 // - Since the DB query is sorted by typed_count, the results may be | |
494 // effectively a random selection as far as visit_counts are concerned | |
495 // (meaning many high-visit_count-URLs may be present in one query and | |
496 // absent in a similar one), leading to wild swings in confidence for the | |
497 // same result across distinct queries. | |
498 float numerator = match.url_info.typed_count(); | |
499 float denominator = 0.0f; | |
500 for (history::HistoryMatches::const_iterator it = matches.begin(); | |
501 it != matches.end(); ++it) { | |
502 denominator += it->url_info.typed_count(); | |
503 } | |
504 if (denominator < 1) { | |
505 numerator = match.url_info.visit_count(); | |
506 for (history::HistoryMatches::const_iterator it = matches.begin(); | |
507 it != matches.end(); ++it) { | |
508 denominator += it->url_info.visit_count(); | |
509 } | |
510 } | |
511 return (denominator > 0.0f ? numerator / denominator : 0); | |
512 } | |
513 | |
514 // static | |
515 void HistoryURLProvider::PromoteOrCreateShorterSuggestion( | |
516 history::URLDatabase* db, | |
517 const HistoryURLProviderParams& params, | |
518 bool have_what_you_typed_match, | |
519 const AutocompleteMatch& what_you_typed_match, | |
520 HistoryMatches* matches) { | |
521 if (matches->empty()) | |
522 return; // No matches, nothing to do. | |
523 | |
524 // Determine the base URL from which to search, and whether that URL could | |
525 // itself be added as a match. We can add the base iff it's not "effectively | |
526 // the same" as any "what you typed" match. | |
527 const HistoryMatch& match = matches->front(); | |
528 GURL search_base = history::ConvertToHostOnly(match, params.input.text()); | |
529 bool can_add_search_base_to_matches = !have_what_you_typed_match; | |
530 if (search_base.is_empty()) { | |
531 // Search from what the user typed when we couldn't reduce the best match | |
532 // to a host. Careful: use a substring of |match| here, rather than the | |
533 // first match in |params|, because they might have different prefixes. If | |
534 // the user typed "google.com", |what_you_typed_match| will hold | |
535 // "http://google.com/", but |match| might begin with | |
536 // "http://www.google.com/". | |
537 // TODO: this should be cleaned up, and is probably incorrect for IDN. | |
538 std::string new_match = match.url_info.url().possibly_invalid_spec(). | |
539 substr(0, match.input_location + params.input.text().length()); | |
540 search_base = GURL(new_match); | |
541 // TODO(mrossetti): There is a degenerate case where the following may | |
542 // cause a failure: http://www/~someword/fubar.html. Diagnose. | |
543 // See: http://crbug.com/50101 | |
544 if (search_base.is_empty()) | |
545 return; // Can't construct a valid URL from which to start a search. | |
546 } else if (!can_add_search_base_to_matches) { | |
547 can_add_search_base_to_matches = | |
548 (search_base != what_you_typed_match.destination_url); | |
549 } | |
550 if (search_base == match.url_info.url()) | |
551 return; // Couldn't shorten |match|, so no range of URLs to search over. | |
552 | |
553 // Search the DB for short URLs between our base and |match|. | |
554 history::URLRow info(search_base); | |
555 bool promote = true; | |
556 // A short URL is only worth suggesting if it's been visited at least a third | |
557 // as often as the longer URL. | |
558 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1; | |
559 // For stability between the in-memory and on-disk autocomplete passes, when | |
560 // the long URL has been typed before, only suggest shorter URLs that have | |
561 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL | |
562 // (which hasn't been typed) that the in-memory pass doesn't know about, | |
563 // thereby making the top match, and thus the behavior of inline | |
564 // autocomplete, unstable. | |
565 const int min_typed_count = match.url_info.typed_count() ? 1 : 0; | |
566 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(), | |
567 match.url_info.url().possibly_invalid_spec(), min_visit_count, | |
568 min_typed_count, can_add_search_base_to_matches, &info)) { | |
569 if (!can_add_search_base_to_matches) | |
570 return; // Couldn't find anything and can't add the search base, bail. | |
571 | |
572 // Try to get info on the search base itself. Promote it to the top if the | |
573 // original best match isn't good enough to autocomplete. | |
574 db->GetRowForURL(search_base, &info); | |
575 promote = match.url_info.typed_count() <= 1; | |
576 } | |
577 | |
578 // Promote or add the desired URL to the list of matches. | |
579 EnsureMatchPresent(info, match.input_location, match.match_in_scheme, | |
580 matches, promote); | |
581 } | |
582 | |
583 // static | |
584 void HistoryURLProvider::EnsureMatchPresent(const history::URLRow& info, | |
585 size_t input_location, | |
586 bool match_in_scheme, | |
587 HistoryMatches* matches, | |
588 bool promote) { | |
589 // |matches| may already have an entry for this. | |
590 for (HistoryMatches::iterator i(matches->begin()); i != matches->end(); | |
591 ++i) { | |
592 if (i->url_info.url() == info.url()) { | |
593 // Rotate it to the front if the caller wishes. | |
594 if (promote) | |
595 std::rotate(matches->begin(), i, i + 1); | |
596 return; | |
597 } | |
598 } | |
599 | |
600 // No entry, so create one. | |
601 HistoryMatch match(info, input_location, match_in_scheme, true); | |
602 if (promote) | |
603 matches->push_front(match); | |
604 else | |
605 matches->push_back(match); | |
606 } | |
607 | |
608 void HistoryURLProvider::RunAutocompletePasses( | |
609 const AutocompleteInput& input, | |
610 bool fixup_input_and_run_pass_1) { | |
611 matches_.clear(); | |
612 | |
613 if ((input.type() == AutocompleteInput::INVALID) || | |
614 (input.type() == AutocompleteInput::FORCED_QUERY)) | |
615 return; | |
616 | |
617 // Create a match for exactly what the user typed. This will only be used as | |
618 // a fallback in case we can't get the history service or URL DB; otherwise, | |
619 // we'll run this again in DoAutocomplete() and use that result instead. | |
620 const bool trim_http = !HasHTTPScheme(input.text()); | |
621 // Don't do this for queries -- while we can sometimes mark up a match for | |
622 // this, it's not what the user wants, and just adds noise. | |
623 if ((input.type() != AutocompleteInput::QUERY) && | |
624 input.canonicalized_url().is_valid()) | |
625 matches_.push_back(SuggestExactInput(input, trim_http)); | |
626 | |
627 // We'll need the history service to run both passes, so try to obtain it. | |
628 if (!profile_) | |
629 return; | |
630 HistoryService* const history_service = | |
631 profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); | |
632 if (!history_service) | |
633 return; | |
634 | |
635 // Create the data structure for the autocomplete passes. We'll save this off | |
636 // onto the |params_| member for later deletion below if we need to run pass | |
637 // 2. | |
638 std::string languages(languages_); | |
639 if (languages.empty()) { | |
640 languages = | |
641 profile_->GetPrefs()->GetString(prefs::kAcceptLanguages); | |
642 } | |
643 scoped_ptr<HistoryURLProviderParams> params( | |
644 new HistoryURLProviderParams(input, trim_http, languages)); | |
645 | |
646 params->prevent_inline_autocomplete = | |
647 PreventInlineAutocomplete(input); | |
648 | |
649 if (fixup_input_and_run_pass_1) { | |
650 // Do some fixup on the user input before matching against it, so we provide | |
651 // good results for local file paths, input with spaces, etc. | |
652 // NOTE: This purposefully doesn't take input.desired_tld() into account; if | |
653 // it did, then holding "ctrl" would change all the results from the | |
654 // HistoryURLProvider provider, not just the What You Typed Result. | |
655 const string16 fixed_text(FixupUserInput(input)); | |
656 if (fixed_text.empty()) { | |
657 // Conceivably fixup could result in an empty string (although I don't | |
658 // have cases where this happens offhand). We can't do anything with | |
659 // empty input, so just bail; otherwise we'd crash later. | |
660 return; | |
661 } | |
662 params->input.set_text(fixed_text); | |
663 | |
664 // Pass 1: Get the in-memory URL database, and use it to find and promote | |
665 // the inline autocomplete match, if any. | |
666 history::URLDatabase* url_db = history_service->InMemoryDatabase(); | |
667 // url_db can be NULL if it hasn't finished initializing (or failed to | |
668 // initialize). In this case all we can do is fall back on the second | |
669 // pass. | |
670 // | |
671 // TODO(pkasting): We should just block here until this loads. Any time | |
672 // someone unloads the history backend, we'll get inconsistent inline | |
673 // autocomplete behavior here. | |
674 if (url_db) { | |
675 DoAutocomplete(NULL, url_db, params.get()); | |
676 // params->matches now has the matches we should expose to the provider. | |
677 // Pass 2 expects a "clean slate" set of matches. | |
678 matches_.clear(); | |
679 matches_.swap(params->matches); | |
680 UpdateStarredStateOfMatches(); | |
681 } | |
682 } | |
683 | |
684 // Pass 2: Ask the history service to call us back on the history thread, | |
685 // where we can read the full on-disk DB. | |
686 if (input.matches_requested() == AutocompleteInput::ALL_MATCHES) { | |
687 done_ = false; | |
688 params_ = params.release(); // This object will be destroyed in | |
689 // QueryComplete() once we're done with it. | |
690 history_service->ScheduleAutocomplete(this, params_); | |
691 } | |
692 } | |
693 | |
694 const history::Prefix* HistoryURLProvider::BestPrefix( | |
695 const GURL& url, | |
696 const string16& prefix_suffix) const { | |
697 const Prefix* best_prefix = NULL; | |
698 const string16 text(UTF8ToUTF16(url.spec())); | |
699 for (Prefixes::const_iterator i(prefixes_.begin()); i != prefixes_.end(); | |
700 ++i) { | |
701 if ((best_prefix == NULL) || | |
702 (i->num_components > best_prefix->num_components)) { | |
703 string16 prefix_with_suffix(i->prefix + prefix_suffix); | |
704 if ((text.length() >= prefix_with_suffix.length()) && | |
705 !text.compare(0, prefix_with_suffix.length(), prefix_with_suffix)) | |
706 best_prefix = &(*i); | |
707 } | |
708 } | |
709 return best_prefix; | |
710 } | |
711 | |
712 void HistoryURLProvider::SortMatches(HistoryMatches* matches) const { | |
713 // Sort by quality, best first. | 729 // Sort by quality, best first. |
714 std::sort(matches->begin(), matches->end(), &history::CompareHistoryMatch); | 730 std::sort(matches->begin(), matches->end(), &CompareHistoryMatch); |
715 | 731 |
716 // Remove duplicate matches (caused by the search string appearing in one of | 732 // Remove duplicate matches (caused by the search string appearing in one of |
717 // the prefixes as well as after it). Consider the following scenario: | 733 // the prefixes as well as after it). Consider the following scenario: |
718 // | 734 // |
719 // User has visited "http://http.com" once and "http://htaccess.com" twice. | 735 // User has visited "http://http.com" once and "http://htaccess.com" twice. |
720 // User types "http". The autocomplete search with prefix "http://" returns | 736 // User types "http". The autocomplete search with prefix "http://" returns |
721 // the first host, while the search with prefix "" returns both hosts. Now | 737 // the first host, while the search with prefix "" returns both hosts. Now |
722 // we sort them into rank order: | 738 // we sort them into rank order: |
723 // http://http.com (innermost_match) | 739 // http://http.com (innermost_match) |
724 // http://htaccess.com (!innermost_match, url_info.visit_count == 2) | 740 // http://htaccess.com (!innermost_match, url_info.visit_count == 2) |
725 // http://http.com (!innermost_match, url_info.visit_count == 1) | 741 // http://http.com (!innermost_match, url_info.visit_count == 1) |
726 // | 742 // |
727 // The above scenario tells us we can't use std::unique(), since our | 743 // The above scenario tells us we can't use std::unique(), since our |
728 // duplicates are not always sequential. It also tells us we should remove | 744 // duplicates are not always sequential. It also tells us we should remove |
729 // the lower-quality duplicate(s), since otherwise the returned results won't | 745 // the lower-quality duplicate(s), since otherwise the returned results won't |
730 // be ordered correctly. This is easy to do: we just always remove the later | 746 // be ordered correctly. This is easy to do: we just always remove the later |
731 // element of a duplicate pair. | 747 // element of a duplicate pair. |
732 // Be careful! Because the vector contents may change as we remove elements, | 748 // Be careful! Because the vector contents may change as we remove elements, |
733 // we use an index instead of an iterator in the outer loop, and don't | 749 // we use an index instead of an iterator in the outer loop, and don't |
734 // precalculate the ending position. | 750 // precalculate the ending position. |
735 for (size_t i = 0; i < matches->size(); ++i) { | 751 for (size_t i = 0; i < matches->size(); ++i) { |
736 HistoryMatches::iterator j(matches->begin() + i + 1); | 752 for (history::HistoryMatches::iterator j(matches->begin() + i + 1); |
737 while (j != matches->end()) { | 753 j != matches->end(); ) { |
738 if ((*matches)[i].url_info.url() == j->url_info.url()) | 754 if ((*matches)[i].url_info.url() == j->url_info.url()) |
739 j = matches->erase(j); | 755 j = matches->erase(j); |
740 else | 756 else |
741 ++j; | 757 ++j; |
742 } | 758 } |
743 } | 759 } |
744 } | 760 } |
745 | 761 |
746 void HistoryURLProvider::CullPoorMatches(HistoryMatches* matches) const { | 762 void HistoryURLProvider::CullPoorMatches( |
| 763 history::HistoryMatches* matches) const { |
747 const base::Time& threshold(history::AutocompleteAgeThreshold()); | 764 const base::Time& threshold(history::AutocompleteAgeThreshold()); |
748 for (HistoryMatches::iterator i(matches->begin()); i != matches->end();) { | 765 for (history::HistoryMatches::iterator i(matches->begin()); |
| 766 i != matches->end();) { |
749 if (RowQualifiesAsSignificant(i->url_info, threshold)) | 767 if (RowQualifiesAsSignificant(i->url_info, threshold)) |
750 ++i; | 768 ++i; |
751 else | 769 else |
752 i = matches->erase(i); | 770 i = matches->erase(i); |
753 } | 771 } |
754 } | 772 } |
755 | 773 |
756 void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, | 774 void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend, |
757 HistoryMatches* matches, | 775 history::HistoryMatches* matches, |
758 size_t max_results) const { | 776 size_t max_results) const { |
759 for (size_t source = 0; | 777 for (size_t source = 0; |
760 (source < matches->size()) && (source < max_results); ) { | 778 (source < matches->size()) && (source < max_results); ) { |
761 const GURL& url = (*matches)[source].url_info.url(); | 779 const GURL& url = (*matches)[source].url_info.url(); |
762 // TODO(brettw) this should go away when everything uses GURL. | 780 // TODO(brettw) this should go away when everything uses GURL. |
763 history::RedirectList redirects; | 781 history::RedirectList redirects; |
764 backend->GetMostRecentRedirectsFrom(url, &redirects); | 782 backend->GetMostRecentRedirectsFrom(url, &redirects); |
765 if (!redirects.empty()) { | 783 if (!redirects.empty()) { |
766 // Remove all but the first occurrence of any of these redirects in the | 784 // Remove all but the first occurrence of any of these redirects in the |
767 // search results. We also must add the URL we queried for, since it may | 785 // search results. We also must add the URL we queried for, since it may |
768 // not be the first match and we'd want to remove it. | 786 // not be the first match and we'd want to remove it. |
769 // | 787 // |
770 // For example, when A redirects to B and our matches are [A, X, B], | 788 // For example, when A redirects to B and our matches are [A, X, B], |
771 // we'll get B as the redirects from, and we want to remove the second | 789 // we'll get B as the redirects from, and we want to remove the second |
772 // item of that pair, removing B. If A redirects to B and our matches are | 790 // item of that pair, removing B. If A redirects to B and our matches are |
773 // [B, X, A], we'll want to remove A instead. | 791 // [B, X, A], we'll want to remove A instead. |
774 redirects.push_back(url); | 792 redirects.push_back(url); |
775 source = RemoveSubsequentMatchesOf(matches, source, redirects); | 793 source = RemoveSubsequentMatchesOf(matches, source, redirects); |
776 } else { | 794 } else { |
777 // Advance to next item. | 795 // Advance to next item. |
778 source++; | 796 source++; |
779 } | 797 } |
780 } | 798 } |
781 | 799 |
782 if (matches->size() > max_results) | 800 if (matches->size() > max_results) |
783 matches->resize(max_results); | 801 matches->resize(max_results); |
784 } | 802 } |
785 | 803 |
786 size_t HistoryURLProvider::RemoveSubsequentMatchesOf( | 804 size_t HistoryURLProvider::RemoveSubsequentMatchesOf( |
787 HistoryMatches* matches, | 805 history::HistoryMatches* matches, |
788 size_t source_index, | 806 size_t source_index, |
789 const std::vector<GURL>& remove) const { | 807 const std::vector<GURL>& remove) const { |
790 size_t next_index = source_index + 1; // return value = item after source | 808 size_t next_index = source_index + 1; // return value = item after source |
791 | 809 |
792 // Find the first occurrence of any URL in the redirect chain. We want to | 810 // Find the first occurrence of any URL in the redirect chain. We want to |
793 // keep this one since it is rated the highest. | 811 // keep this one since it is rated the highest. |
794 HistoryMatches::iterator first(std::find_first_of( | 812 history::HistoryMatches::iterator first(std::find_first_of( |
795 matches->begin(), matches->end(), remove.begin(), remove.end())); | 813 matches->begin(), matches->end(), remove.begin(), remove.end())); |
796 DCHECK(first != matches->end()) << | 814 DCHECK(first != matches->end()) << "We should have always found at least the " |
797 "We should have always found at least the original URL."; | 815 "original URL."; |
798 | 816 |
799 // Find any following occurrences of any URL in the redirect chain, these | 817 // Find any following occurrences of any URL in the redirect chain, these |
800 // should be deleted. | 818 // should be deleted. |
801 HistoryMatches::iterator next(first); | 819 for (history::HistoryMatches::iterator next(std::find_first_of(first + 1, |
802 next++; // Start searching immediately after the one we found already. | 820 matches->end(), remove.begin(), remove.end())); |
803 while (next != matches->end() && | 821 next != matches->end(); next = std::find_first_of(next, matches->end(), |
804 (next = std::find_first_of(next, matches->end(), remove.begin(), | 822 remove.begin(), remove.end())) { |
805 remove.end())) != matches->end()) { | |
806 // Remove this item. When we remove an item before the source index, we | 823 // Remove this item. When we remove an item before the source index, we |
807 // need to shift it to the right and remember that so we can return it. | 824 // need to shift it to the right and remember that so we can return it. |
808 next = matches->erase(next); | 825 next = matches->erase(next); |
809 if (static_cast<size_t>(next - matches->begin()) < next_index) | 826 if (static_cast<size_t>(next - matches->begin()) < next_index) |
810 next_index--; | 827 --next_index; |
811 } | 828 } |
812 return next_index; | 829 return next_index; |
813 } | 830 } |
814 | 831 |
815 AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( | 832 AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch( |
816 HistoryURLProviderParams* params, | 833 HistoryURLProviderParams* params, |
817 const HistoryMatch& history_match, | 834 const history::HistoryMatch& history_match, |
818 const HistoryMatches& history_matches, | 835 const history::HistoryMatches& history_matches, |
819 MatchType match_type, | 836 MatchType match_type, |
820 size_t match_number) { | 837 size_t match_number) { |
821 const history::URLRow& info = history_match.url_info; | 838 const history::URLRow& info = history_match.url_info; |
822 AutocompleteMatch match(this, | 839 AutocompleteMatch match(this, |
823 CalculateRelevance(params->input.type(), match_type, match_number), | 840 CalculateRelevance(params->input.type(), match_type, match_number), |
824 CalculateConfidence(history_match, history_matches), | 841 CalculateConfidence(history_match, history_matches), |
825 !!info.visit_count(), AutocompleteMatch::HISTORY_URL); | 842 !!info.visit_count(), AutocompleteMatch::HISTORY_URL); |
826 match.destination_url = info.url(); | 843 match.destination_url = info.url(); |
827 DCHECK(match.destination_url.is_valid()); | 844 DCHECK(match.destination_url.is_valid()); |
828 size_t inline_autocomplete_offset = | 845 size_t inline_autocomplete_offset = |
(...skipping 29 matching lines...) Expand all Loading... |
858 &match.contents_class); | 875 &match.contents_class); |
859 } | 876 } |
860 match.description = info.title(); | 877 match.description = info.title(); |
861 AutocompleteMatch::ClassifyMatchInString(params->input.text(), | 878 AutocompleteMatch::ClassifyMatchInString(params->input.text(), |
862 info.title(), | 879 info.title(), |
863 ACMatchClassification::NONE, | 880 ACMatchClassification::NONE, |
864 &match.description_class); | 881 &match.description_class); |
865 | 882 |
866 return match; | 883 return match; |
867 } | 884 } |
OLD | NEW |