Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(221)

Side by Side Diff: chrome/browser/autocomplete/autocomplete_input.cc

Issue 320713002: Revert of Omnibox: Combine Two Input Type Enums into One (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autocomplete/autocomplete_input.h" 5 #include "chrome/browser/autocomplete/autocomplete_input.h"
6 6
7 #include "base/strings/string_util.h" 7 #include "base/strings/string_util.h"
8 #include "base/strings/utf_string_conversions.h" 8 #include "base/strings/utf_string_conversions.h"
9 #include "chrome/browser/external_protocol/external_protocol_handler.h" 9 #include "chrome/browser/external_protocol/external_protocol_handler.h"
10 #include "chrome/browser/profiles/profile_io_data.h" 10 #include "chrome/browser/profiles/profile_io_data.h"
(...skipping 14 matching lines...) Expand all
25 *cursor_position -= num_leading_chars_removed; 25 *cursor_position -= num_leading_chars_removed;
26 else 26 else
27 *cursor_position = 0; 27 *cursor_position = 0;
28 } 28 }
29 29
30 } // namespace 30 } // namespace
31 31
32 AutocompleteInput::AutocompleteInput() 32 AutocompleteInput::AutocompleteInput()
33 : cursor_position_(base::string16::npos), 33 : cursor_position_(base::string16::npos),
34 current_page_classification_(AutocompleteInput::INVALID_SPEC), 34 current_page_classification_(AutocompleteInput::INVALID_SPEC),
35 type_(metrics::OmniboxInputType::INVALID), 35 type_(INVALID),
36 prevent_inline_autocomplete_(false), 36 prevent_inline_autocomplete_(false),
37 prefer_keyword_(false), 37 prefer_keyword_(false),
38 allow_exact_keyword_match_(true), 38 allow_exact_keyword_match_(true),
39 want_asynchronous_matches_(true) { 39 want_asynchronous_matches_(true) {
40 } 40 }
41 41
42 AutocompleteInput::AutocompleteInput( 42 AutocompleteInput::AutocompleteInput(
43 const base::string16& text, 43 const base::string16& text,
44 size_t cursor_position, 44 size_t cursor_position,
45 const base::string16& desired_tld, 45 const base::string16& desired_tld,
(...skipping 16 matching lines...) Expand all
62 // None of the providers care about leading white space so we always trim it. 62 // None of the providers care about leading white space so we always trim it.
63 // Providers that care about trailing white space handle trimming themselves. 63 // Providers that care about trailing white space handle trimming themselves.
64 if ((base::TrimWhitespace(text, base::TRIM_LEADING, &text_) & 64 if ((base::TrimWhitespace(text, base::TRIM_LEADING, &text_) &
65 base::TRIM_LEADING) != 0) 65 base::TRIM_LEADING) != 0)
66 AdjustCursorPositionIfNecessary(text.length() - text_.length(), 66 AdjustCursorPositionIfNecessary(text.length() - text_.length(),
67 &cursor_position_); 67 &cursor_position_);
68 68
69 GURL canonicalized_url; 69 GURL canonicalized_url;
70 type_ = Parse(text_, desired_tld, &parts_, &scheme_, &canonicalized_url); 70 type_ = Parse(text_, desired_tld, &parts_, &scheme_, &canonicalized_url);
71 71
72 if (type_ == metrics::OmniboxInputType::INVALID) 72 if (type_ == INVALID)
73 return; 73 return;
74 74
75 if (((type_ == metrics::OmniboxInputType::UNKNOWN) || 75 if (((type_ == UNKNOWN) || (type_ == URL)) &&
76 (type_ == metrics::OmniboxInputType::URL)) &&
77 canonicalized_url.is_valid() && 76 canonicalized_url.is_valid() &&
78 (!canonicalized_url.IsStandard() || canonicalized_url.SchemeIsFile() || 77 (!canonicalized_url.IsStandard() || canonicalized_url.SchemeIsFile() ||
79 canonicalized_url.SchemeIsFileSystem() || 78 canonicalized_url.SchemeIsFileSystem() ||
80 !canonicalized_url.host().empty())) 79 !canonicalized_url.host().empty()))
81 canonicalized_url_ = canonicalized_url; 80 canonicalized_url_ = canonicalized_url;
82 81
83 size_t chars_removed = RemoveForcedQueryStringIfNecessary(type_, &text_); 82 size_t chars_removed = RemoveForcedQueryStringIfNecessary(type_, &text_);
84 AdjustCursorPositionIfNecessary(chars_removed, &cursor_position_); 83 AdjustCursorPositionIfNecessary(chars_removed, &cursor_position_);
85 if (chars_removed) { 84 if (chars_removed) {
86 // Remove spaces between opening question mark and first actual character. 85 // Remove spaces between opening question mark and first actual character.
87 base::string16 trimmed_text; 86 base::string16 trimmed_text;
88 if ((base::TrimWhitespace(text_, base::TRIM_LEADING, &trimmed_text) & 87 if ((base::TrimWhitespace(text_, base::TRIM_LEADING, &trimmed_text) &
89 base::TRIM_LEADING) != 0) { 88 base::TRIM_LEADING) != 0) {
90 AdjustCursorPositionIfNecessary(text_.length() - trimmed_text.length(), 89 AdjustCursorPositionIfNecessary(text_.length() - trimmed_text.length(),
91 &cursor_position_); 90 &cursor_position_);
92 text_ = trimmed_text; 91 text_ = trimmed_text;
93 } 92 }
94 } 93 }
95 } 94 }
96 95
97 AutocompleteInput::~AutocompleteInput() { 96 AutocompleteInput::~AutocompleteInput() {
98 } 97 }
99 98
100 // static 99 // static
101 size_t AutocompleteInput::RemoveForcedQueryStringIfNecessary( 100 size_t AutocompleteInput::RemoveForcedQueryStringIfNecessary(
102 AutocompleteInput::Type type, 101 Type type,
103 base::string16* text) { 102 base::string16* text) {
104 if ((type != metrics::OmniboxInputType::FORCED_QUERY) || text->empty() || 103 if (type != FORCED_QUERY || text->empty() || (*text)[0] != L'?')
105 (*text)[0] != L'?')
106 return 0; 104 return 0;
107 // Drop the leading '?'. 105 // Drop the leading '?'.
108 text->erase(0, 1); 106 text->erase(0, 1);
109 return 1; 107 return 1;
110 } 108 }
111 109
112 // static 110 // static
113 std::string AutocompleteInput::TypeToString(AutocompleteInput::Type type) { 111 std::string AutocompleteInput::TypeToString(Type type) {
114 switch (type) { 112 switch (type) {
115 case metrics::OmniboxInputType::INVALID: return "invalid"; 113 case INVALID: return "invalid";
116 case metrics::OmniboxInputType::UNKNOWN: return "unknown"; 114 case UNKNOWN: return "unknown";
117 case metrics::OmniboxInputType::DEPRECATED_REQUESTED_URL: 115 case URL: return "url";
118 return "deprecated-requested-url"; 116 case QUERY: return "query";
119 case metrics::OmniboxInputType::URL: return "url"; 117 case FORCED_QUERY: return "forced-query";
120 case metrics::OmniboxInputType::QUERY: return "query"; 118
121 case metrics::OmniboxInputType::FORCED_QUERY: return "forced-query"; 119 default:
120 NOTREACHED();
121 return std::string();
122 } 122 }
123 return std::string();
124 } 123 }
125 124
126 // static 125 // static
127 AutocompleteInput::Type AutocompleteInput::Parse( 126 AutocompleteInput::Type AutocompleteInput::Parse(
128 const base::string16& text, 127 const base::string16& text,
129 const base::string16& desired_tld, 128 const base::string16& desired_tld,
130 url::Parsed* parts, 129 url::Parsed* parts,
131 base::string16* scheme, 130 base::string16* scheme,
132 GURL* canonicalized_url) { 131 GURL* canonicalized_url) {
133 size_t first_non_white = text.find_first_not_of(base::kWhitespaceUTF16, 0); 132 size_t first_non_white = text.find_first_not_of(base::kWhitespaceUTF16, 0);
134 if (first_non_white == base::string16::npos) 133 if (first_non_white == base::string16::npos)
135 return metrics::OmniboxInputType::INVALID; // All whitespace. 134 return INVALID; // All whitespace.
136 135
137 if (text[first_non_white] == L'?') { 136 if (text[first_non_white] == L'?') {
138 // If the first non-whitespace character is a '?', we magically treat this 137 // If the first non-whitespace character is a '?', we magically treat this
139 // as a query. 138 // as a query.
140 return metrics::OmniboxInputType::FORCED_QUERY; 139 return FORCED_QUERY;
141 } 140 }
142 141
143 // Ask our parsing back-end to help us understand what the user typed. We 142 // Ask our parsing back-end to help us understand what the user typed. We
144 // use the URLFixerUpper here because we want to be smart about what we 143 // use the URLFixerUpper here because we want to be smart about what we
145 // consider a scheme. For example, we shouldn't consider www.google.com:80 144 // consider a scheme. For example, we shouldn't consider www.google.com:80
146 // to have a scheme. 145 // to have a scheme.
147 url::Parsed local_parts; 146 url::Parsed local_parts;
148 if (!parts) 147 if (!parts)
149 parts = &local_parts; 148 parts = &local_parts;
150 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); 149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts));
151 if (scheme) 150 if (scheme)
152 *scheme = parsed_scheme; 151 *scheme = parsed_scheme;
153 152
154 // If we can't canonicalize the user's input, the rest of the autocomplete 153 // If we can't canonicalize the user's input, the rest of the autocomplete
155 // system isn't going to be able to produce a navigable URL match for it. 154 // system isn't going to be able to produce a navigable URL match for it.
156 // So we just return QUERY immediately in these cases. 155 // So we just return QUERY immediately in these cases.
157 GURL placeholder_canonicalized_url; 156 GURL placeholder_canonicalized_url;
158 if (!canonicalized_url) 157 if (!canonicalized_url)
159 canonicalized_url = &placeholder_canonicalized_url; 158 canonicalized_url = &placeholder_canonicalized_url;
160 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text), 159 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text),
161 base::UTF16ToUTF8(desired_tld)); 160 base::UTF16ToUTF8(desired_tld));
162 if (!canonicalized_url->is_valid()) 161 if (!canonicalized_url->is_valid())
163 return metrics::OmniboxInputType::QUERY; 162 return QUERY;
164 163
165 if (LowerCaseEqualsASCII(parsed_scheme, url::kFileScheme)) { 164 if (LowerCaseEqualsASCII(parsed_scheme, url::kFileScheme)) {
166 // A user might or might not type a scheme when entering a file URL. In 165 // A user might or might not type a scheme when entering a file URL. In
167 // either case, |parsed_scheme| will tell us that this is a file URL, but 166 // either case, |parsed_scheme| will tell us that this is a file URL, but
168 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". 167 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo".
169 return metrics::OmniboxInputType::URL; 168 return URL;
170 } 169 }
171 170
172 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it 171 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it
173 // well enough that we can fall through to the heuristics below. If it's 172 // well enough that we can fall through to the heuristics below. If it's
174 // something else, we can just determine our action based on what we do with 173 // something else, we can just determine our action based on what we do with
175 // any input of this scheme. In theory we could do better with some schemes 174 // any input of this scheme. In theory we could do better with some schemes
176 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that 175 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that
177 // until I run into some cases that really need it. 176 // until I run into some cases that really need it.
178 if (parts->scheme.is_nonempty() && 177 if (parts->scheme.is_nonempty() &&
179 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpScheme) && 178 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpScheme) &&
180 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpsScheme)) { 179 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpsScheme)) {
181 // See if we know how to handle the URL internally. There are some schemes 180 // See if we know how to handle the URL internally. There are some schemes
182 // that we convert to other things before they reach the renderer or else 181 // that we convert to other things before they reach the renderer or else
183 // the renderer handles internally without reaching the net::URLRequest 182 // the renderer handles internally without reaching the net::URLRequest
184 // logic. They thus won't be listed as "handled protocols", but we should 183 // logic. They thus won't be listed as "handled protocols", but we should
185 // still claim to handle them. 184 // still claim to handle them.
186 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) || 185 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) ||
187 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) || 186 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) ||
188 LowerCaseEqualsASCII(parsed_scheme, url::kJavaScriptScheme) || 187 LowerCaseEqualsASCII(parsed_scheme, url::kJavaScriptScheme) ||
189 LowerCaseEqualsASCII(parsed_scheme, url::kDataScheme)) 188 LowerCaseEqualsASCII(parsed_scheme, url::kDataScheme))
190 return metrics::OmniboxInputType::URL; 189 return URL;
191 190
192 // Not an internal protocol. Check and see if the user has explicitly 191 // Not an internal protocol. Check and see if the user has explicitly
193 // opened this scheme as a URL before, or if the "scheme" is actually a 192 // opened this scheme as a URL before, or if the "scheme" is actually a
194 // username. We need to do this after the check above because some 193 // username. We need to do this after the check above because some
195 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the 194 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the
196 // external protocol handler because we don't want pages to open them, but 195 // external protocol handler because we don't want pages to open them, but
197 // users still can. 196 // users still can.
198 // Note that the protocol handler needs to be informed that omnibox input 197 // Note that the protocol handler needs to be informed that omnibox input
199 // should always be considered "user gesture-triggered", lest it always 198 // should always be considered "user gesture-triggered", lest it always
200 // return BLOCK. 199 // return BLOCK.
201 ExternalProtocolHandler::BlockState block_state = 200 ExternalProtocolHandler::BlockState block_state =
202 ExternalProtocolHandler::GetBlockState( 201 ExternalProtocolHandler::GetBlockState(
203 base::UTF16ToUTF8(parsed_scheme), true); 202 base::UTF16ToUTF8(parsed_scheme), true);
204 switch (block_state) { 203 switch (block_state) {
205 case ExternalProtocolHandler::DONT_BLOCK: 204 case ExternalProtocolHandler::DONT_BLOCK:
206 return metrics::OmniboxInputType::URL; 205 return URL;
207 206
208 case ExternalProtocolHandler::BLOCK: 207 case ExternalProtocolHandler::BLOCK:
209 // If we don't want the user to open the URL, don't let it be navigated 208 // If we don't want the user to open the URL, don't let it be navigated
210 // to at all. 209 // to at all.
211 return metrics::OmniboxInputType::QUERY; 210 return QUERY;
212 211
213 default: { 212 default: {
214 // We don't know about this scheme. It might be that the user typed a 213 // We don't know about this scheme. It might be that the user typed a
215 // URL of the form "username:password@foo.com". 214 // URL of the form "username:password@foo.com".
216 const base::string16 http_scheme_prefix = 215 const base::string16 http_scheme_prefix =
217 base::ASCIIToUTF16(std::string(url::kHttpScheme) + 216 base::ASCIIToUTF16(std::string(url::kHttpScheme) +
218 url::kStandardSchemeSeparator); 217 url::kStandardSchemeSeparator);
219 url::Parsed http_parts; 218 url::Parsed http_parts;
220 base::string16 http_scheme; 219 base::string16 http_scheme;
221 GURL http_canonicalized_url; 220 GURL http_canonicalized_url;
222 AutocompleteInput::Type http_type = 221 Type http_type = Parse(http_scheme_prefix + text, desired_tld,
223 Parse(http_scheme_prefix + text, desired_tld, &http_parts, 222 &http_parts, &http_scheme,
224 &http_scheme, &http_canonicalized_url); 223 &http_canonicalized_url);
225 DCHECK_EQ(std::string(url::kHttpScheme), 224 DCHECK_EQ(std::string(url::kHttpScheme),
226 base::UTF16ToUTF8(http_scheme)); 225 base::UTF16ToUTF8(http_scheme));
227 226
228 if ((http_type == metrics::OmniboxInputType::URL) && 227 if ((http_type == URL) && http_parts.username.is_nonempty() &&
229 http_parts.username.is_nonempty() &&
230 http_parts.password.is_nonempty()) { 228 http_parts.password.is_nonempty()) {
231 // Manually re-jigger the parsed parts to match |text| (without the 229 // Manually re-jigger the parsed parts to match |text| (without the
232 // http scheme added). 230 // http scheme added).
233 http_parts.scheme.reset(); 231 http_parts.scheme.reset();
234 url::Component* components[] = { 232 url::Component* components[] = {
235 &http_parts.username, 233 &http_parts.username,
236 &http_parts.password, 234 &http_parts.password,
237 &http_parts.host, 235 &http_parts.host,
238 &http_parts.port, 236 &http_parts.port,
239 &http_parts.path, 237 &http_parts.path,
240 &http_parts.query, 238 &http_parts.query,
241 &http_parts.ref, 239 &http_parts.ref,
242 }; 240 };
243 for (size_t i = 0; i < arraysize(components); ++i) { 241 for (size_t i = 0; i < arraysize(components); ++i) {
244 URLFixerUpper::OffsetComponent( 242 URLFixerUpper::OffsetComponent(
245 -static_cast<int>(http_scheme_prefix.length()), components[i]); 243 -static_cast<int>(http_scheme_prefix.length()), components[i]);
246 } 244 }
247 245
248 *parts = http_parts; 246 *parts = http_parts;
249 if (scheme) 247 if (scheme)
250 scheme->clear(); 248 scheme->clear();
251 *canonicalized_url = http_canonicalized_url; 249 *canonicalized_url = http_canonicalized_url;
252 250
253 return metrics::OmniboxInputType::URL; 251 return URL;
254 } 252 }
255 253
256 // We don't know about this scheme and it doesn't look like the user 254 // We don't know about this scheme and it doesn't look like the user
257 // typed a username and password. It's likely to be a search operator 255 // typed a username and password. It's likely to be a search operator
258 // like "site:" or "link:". We classify it as UNKNOWN so the user has 256 // like "site:" or "link:". We classify it as UNKNOWN so the user has
259 // the option of treating it as a URL if we're wrong. 257 // the option of treating it as a URL if we're wrong.
260 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or 258 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or
261 // "www.example.com:81" in this case. 259 // "www.example.com:81" in this case.
262 return metrics::OmniboxInputType::UNKNOWN; 260 return UNKNOWN;
263 } 261 }
264 } 262 }
265 } 263 }
266 264
267 // Either the user didn't type a scheme, in which case we need to distinguish 265 // Either the user didn't type a scheme, in which case we need to distinguish
268 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which 266 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which
269 // case we should reject invalid formulations. 267 // case we should reject invalid formulations.
270 268
271 // If we have an empty host it can't be a valid HTTP[S] URL. (This should 269 // If we have an empty host it can't be a valid HTTP[S] URL. (This should
272 // only trigger for input that begins with a colon, which GURL will parse as a 270 // only trigger for input that begins with a colon, which GURL will parse as a
273 // valid, non-standard URL; for standard URLs, an empty host would have 271 // valid, non-standard URL; for standard URLs, an empty host would have
274 // resulted in an invalid |canonicalized_url| above.) 272 // resulted in an invalid |canonicalized_url| above.)
275 if (!parts->host.is_nonempty()) 273 if (!parts->host.is_nonempty())
276 return metrics::OmniboxInputType::QUERY; 274 return QUERY;
277 275
278 // Sanity-check: GURL should have failed to canonicalize this URL if it had an 276 // Sanity-check: GURL should have failed to canonicalize this URL if it had an
279 // invalid port. 277 // invalid port.
280 DCHECK_NE(url::PORT_INVALID, url::ParsePort(text.c_str(), parts->port)); 278 DCHECK_NE(url::PORT_INVALID, url::ParsePort(text.c_str(), parts->port));
281 279
282 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also 280 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also
283 // use the registry length later below.) 281 // use the registry length later below.)
284 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); 282 const base::string16 host(text.substr(parts->host.begin, parts->host.len));
285 const size_t registry_length = 283 const size_t registry_length =
286 net::registry_controlled_domains::GetRegistryLength( 284 net::registry_controlled_domains::GetRegistryLength(
287 base::UTF16ToUTF8(host), 285 base::UTF16ToUTF8(host),
288 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 286 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
289 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 287 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
290 if (registry_length == std::string::npos) { 288 if (registry_length == std::string::npos) {
291 // Try to append the desired_tld. 289 // Try to append the desired_tld.
292 if (!desired_tld.empty()) { 290 if (!desired_tld.empty()) {
293 base::string16 host_with_tld(host); 291 base::string16 host_with_tld(host);
294 if (host[host.length() - 1] != '.') 292 if (host[host.length() - 1] != '.')
295 host_with_tld += '.'; 293 host_with_tld += '.';
296 host_with_tld += desired_tld; 294 host_with_tld += desired_tld;
297 const size_t tld_length = 295 const size_t tld_length =
298 net::registry_controlled_domains::GetRegistryLength( 296 net::registry_controlled_domains::GetRegistryLength(
299 base::UTF16ToUTF8(host_with_tld), 297 base::UTF16ToUTF8(host_with_tld),
300 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 298 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
301 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 299 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
302 if (tld_length != std::string::npos) { 300 if (tld_length != std::string::npos)
303 // Something like "99999999999" that looks like a bad IP 301 return URL; // Something like "99999999999" that looks like a bad IP
304 // address, but becomes valid on attaching a TLD. 302 // address, but becomes valid on attaching a TLD.
305 return metrics::OmniboxInputType::URL;
306 }
307 } 303 }
308 // Could be a broken IP address, etc. 304 return QUERY; // Could be a broken IP address, etc.
309 return metrics::OmniboxInputType::QUERY;
310 } 305 }
311 306
312 307
313 // See if the hostname is valid. While IE and GURL allow hostnames to contain 308 // See if the hostname is valid. While IE and GURL allow hostnames to contain
314 // many other characters (perhaps for weird intranet machines), it's extremely 309 // many other characters (perhaps for weird intranet machines), it's extremely
315 // unlikely that a user would be trying to type those in for anything other 310 // unlikely that a user would be trying to type those in for anything other
316 // than a search query. 311 // than a search query.
317 url::CanonHostInfo host_info; 312 url::CanonHostInfo host_info;
318 const std::string canonicalized_host(net::CanonicalizeHost( 313 const std::string canonicalized_host(net::CanonicalizeHost(
319 base::UTF16ToUTF8(host), &host_info)); 314 base::UTF16ToUTF8(host), &host_info));
(...skipping 13 matching lines...) Expand all
333 // "toys at amazon.com" will be treated as a search. 328 // "toys at amazon.com" will be treated as a search.
334 // * The user is typing some garbage string. Return QUERY. 329 // * The user is typing some garbage string. Return QUERY.
335 // 330 //
336 // Thus we fall down in the following cases: 331 // Thus we fall down in the following cases:
337 // * Trying to navigate to a hostname with spaces 332 // * Trying to navigate to a hostname with spaces
338 // * Trying to navigate to a hostname with invalid characters and an unknown 333 // * Trying to navigate to a hostname with invalid characters and an unknown
339 // TLD 334 // TLD
340 // These are rare, though probably possible in intranets. 335 // These are rare, though probably possible in intranets.
341 return (parts->scheme.is_nonempty() || 336 return (parts->scheme.is_nonempty() ||
342 ((registry_length != 0) && 337 ((registry_length != 0) &&
343 (host.find(' ') == base::string16::npos))) ? 338 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY;
344 metrics::OmniboxInputType::UNKNOWN : metrics::OmniboxInputType::QUERY;
345 } 339 }
346 340
347 // Now that we've ruled out all schemes other than http or https and done a 341 // Now that we've ruled out all schemes other than http or https and done a
348 // little more sanity checking, the presence of a scheme means this is likely 342 // little more sanity checking, the presence of a scheme means this is likely
349 // a URL. 343 // a URL.
350 if (parts->scheme.is_nonempty()) 344 if (parts->scheme.is_nonempty())
351 return metrics::OmniboxInputType::URL; 345 return URL;
352 346
353 // See if the host is an IP address. 347 // See if the host is an IP address.
354 if (host_info.family == url::CanonHostInfo::IPV6) 348 if (host_info.family == url::CanonHostInfo::IPV6)
355 return metrics::OmniboxInputType::URL; 349 return URL;
356 // If the user originally typed a host that looks like an IP address (a 350 // If the user originally typed a host that looks like an IP address (a
357 // dotted quad), they probably want to open it. If the original input was 351 // dotted quad), they probably want to open it. If the original input was
358 // something else (like a single number), they probably wanted to search for 352 // something else (like a single number), they probably wanted to search for
359 // it, unless they explicitly typed a scheme. This is true even if the URL 353 // it, unless they explicitly typed a scheme. This is true even if the URL
360 // appears to have a path: "1.2/45" is more likely a search (for the answer 354 // appears to have a path: "1.2/45" is more likely a search (for the answer
361 // to a math problem) than a URL. However, if there are more non-host 355 // to a math problem) than a URL. However, if there are more non-host
362 // components, then maybe this really was intended to be a navigation. For 356 // components, then maybe this really was intended to be a navigation. For
363 // this reason we only check the dotted-quad case here, and save the "other 357 // this reason we only check the dotted-quad case here, and save the "other
364 // IP addresses" case for after we check the number of non-host components 358 // IP addresses" case for after we check the number of non-host components
365 // below. 359 // below.
366 if ((host_info.family == url::CanonHostInfo::IPV4) && 360 if ((host_info.family == url::CanonHostInfo::IPV4) &&
367 (host_info.num_ipv4_components == 4)) 361 (host_info.num_ipv4_components == 4))
368 return metrics::OmniboxInputType::URL; 362 return URL;
369 363
370 // Presence of a password means this is likely a URL. Note that unless the 364 // Presence of a password means this is likely a URL. Note that unless the
371 // user has typed an explicit "http://" or similar, we'll probably think that 365 // user has typed an explicit "http://" or similar, we'll probably think that
372 // the username is some unknown scheme, and bail out in the scheme-handling 366 // the username is some unknown scheme, and bail out in the scheme-handling
373 // code above. 367 // code above.
374 if (parts->password.is_nonempty()) 368 if (parts->password.is_nonempty())
375 return metrics::OmniboxInputType::URL; 369 return URL;
376 370
377 // Trailing slashes force the input to be treated as a URL. 371 // Trailing slashes force the input to be treated as a URL.
378 if (parts->path.is_nonempty()) { 372 if (parts->path.is_nonempty()) {
379 char c = text[parts->path.end() - 1]; 373 char c = text[parts->path.end() - 1];
380 if ((c == '\\') || (c == '/')) 374 if ((c == '\\') || (c == '/'))
381 return metrics::OmniboxInputType::URL; 375 return URL;
382 } 376 }
383 377
384 // If there is more than one recognized non-host component, this is likely to 378 // If there is more than one recognized non-host component, this is likely to
385 // be a URL, even if the TLD is unknown (in which case this is likely an 379 // be a URL, even if the TLD is unknown (in which case this is likely an
386 // intranet URL). 380 // intranet URL).
387 if (NumNonHostComponents(*parts) > 1) 381 if (NumNonHostComponents(*parts) > 1)
388 return metrics::OmniboxInputType::URL; 382 return URL;
389 383
390 // If the host has a known TLD or a port, it's probably a URL, with the 384 // If the host has a known TLD or a port, it's probably a URL, with the
391 // following exceptions: 385 // following exceptions:
392 // * Any "IP addresses" that make it here are more likely searches 386 // * Any "IP addresses" that make it here are more likely searches
393 // (see above). 387 // (see above).
394 // * If we reach here with a username, our input looks like "user@host[.tld]". 388 // * If we reach here with a username, our input looks like "user@host[.tld]".
395 // Because there is no scheme explicitly specified, we think this is more 389 // Because there is no scheme explicitly specified, we think this is more
396 // likely an email address than an HTTP auth attempt. Hence, we search by 390 // likely an email address than an HTTP auth attempt. Hence, we search by
397 // default and let users correct us on a case-by-case basis. 391 // default and let users correct us on a case-by-case basis.
398 // Note that we special-case "localhost" as a known hostname. 392 // Note that we special-case "localhost" as a known hostname.
399 if ((host_info.family != url::CanonHostInfo::IPV4) && 393 if ((host_info.family != url::CanonHostInfo::IPV4) &&
400 ((registry_length != 0) || (host == base::ASCIIToUTF16("localhost") || 394 ((registry_length != 0) || (host == base::ASCIIToUTF16("localhost") ||
401 parts->port.is_nonempty()))) { 395 parts->port.is_nonempty())))
402 return parts->username.is_nonempty() ? metrics::OmniboxInputType::UNKNOWN : 396 return parts->username.is_nonempty() ? UNKNOWN : URL;
403 metrics::OmniboxInputType::URL;
404 }
405 397
406 // If we reach this point, we know there's no known TLD on the input, so if 398 // If we reach this point, we know there's no known TLD on the input, so if
407 // the user wishes to add a desired_tld, the fixup code will oblige; thus this 399 // the user wishes to add a desired_tld, the fixup code will oblige; thus this
408 // is a URL. 400 // is a URL.
409 if (!desired_tld.empty()) 401 if (!desired_tld.empty())
410 return metrics::OmniboxInputType::URL; 402 return URL;
411 403
412 // No scheme, password, port, path, and no known TLD on the host. 404 // No scheme, password, port, path, and no known TLD on the host.
413 // This could be: 405 // This could be:
414 // * An "incomplete IP address"; likely a search (see above). 406 // * An "incomplete IP address"; likely a search (see above).
415 // * An email-like input like "user@host", where "host" has no known TLD. 407 // * An email-like input like "user@host", where "host" has no known TLD.
416 // It's not clear what the user means here and searching seems reasonable. 408 // It's not clear what the user means here and searching seems reasonable.
417 // * A single word "foo"; possibly an intranet site, but more likely a search. 409 // * A single word "foo"; possibly an intranet site, but more likely a search.
418 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code 410 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code
419 // catch our mistakes. 411 // catch our mistakes.
420 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds 412 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds
421 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know 413 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know
422 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really 414 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really
423 // distinguish this case from: 415 // distinguish this case from:
424 // * A "URL-like" string that's not really a URL (like 416 // * A "URL-like" string that's not really a URL (like
425 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a 417 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a
426 // QUERY. Since this is indistinguishable from the case above, and this 418 // QUERY. Since this is indistinguishable from the case above, and this
427 // case is much more likely, claim these are UNKNOWN, which should default 419 // case is much more likely, claim these are UNKNOWN, which should default
428 // to the right thing and let users correct us on a case-by-case basis. 420 // to the right thing and let users correct us on a case-by-case basis.
429 return metrics::OmniboxInputType::UNKNOWN; 421 return UNKNOWN;
430 } 422 }
431 423
432 // static 424 // static
433 void AutocompleteInput::ParseForEmphasizeComponents(const base::string16& text, 425 void AutocompleteInput::ParseForEmphasizeComponents(const base::string16& text,
434 url::Component* scheme, 426 url::Component* scheme,
435 url::Component* host) { 427 url::Component* host) {
436 url::Parsed parts; 428 url::Parsed parts;
437 base::string16 scheme_str; 429 base::string16 scheme_str;
438 Parse(text, base::string16(), &parts, &scheme_str, NULL); 430 Parse(text, base::string16(), &parts, &scheme_str, NULL);
439 431
(...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after
524 text_ = text; 516 text_ = text;
525 cursor_position_ = cursor_position; 517 cursor_position_ = cursor_position;
526 parts_ = parts; 518 parts_ = parts;
527 } 519 }
528 520
529 void AutocompleteInput::Clear() { 521 void AutocompleteInput::Clear() {
530 text_.clear(); 522 text_.clear();
531 cursor_position_ = base::string16::npos; 523 cursor_position_ = base::string16::npos;
532 current_url_ = GURL(); 524 current_url_ = GURL();
533 current_page_classification_ = AutocompleteInput::INVALID_SPEC; 525 current_page_classification_ = AutocompleteInput::INVALID_SPEC;
534 type_ = metrics::OmniboxInputType::INVALID; 526 type_ = INVALID;
535 parts_ = url::Parsed(); 527 parts_ = url::Parsed();
536 scheme_.clear(); 528 scheme_.clear();
537 canonicalized_url_ = GURL(); 529 canonicalized_url_ = GURL();
538 prevent_inline_autocomplete_ = false; 530 prevent_inline_autocomplete_ = false;
539 prefer_keyword_ = false; 531 prefer_keyword_ = false;
540 allow_exact_keyword_match_ = false; 532 allow_exact_keyword_match_ = false;
541 want_asynchronous_matches_ = true; 533 want_asynchronous_matches_ = true;
542 } 534 }
OLDNEW
« no previous file with comments | « chrome/browser/autocomplete/autocomplete_input.h ('k') | chrome/browser/autocomplete/autocomplete_input_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698