OLD | NEW |
1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "components/omnibox/autocomplete_input.h" | 5 #include "components/omnibox/autocomplete_input.h" |
6 | 6 |
7 #include "base/strings/string_util.h" | 7 #include "base/strings/string_util.h" |
8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
9 #include "components/metrics/proto/omnibox_event.pb.h" | 9 #include "components/metrics/proto/omnibox_event.pb.h" |
10 #include "components/omnibox/autocomplete_scheme_classifier.h" | 10 #include "components/omnibox/autocomplete_scheme_classifier.h" |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
302 | 302 |
303 // For hostnames that look like IP addresses, distinguish between IPv6 | 303 // For hostnames that look like IP addresses, distinguish between IPv6 |
304 // addresses, which are basically guaranteed to be navigations, and IPv4 | 304 // addresses, which are basically guaranteed to be navigations, and IPv4 |
305 // addresses, which are much fuzzier. | 305 // addresses, which are much fuzzier. |
306 if (host_info.family == url::CanonHostInfo::IPV6) | 306 if (host_info.family == url::CanonHostInfo::IPV6) |
307 return metrics::OmniboxInputType::URL; | 307 return metrics::OmniboxInputType::URL; |
308 if (host_info.family == url::CanonHostInfo::IPV4) { | 308 if (host_info.family == url::CanonHostInfo::IPV4) { |
309 // The host may be a real IP address, or something that looks a bit like it | 309 // The host may be a real IP address, or something that looks a bit like it |
310 // (e.g. "1.2" or "3232235521"). We check whether it was convertible to an | 310 // (e.g. "1.2" or "3232235521"). We check whether it was convertible to an |
311 // IP with a non-zero first octet; IPs with first octet zero are "source | 311 // IP with a non-zero first octet; IPs with first octet zero are "source |
312 // IPs" and are never navigable as destination addresses. | 312 // IPs" and are almost never navigable as destination addresses. |
| 313 // |
| 314 // The one exception to this is 0.0.0.0; on many systems, attempting to |
| 315 // navigate to this IP actually navigates to localhost. To support this |
| 316 // case, when the converted IP is 0.0.0.0, we go ahead and run the "did the |
| 317 // user actually type four components" test in the conditional below, so |
| 318 // that we'll allow explicit attempts to navigate to "0.0.0.0". If the |
| 319 // input was anything else (e.g. "0"), we'll fall through to returning QUERY |
| 320 // afterwards. |
| 321 if ((host_info.address[0] != 0) || |
| 322 ((host_info.address[1] == 0) && (host_info.address[2] == 0) && |
| 323 (host_info.address[3] == 0))) { |
| 324 // This is theoretically a navigable IP. We have four cases. The first |
| 325 // three are: |
| 326 // * If the user typed four distinct components, this is an IP for sure. |
| 327 // * If the user typed two or three components, this is almost certainly a |
| 328 // query, especially for two components (as in "13.5/7.25"), but we'll |
| 329 // allow navigation for an explicit scheme or trailing slash below. |
| 330 // * If the user typed one component, this is likely a query, but could be |
| 331 // a non-dotted-quad version of an IP address. |
| 332 // Unfortunately, since we called CanonicalizeHost() on the |
| 333 // already-canonicalized host, all of these cases will have been changed |
| 334 // to have four components (e.g. 13.2 -> 13.0.0.2), so we have to call |
| 335 // CanonicalizeHost() again, this time on the original input, so that we |
| 336 // can get the correct number of IP components. |
| 337 // |
| 338 // The fourth case is that the user typed something ambiguous like ".1.2" |
| 339 // that fixup converted to an IP address ("1.0.0.2"). In this case the |
| 340 // call to CanonicalizeHost() will return NEUTRAL here. Since it's not |
| 341 // clear what the user intended, we fall back to our other heuristics. |
| 342 net::CanonicalizeHost(base::UTF16ToUTF8(original_host), &host_info); |
| 343 if ((host_info.family == url::CanonHostInfo::IPV4) && |
| 344 (host_info.num_ipv4_components == 4)) |
| 345 return metrics::OmniboxInputType::URL; |
| 346 } |
| 347 |
| 348 // By this point, if we have an "IP" with first octet zero, we know it |
| 349 // wasn't "0.0.0.0", so mark it as non-navigable. |
313 if (host_info.address[0] == 0) | 350 if (host_info.address[0] == 0) |
314 return metrics::OmniboxInputType::QUERY; | 351 return metrics::OmniboxInputType::QUERY; |
315 | |
316 // This is theoretically a navigable IP. We have four cases. The first | |
317 // three are: | |
318 // * If the user typed four distinct components, this is an IP for sure. | |
319 // * If the user typed two or three components, this is almost certainly a | |
320 // query, especially for two components (as in "13.5/7.25"), but we'll | |
321 // allow navigation for an explicit scheme or trailing slash below. | |
322 // * If the user typed one component, this is likely a query, but could be | |
323 // a non-dotted-quad version of an IP address. | |
324 // Unfortunately, since we called CanonicalizeHost() on the | |
325 // already-canonicalized host, all of these cases will have been changed to | |
326 // have four components (e.g. 13.2 -> 13.0.0.2), so we have to call | |
327 // CanonicalizeHost() again, this time on the original input, so that we can | |
328 // get the correct number of IP components. | |
329 // | |
330 // The fourth case is that the user typed something ambiguous like ".1.2" | |
331 // that fixup converted to an IP address ("1.0.0.2"). In this case the call | |
332 // to CanonicalizeHost() will return NEUTRAL here. Since it's not clear | |
333 // what the user intended, we fall back to our other heuristics. | |
334 net::CanonicalizeHost(base::UTF16ToUTF8(original_host), &host_info); | |
335 if ((host_info.family == url::CanonHostInfo::IPV4) && | |
336 (host_info.num_ipv4_components == 4)) | |
337 return metrics::OmniboxInputType::URL; | |
338 } | 352 } |
339 | 353 |
340 // Now that we've ruled out all schemes other than http or https and done a | 354 // Now that we've ruled out all schemes other than http or https and done a |
341 // little more sanity checking, the presence of a scheme means this is likely | 355 // little more sanity checking, the presence of a scheme means this is likely |
342 // a URL. | 356 // a URL. |
343 if (parts->scheme.is_nonempty()) | 357 if (parts->scheme.is_nonempty()) |
344 return metrics::OmniboxInputType::URL; | 358 return metrics::OmniboxInputType::URL; |
345 | 359 |
346 // Trailing slashes force the input to be treated as a URL. | 360 // Trailing slashes force the input to be treated as a URL. |
347 if (parts->path.is_nonempty()) { | 361 if (parts->path.is_nonempty()) { |
(...skipping 164 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
512 current_page_classification_ = metrics::OmniboxEventProto::INVALID_SPEC; | 526 current_page_classification_ = metrics::OmniboxEventProto::INVALID_SPEC; |
513 type_ = metrics::OmniboxInputType::INVALID; | 527 type_ = metrics::OmniboxInputType::INVALID; |
514 parts_ = url::Parsed(); | 528 parts_ = url::Parsed(); |
515 scheme_.clear(); | 529 scheme_.clear(); |
516 canonicalized_url_ = GURL(); | 530 canonicalized_url_ = GURL(); |
517 prevent_inline_autocomplete_ = false; | 531 prevent_inline_autocomplete_ = false; |
518 prefer_keyword_ = false; | 532 prefer_keyword_ = false; |
519 allow_exact_keyword_match_ = false; | 533 allow_exact_keyword_match_ = false; |
520 want_asynchronous_matches_ = true; | 534 want_asynchronous_matches_ = true; |
521 } | 535 } |
OLD | NEW |