| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/autocomplete/autocomplete_input.h" | 5 #include "chrome/browser/autocomplete/autocomplete_input.h" |
| 6 | 6 |
| 7 #include "base/strings/string_util.h" | 7 #include "base/strings/string_util.h" |
| 8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
| 9 #include "chrome/browser/external_protocol/external_protocol_handler.h" | 9 #include "chrome/browser/external_protocol/external_protocol_handler.h" |
| 10 #include "chrome/browser/profiles/profile_io_data.h" | 10 #include "chrome/browser/profiles/profile_io_data.h" |
| (...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 143 // use the URLFixerUpper here because we want to be smart about what we | 143 // use the URLFixerUpper here because we want to be smart about what we |
| 144 // consider a scheme. For example, we shouldn't consider www.google.com:80 | 144 // consider a scheme. For example, we shouldn't consider www.google.com:80 |
| 145 // to have a scheme. | 145 // to have a scheme. |
| 146 url_parse::Parsed local_parts; | 146 url_parse::Parsed local_parts; |
| 147 if (!parts) | 147 if (!parts) |
| 148 parts = &local_parts; | 148 parts = &local_parts; |
| 149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); | 149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); |
| 150 if (scheme) | 150 if (scheme) |
| 151 *scheme = parsed_scheme; | 151 *scheme = parsed_scheme; |
| 152 | 152 |
| 153 // Try to fixup and canonicalize the user's typing. We use this to help | 153 // If we can't canonicalize the user's input, the rest of the autocomplete |
| 154 // determine if it's safe to return "URL" as the type of anything that has an | 154 // system isn't going to be able to produce a navigable URL match for it. |
| 155 // explicit, non-HTTP[S] scheme. (HTTP[S] and "no scheme" inputs get more | 155 // So we just return QUERY immediately in these cases. |
| 156 // sophisticated heuristics below.) If we can't canonicalize such inputs, we | |
| 157 // shouldn't mark them as "URL"s, because the rest of the autocomplete system | |
| 158 // isn't going to be able to produce navigable URL matches for them, which can | |
| 159 // lead to DCHECK failures later. | |
| 160 GURL placeholder_canonicalized_url; | 156 GURL placeholder_canonicalized_url; |
| 161 if (!canonicalized_url) | 157 if (!canonicalized_url) |
| 162 canonicalized_url = &placeholder_canonicalized_url; | 158 canonicalized_url = &placeholder_canonicalized_url; |
| 163 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text), | 159 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text), |
| 164 base::UTF16ToUTF8(desired_tld)); | 160 base::UTF16ToUTF8(desired_tld)); |
| 165 Type return_value_for_non_http_url = | 161 if (!canonicalized_url->is_valid()) |
| 166 canonicalized_url->is_valid() ? URL : QUERY; | 162 return QUERY; |
| 167 | 163 |
| 168 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileScheme)) { | 164 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileScheme)) { |
| 169 // A user might or might not type a scheme when entering a file URL. In | 165 // A user might or might not type a scheme when entering a file URL. In |
| 170 // either case, |parsed_scheme| will tell us that this is a file URL, but | 166 // either case, |parsed_scheme| will tell us that this is a file URL, but |
| 171 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". | 167 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". |
| 172 return URL; | 168 return URL; |
| 173 } | 169 } |
| 174 | 170 |
| 175 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it | 171 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it |
| 176 // well enough that we can fall through to the heuristics below. If it's | 172 // well enough that we can fall through to the heuristics below. If it's |
| 177 // something else, we can just determine our action based on what we do with | 173 // something else, we can just determine our action based on what we do with |
| 178 // any input of this scheme. In theory we could do better with some schemes | 174 // any input of this scheme. In theory we could do better with some schemes |
| 179 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that | 175 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that |
| 180 // until I run into some cases that really need it. | 176 // until I run into some cases that really need it. |
| 181 if (parts->scheme.is_nonempty() && | 177 if (parts->scheme.is_nonempty() && |
| 182 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpScheme) && | 178 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpScheme) && |
| 183 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpsScheme)) { | 179 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpsScheme)) { |
| 184 // See if we know how to handle the URL internally. There are some schemes | 180 // See if we know how to handle the URL internally. There are some schemes |
| 185 // that we convert to other things before they reach the renderer or else | 181 // that we convert to other things before they reach the renderer or else |
| 186 // the renderer handles internally without reaching the net::URLRequest | 182 // the renderer handles internally without reaching the net::URLRequest |
| 187 // logic. They thus won't be listed as "handled protocols", but we should | 183 // logic. They thus won't be listed as "handled protocols", but we should |
| 188 // still claim to handle them. | 184 // still claim to handle them. |
| 189 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) || | 185 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) || |
| 190 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) || | 186 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) || |
| 191 LowerCaseEqualsASCII(parsed_scheme, content::kJavaScriptScheme) || | 187 LowerCaseEqualsASCII(parsed_scheme, content::kJavaScriptScheme) || |
| 192 LowerCaseEqualsASCII(parsed_scheme, content::kDataScheme)) | 188 LowerCaseEqualsASCII(parsed_scheme, content::kDataScheme)) |
| 193 return return_value_for_non_http_url; | 189 return URL; |
| 194 | 190 |
| 195 // Not an internal protocol. Check and see if the user has explicitly | 191 // Not an internal protocol. Check and see if the user has explicitly |
| 196 // opened this scheme as a URL before, or if the "scheme" is actually a | 192 // opened this scheme as a URL before, or if the "scheme" is actually a |
| 197 // username. We need to do this after the check above because some | 193 // username. We need to do this after the check above because some |
| 198 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the | 194 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the |
| 199 // external protocol handler because we don't want pages to open them, but | 195 // external protocol handler because we don't want pages to open them, but |
| 200 // users still can. | 196 // users still can. |
| 201 // Note that the protocol handler needs to be informed that omnibox input | 197 // Note that the protocol handler needs to be informed that omnibox input |
| 202 // should always be considered "user gesture-triggered", lest it always | 198 // should always be considered "user gesture-triggered", lest it always |
| 203 // return BLOCK. | 199 // return BLOCK. |
| 204 ExternalProtocolHandler::BlockState block_state = | 200 ExternalProtocolHandler::BlockState block_state = |
| 205 ExternalProtocolHandler::GetBlockState( | 201 ExternalProtocolHandler::GetBlockState( |
| 206 base::UTF16ToUTF8(parsed_scheme), true); | 202 base::UTF16ToUTF8(parsed_scheme), true); |
| 207 switch (block_state) { | 203 switch (block_state) { |
| 208 case ExternalProtocolHandler::DONT_BLOCK: | 204 case ExternalProtocolHandler::DONT_BLOCK: |
| 209 return return_value_for_non_http_url; | 205 return URL; |
| 210 | 206 |
| 211 case ExternalProtocolHandler::BLOCK: | 207 case ExternalProtocolHandler::BLOCK: |
| 212 // If we don't want the user to open the URL, don't let it be navigated | 208 // If we don't want the user to open the URL, don't let it be navigated |
| 213 // to at all. | 209 // to at all. |
| 214 return QUERY; | 210 return QUERY; |
| 215 | 211 |
| 216 default: { | 212 default: { |
| 217 // We don't know about this scheme. It might be that the user typed a | 213 // We don't know about this scheme. It might be that the user typed a |
| 218 // URL of the form "username:password@foo.com". | 214 // URL of the form "username:password@foo.com". |
| 219 const base::string16 http_scheme_prefix = | 215 const base::string16 http_scheme_prefix = |
| (...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 263 // "www.example.com:81" in this case. | 259 // "www.example.com:81" in this case. |
| 264 return UNKNOWN; | 260 return UNKNOWN; |
| 265 } | 261 } |
| 266 } | 262 } |
| 267 } | 263 } |
| 268 | 264 |
| 269 // Either the user didn't type a scheme, in which case we need to distinguish | 265 // Either the user didn't type a scheme, in which case we need to distinguish |
| 270 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which | 266 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which |
| 271 // case we should reject invalid formulations. | 267 // case we should reject invalid formulations. |
| 272 | 268 |
| 273 // If we have an empty host it can't be a URL. | 269 // If we have an empty host it can't be a valid HTTP[S] URL. (This should |
| 270 // only trigger for input that begins with a colon, which GURL will parse as a |
| 271 // valid, non-standard URL; for standard URLs, an empty host would have |
| 272 // resulted in an invalid |canonicalized_url| above.) |
| 274 if (!parts->host.is_nonempty()) | 273 if (!parts->host.is_nonempty()) |
| 275 return QUERY; | 274 return QUERY; |
| 276 | 275 |
| 276 // Sanity-check: GURL should have failed to canonicalize this URL if it had an |
| 277 // invalid port. |
| 278 DCHECK_NE(url_parse::PORT_INVALID, |
| 279 url_parse::ParsePort(text.c_str(), parts->port)); |
| 280 |
| 277 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also | 281 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also |
| 278 // use the registry length later below.) | 282 // use the registry length later below.) |
| 279 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); | 283 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); |
| 280 const size_t registry_length = | 284 const size_t registry_length = |
| 281 net::registry_controlled_domains::GetRegistryLength( | 285 net::registry_controlled_domains::GetRegistryLength( |
| 282 base::UTF16ToUTF8(host), | 286 base::UTF16ToUTF8(host), |
| 283 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 287 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
| 284 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 288 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| 285 if (registry_length == std::string::npos) { | 289 if (registry_length == std::string::npos) { |
| 286 // Try to append the desired_tld. | 290 // Try to append the desired_tld. |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 328 // Thus we fall down in the following cases: | 332 // Thus we fall down in the following cases: |
| 329 // * Trying to navigate to a hostname with spaces | 333 // * Trying to navigate to a hostname with spaces |
| 330 // * Trying to navigate to a hostname with invalid characters and an unknown | 334 // * Trying to navigate to a hostname with invalid characters and an unknown |
| 331 // TLD | 335 // TLD |
| 332 // These are rare, though probably possible in intranets. | 336 // These are rare, though probably possible in intranets. |
| 333 return (parts->scheme.is_nonempty() || | 337 return (parts->scheme.is_nonempty() || |
| 334 ((registry_length != 0) && | 338 ((registry_length != 0) && |
| 335 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY; | 339 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY; |
| 336 } | 340 } |
| 337 | 341 |
| 338 // A port number is a good indicator that this is a URL. However, it might | |
| 339 // also be a query like "1.66:1" that looks kind of like an IP address and | |
| 340 // port number. So here we only check for "port numbers" that are illegal and | |
| 341 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save | |
| 342 // handling legal port numbers until after the "IP address" determination | |
| 343 // below. | |
| 344 if (url_parse::ParsePort(text.c_str(), parts->port) == | |
| 345 url_parse::PORT_INVALID) | |
| 346 return QUERY; | |
| 347 | |
| 348 // Now that we've ruled out all schemes other than http or https and done a | 342 // Now that we've ruled out all schemes other than http or https and done a |
| 349 // little more sanity checking, the presence of a scheme means this is likely | 343 // little more sanity checking, the presence of a scheme means this is likely |
| 350 // a URL. | 344 // a URL. |
| 351 if (parts->scheme.is_nonempty()) | 345 if (parts->scheme.is_nonempty()) |
| 352 return URL; | 346 return URL; |
| 353 | 347 |
| 354 // See if the host is an IP address. | 348 // See if the host is an IP address. |
| 355 if (host_info.family == url_canon::CanonHostInfo::IPV6) | 349 if (host_info.family == url_canon::CanonHostInfo::IPV6) |
| 356 return URL; | 350 return URL; |
| 357 // If the user originally typed a host that looks like an IP address (a | 351 // If the user originally typed a host that looks like an IP address (a |
| (...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 533 current_page_classification_ = AutocompleteInput::INVALID_SPEC; | 527 current_page_classification_ = AutocompleteInput::INVALID_SPEC; |
| 534 type_ = INVALID; | 528 type_ = INVALID; |
| 535 parts_ = url_parse::Parsed(); | 529 parts_ = url_parse::Parsed(); |
| 536 scheme_.clear(); | 530 scheme_.clear(); |
| 537 canonicalized_url_ = GURL(); | 531 canonicalized_url_ = GURL(); |
| 538 prevent_inline_autocomplete_ = false; | 532 prevent_inline_autocomplete_ = false; |
| 539 prefer_keyword_ = false; | 533 prefer_keyword_ = false; |
| 540 allow_exact_keyword_match_ = false; | 534 allow_exact_keyword_match_ = false; |
| 541 matches_requested_ = ALL_MATCHES; | 535 matches_requested_ = ALL_MATCHES; |
| 542 } | 536 } |
| OLD | NEW |