OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/autocomplete/autocomplete_input.h" | 5 #include "chrome/browser/autocomplete/autocomplete_input.h" |
6 | 6 |
7 #include "base/strings/string_util.h" | 7 #include "base/strings/string_util.h" |
8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
9 #include "chrome/browser/external_protocol/external_protocol_handler.h" | 9 #include "chrome/browser/external_protocol/external_protocol_handler.h" |
10 #include "chrome/browser/profiles/profile_io_data.h" | 10 #include "chrome/browser/profiles/profile_io_data.h" |
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
143 // use the URLFixerUpper here because we want to be smart about what we | 143 // use the URLFixerUpper here because we want to be smart about what we |
144 // consider a scheme. For example, we shouldn't consider www.google.com:80 | 144 // consider a scheme. For example, we shouldn't consider www.google.com:80 |
145 // to have a scheme. | 145 // to have a scheme. |
146 url_parse::Parsed local_parts; | 146 url_parse::Parsed local_parts; |
147 if (!parts) | 147 if (!parts) |
148 parts = &local_parts; | 148 parts = &local_parts; |
149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); | 149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); |
150 if (scheme) | 150 if (scheme) |
151 *scheme = parsed_scheme; | 151 *scheme = parsed_scheme; |
152 | 152 |
153 // Try to fixup and canonicalize the user's typing. We use this to help | 153 // If we can't canonicalize the user's input, the rest of the autocomplete |
154 // determine if it's safe to return "URL" as the type of anything that has an | 154 // system isn't going to be able to produce a navigable URL match for it. |
155 // explicit, non-HTTP[S] scheme. (HTTP[S] and "no scheme" inputs get more | 155 // So we just return QUERY immediately in these cases. |
156 // sophisticated heuristics below.) If we can't canonicalize such inputs, we | |
157 // shouldn't mark them as "URL"s, because the rest of the autocomplete system | |
158 // isn't going to be able to produce navigable URL matches for them, which can | |
159 // lead to DCHECK failures later. | |
160 GURL placeholder_canonicalized_url; | 156 GURL placeholder_canonicalized_url; |
161 if (!canonicalized_url) | 157 if (!canonicalized_url) |
162 canonicalized_url = &placeholder_canonicalized_url; | 158 canonicalized_url = &placeholder_canonicalized_url; |
163 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text), | 159 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text), |
164 base::UTF16ToUTF8(desired_tld)); | 160 base::UTF16ToUTF8(desired_tld)); |
165 Type return_value_for_non_http_url = | 161 if (!canonicalized_url->is_valid()) |
166 canonicalized_url->is_valid() ? URL : QUERY; | 162 return QUERY; |
167 | 163 |
168 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileScheme)) { | 164 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileScheme)) { |
169 // A user might or might not type a scheme when entering a file URL. In | 165 // A user might or might not type a scheme when entering a file URL. In |
170 // either case, |parsed_scheme| will tell us that this is a file URL, but | 166 // either case, |parsed_scheme| will tell us that this is a file URL, but |
171 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". | 167 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". |
172 return URL; | 168 return URL; |
173 } | 169 } |
174 | 170 |
175 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it | 171 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it |
176 // well enough that we can fall through to the heuristics below. If it's | 172 // well enough that we can fall through to the heuristics below. If it's |
177 // something else, we can just determine our action based on what we do with | 173 // something else, we can just determine our action based on what we do with |
178 // any input of this scheme. In theory we could do better with some schemes | 174 // any input of this scheme. In theory we could do better with some schemes |
179 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that | 175 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that |
180 // until I run into some cases that really need it. | 176 // until I run into some cases that really need it. |
181 if (parts->scheme.is_nonempty() && | 177 if (parts->scheme.is_nonempty() && |
182 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpScheme) && | 178 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpScheme) && |
183 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpsScheme)) { | 179 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpsScheme)) { |
184 // See if we know how to handle the URL internally. There are some schemes | 180 // See if we know how to handle the URL internally. There are some schemes |
185 // that we convert to other things before they reach the renderer or else | 181 // that we convert to other things before they reach the renderer or else |
186 // the renderer handles internally without reaching the net::URLRequest | 182 // the renderer handles internally without reaching the net::URLRequest |
187 // logic. They thus won't be listed as "handled protocols", but we should | 183 // logic. They thus won't be listed as "handled protocols", but we should |
188 // still claim to handle them. | 184 // still claim to handle them. |
189 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) || | 185 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) || |
190 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) || | 186 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) || |
191 LowerCaseEqualsASCII(parsed_scheme, content::kJavaScriptScheme) || | 187 LowerCaseEqualsASCII(parsed_scheme, content::kJavaScriptScheme) || |
192 LowerCaseEqualsASCII(parsed_scheme, content::kDataScheme)) | 188 LowerCaseEqualsASCII(parsed_scheme, content::kDataScheme)) |
193 return return_value_for_non_http_url; | 189 return URL; |
194 | 190 |
195 // Not an internal protocol. Check and see if the user has explicitly | 191 // Not an internal protocol. Check and see if the user has explicitly |
196 // opened this scheme as a URL before, or if the "scheme" is actually a | 192 // opened this scheme as a URL before, or if the "scheme" is actually a |
197 // username. We need to do this after the check above because some | 193 // username. We need to do this after the check above because some |
198 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the | 194 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the |
199 // external protocol handler because we don't want pages to open them, but | 195 // external protocol handler because we don't want pages to open them, but |
200 // users still can. | 196 // users still can. |
201 // Note that the protocol handler needs to be informed that omnibox input | 197 // Note that the protocol handler needs to be informed that omnibox input |
202 // should always be considered "user gesture-triggered", lest it always | 198 // should always be considered "user gesture-triggered", lest it always |
203 // return BLOCK. | 199 // return BLOCK. |
204 ExternalProtocolHandler::BlockState block_state = | 200 ExternalProtocolHandler::BlockState block_state = |
205 ExternalProtocolHandler::GetBlockState( | 201 ExternalProtocolHandler::GetBlockState( |
206 base::UTF16ToUTF8(parsed_scheme), true); | 202 base::UTF16ToUTF8(parsed_scheme), true); |
207 switch (block_state) { | 203 switch (block_state) { |
208 case ExternalProtocolHandler::DONT_BLOCK: | 204 case ExternalProtocolHandler::DONT_BLOCK: |
209 return return_value_for_non_http_url; | 205 return URL; |
210 | 206 |
211 case ExternalProtocolHandler::BLOCK: | 207 case ExternalProtocolHandler::BLOCK: |
212 // If we don't want the user to open the URL, don't let it be navigated | 208 // If we don't want the user to open the URL, don't let it be navigated |
213 // to at all. | 209 // to at all. |
214 return QUERY; | 210 return QUERY; |
215 | 211 |
216 default: { | 212 default: { |
217 // We don't know about this scheme. It might be that the user typed a | 213 // We don't know about this scheme. It might be that the user typed a |
218 // URL of the form "username:password@foo.com". | 214 // URL of the form "username:password@foo.com". |
219 const base::string16 http_scheme_prefix = | 215 const base::string16 http_scheme_prefix = |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
263 // "www.example.com:81" in this case. | 259 // "www.example.com:81" in this case. |
264 return UNKNOWN; | 260 return UNKNOWN; |
265 } | 261 } |
266 } | 262 } |
267 } | 263 } |
268 | 264 |
269 // Either the user didn't type a scheme, in which case we need to distinguish | 265 // Either the user didn't type a scheme, in which case we need to distinguish |
270 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which | 266 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which |
271 // case we should reject invalid formulations. | 267 // case we should reject invalid formulations. |
272 | 268 |
273 // If we have an empty host it can't be a URL. | 269 // If we have an empty host it can't be a valid HTTP[S] URL. (This should |
| 270 // only trigger for input that begins with a colon, which GURL will parse as a |
| 271 // valid, non-standard URL; for standard URLs, an empty host would have |
| 272 // resulted in an invalid |canonicalized_url| above.) |
274 if (!parts->host.is_nonempty()) | 273 if (!parts->host.is_nonempty()) |
275 return QUERY; | 274 return QUERY; |
276 | 275 |
| 276 // Sanity-check: GURL should have failed to canonicalize this URL if it had an |
| 277 // invalid port. |
| 278 DCHECK_NE(url_parse::PORT_INVALID, |
| 279 url_parse::ParsePort(text.c_str(), parts->port)); |
| 280 |
277 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also | 281 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also |
278 // use the registry length later below.) | 282 // use the registry length later below.) |
279 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); | 283 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); |
280 const size_t registry_length = | 284 const size_t registry_length = |
281 net::registry_controlled_domains::GetRegistryLength( | 285 net::registry_controlled_domains::GetRegistryLength( |
282 base::UTF16ToUTF8(host), | 286 base::UTF16ToUTF8(host), |
283 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 287 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
284 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 288 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
285 if (registry_length == std::string::npos) { | 289 if (registry_length == std::string::npos) { |
286 // Try to append the desired_tld. | 290 // Try to append the desired_tld. |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
328 // Thus we fall down in the following cases: | 332 // Thus we fall down in the following cases: |
329 // * Trying to navigate to a hostname with spaces | 333 // * Trying to navigate to a hostname with spaces |
330 // * Trying to navigate to a hostname with invalid characters and an unknown | 334 // * Trying to navigate to a hostname with invalid characters and an unknown |
331 // TLD | 335 // TLD |
332 // These are rare, though probably possible in intranets. | 336 // These are rare, though probably possible in intranets. |
333 return (parts->scheme.is_nonempty() || | 337 return (parts->scheme.is_nonempty() || |
334 ((registry_length != 0) && | 338 ((registry_length != 0) && |
335 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY; | 339 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY; |
336 } | 340 } |
337 | 341 |
338 // A port number is a good indicator that this is a URL. However, it might | |
339 // also be a query like "1.66:1" that looks kind of like an IP address and | |
340 // port number. So here we only check for "port numbers" that are illegal and | |
341 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save | |
342 // handling legal port numbers until after the "IP address" determination | |
343 // below. | |
344 if (url_parse::ParsePort(text.c_str(), parts->port) == | |
345 url_parse::PORT_INVALID) | |
346 return QUERY; | |
347 | |
348 // Now that we've ruled out all schemes other than http or https and done a | 342 // Now that we've ruled out all schemes other than http or https and done a |
349 // little more sanity checking, the presence of a scheme means this is likely | 343 // little more sanity checking, the presence of a scheme means this is likely |
350 // a URL. | 344 // a URL. |
351 if (parts->scheme.is_nonempty()) | 345 if (parts->scheme.is_nonempty()) |
352 return URL; | 346 return URL; |
353 | 347 |
354 // See if the host is an IP address. | 348 // See if the host is an IP address. |
355 if (host_info.family == url_canon::CanonHostInfo::IPV6) | 349 if (host_info.family == url_canon::CanonHostInfo::IPV6) |
356 return URL; | 350 return URL; |
357 // If the user originally typed a host that looks like an IP address (a | 351 // If the user originally typed a host that looks like an IP address (a |
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
533 current_page_classification_ = AutocompleteInput::INVALID_SPEC; | 527 current_page_classification_ = AutocompleteInput::INVALID_SPEC; |
534 type_ = INVALID; | 528 type_ = INVALID; |
535 parts_ = url_parse::Parsed(); | 529 parts_ = url_parse::Parsed(); |
536 scheme_.clear(); | 530 scheme_.clear(); |
537 canonicalized_url_ = GURL(); | 531 canonicalized_url_ = GURL(); |
538 prevent_inline_autocomplete_ = false; | 532 prevent_inline_autocomplete_ = false; |
539 prefer_keyword_ = false; | 533 prefer_keyword_ = false; |
540 allow_exact_keyword_match_ = false; | 534 allow_exact_keyword_match_ = false; |
541 matches_requested_ = ALL_MATCHES; | 535 matches_requested_ = ALL_MATCHES; |
542 } | 536 } |
OLD | NEW |