Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(180)

Side by Side Diff: chrome/browser/autocomplete/autocomplete_input.cc

Issue 226283009: Make AutocompleteInput::Parse() more strict: return QUERY for all inputs that (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 6 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/autocomplete/autocomplete_input.h" 5 #include "chrome/browser/autocomplete/autocomplete_input.h"
6 6
7 #include "base/strings/string_util.h" 7 #include "base/strings/string_util.h"
8 #include "base/strings/utf_string_conversions.h" 8 #include "base/strings/utf_string_conversions.h"
9 #include "chrome/browser/external_protocol/external_protocol_handler.h" 9 #include "chrome/browser/external_protocol/external_protocol_handler.h"
10 #include "chrome/browser/profiles/profile_io_data.h" 10 #include "chrome/browser/profiles/profile_io_data.h"
(...skipping 132 matching lines...) Expand 10 before | Expand all | Expand 10 after
143 // use the URLFixerUpper here because we want to be smart about what we 143 // use the URLFixerUpper here because we want to be smart about what we
144 // consider a scheme. For example, we shouldn't consider www.google.com:80 144 // consider a scheme. For example, we shouldn't consider www.google.com:80
145 // to have a scheme. 145 // to have a scheme.
146 url_parse::Parsed local_parts; 146 url_parse::Parsed local_parts;
147 if (!parts) 147 if (!parts)
148 parts = &local_parts; 148 parts = &local_parts;
149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); 149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts));
150 if (scheme) 150 if (scheme)
151 *scheme = parsed_scheme; 151 *scheme = parsed_scheme;
152 152
153 // Try to fixup and canonicalize the user's typing. We use this to help 153 // If we can't canonicalize the user's input, the rest of the autocomplete
154 // determine if it's safe to return "URL" as the type of anything that has an 154 // system isn't going to be able to produce a navigable URL match for it.
155 // explicit, non-HTTP[S] scheme. (HTTP[S] and "no scheme" inputs get more 155 // So we just return QUERY immediately in these cases.
156 // sophisticated heuristics below.) If we can't canonicalize such inputs, we
157 // shouldn't mark them as "URL"s, because the rest of the autocomplete system
158 // isn't going to be able to produce navigable URL matches for them, which can
159 // lead to DCHECK failures later.
160 GURL placeholder_canonicalized_url; 156 GURL placeholder_canonicalized_url;
161 if (!canonicalized_url) 157 if (!canonicalized_url)
162 canonicalized_url = &placeholder_canonicalized_url; 158 canonicalized_url = &placeholder_canonicalized_url;
163 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text), 159 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text),
164 base::UTF16ToUTF8(desired_tld)); 160 base::UTF16ToUTF8(desired_tld));
165 Type return_value_for_non_http_url = 161 if (!canonicalized_url->is_valid())
166 canonicalized_url->is_valid() ? URL : QUERY; 162 return QUERY;
167 163
168 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileScheme)) { 164 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileScheme)) {
169 // A user might or might not type a scheme when entering a file URL. In 165 // A user might or might not type a scheme when entering a file URL. In
170 // either case, |parsed_scheme| will tell us that this is a file URL, but 166 // either case, |parsed_scheme| will tell us that this is a file URL, but
171 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". 167 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo".
172 return URL; 168 return URL;
173 } 169 }
174 170
175 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it 171 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it
176 // well enough that we can fall through to the heuristics below. If it's 172 // well enough that we can fall through to the heuristics below. If it's
177 // something else, we can just determine our action based on what we do with 173 // something else, we can just determine our action based on what we do with
178 // any input of this scheme. In theory we could do better with some schemes 174 // any input of this scheme. In theory we could do better with some schemes
179 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that 175 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that
180 // until I run into some cases that really need it. 176 // until I run into some cases that really need it.
181 if (parts->scheme.is_nonempty() && 177 if (parts->scheme.is_nonempty() &&
182 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpScheme) && 178 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpScheme) &&
183 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpsScheme)) { 179 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpsScheme)) {
184 // See if we know how to handle the URL internally. There are some schemes 180 // See if we know how to handle the URL internally. There are some schemes
185 // that we convert to other things before they reach the renderer or else 181 // that we convert to other things before they reach the renderer or else
186 // the renderer handles internally without reaching the net::URLRequest 182 // the renderer handles internally without reaching the net::URLRequest
187 // logic. They thus won't be listed as "handled protocols", but we should 183 // logic. They thus won't be listed as "handled protocols", but we should
188 // still claim to handle them. 184 // still claim to handle them.
189 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) || 185 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) ||
190 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) || 186 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) ||
191 LowerCaseEqualsASCII(parsed_scheme, content::kJavaScriptScheme) || 187 LowerCaseEqualsASCII(parsed_scheme, content::kJavaScriptScheme) ||
192 LowerCaseEqualsASCII(parsed_scheme, content::kDataScheme)) 188 LowerCaseEqualsASCII(parsed_scheme, content::kDataScheme))
193 return return_value_for_non_http_url; 189 return URL;
194 190
195 // Not an internal protocol. Check and see if the user has explicitly 191 // Not an internal protocol. Check and see if the user has explicitly
196 // opened this scheme as a URL before, or if the "scheme" is actually a 192 // opened this scheme as a URL before, or if the "scheme" is actually a
197 // username. We need to do this after the check above because some 193 // username. We need to do this after the check above because some
198 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the 194 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the
199 // external protocol handler because we don't want pages to open them, but 195 // external protocol handler because we don't want pages to open them, but
200 // users still can. 196 // users still can.
201 // Note that the protocol handler needs to be informed that omnibox input 197 // Note that the protocol handler needs to be informed that omnibox input
202 // should always be considered "user gesture-triggered", lest it always 198 // should always be considered "user gesture-triggered", lest it always
203 // return BLOCK. 199 // return BLOCK.
204 ExternalProtocolHandler::BlockState block_state = 200 ExternalProtocolHandler::BlockState block_state =
205 ExternalProtocolHandler::GetBlockState( 201 ExternalProtocolHandler::GetBlockState(
206 base::UTF16ToUTF8(parsed_scheme), true); 202 base::UTF16ToUTF8(parsed_scheme), true);
207 switch (block_state) { 203 switch (block_state) {
208 case ExternalProtocolHandler::DONT_BLOCK: 204 case ExternalProtocolHandler::DONT_BLOCK:
209 return return_value_for_non_http_url; 205 return URL;
210 206
211 case ExternalProtocolHandler::BLOCK: 207 case ExternalProtocolHandler::BLOCK:
212 // If we don't want the user to open the URL, don't let it be navigated 208 // If we don't want the user to open the URL, don't let it be navigated
213 // to at all. 209 // to at all.
214 return QUERY; 210 return QUERY;
215 211
216 default: { 212 default: {
217 // We don't know about this scheme. It might be that the user typed a 213 // We don't know about this scheme. It might be that the user typed a
218 // URL of the form "username:password@foo.com". 214 // URL of the form "username:password@foo.com".
219 const base::string16 http_scheme_prefix = 215 const base::string16 http_scheme_prefix =
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
263 // "www.example.com:81" in this case. 259 // "www.example.com:81" in this case.
264 return UNKNOWN; 260 return UNKNOWN;
265 } 261 }
266 } 262 }
267 } 263 }
268 264
269 // Either the user didn't type a scheme, in which case we need to distinguish 265 // Either the user didn't type a scheme, in which case we need to distinguish
270 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which 266 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which
271 // case we should reject invalid formulations. 267 // case we should reject invalid formulations.
272 268
273 // If we have an empty host it can't be a URL. 269 // If we have an empty host it can't be a valid HTTP[S] URL. (This should
270 // only trigger for input that begins with a colon, which GURL will parse as a
271 // valid, non-standard URL; for standard URLs, an empty host would have
272 // resulted in an invalid |canonicalized_url| above.)
274 if (!parts->host.is_nonempty()) 273 if (!parts->host.is_nonempty())
275 return QUERY; 274 return QUERY;
276 275
276 // Sanity-check: GURL should have failed to canonicalize this URL if it had an
277 // invalid port.
278 DCHECK_NE(url_parse::PORT_INVALID,
279 url_parse::ParsePort(text.c_str(), parts->port));
280
277 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also 281 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also
278 // use the registry length later below.) 282 // use the registry length later below.)
279 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); 283 const base::string16 host(text.substr(parts->host.begin, parts->host.len));
280 const size_t registry_length = 284 const size_t registry_length =
281 net::registry_controlled_domains::GetRegistryLength( 285 net::registry_controlled_domains::GetRegistryLength(
282 base::UTF16ToUTF8(host), 286 base::UTF16ToUTF8(host),
283 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, 287 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
284 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); 288 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
285 if (registry_length == std::string::npos) { 289 if (registry_length == std::string::npos) {
286 // Try to append the desired_tld. 290 // Try to append the desired_tld.
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after
328 // Thus we fall down in the following cases: 332 // Thus we fall down in the following cases:
329 // * Trying to navigate to a hostname with spaces 333 // * Trying to navigate to a hostname with spaces
330 // * Trying to navigate to a hostname with invalid characters and an unknown 334 // * Trying to navigate to a hostname with invalid characters and an unknown
331 // TLD 335 // TLD
332 // These are rare, though probably possible in intranets. 336 // These are rare, though probably possible in intranets.
333 return (parts->scheme.is_nonempty() || 337 return (parts->scheme.is_nonempty() ||
334 ((registry_length != 0) && 338 ((registry_length != 0) &&
335 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY; 339 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY;
336 } 340 }
337 341
338 // A port number is a good indicator that this is a URL. However, it might
339 // also be a query like "1.66:1" that looks kind of like an IP address and
340 // port number. So here we only check for "port numbers" that are illegal and
341 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save
342 // handling legal port numbers until after the "IP address" determination
343 // below.
344 if (url_parse::ParsePort(text.c_str(), parts->port) ==
345 url_parse::PORT_INVALID)
346 return QUERY;
347
348 // Now that we've ruled out all schemes other than http or https and done a 342 // Now that we've ruled out all schemes other than http or https and done a
349 // little more sanity checking, the presence of a scheme means this is likely 343 // little more sanity checking, the presence of a scheme means this is likely
350 // a URL. 344 // a URL.
351 if (parts->scheme.is_nonempty()) 345 if (parts->scheme.is_nonempty())
352 return URL; 346 return URL;
353 347
354 // See if the host is an IP address. 348 // See if the host is an IP address.
355 if (host_info.family == url_canon::CanonHostInfo::IPV6) 349 if (host_info.family == url_canon::CanonHostInfo::IPV6)
356 return URL; 350 return URL;
357 // If the user originally typed a host that looks like an IP address (a 351 // If the user originally typed a host that looks like an IP address (a
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after
533 current_page_classification_ = AutocompleteInput::INVALID_SPEC; 527 current_page_classification_ = AutocompleteInput::INVALID_SPEC;
534 type_ = INVALID; 528 type_ = INVALID;
535 parts_ = url_parse::Parsed(); 529 parts_ = url_parse::Parsed();
536 scheme_.clear(); 530 scheme_.clear();
537 canonicalized_url_ = GURL(); 531 canonicalized_url_ = GURL();
538 prevent_inline_autocomplete_ = false; 532 prevent_inline_autocomplete_ = false;
539 prefer_keyword_ = false; 533 prefer_keyword_ = false;
540 allow_exact_keyword_match_ = false; 534 allow_exact_keyword_match_ = false;
541 matches_requested_ = ALL_MATCHES; 535 matches_requested_ = ALL_MATCHES;
542 } 536 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698