chrome/browser/autocomplete/autocomplete_input.cc - Issue 226283009: Make AutocompleteInput::Parse() more strict: return QUERY for all inputs that

Side by Side Diff: chrome/browser/autocomplete/autocomplete_input.cc

Issue 226283009: Make AutocompleteInput::Parse() more strict: return QUERY for all inputs that (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/

Patch Set: Created 6 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/autocomplete/autocomplete_input.h"	5 #include "chrome/browser/autocomplete/autocomplete_input.h"

6	6

7 #include "base/strings/string_util.h"	7 #include "base/strings/string_util.h"

8 #include "base/strings/utf_string_conversions.h"	8 #include "base/strings/utf_string_conversions.h"

9 #include "chrome/browser/external_protocol/external_protocol_handler.h"	9 #include "chrome/browser/external_protocol/external_protocol_handler.h"

10 #include "chrome/browser/profiles/profile_io_data.h"	10 #include "chrome/browser/profiles/profile_io_data.h"

(...skipping 132 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
143 // use the URLFixerUpper here because we want to be smart about what we	143 // use the URLFixerUpper here because we want to be smart about what we

144 // consider a scheme. For example, we shouldn't consider www.google.com:80	144 // consider a scheme. For example, we shouldn't consider www.google.com:80

145 // to have a scheme.	145 // to have a scheme.

146 url_parse::Parsed local_parts;	146 url_parse::Parsed local_parts;

147 if (!parts)	147 if (!parts)

148 parts = &local_parts;	148 parts = &local_parts;

149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts));	149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts));

150 if (scheme)	150 if (scheme)

151 *scheme = parsed_scheme;	151 *scheme = parsed_scheme;

152	152

153 // Try to fixup and canonicalize the user's typing. We use this to help	153 // If we can't canonicalize the user's input, the rest of the autocomplete

154 // determine if it's safe to return "URL" as the type of anything that has an	154 // system isn't going to be able to produce a navigable URL match for it.

155 // explicit, non-HTTP[S] scheme. (HTTP[S] and "no scheme" inputs get more	155 // So we just return QUERY immediately in these cases.

156 // sophisticated heuristics below.) If we can't canonicalize such inputs, we

157 // shouldn't mark them as "URL"s, because the rest of the autocomplete system

158 // isn't going to be able to produce navigable URL matches for them, which can

159 // lead to DCHECK failures later.

160 GURL placeholder_canonicalized_url;	156 GURL placeholder_canonicalized_url;

161 if (!canonicalized_url)	157 if (!canonicalized_url)

162 canonicalized_url = &placeholder_canonicalized_url;	158 canonicalized_url = &placeholder_canonicalized_url;

163 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text),	159 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text),

164 base::UTF16ToUTF8(desired_tld));	160 base::UTF16ToUTF8(desired_tld));

165 Type return_value_for_non_http_url =	161 if (!canonicalized_url->is_valid())

166 canonicalized_url->is_valid() ? URL : QUERY;	162 return QUERY;

167	163

168 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileScheme)) {	164 if (LowerCaseEqualsASCII(parsed_scheme, content::kFileScheme)) {

169 // A user might or might not type a scheme when entering a file URL. In	165 // A user might or might not type a scheme when entering a file URL. In

170 // either case, \|parsed_scheme\| will tell us that this is a file URL, but	166 // either case, \|parsed_scheme\| will tell us that this is a file URL, but

171 // \|parts->scheme\| might be empty, e.g. if the user typed "C:\foo".	167 // \|parts->scheme\| might be empty, e.g. if the user typed "C:\foo".

172 return URL;	168 return URL;

173 }	169 }

174	170

175 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it	171 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it

176 // well enough that we can fall through to the heuristics below. If it's	172 // well enough that we can fall through to the heuristics below. If it's

177 // something else, we can just determine our action based on what we do with	173 // something else, we can just determine our action based on what we do with

178 // any input of this scheme. In theory we could do better with some schemes	174 // any input of this scheme. In theory we could do better with some schemes

179 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that	175 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that

180 // until I run into some cases that really need it.	176 // until I run into some cases that really need it.

181 if (parts->scheme.is_nonempty() &&	177 if (parts->scheme.is_nonempty() &&

182 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpScheme) &&	178 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpScheme) &&

183 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpsScheme)) {	179 !LowerCaseEqualsASCII(parsed_scheme, content::kHttpsScheme)) {

184 // See if we know how to handle the URL internally. There are some schemes	180 // See if we know how to handle the URL internally. There are some schemes

185 // that we convert to other things before they reach the renderer or else	181 // that we convert to other things before they reach the renderer or else

186 // the renderer handles internally without reaching the net::URLRequest	182 // the renderer handles internally without reaching the net::URLRequest

187 // logic. They thus won't be listed as "handled protocols", but we should	183 // logic. They thus won't be listed as "handled protocols", but we should

188 // still claim to handle them.	184 // still claim to handle them.

189 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) \|\|	185 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) \|\|

190 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) \|\|	186 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) \|\|

191 LowerCaseEqualsASCII(parsed_scheme, content::kJavaScriptScheme) \|\|	187 LowerCaseEqualsASCII(parsed_scheme, content::kJavaScriptScheme) \|\|

192 LowerCaseEqualsASCII(parsed_scheme, content::kDataScheme))	188 LowerCaseEqualsASCII(parsed_scheme, content::kDataScheme))

193 return return_value_for_non_http_url;	189 return URL;

194	190

195 // Not an internal protocol. Check and see if the user has explicitly	191 // Not an internal protocol. Check and see if the user has explicitly

196 // opened this scheme as a URL before, or if the "scheme" is actually a	192 // opened this scheme as a URL before, or if the "scheme" is actually a

197 // username. We need to do this after the check above because some	193 // username. We need to do this after the check above because some

198 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the	194 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the

199 // external protocol handler because we don't want pages to open them, but	195 // external protocol handler because we don't want pages to open them, but

200 // users still can.	196 // users still can.

201 // Note that the protocol handler needs to be informed that omnibox input	197 // Note that the protocol handler needs to be informed that omnibox input

202 // should always be considered "user gesture-triggered", lest it always	198 // should always be considered "user gesture-triggered", lest it always

203 // return BLOCK.	199 // return BLOCK.

204 ExternalProtocolHandler::BlockState block_state =	200 ExternalProtocolHandler::BlockState block_state =

205 ExternalProtocolHandler::GetBlockState(	201 ExternalProtocolHandler::GetBlockState(

206 base::UTF16ToUTF8(parsed_scheme), true);	202 base::UTF16ToUTF8(parsed_scheme), true);

207 switch (block_state) {	203 switch (block_state) {

208 case ExternalProtocolHandler::DONT_BLOCK:	204 case ExternalProtocolHandler::DONT_BLOCK:

209 return return_value_for_non_http_url;	205 return URL;

210	206

211 case ExternalProtocolHandler::BLOCK:	207 case ExternalProtocolHandler::BLOCK:

212 // If we don't want the user to open the URL, don't let it be navigated	208 // If we don't want the user to open the URL, don't let it be navigated

213 // to at all.	209 // to at all.

214 return QUERY;	210 return QUERY;

215	211

216 default: {	212 default: {

217 // We don't know about this scheme. It might be that the user typed a	213 // We don't know about this scheme. It might be that the user typed a

218 // URL of the form "username:password@foo.com".	214 // URL of the form "username:password@foo.com".

219 const base::string16 http_scheme_prefix =	215 const base::string16 http_scheme_prefix =

(...skipping 43 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
263 // "www.example.com:81" in this case.	259 // "www.example.com:81" in this case.

264 return UNKNOWN;	260 return UNKNOWN;

265 }	261 }

266 }	262 }

267 }	263 }

268	264

269 // Either the user didn't type a scheme, in which case we need to distinguish	265 // Either the user didn't type a scheme, in which case we need to distinguish

270 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which	266 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which

271 // case we should reject invalid formulations.	267 // case we should reject invalid formulations.

272	268

273 // If we have an empty host it can't be a URL.	269 // If we have an empty host it can't be a valid HTTP[S] URL. (This should

	270 // only trigger for input that begins with a colon, which GURL will parse as a

	271 // valid, non-standard URL; for standard URLs, an empty host would have

	272 // resulted in an invalid \|canonicalized_url\| above.)

274 if (!parts->host.is_nonempty())	273 if (!parts->host.is_nonempty())

275 return QUERY;	274 return QUERY;

276	275

	276 // Sanity-check: GURL should have failed to canonicalize this URL if it had an

	277 // invalid port.

	278 DCHECK_NE(url_parse::PORT_INVALID,

	279 url_parse::ParsePort(text.c_str(), parts->port));

	280

277 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also	281 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also

278 // use the registry length later below.)	282 // use the registry length later below.)

279 const base::string16 host(text.substr(parts->host.begin, parts->host.len));	283 const base::string16 host(text.substr(parts->host.begin, parts->host.len));

280 const size_t registry_length =	284 const size_t registry_length =

281 net::registry_controlled_domains::GetRegistryLength(	285 net::registry_controlled_domains::GetRegistryLength(

282 base::UTF16ToUTF8(host),	286 base::UTF16ToUTF8(host),

283 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,	287 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,

284 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);	288 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);

285 if (registry_length == std::string::npos) {	289 if (registry_length == std::string::npos) {

286 // Try to append the desired_tld.	290 // Try to append the desired_tld.

(...skipping 41 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
328 // Thus we fall down in the following cases:	332 // Thus we fall down in the following cases:

329 // * Trying to navigate to a hostname with spaces	333 // * Trying to navigate to a hostname with spaces

330 // * Trying to navigate to a hostname with invalid characters and an unknown	334 // * Trying to navigate to a hostname with invalid characters and an unknown

331 // TLD	335 // TLD

332 // These are rare, though probably possible in intranets.	336 // These are rare, though probably possible in intranets.

333 return (parts->scheme.is_nonempty() \|\|	337 return (parts->scheme.is_nonempty() \|\|

334 ((registry_length != 0) &&	338 ((registry_length != 0) &&

335 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY;	339 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY;

336 }	340 }

337	341

338 // A port number is a good indicator that this is a URL. However, it might

339 // also be a query like "1.66:1" that looks kind of like an IP address and

340 // port number. So here we only check for "port numbers" that are illegal and

341 // thus mean this can't be navigated to (e.g. "1.2.3.4:garbage"), and we save

342 // handling legal port numbers until after the "IP address" determination

343 // below.

344 if (url_parse::ParsePort(text.c_str(), parts->port) ==

345 url_parse::PORT_INVALID)

346 return QUERY;

347

348 // Now that we've ruled out all schemes other than http or https and done a	342 // Now that we've ruled out all schemes other than http or https and done a

349 // little more sanity checking, the presence of a scheme means this is likely	343 // little more sanity checking, the presence of a scheme means this is likely

350 // a URL.	344 // a URL.

351 if (parts->scheme.is_nonempty())	345 if (parts->scheme.is_nonempty())

352 return URL;	346 return URL;

353	347

354 // See if the host is an IP address.	348 // See if the host is an IP address.

355 if (host_info.family == url_canon::CanonHostInfo::IPV6)	349 if (host_info.family == url_canon::CanonHostInfo::IPV6)

356 return URL;	350 return URL;

357 // If the user originally typed a host that looks like an IP address (a	351 // If the user originally typed a host that looks like an IP address (a

(...skipping 175 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
533 current_page_classification_ = AutocompleteInput::INVALID_SPEC;	527 current_page_classification_ = AutocompleteInput::INVALID_SPEC;

534 type_ = INVALID;	528 type_ = INVALID;

535 parts_ = url_parse::Parsed();	529 parts_ = url_parse::Parsed();

536 scheme_.clear();	530 scheme_.clear();

537 canonicalized_url_ = GURL();	531 canonicalized_url_ = GURL();

538 prevent_inline_autocomplete_ = false;	532 prevent_inline_autocomplete_ = false;

539 prefer_keyword_ = false;	533 prefer_keyword_ = false;

540 allow_exact_keyword_match_ = false;	534 allow_exact_keyword_match_ = false;

541 matches_requested_ = ALL_MATCHES;	535 matches_requested_ = ALL_MATCHES;

542 }	536 }

OLD	NEW

« no previous file with comments | « no previous file | chrome/browser/autocomplete/autocomplete_input_unittest.cc » ('j') | chrome/browser/autocomplete/history_quick_provider.cc » ('J')