chrome/browser/autocomplete/autocomplete_input.cc - Issue 319523005: Omnibox: Combine Two Input Type Enums into One

Side by Side Diff: chrome/browser/autocomplete/autocomplete_input.cc

Issue 319523005: Omnibox: Combine Two Input Type Enums into One (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: remove blank line Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/browser/autocomplete/autocomplete_input.h ('K') | « chrome/browser/autocomplete/autocomplete_input.h ('k') | chrome/browser/autocomplete/autocomplete_provider.cc » ('j') | chrome/browser/autocomplete/search_provider.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/autocomplete/autocomplete_input.h"	5 #include "chrome/browser/autocomplete/autocomplete_input.h"

6	6

7 #include "base/strings/string_util.h"	7 #include "base/strings/string_util.h"

8 #include "base/strings/utf_string_conversions.h"	8 #include "base/strings/utf_string_conversions.h"

9 #include "chrome/browser/external_protocol/external_protocol_handler.h"	9 #include "chrome/browser/external_protocol/external_protocol_handler.h"

10 #include "chrome/browser/profiles/profile_io_data.h"	10 #include "chrome/browser/profiles/profile_io_data.h"

(...skipping 14 matching lines...) Expand all Loading...
25 *cursor_position -= num_leading_chars_removed;	25 *cursor_position -= num_leading_chars_removed;

26 else	26 else

27 *cursor_position = 0;	27 *cursor_position = 0;

28 }	28 }

29	29

30 } // namespace	30 } // namespace

31	31

32 AutocompleteInput::AutocompleteInput()	32 AutocompleteInput::AutocompleteInput()

33 : cursor_position_(base::string16::npos),	33 : cursor_position_(base::string16::npos),

34 current_page_classification_(AutocompleteInput::INVALID_SPEC),	34 current_page_classification_(AutocompleteInput::INVALID_SPEC),

35 type_(INVALID),	35 type_(metrics::OmniboxInputType::INVALID),

36 prevent_inline_autocomplete_(false),	36 prevent_inline_autocomplete_(false),

37 prefer_keyword_(false),	37 prefer_keyword_(false),

38 allow_exact_keyword_match_(true),	38 allow_exact_keyword_match_(true),

39 want_asynchronous_matches_(true) {	39 want_asynchronous_matches_(true) {

40 }	40 }

41	41

42 AutocompleteInput::AutocompleteInput(	42 AutocompleteInput::AutocompleteInput(

43 const base::string16& text,	43 const base::string16& text,

44 size_t cursor_position,	44 size_t cursor_position,

45 const base::string16& desired_tld,	45 const base::string16& desired_tld,

(...skipping 16 matching lines...) Expand all Loading...
62 // None of the providers care about leading white space so we always trim it.	62 // None of the providers care about leading white space so we always trim it.

63 // Providers that care about trailing white space handle trimming themselves.	63 // Providers that care about trailing white space handle trimming themselves.

64 if ((base::TrimWhitespace(text, base::TRIM_LEADING, &text_) &	64 if ((base::TrimWhitespace(text, base::TRIM_LEADING, &text_) &

65 base::TRIM_LEADING) != 0)	65 base::TRIM_LEADING) != 0)

66 AdjustCursorPositionIfNecessary(text.length() - text_.length(),	66 AdjustCursorPositionIfNecessary(text.length() - text_.length(),

67 &cursor_position_);	67 &cursor_position_);

68	68

69 GURL canonicalized_url;	69 GURL canonicalized_url;

70 type_ = Parse(text_, desired_tld, &parts_, &scheme_, &canonicalized_url);	70 type_ = Parse(text_, desired_tld, &parts_, &scheme_, &canonicalized_url);

71	71

72 if (type_ == INVALID)	72 if (type_ == metrics::OmniboxInputType::INVALID)

73 return;	73 return;

74	74

75 if (((type_ == UNKNOWN) \|\| (type_ == URL)) &&	75 if (((type_ == metrics::OmniboxInputType::UNKNOWN) \|\|

	76 (type_ == metrics::OmniboxInputType::URL)) &&

76 canonicalized_url.is_valid() &&	77 canonicalized_url.is_valid() &&

77 (!canonicalized_url.IsStandard() \|\| canonicalized_url.SchemeIsFile() \|\|	78 (!canonicalized_url.IsStandard() \|\| canonicalized_url.SchemeIsFile() \|\|

78 canonicalized_url.SchemeIsFileSystem() \|\|	79 canonicalized_url.SchemeIsFileSystem() \|\|

79 !canonicalized_url.host().empty()))	80 !canonicalized_url.host().empty()))

80 canonicalized_url_ = canonicalized_url;	81 canonicalized_url_ = canonicalized_url;

81	82

82 size_t chars_removed = RemoveForcedQueryStringIfNecessary(type_, &text_);	83 size_t chars_removed = RemoveForcedQueryStringIfNecessary(type_, &text_);

83 AdjustCursorPositionIfNecessary(chars_removed, &cursor_position_);	84 AdjustCursorPositionIfNecessary(chars_removed, &cursor_position_);

84 if (chars_removed) {	85 if (chars_removed) {

85 // Remove spaces between opening question mark and first actual character.	86 // Remove spaces between opening question mark and first actual character.

86 base::string16 trimmed_text;	87 base::string16 trimmed_text;

87 if ((base::TrimWhitespace(text_, base::TRIM_LEADING, &trimmed_text) &	88 if ((base::TrimWhitespace(text_, base::TRIM_LEADING, &trimmed_text) &

88 base::TRIM_LEADING) != 0) {	89 base::TRIM_LEADING) != 0) {

89 AdjustCursorPositionIfNecessary(text_.length() - trimmed_text.length(),	90 AdjustCursorPositionIfNecessary(text_.length() - trimmed_text.length(),

90 &cursor_position_);	91 &cursor_position_);

91 text_ = trimmed_text;	92 text_ = trimmed_text;

92 }	93 }

93 }	94 }

94 }	95 }

95	96

96 AutocompleteInput::~AutocompleteInput() {	97 AutocompleteInput::~AutocompleteInput() {

97 }	98 }

98	99

99 // static	100 // static

100 size_t AutocompleteInput::RemoveForcedQueryStringIfNecessary(	101 size_t AutocompleteInput::RemoveForcedQueryStringIfNecessary(

101 Type type,	102 metrics::OmniboxInputType::Type type,

102 base::string16* text) {	103 base::string16* text) {

103 if (type != FORCED_QUERY \|\| text->empty() \|\| (*text)[0] != L'?')	104 if ((type != metrics::OmniboxInputType::FORCED_QUERY) \|\| text->empty() \|\|

	105 (*text)[0] != L'?')

104 return 0;	106 return 0;

105 // Drop the leading '?'.	107 // Drop the leading '?'.

106 text->erase(0, 1);	108 text->erase(0, 1);

107 return 1;	109 return 1;

108 }	110 }

109	111

110 // static	112 // static

111 std::string AutocompleteInput::TypeToString(Type type) {	113 std::string AutocompleteInput::TypeToString(

	114 metrics::OmniboxInputType::Type type) {

112 switch (type) {	115 switch (type) {

113 case INVALID: return "invalid";	116 case metrics::OmniboxInputType::INVALID: return "invalid";

114 case UNKNOWN: return "unknown";	117 case metrics::OmniboxInputType::UNKNOWN: return "unknown";

115 case URL: return "url";	118 case metrics::OmniboxInputType::URL: return "url";

116 case QUERY: return "query";	119 case metrics::OmniboxInputType::QUERY: return "query";

117 case FORCED_QUERY: return "forced-query";	120 case metrics::OmniboxInputType::FORCED_QUERY: return "forced-query";

118	121

119 default:	122 default:

120 NOTREACHED();	123 NOTREACHED();

121 return std::string();	124 return std::string();
	Ilya Sherman 2014/06/06 05:01:01 I'd prefer that you remove the default case, so th I'd prefer that you remove the default case, so that the compiler can remind you if this ever needs to be updated. Mark P 2014/06/06 20:22:51 Done. Like in the other place, note that I add to Show quoted text On 2014/06/06 05:01:01, Ilya Sherman wrote: > I'd prefer that you remove the default case, so that the compiler can remind you > if this ever needs to be updated. Done. Like in the other place, note that I add to add DEPRECATED_REQUESTED_URL and INVALID to the case list here. Without the default block, I also needed to add a return at the end of the function (outside the switch). With these changes, I'm not sure this is an improvement. What do you think? Ilya Sherman 2014/06/06 20:29:24 Yes, I think it's still an improvement. The code Show quoted text On 2014/06/06 20:22:51, Mark P wrote: > On 2014/06/06 05:01:01, Ilya Sherman wrote: > > I'd prefer that you remove the default case, so that the compiler can remind > you > > if this ever needs to be updated. > > Done. Like in the other place, note that I add to add DEPRECATED_REQUESTED_URL > and INVALID to the case list here. Without the default block, I also needed to > add a return at the end of the function (outside the switch). With these > changes, I'm not sure this is an improvement. What do you think? Yes, I think it's still an improvement. The code is slightly longer, but not in a way that much harms readability; and you get the benefit that the compiler will help you avoid bugs in the future. IMO that's a great tradeoff.
122 }	125 }

123 }	126 }

124	127

125 // static	128 // static

126 AutocompleteInput::Type AutocompleteInput::Parse(	129 metrics::OmniboxInputType::Type AutocompleteInput::Parse(

127 const base::string16& text,	130 const base::string16& text,

128 const base::string16& desired_tld,	131 const base::string16& desired_tld,

129 url::Parsed* parts,	132 url::Parsed* parts,

130 base::string16* scheme,	133 base::string16* scheme,

131 GURL* canonicalized_url) {	134 GURL* canonicalized_url) {

132 size_t first_non_white = text.find_first_not_of(base::kWhitespaceUTF16, 0);	135 size_t first_non_white = text.find_first_not_of(base::kWhitespaceUTF16, 0);

133 if (first_non_white == base::string16::npos)	136 if (first_non_white == base::string16::npos)

134 return INVALID; // All whitespace.	137 return metrics::OmniboxInputType::INVALID; // All whitespace.

135	138

136 if (text[first_non_white] == L'?') {	139 if (text[first_non_white] == L'?') {

137 // If the first non-whitespace character is a '?', we magically treat this	140 // If the first non-whitespace character is a '?', we magically treat this

138 // as a query.	141 // as a query.

139 return FORCED_QUERY;	142 return metrics::OmniboxInputType::FORCED_QUERY;

140 }	143 }

141	144

142 // Ask our parsing back-end to help us understand what the user typed. We	145 // Ask our parsing back-end to help us understand what the user typed. We

143 // use the URLFixerUpper here because we want to be smart about what we	146 // use the URLFixerUpper here because we want to be smart about what we

144 // consider a scheme. For example, we shouldn't consider www.google.com:80	147 // consider a scheme. For example, we shouldn't consider www.google.com:80

145 // to have a scheme.	148 // to have a scheme.

146 url::Parsed local_parts;	149 url::Parsed local_parts;

147 if (!parts)	150 if (!parts)

148 parts = &local_parts;	151 parts = &local_parts;

149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts));	152 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts));

150 if (scheme)	153 if (scheme)

151 *scheme = parsed_scheme;	154 *scheme = parsed_scheme;

152	155

153 // If we can't canonicalize the user's input, the rest of the autocomplete	156 // If we can't canonicalize the user's input, the rest of the autocomplete

154 // system isn't going to be able to produce a navigable URL match for it.	157 // system isn't going to be able to produce a navigable URL match for it.

155 // So we just return QUERY immediately in these cases.	158 // So we just return QUERY immediately in these cases.

156 GURL placeholder_canonicalized_url;	159 GURL placeholder_canonicalized_url;

157 if (!canonicalized_url)	160 if (!canonicalized_url)

158 canonicalized_url = &placeholder_canonicalized_url;	161 canonicalized_url = &placeholder_canonicalized_url;

159 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text),	162 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text),

160 base::UTF16ToUTF8(desired_tld));	163 base::UTF16ToUTF8(desired_tld));

161 if (!canonicalized_url->is_valid())	164 if (!canonicalized_url->is_valid())

162 return QUERY;	165 return metrics::OmniboxInputType::QUERY;

163	166

164 if (LowerCaseEqualsASCII(parsed_scheme, url::kFileScheme)) {	167 if (LowerCaseEqualsASCII(parsed_scheme, url::kFileScheme)) {

165 // A user might or might not type a scheme when entering a file URL. In	168 // A user might or might not type a scheme when entering a file URL. In

166 // either case, \|parsed_scheme\| will tell us that this is a file URL, but	169 // either case, \|parsed_scheme\| will tell us that this is a file URL, but

167 // \|parts->scheme\| might be empty, e.g. if the user typed "C:\foo".	170 // \|parts->scheme\| might be empty, e.g. if the user typed "C:\foo".

168 return URL;	171 return metrics::OmniboxInputType::URL;

169 }	172 }

170	173

171 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it	174 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it

172 // well enough that we can fall through to the heuristics below. If it's	175 // well enough that we can fall through to the heuristics below. If it's

173 // something else, we can just determine our action based on what we do with	176 // something else, we can just determine our action based on what we do with

174 // any input of this scheme. In theory we could do better with some schemes	177 // any input of this scheme. In theory we could do better with some schemes

175 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that	178 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that

176 // until I run into some cases that really need it.	179 // until I run into some cases that really need it.

177 if (parts->scheme.is_nonempty() &&	180 if (parts->scheme.is_nonempty() &&

178 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpScheme) &&	181 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpScheme) &&

179 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpsScheme)) {	182 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpsScheme)) {

180 // See if we know how to handle the URL internally. There are some schemes	183 // See if we know how to handle the URL internally. There are some schemes

181 // that we convert to other things before they reach the renderer or else	184 // that we convert to other things before they reach the renderer or else

182 // the renderer handles internally without reaching the net::URLRequest	185 // the renderer handles internally without reaching the net::URLRequest

183 // logic. They thus won't be listed as "handled protocols", but we should	186 // logic. They thus won't be listed as "handled protocols", but we should

184 // still claim to handle them.	187 // still claim to handle them.

185 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) \|\|	188 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) \|\|

186 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) \|\|	189 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) \|\|

187 LowerCaseEqualsASCII(parsed_scheme, url::kJavaScriptScheme) \|\|	190 LowerCaseEqualsASCII(parsed_scheme, url::kJavaScriptScheme) \|\|

188 LowerCaseEqualsASCII(parsed_scheme, url::kDataScheme))	191 LowerCaseEqualsASCII(parsed_scheme, url::kDataScheme))

189 return URL;	192 return metrics::OmniboxInputType::URL;

190	193

191 // Not an internal protocol. Check and see if the user has explicitly	194 // Not an internal protocol. Check and see if the user has explicitly

192 // opened this scheme as a URL before, or if the "scheme" is actually a	195 // opened this scheme as a URL before, or if the "scheme" is actually a

193 // username. We need to do this after the check above because some	196 // username. We need to do this after the check above because some

194 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the	197 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the

195 // external protocol handler because we don't want pages to open them, but	198 // external protocol handler because we don't want pages to open them, but

196 // users still can.	199 // users still can.

197 // Note that the protocol handler needs to be informed that omnibox input	200 // Note that the protocol handler needs to be informed that omnibox input

198 // should always be considered "user gesture-triggered", lest it always	201 // should always be considered "user gesture-triggered", lest it always

199 // return BLOCK.	202 // return BLOCK.

200 ExternalProtocolHandler::BlockState block_state =	203 ExternalProtocolHandler::BlockState block_state =

201 ExternalProtocolHandler::GetBlockState(	204 ExternalProtocolHandler::GetBlockState(

202 base::UTF16ToUTF8(parsed_scheme), true);	205 base::UTF16ToUTF8(parsed_scheme), true);

203 switch (block_state) {	206 switch (block_state) {

204 case ExternalProtocolHandler::DONT_BLOCK:	207 case ExternalProtocolHandler::DONT_BLOCK:

205 return URL;	208 return metrics::OmniboxInputType::URL;

206	209

207 case ExternalProtocolHandler::BLOCK:	210 case ExternalProtocolHandler::BLOCK:

208 // If we don't want the user to open the URL, don't let it be navigated	211 // If we don't want the user to open the URL, don't let it be navigated

209 // to at all.	212 // to at all.

210 return QUERY;	213 return metrics::OmniboxInputType::QUERY;

211	214

212 default: {	215 default: {

213 // We don't know about this scheme. It might be that the user typed a	216 // We don't know about this scheme. It might be that the user typed a

214 // URL of the form "username:password@foo.com".	217 // URL of the form "username:password@foo.com".

215 const base::string16 http_scheme_prefix =	218 const base::string16 http_scheme_prefix =

216 base::ASCIIToUTF16(std::string(url::kHttpScheme) +	219 base::ASCIIToUTF16(std::string(url::kHttpScheme) +

217 content::kStandardSchemeSeparator);	220 content::kStandardSchemeSeparator);

218 url::Parsed http_parts;	221 url::Parsed http_parts;

219 base::string16 http_scheme;	222 base::string16 http_scheme;

220 GURL http_canonicalized_url;	223 GURL http_canonicalized_url;

221 Type http_type = Parse(http_scheme_prefix + text, desired_tld,	224 metrics::OmniboxInputType::Type http_type =

222 &http_parts, &http_scheme,	225 Parse(http_scheme_prefix + text, desired_tld, &http_parts,

223 &http_canonicalized_url);	226 &http_scheme, &http_canonicalized_url);

224 DCHECK_EQ(std::string(url::kHttpScheme),	227 DCHECK_EQ(std::string(url::kHttpScheme),

225 base::UTF16ToUTF8(http_scheme));	228 base::UTF16ToUTF8(http_scheme));

226	229

227 if ((http_type == URL) && http_parts.username.is_nonempty() &&	230 if ((http_type == metrics::OmniboxInputType::URL) &&

	231 http_parts.username.is_nonempty() &&

228 http_parts.password.is_nonempty()) {	232 http_parts.password.is_nonempty()) {

229 // Manually re-jigger the parsed parts to match \|text\| (without the	233 // Manually re-jigger the parsed parts to match \|text\| (without the

230 // http scheme added).	234 // http scheme added).

231 http_parts.scheme.reset();	235 http_parts.scheme.reset();

232 url::Component* components[] = {	236 url::Component* components[] = {

233 &http_parts.username,	237 &http_parts.username,

234 &http_parts.password,	238 &http_parts.password,

235 &http_parts.host,	239 &http_parts.host,

236 &http_parts.port,	240 &http_parts.port,

237 &http_parts.path,	241 &http_parts.path,

238 &http_parts.query,	242 &http_parts.query,

239 &http_parts.ref,	243 &http_parts.ref,

240 };	244 };

241 for (size_t i = 0; i < arraysize(components); ++i) {	245 for (size_t i = 0; i < arraysize(components); ++i) {

242 URLFixerUpper::OffsetComponent(	246 URLFixerUpper::OffsetComponent(

243 -static_cast<int>(http_scheme_prefix.length()), components[i]);	247 -static_cast<int>(http_scheme_prefix.length()), components[i]);

244 }	248 }

245	249

246 *parts = http_parts;	250 *parts = http_parts;

247 if (scheme)	251 if (scheme)

248 scheme->clear();	252 scheme->clear();

249 *canonicalized_url = http_canonicalized_url;	253 *canonicalized_url = http_canonicalized_url;

250	254

251 return URL;	255 return metrics::OmniboxInputType::URL;

252 }	256 }

253	257

254 // We don't know about this scheme and it doesn't look like the user	258 // We don't know about this scheme and it doesn't look like the user

255 // typed a username and password. It's likely to be a search operator	259 // typed a username and password. It's likely to be a search operator

256 // like "site:" or "link:". We classify it as UNKNOWN so the user has	260 // like "site:" or "link:". We classify it as UNKNOWN so the user has

257 // the option of treating it as a URL if we're wrong.	261 // the option of treating it as a URL if we're wrong.

258 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or	262 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or

259 // "www.example.com:81" in this case.	263 // "www.example.com:81" in this case.

260 return UNKNOWN;	264 return metrics::OmniboxInputType::UNKNOWN;

261 }	265 }

262 }	266 }

263 }	267 }

264	268

265 // Either the user didn't type a scheme, in which case we need to distinguish	269 // Either the user didn't type a scheme, in which case we need to distinguish

266 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which	270 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which

267 // case we should reject invalid formulations.	271 // case we should reject invalid formulations.

268	272

269 // If we have an empty host it can't be a valid HTTP[S] URL. (This should	273 // If we have an empty host it can't be a valid HTTP[S] URL. (This should

270 // only trigger for input that begins with a colon, which GURL will parse as a	274 // only trigger for input that begins with a colon, which GURL will parse as a

271 // valid, non-standard URL; for standard URLs, an empty host would have	275 // valid, non-standard URL; for standard URLs, an empty host would have

272 // resulted in an invalid \|canonicalized_url\| above.)	276 // resulted in an invalid \|canonicalized_url\| above.)

273 if (!parts->host.is_nonempty())	277 if (!parts->host.is_nonempty())

274 return QUERY;	278 return metrics::OmniboxInputType::QUERY;

275	279

276 // Sanity-check: GURL should have failed to canonicalize this URL if it had an	280 // Sanity-check: GURL should have failed to canonicalize this URL if it had an

277 // invalid port.	281 // invalid port.

278 DCHECK_NE(url::PORT_INVALID, url::ParsePort(text.c_str(), parts->port));	282 DCHECK_NE(url::PORT_INVALID, url::ParsePort(text.c_str(), parts->port));

279	283

280 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also	284 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also

281 // use the registry length later below.)	285 // use the registry length later below.)

282 const base::string16 host(text.substr(parts->host.begin, parts->host.len));	286 const base::string16 host(text.substr(parts->host.begin, parts->host.len));

283 const size_t registry_length =	287 const size_t registry_length =

284 net::registry_controlled_domains::GetRegistryLength(	288 net::registry_controlled_domains::GetRegistryLength(

285 base::UTF16ToUTF8(host),	289 base::UTF16ToUTF8(host),

286 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,	290 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,

287 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);	291 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);

288 if (registry_length == std::string::npos) {	292 if (registry_length == std::string::npos) {

289 // Try to append the desired_tld.	293 // Try to append the desired_tld.

290 if (!desired_tld.empty()) {	294 if (!desired_tld.empty()) {

291 base::string16 host_with_tld(host);	295 base::string16 host_with_tld(host);

292 if (host[host.length() - 1] != '.')	296 if (host[host.length() - 1] != '.')

293 host_with_tld += '.';	297 host_with_tld += '.';

294 host_with_tld += desired_tld;	298 host_with_tld += desired_tld;

295 const size_t tld_length =	299 const size_t tld_length =

296 net::registry_controlled_domains::GetRegistryLength(	300 net::registry_controlled_domains::GetRegistryLength(

297 base::UTF16ToUTF8(host_with_tld),	301 base::UTF16ToUTF8(host_with_tld),

298 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,	302 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,

299 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);	303 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);

300 if (tld_length != std::string::npos)	304 if (tld_length != std::string::npos) {

301 return URL; // Something like "99999999999" that looks like a bad IP	305 // Something like "99999999999" that looks like a bad IP

302 // address, but becomes valid on attaching a TLD.	306 // address, but becomes valid on attaching a TLD.

	307 return metrics::OmniboxInputType::URL;

	308 }

303 }	309 }

304 return QUERY; // Could be a broken IP address, etc.	310 // Could be a broken IP address, etc.

	311 return metrics::OmniboxInputType::QUERY;

305 }	312 }

306	313

307	314

308 // See if the hostname is valid. While IE and GURL allow hostnames to contain	315 // See if the hostname is valid. While IE and GURL allow hostnames to contain

309 // many other characters (perhaps for weird intranet machines), it's extremely	316 // many other characters (perhaps for weird intranet machines), it's extremely

310 // unlikely that a user would be trying to type those in for anything other	317 // unlikely that a user would be trying to type those in for anything other

311 // than a search query.	318 // than a search query.

312 url::CanonHostInfo host_info;	319 url::CanonHostInfo host_info;

313 const std::string canonicalized_host(net::CanonicalizeHost(	320 const std::string canonicalized_host(net::CanonicalizeHost(

314 base::UTF16ToUTF8(host), &host_info));	321 base::UTF16ToUTF8(host), &host_info));

(...skipping 13 matching lines...) Expand all Loading...
328 // "toys at amazon.com" will be treated as a search.	335 // "toys at amazon.com" will be treated as a search.

329 // * The user is typing some garbage string. Return QUERY.	336 // * The user is typing some garbage string. Return QUERY.

330 //	337 //

331 // Thus we fall down in the following cases:	338 // Thus we fall down in the following cases:

332 // * Trying to navigate to a hostname with spaces	339 // * Trying to navigate to a hostname with spaces

333 // * Trying to navigate to a hostname with invalid characters and an unknown	340 // * Trying to navigate to a hostname with invalid characters and an unknown

334 // TLD	341 // TLD

335 // These are rare, though probably possible in intranets.	342 // These are rare, though probably possible in intranets.

336 return (parts->scheme.is_nonempty() \|\|	343 return (parts->scheme.is_nonempty() \|\|

337 ((registry_length != 0) &&	344 ((registry_length != 0) &&

338 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY;	345 (host.find(' ') == base::string16::npos))) ?

	346 metrics::OmniboxInputType::UNKNOWN : metrics::OmniboxInputType::QUERY;

339 }	347 }

340	348

341 // Now that we've ruled out all schemes other than http or https and done a	349 // Now that we've ruled out all schemes other than http or https and done a

342 // little more sanity checking, the presence of a scheme means this is likely	350 // little more sanity checking, the presence of a scheme means this is likely

343 // a URL.	351 // a URL.

344 if (parts->scheme.is_nonempty())	352 if (parts->scheme.is_nonempty())

345 return URL;	353 return metrics::OmniboxInputType::URL;

346	354

347 // See if the host is an IP address.	355 // See if the host is an IP address.

348 if (host_info.family == url::CanonHostInfo::IPV6)	356 if (host_info.family == url::CanonHostInfo::IPV6)

349 return URL;	357 return metrics::OmniboxInputType::URL;

350 // If the user originally typed a host that looks like an IP address (a	358 // If the user originally typed a host that looks like an IP address (a

351 // dotted quad), they probably want to open it. If the original input was	359 // dotted quad), they probably want to open it. If the original input was

352 // something else (like a single number), they probably wanted to search for	360 // something else (like a single number), they probably wanted to search for

353 // it, unless they explicitly typed a scheme. This is true even if the URL	361 // it, unless they explicitly typed a scheme. This is true even if the URL

354 // appears to have a path: "1.2/45" is more likely a search (for the answer	362 // appears to have a path: "1.2/45" is more likely a search (for the answer

355 // to a math problem) than a URL. However, if there are more non-host	363 // to a math problem) than a URL. However, if there are more non-host

356 // components, then maybe this really was intended to be a navigation. For	364 // components, then maybe this really was intended to be a navigation. For

357 // this reason we only check the dotted-quad case here, and save the "other	365 // this reason we only check the dotted-quad case here, and save the "other

358 // IP addresses" case for after we check the number of non-host components	366 // IP addresses" case for after we check the number of non-host components

359 // below.	367 // below.

360 if ((host_info.family == url::CanonHostInfo::IPV4) &&	368 if ((host_info.family == url::CanonHostInfo::IPV4) &&

361 (host_info.num_ipv4_components == 4))	369 (host_info.num_ipv4_components == 4))

362 return URL;	370 return metrics::OmniboxInputType::URL;

363	371

364 // Presence of a password means this is likely a URL. Note that unless the	372 // Presence of a password means this is likely a URL. Note that unless the

365 // user has typed an explicit "http://" or similar, we'll probably think that	373 // user has typed an explicit "http://" or similar, we'll probably think that

366 // the username is some unknown scheme, and bail out in the scheme-handling	374 // the username is some unknown scheme, and bail out in the scheme-handling

367 // code above.	375 // code above.

368 if (parts->password.is_nonempty())	376 if (parts->password.is_nonempty())

369 return URL;	377 return metrics::OmniboxInputType::URL;

370	378

371 // Trailing slashes force the input to be treated as a URL.	379 // Trailing slashes force the input to be treated as a URL.

372 if (parts->path.is_nonempty()) {	380 if (parts->path.is_nonempty()) {

373 char c = text[parts->path.end() - 1];	381 char c = text[parts->path.end() - 1];

374 if ((c == '\\') \|\| (c == '/'))	382 if ((c == '\\') \|\| (c == '/'))

375 return URL;	383 return metrics::OmniboxInputType::URL;

376 }	384 }

377	385

378 // If there is more than one recognized non-host component, this is likely to	386 // If there is more than one recognized non-host component, this is likely to

379 // be a URL, even if the TLD is unknown (in which case this is likely an	387 // be a URL, even if the TLD is unknown (in which case this is likely an

380 // intranet URL).	388 // intranet URL).

381 if (NumNonHostComponents(*parts) > 1)	389 if (NumNonHostComponents(*parts) > 1)

382 return URL;	390 return metrics::OmniboxInputType::URL;

383	391

384 // If the host has a known TLD or a port, it's probably a URL, with the	392 // If the host has a known TLD or a port, it's probably a URL, with the

385 // following exceptions:	393 // following exceptions:

386 // * Any "IP addresses" that make it here are more likely searches	394 // * Any "IP addresses" that make it here are more likely searches

387 // (see above).	395 // (see above).

388 // * If we reach here with a username, our input looks like "user@host[.tld]".	396 // * If we reach here with a username, our input looks like "user@host[.tld]".

389 // Because there is no scheme explicitly specified, we think this is more	397 // Because there is no scheme explicitly specified, we think this is more

390 // likely an email address than an HTTP auth attempt. Hence, we search by	398 // likely an email address than an HTTP auth attempt. Hence, we search by

391 // default and let users correct us on a case-by-case basis.	399 // default and let users correct us on a case-by-case basis.

392 // Note that we special-case "localhost" as a known hostname.	400 // Note that we special-case "localhost" as a known hostname.

393 if ((host_info.family != url::CanonHostInfo::IPV4) &&	401 if ((host_info.family != url::CanonHostInfo::IPV4) &&

394 ((registry_length != 0) \|\| (host == base::ASCIIToUTF16("localhost") \|\|	402 ((registry_length != 0) \|\| (host == base::ASCIIToUTF16("localhost") \|\|

395 parts->port.is_nonempty())))	403 parts->port.is_nonempty())))

396 return parts->username.is_nonempty() ? UNKNOWN : URL;	404 return parts->username.is_nonempty() ?

	405 metrics::OmniboxInputType::UNKNOWN : metrics::OmniboxInputType::URL;

397	406

398 // If we reach this point, we know there's no known TLD on the input, so if	407 // If we reach this point, we know there's no known TLD on the input, so if

399 // the user wishes to add a desired_tld, the fixup code will oblige; thus this	408 // the user wishes to add a desired_tld, the fixup code will oblige; thus this

400 // is a URL.	409 // is a URL.

401 if (!desired_tld.empty())	410 if (!desired_tld.empty())

402 return URL;	411 return metrics::OmniboxInputType::URL;

403	412

404 // No scheme, password, port, path, and no known TLD on the host.	413 // No scheme, password, port, path, and no known TLD on the host.

405 // This could be:	414 // This could be:

406 // * An "incomplete IP address"; likely a search (see above).	415 // * An "incomplete IP address"; likely a search (see above).

407 // * An email-like input like "user@host", where "host" has no known TLD.	416 // * An email-like input like "user@host", where "host" has no known TLD.

408 // It's not clear what the user means here and searching seems reasonable.	417 // It's not clear what the user means here and searching seems reasonable.

409 // * A single word "foo"; possibly an intranet site, but more likely a search.	418 // * A single word "foo"; possibly an intranet site, but more likely a search.

410 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code	419 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code

411 // catch our mistakes.	420 // catch our mistakes.

412 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds	421 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds

413 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know	422 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know

414 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really	423 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really

415 // distinguish this case from:	424 // distinguish this case from:

416 // * A "URL-like" string that's not really a URL (like	425 // * A "URL-like" string that's not really a URL (like

417 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a	426 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a

418 // QUERY. Since this is indistinguishable from the case above, and this	427 // QUERY. Since this is indistinguishable from the case above, and this

419 // case is much more likely, claim these are UNKNOWN, which should default	428 // case is much more likely, claim these are UNKNOWN, which should default

420 // to the right thing and let users correct us on a case-by-case basis.	429 // to the right thing and let users correct us on a case-by-case basis.

421 return UNKNOWN;	430 return metrics::OmniboxInputType::UNKNOWN;

422 }	431 }

423	432

424 // static	433 // static

425 void AutocompleteInput::ParseForEmphasizeComponents(const base::string16& text,	434 void AutocompleteInput::ParseForEmphasizeComponents(const base::string16& text,

426 url::Component* scheme,	435 url::Component* scheme,

427 url::Component* host) {	436 url::Component* host) {

428 url::Parsed parts;	437 url::Parsed parts;

429 base::string16 scheme_str;	438 base::string16 scheme_str;

430 Parse(text, base::string16(), &parts, &scheme_str, NULL);	439 Parse(text, base::string16(), &parts, &scheme_str, NULL);

431	440

(...skipping 84 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
516 text_ = text;	525 text_ = text;

517 cursor_position_ = cursor_position;	526 cursor_position_ = cursor_position;

518 parts_ = parts;	527 parts_ = parts;

519 }	528 }

520	529

521 void AutocompleteInput::Clear() {	530 void AutocompleteInput::Clear() {

522 text_.clear();	531 text_.clear();

523 cursor_position_ = base::string16::npos;	532 cursor_position_ = base::string16::npos;

524 current_url_ = GURL();	533 current_url_ = GURL();

525 current_page_classification_ = AutocompleteInput::INVALID_SPEC;	534 current_page_classification_ = AutocompleteInput::INVALID_SPEC;

526 type_ = INVALID;	535 type_ = metrics::OmniboxInputType::INVALID;

527 parts_ = url::Parsed();	536 parts_ = url::Parsed();

528 scheme_.clear();	537 scheme_.clear();

529 canonicalized_url_ = GURL();	538 canonicalized_url_ = GURL();

530 prevent_inline_autocomplete_ = false;	539 prevent_inline_autocomplete_ = false;

531 prefer_keyword_ = false;	540 prefer_keyword_ = false;

532 allow_exact_keyword_match_ = false;	541 allow_exact_keyword_match_ = false;

533 want_asynchronous_matches_ = true;	542 want_asynchronous_matches_ = true;

534 }	543 }

OLD	NEW