Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/autocomplete/autocomplete_input.h" | 5 #include "chrome/browser/autocomplete/autocomplete_input.h" |
| 6 | 6 |
| 7 #include "base/strings/string_util.h" | 7 #include "base/strings/string_util.h" |
| 8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
| 9 #include "chrome/browser/external_protocol/external_protocol_handler.h" | 9 #include "chrome/browser/external_protocol/external_protocol_handler.h" |
| 10 #include "chrome/browser/profiles/profile_io_data.h" | 10 #include "chrome/browser/profiles/profile_io_data.h" |
| (...skipping 14 matching lines...) Expand all Loading... | |
| 25 *cursor_position -= num_leading_chars_removed; | 25 *cursor_position -= num_leading_chars_removed; |
| 26 else | 26 else |
| 27 *cursor_position = 0; | 27 *cursor_position = 0; |
| 28 } | 28 } |
| 29 | 29 |
| 30 } // namespace | 30 } // namespace |
| 31 | 31 |
| 32 AutocompleteInput::AutocompleteInput() | 32 AutocompleteInput::AutocompleteInput() |
| 33 : cursor_position_(base::string16::npos), | 33 : cursor_position_(base::string16::npos), |
| 34 current_page_classification_(AutocompleteInput::INVALID_SPEC), | 34 current_page_classification_(AutocompleteInput::INVALID_SPEC), |
| 35 type_(INVALID), | 35 type_(metrics::OmniboxInputType::INVALID), |
| 36 prevent_inline_autocomplete_(false), | 36 prevent_inline_autocomplete_(false), |
| 37 prefer_keyword_(false), | 37 prefer_keyword_(false), |
| 38 allow_exact_keyword_match_(true), | 38 allow_exact_keyword_match_(true), |
| 39 want_asynchronous_matches_(true) { | 39 want_asynchronous_matches_(true) { |
| 40 } | 40 } |
| 41 | 41 |
| 42 AutocompleteInput::AutocompleteInput( | 42 AutocompleteInput::AutocompleteInput( |
| 43 const base::string16& text, | 43 const base::string16& text, |
| 44 size_t cursor_position, | 44 size_t cursor_position, |
| 45 const base::string16& desired_tld, | 45 const base::string16& desired_tld, |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 62 // None of the providers care about leading white space so we always trim it. | 62 // None of the providers care about leading white space so we always trim it. |
| 63 // Providers that care about trailing white space handle trimming themselves. | 63 // Providers that care about trailing white space handle trimming themselves. |
| 64 if ((base::TrimWhitespace(text, base::TRIM_LEADING, &text_) & | 64 if ((base::TrimWhitespace(text, base::TRIM_LEADING, &text_) & |
| 65 base::TRIM_LEADING) != 0) | 65 base::TRIM_LEADING) != 0) |
| 66 AdjustCursorPositionIfNecessary(text.length() - text_.length(), | 66 AdjustCursorPositionIfNecessary(text.length() - text_.length(), |
| 67 &cursor_position_); | 67 &cursor_position_); |
| 68 | 68 |
| 69 GURL canonicalized_url; | 69 GURL canonicalized_url; |
| 70 type_ = Parse(text_, desired_tld, &parts_, &scheme_, &canonicalized_url); | 70 type_ = Parse(text_, desired_tld, &parts_, &scheme_, &canonicalized_url); |
| 71 | 71 |
| 72 if (type_ == INVALID) | 72 if (type_ == metrics::OmniboxInputType::INVALID) |
| 73 return; | 73 return; |
| 74 | 74 |
| 75 if (((type_ == UNKNOWN) || (type_ == URL)) && | 75 if (((type_ == metrics::OmniboxInputType::UNKNOWN) || |
| 76 (type_ == metrics::OmniboxInputType::URL)) && | |
| 76 canonicalized_url.is_valid() && | 77 canonicalized_url.is_valid() && |
| 77 (!canonicalized_url.IsStandard() || canonicalized_url.SchemeIsFile() || | 78 (!canonicalized_url.IsStandard() || canonicalized_url.SchemeIsFile() || |
| 78 canonicalized_url.SchemeIsFileSystem() || | 79 canonicalized_url.SchemeIsFileSystem() || |
| 79 !canonicalized_url.host().empty())) | 80 !canonicalized_url.host().empty())) |
| 80 canonicalized_url_ = canonicalized_url; | 81 canonicalized_url_ = canonicalized_url; |
| 81 | 82 |
| 82 size_t chars_removed = RemoveForcedQueryStringIfNecessary(type_, &text_); | 83 size_t chars_removed = RemoveForcedQueryStringIfNecessary(type_, &text_); |
| 83 AdjustCursorPositionIfNecessary(chars_removed, &cursor_position_); | 84 AdjustCursorPositionIfNecessary(chars_removed, &cursor_position_); |
| 84 if (chars_removed) { | 85 if (chars_removed) { |
| 85 // Remove spaces between opening question mark and first actual character. | 86 // Remove spaces between opening question mark and first actual character. |
| 86 base::string16 trimmed_text; | 87 base::string16 trimmed_text; |
| 87 if ((base::TrimWhitespace(text_, base::TRIM_LEADING, &trimmed_text) & | 88 if ((base::TrimWhitespace(text_, base::TRIM_LEADING, &trimmed_text) & |
| 88 base::TRIM_LEADING) != 0) { | 89 base::TRIM_LEADING) != 0) { |
| 89 AdjustCursorPositionIfNecessary(text_.length() - trimmed_text.length(), | 90 AdjustCursorPositionIfNecessary(text_.length() - trimmed_text.length(), |
| 90 &cursor_position_); | 91 &cursor_position_); |
| 91 text_ = trimmed_text; | 92 text_ = trimmed_text; |
| 92 } | 93 } |
| 93 } | 94 } |
| 94 } | 95 } |
| 95 | 96 |
| 96 AutocompleteInput::~AutocompleteInput() { | 97 AutocompleteInput::~AutocompleteInput() { |
| 97 } | 98 } |
| 98 | 99 |
| 99 // static | 100 // static |
| 100 size_t AutocompleteInput::RemoveForcedQueryStringIfNecessary( | 101 size_t AutocompleteInput::RemoveForcedQueryStringIfNecessary( |
| 101 Type type, | 102 metrics::OmniboxInputType::Type type, |
| 102 base::string16* text) { | 103 base::string16* text) { |
| 103 if (type != FORCED_QUERY || text->empty() || (*text)[0] != L'?') | 104 if ((type != metrics::OmniboxInputType::FORCED_QUERY) || text->empty() || |
| 105 (*text)[0] != L'?') | |
| 104 return 0; | 106 return 0; |
| 105 // Drop the leading '?'. | 107 // Drop the leading '?'. |
| 106 text->erase(0, 1); | 108 text->erase(0, 1); |
| 107 return 1; | 109 return 1; |
| 108 } | 110 } |
| 109 | 111 |
| 110 // static | 112 // static |
| 111 std::string AutocompleteInput::TypeToString(Type type) { | 113 std::string AutocompleteInput::TypeToString( |
| 114 metrics::OmniboxInputType::Type type) { | |
| 112 switch (type) { | 115 switch (type) { |
| 113 case INVALID: return "invalid"; | 116 case metrics::OmniboxInputType::INVALID: return "invalid"; |
| 114 case UNKNOWN: return "unknown"; | 117 case metrics::OmniboxInputType::UNKNOWN: return "unknown"; |
| 115 case URL: return "url"; | 118 case metrics::OmniboxInputType::URL: return "url"; |
| 116 case QUERY: return "query"; | 119 case metrics::OmniboxInputType::QUERY: return "query"; |
| 117 case FORCED_QUERY: return "forced-query"; | 120 case metrics::OmniboxInputType::FORCED_QUERY: return "forced-query"; |
| 118 | 121 |
| 119 default: | 122 default: |
| 120 NOTREACHED(); | 123 NOTREACHED(); |
| 121 return std::string(); | 124 return std::string(); |
|
Ilya Sherman
2014/06/06 05:01:01
I'd prefer that you remove the default case, so th
Mark P
2014/06/06 20:22:51
Done. Like in the other place, note that I add to
Ilya Sherman
2014/06/06 20:29:24
Yes, I think it's still an improvement. The code
| |
| 122 } | 125 } |
| 123 } | 126 } |
| 124 | 127 |
| 125 // static | 128 // static |
| 126 AutocompleteInput::Type AutocompleteInput::Parse( | 129 metrics::OmniboxInputType::Type AutocompleteInput::Parse( |
| 127 const base::string16& text, | 130 const base::string16& text, |
| 128 const base::string16& desired_tld, | 131 const base::string16& desired_tld, |
| 129 url::Parsed* parts, | 132 url::Parsed* parts, |
| 130 base::string16* scheme, | 133 base::string16* scheme, |
| 131 GURL* canonicalized_url) { | 134 GURL* canonicalized_url) { |
| 132 size_t first_non_white = text.find_first_not_of(base::kWhitespaceUTF16, 0); | 135 size_t first_non_white = text.find_first_not_of(base::kWhitespaceUTF16, 0); |
| 133 if (first_non_white == base::string16::npos) | 136 if (first_non_white == base::string16::npos) |
| 134 return INVALID; // All whitespace. | 137 return metrics::OmniboxInputType::INVALID; // All whitespace. |
| 135 | 138 |
| 136 if (text[first_non_white] == L'?') { | 139 if (text[first_non_white] == L'?') { |
| 137 // If the first non-whitespace character is a '?', we magically treat this | 140 // If the first non-whitespace character is a '?', we magically treat this |
| 138 // as a query. | 141 // as a query. |
| 139 return FORCED_QUERY; | 142 return metrics::OmniboxInputType::FORCED_QUERY; |
| 140 } | 143 } |
| 141 | 144 |
| 142 // Ask our parsing back-end to help us understand what the user typed. We | 145 // Ask our parsing back-end to help us understand what the user typed. We |
| 143 // use the URLFixerUpper here because we want to be smart about what we | 146 // use the URLFixerUpper here because we want to be smart about what we |
| 144 // consider a scheme. For example, we shouldn't consider www.google.com:80 | 147 // consider a scheme. For example, we shouldn't consider www.google.com:80 |
| 145 // to have a scheme. | 148 // to have a scheme. |
| 146 url::Parsed local_parts; | 149 url::Parsed local_parts; |
| 147 if (!parts) | 150 if (!parts) |
| 148 parts = &local_parts; | 151 parts = &local_parts; |
| 149 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); | 152 const base::string16 parsed_scheme(URLFixerUpper::SegmentURL(text, parts)); |
| 150 if (scheme) | 153 if (scheme) |
| 151 *scheme = parsed_scheme; | 154 *scheme = parsed_scheme; |
| 152 | 155 |
| 153 // If we can't canonicalize the user's input, the rest of the autocomplete | 156 // If we can't canonicalize the user's input, the rest of the autocomplete |
| 154 // system isn't going to be able to produce a navigable URL match for it. | 157 // system isn't going to be able to produce a navigable URL match for it. |
| 155 // So we just return QUERY immediately in these cases. | 158 // So we just return QUERY immediately in these cases. |
| 156 GURL placeholder_canonicalized_url; | 159 GURL placeholder_canonicalized_url; |
| 157 if (!canonicalized_url) | 160 if (!canonicalized_url) |
| 158 canonicalized_url = &placeholder_canonicalized_url; | 161 canonicalized_url = &placeholder_canonicalized_url; |
| 159 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text), | 162 *canonicalized_url = URLFixerUpper::FixupURL(base::UTF16ToUTF8(text), |
| 160 base::UTF16ToUTF8(desired_tld)); | 163 base::UTF16ToUTF8(desired_tld)); |
| 161 if (!canonicalized_url->is_valid()) | 164 if (!canonicalized_url->is_valid()) |
| 162 return QUERY; | 165 return metrics::OmniboxInputType::QUERY; |
| 163 | 166 |
| 164 if (LowerCaseEqualsASCII(parsed_scheme, url::kFileScheme)) { | 167 if (LowerCaseEqualsASCII(parsed_scheme, url::kFileScheme)) { |
| 165 // A user might or might not type a scheme when entering a file URL. In | 168 // A user might or might not type a scheme when entering a file URL. In |
| 166 // either case, |parsed_scheme| will tell us that this is a file URL, but | 169 // either case, |parsed_scheme| will tell us that this is a file URL, but |
| 167 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". | 170 // |parts->scheme| might be empty, e.g. if the user typed "C:\foo". |
| 168 return URL; | 171 return metrics::OmniboxInputType::URL; |
| 169 } | 172 } |
| 170 | 173 |
| 171 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it | 174 // If the user typed a scheme, and it's HTTP or HTTPS, we know how to parse it |
| 172 // well enough that we can fall through to the heuristics below. If it's | 175 // well enough that we can fall through to the heuristics below. If it's |
| 173 // something else, we can just determine our action based on what we do with | 176 // something else, we can just determine our action based on what we do with |
| 174 // any input of this scheme. In theory we could do better with some schemes | 177 // any input of this scheme. In theory we could do better with some schemes |
| 175 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that | 178 // (e.g. "ftp" or "view-source") but I'll wait to spend the effort on that |
| 176 // until I run into some cases that really need it. | 179 // until I run into some cases that really need it. |
| 177 if (parts->scheme.is_nonempty() && | 180 if (parts->scheme.is_nonempty() && |
| 178 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpScheme) && | 181 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpScheme) && |
| 179 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpsScheme)) { | 182 !LowerCaseEqualsASCII(parsed_scheme, url::kHttpsScheme)) { |
| 180 // See if we know how to handle the URL internally. There are some schemes | 183 // See if we know how to handle the URL internally. There are some schemes |
| 181 // that we convert to other things before they reach the renderer or else | 184 // that we convert to other things before they reach the renderer or else |
| 182 // the renderer handles internally without reaching the net::URLRequest | 185 // the renderer handles internally without reaching the net::URLRequest |
| 183 // logic. They thus won't be listed as "handled protocols", but we should | 186 // logic. They thus won't be listed as "handled protocols", but we should |
| 184 // still claim to handle them. | 187 // still claim to handle them. |
| 185 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) || | 188 if (ProfileIOData::IsHandledProtocol(base::UTF16ToASCII(parsed_scheme)) || |
| 186 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) || | 189 LowerCaseEqualsASCII(parsed_scheme, content::kViewSourceScheme) || |
| 187 LowerCaseEqualsASCII(parsed_scheme, url::kJavaScriptScheme) || | 190 LowerCaseEqualsASCII(parsed_scheme, url::kJavaScriptScheme) || |
| 188 LowerCaseEqualsASCII(parsed_scheme, url::kDataScheme)) | 191 LowerCaseEqualsASCII(parsed_scheme, url::kDataScheme)) |
| 189 return URL; | 192 return metrics::OmniboxInputType::URL; |
| 190 | 193 |
| 191 // Not an internal protocol. Check and see if the user has explicitly | 194 // Not an internal protocol. Check and see if the user has explicitly |
| 192 // opened this scheme as a URL before, or if the "scheme" is actually a | 195 // opened this scheme as a URL before, or if the "scheme" is actually a |
| 193 // username. We need to do this after the check above because some | 196 // username. We need to do this after the check above because some |
| 194 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the | 197 // handlable schemes (e.g. "javascript") may be treated as "blocked" by the |
| 195 // external protocol handler because we don't want pages to open them, but | 198 // external protocol handler because we don't want pages to open them, but |
| 196 // users still can. | 199 // users still can. |
| 197 // Note that the protocol handler needs to be informed that omnibox input | 200 // Note that the protocol handler needs to be informed that omnibox input |
| 198 // should always be considered "user gesture-triggered", lest it always | 201 // should always be considered "user gesture-triggered", lest it always |
| 199 // return BLOCK. | 202 // return BLOCK. |
| 200 ExternalProtocolHandler::BlockState block_state = | 203 ExternalProtocolHandler::BlockState block_state = |
| 201 ExternalProtocolHandler::GetBlockState( | 204 ExternalProtocolHandler::GetBlockState( |
| 202 base::UTF16ToUTF8(parsed_scheme), true); | 205 base::UTF16ToUTF8(parsed_scheme), true); |
| 203 switch (block_state) { | 206 switch (block_state) { |
| 204 case ExternalProtocolHandler::DONT_BLOCK: | 207 case ExternalProtocolHandler::DONT_BLOCK: |
| 205 return URL; | 208 return metrics::OmniboxInputType::URL; |
| 206 | 209 |
| 207 case ExternalProtocolHandler::BLOCK: | 210 case ExternalProtocolHandler::BLOCK: |
| 208 // If we don't want the user to open the URL, don't let it be navigated | 211 // If we don't want the user to open the URL, don't let it be navigated |
| 209 // to at all. | 212 // to at all. |
| 210 return QUERY; | 213 return metrics::OmniboxInputType::QUERY; |
| 211 | 214 |
| 212 default: { | 215 default: { |
| 213 // We don't know about this scheme. It might be that the user typed a | 216 // We don't know about this scheme. It might be that the user typed a |
| 214 // URL of the form "username:password@foo.com". | 217 // URL of the form "username:password@foo.com". |
| 215 const base::string16 http_scheme_prefix = | 218 const base::string16 http_scheme_prefix = |
| 216 base::ASCIIToUTF16(std::string(url::kHttpScheme) + | 219 base::ASCIIToUTF16(std::string(url::kHttpScheme) + |
| 217 content::kStandardSchemeSeparator); | 220 content::kStandardSchemeSeparator); |
| 218 url::Parsed http_parts; | 221 url::Parsed http_parts; |
| 219 base::string16 http_scheme; | 222 base::string16 http_scheme; |
| 220 GURL http_canonicalized_url; | 223 GURL http_canonicalized_url; |
| 221 Type http_type = Parse(http_scheme_prefix + text, desired_tld, | 224 metrics::OmniboxInputType::Type http_type = |
| 222 &http_parts, &http_scheme, | 225 Parse(http_scheme_prefix + text, desired_tld, &http_parts, |
| 223 &http_canonicalized_url); | 226 &http_scheme, &http_canonicalized_url); |
| 224 DCHECK_EQ(std::string(url::kHttpScheme), | 227 DCHECK_EQ(std::string(url::kHttpScheme), |
| 225 base::UTF16ToUTF8(http_scheme)); | 228 base::UTF16ToUTF8(http_scheme)); |
| 226 | 229 |
| 227 if ((http_type == URL) && http_parts.username.is_nonempty() && | 230 if ((http_type == metrics::OmniboxInputType::URL) && |
| 231 http_parts.username.is_nonempty() && | |
| 228 http_parts.password.is_nonempty()) { | 232 http_parts.password.is_nonempty()) { |
| 229 // Manually re-jigger the parsed parts to match |text| (without the | 233 // Manually re-jigger the parsed parts to match |text| (without the |
| 230 // http scheme added). | 234 // http scheme added). |
| 231 http_parts.scheme.reset(); | 235 http_parts.scheme.reset(); |
| 232 url::Component* components[] = { | 236 url::Component* components[] = { |
| 233 &http_parts.username, | 237 &http_parts.username, |
| 234 &http_parts.password, | 238 &http_parts.password, |
| 235 &http_parts.host, | 239 &http_parts.host, |
| 236 &http_parts.port, | 240 &http_parts.port, |
| 237 &http_parts.path, | 241 &http_parts.path, |
| 238 &http_parts.query, | 242 &http_parts.query, |
| 239 &http_parts.ref, | 243 &http_parts.ref, |
| 240 }; | 244 }; |
| 241 for (size_t i = 0; i < arraysize(components); ++i) { | 245 for (size_t i = 0; i < arraysize(components); ++i) { |
| 242 URLFixerUpper::OffsetComponent( | 246 URLFixerUpper::OffsetComponent( |
| 243 -static_cast<int>(http_scheme_prefix.length()), components[i]); | 247 -static_cast<int>(http_scheme_prefix.length()), components[i]); |
| 244 } | 248 } |
| 245 | 249 |
| 246 *parts = http_parts; | 250 *parts = http_parts; |
| 247 if (scheme) | 251 if (scheme) |
| 248 scheme->clear(); | 252 scheme->clear(); |
| 249 *canonicalized_url = http_canonicalized_url; | 253 *canonicalized_url = http_canonicalized_url; |
| 250 | 254 |
| 251 return URL; | 255 return metrics::OmniboxInputType::URL; |
| 252 } | 256 } |
| 253 | 257 |
| 254 // We don't know about this scheme and it doesn't look like the user | 258 // We don't know about this scheme and it doesn't look like the user |
| 255 // typed a username and password. It's likely to be a search operator | 259 // typed a username and password. It's likely to be a search operator |
| 256 // like "site:" or "link:". We classify it as UNKNOWN so the user has | 260 // like "site:" or "link:". We classify it as UNKNOWN so the user has |
| 257 // the option of treating it as a URL if we're wrong. | 261 // the option of treating it as a URL if we're wrong. |
| 258 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or | 262 // Note that SegmentURL() is smart so we aren't tricked by "c:\foo" or |
| 259 // "www.example.com:81" in this case. | 263 // "www.example.com:81" in this case. |
| 260 return UNKNOWN; | 264 return metrics::OmniboxInputType::UNKNOWN; |
| 261 } | 265 } |
| 262 } | 266 } |
| 263 } | 267 } |
| 264 | 268 |
| 265 // Either the user didn't type a scheme, in which case we need to distinguish | 269 // Either the user didn't type a scheme, in which case we need to distinguish |
| 266 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which | 270 // between an HTTP URL and a query, or the scheme is HTTP or HTTPS, in which |
| 267 // case we should reject invalid formulations. | 271 // case we should reject invalid formulations. |
| 268 | 272 |
| 269 // If we have an empty host it can't be a valid HTTP[S] URL. (This should | 273 // If we have an empty host it can't be a valid HTTP[S] URL. (This should |
| 270 // only trigger for input that begins with a colon, which GURL will parse as a | 274 // only trigger for input that begins with a colon, which GURL will parse as a |
| 271 // valid, non-standard URL; for standard URLs, an empty host would have | 275 // valid, non-standard URL; for standard URLs, an empty host would have |
| 272 // resulted in an invalid |canonicalized_url| above.) | 276 // resulted in an invalid |canonicalized_url| above.) |
| 273 if (!parts->host.is_nonempty()) | 277 if (!parts->host.is_nonempty()) |
| 274 return QUERY; | 278 return metrics::OmniboxInputType::QUERY; |
| 275 | 279 |
| 276 // Sanity-check: GURL should have failed to canonicalize this URL if it had an | 280 // Sanity-check: GURL should have failed to canonicalize this URL if it had an |
| 277 // invalid port. | 281 // invalid port. |
| 278 DCHECK_NE(url::PORT_INVALID, url::ParsePort(text.c_str(), parts->port)); | 282 DCHECK_NE(url::PORT_INVALID, url::ParsePort(text.c_str(), parts->port)); |
| 279 | 283 |
| 280 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also | 284 // Likewise, the RCDS can reject certain obviously-invalid hosts. (We also |
| 281 // use the registry length later below.) | 285 // use the registry length later below.) |
| 282 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); | 286 const base::string16 host(text.substr(parts->host.begin, parts->host.len)); |
| 283 const size_t registry_length = | 287 const size_t registry_length = |
| 284 net::registry_controlled_domains::GetRegistryLength( | 288 net::registry_controlled_domains::GetRegistryLength( |
| 285 base::UTF16ToUTF8(host), | 289 base::UTF16ToUTF8(host), |
| 286 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 290 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
| 287 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 291 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| 288 if (registry_length == std::string::npos) { | 292 if (registry_length == std::string::npos) { |
| 289 // Try to append the desired_tld. | 293 // Try to append the desired_tld. |
| 290 if (!desired_tld.empty()) { | 294 if (!desired_tld.empty()) { |
| 291 base::string16 host_with_tld(host); | 295 base::string16 host_with_tld(host); |
| 292 if (host[host.length() - 1] != '.') | 296 if (host[host.length() - 1] != '.') |
| 293 host_with_tld += '.'; | 297 host_with_tld += '.'; |
| 294 host_with_tld += desired_tld; | 298 host_with_tld += desired_tld; |
| 295 const size_t tld_length = | 299 const size_t tld_length = |
| 296 net::registry_controlled_domains::GetRegistryLength( | 300 net::registry_controlled_domains::GetRegistryLength( |
| 297 base::UTF16ToUTF8(host_with_tld), | 301 base::UTF16ToUTF8(host_with_tld), |
| 298 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, | 302 net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, |
| 299 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); | 303 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); |
| 300 if (tld_length != std::string::npos) | 304 if (tld_length != std::string::npos) { |
| 301 return URL; // Something like "99999999999" that looks like a bad IP | 305 // Something like "99999999999" that looks like a bad IP |
| 302 // address, but becomes valid on attaching a TLD. | 306 // address, but becomes valid on attaching a TLD. |
| 307 return metrics::OmniboxInputType::URL; | |
| 308 } | |
| 303 } | 309 } |
| 304 return QUERY; // Could be a broken IP address, etc. | 310 // Could be a broken IP address, etc. |
| 311 return metrics::OmniboxInputType::QUERY; | |
| 305 } | 312 } |
| 306 | 313 |
| 307 | 314 |
| 308 // See if the hostname is valid. While IE and GURL allow hostnames to contain | 315 // See if the hostname is valid. While IE and GURL allow hostnames to contain |
| 309 // many other characters (perhaps for weird intranet machines), it's extremely | 316 // many other characters (perhaps for weird intranet machines), it's extremely |
| 310 // unlikely that a user would be trying to type those in for anything other | 317 // unlikely that a user would be trying to type those in for anything other |
| 311 // than a search query. | 318 // than a search query. |
| 312 url::CanonHostInfo host_info; | 319 url::CanonHostInfo host_info; |
| 313 const std::string canonicalized_host(net::CanonicalizeHost( | 320 const std::string canonicalized_host(net::CanonicalizeHost( |
| 314 base::UTF16ToUTF8(host), &host_info)); | 321 base::UTF16ToUTF8(host), &host_info)); |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 328 // "toys at amazon.com" will be treated as a search. | 335 // "toys at amazon.com" will be treated as a search. |
| 329 // * The user is typing some garbage string. Return QUERY. | 336 // * The user is typing some garbage string. Return QUERY. |
| 330 // | 337 // |
| 331 // Thus we fall down in the following cases: | 338 // Thus we fall down in the following cases: |
| 332 // * Trying to navigate to a hostname with spaces | 339 // * Trying to navigate to a hostname with spaces |
| 333 // * Trying to navigate to a hostname with invalid characters and an unknown | 340 // * Trying to navigate to a hostname with invalid characters and an unknown |
| 334 // TLD | 341 // TLD |
| 335 // These are rare, though probably possible in intranets. | 342 // These are rare, though probably possible in intranets. |
| 336 return (parts->scheme.is_nonempty() || | 343 return (parts->scheme.is_nonempty() || |
| 337 ((registry_length != 0) && | 344 ((registry_length != 0) && |
| 338 (host.find(' ') == base::string16::npos))) ? UNKNOWN : QUERY; | 345 (host.find(' ') == base::string16::npos))) ? |
| 346 metrics::OmniboxInputType::UNKNOWN : metrics::OmniboxInputType::QUERY; | |
| 339 } | 347 } |
| 340 | 348 |
| 341 // Now that we've ruled out all schemes other than http or https and done a | 349 // Now that we've ruled out all schemes other than http or https and done a |
| 342 // little more sanity checking, the presence of a scheme means this is likely | 350 // little more sanity checking, the presence of a scheme means this is likely |
| 343 // a URL. | 351 // a URL. |
| 344 if (parts->scheme.is_nonempty()) | 352 if (parts->scheme.is_nonempty()) |
| 345 return URL; | 353 return metrics::OmniboxInputType::URL; |
| 346 | 354 |
| 347 // See if the host is an IP address. | 355 // See if the host is an IP address. |
| 348 if (host_info.family == url::CanonHostInfo::IPV6) | 356 if (host_info.family == url::CanonHostInfo::IPV6) |
| 349 return URL; | 357 return metrics::OmniboxInputType::URL; |
| 350 // If the user originally typed a host that looks like an IP address (a | 358 // If the user originally typed a host that looks like an IP address (a |
| 351 // dotted quad), they probably want to open it. If the original input was | 359 // dotted quad), they probably want to open it. If the original input was |
| 352 // something else (like a single number), they probably wanted to search for | 360 // something else (like a single number), they probably wanted to search for |
| 353 // it, unless they explicitly typed a scheme. This is true even if the URL | 361 // it, unless they explicitly typed a scheme. This is true even if the URL |
| 354 // appears to have a path: "1.2/45" is more likely a search (for the answer | 362 // appears to have a path: "1.2/45" is more likely a search (for the answer |
| 355 // to a math problem) than a URL. However, if there are more non-host | 363 // to a math problem) than a URL. However, if there are more non-host |
| 356 // components, then maybe this really was intended to be a navigation. For | 364 // components, then maybe this really was intended to be a navigation. For |
| 357 // this reason we only check the dotted-quad case here, and save the "other | 365 // this reason we only check the dotted-quad case here, and save the "other |
| 358 // IP addresses" case for after we check the number of non-host components | 366 // IP addresses" case for after we check the number of non-host components |
| 359 // below. | 367 // below. |
| 360 if ((host_info.family == url::CanonHostInfo::IPV4) && | 368 if ((host_info.family == url::CanonHostInfo::IPV4) && |
| 361 (host_info.num_ipv4_components == 4)) | 369 (host_info.num_ipv4_components == 4)) |
| 362 return URL; | 370 return metrics::OmniboxInputType::URL; |
| 363 | 371 |
| 364 // Presence of a password means this is likely a URL. Note that unless the | 372 // Presence of a password means this is likely a URL. Note that unless the |
| 365 // user has typed an explicit "http://" or similar, we'll probably think that | 373 // user has typed an explicit "http://" or similar, we'll probably think that |
| 366 // the username is some unknown scheme, and bail out in the scheme-handling | 374 // the username is some unknown scheme, and bail out in the scheme-handling |
| 367 // code above. | 375 // code above. |
| 368 if (parts->password.is_nonempty()) | 376 if (parts->password.is_nonempty()) |
| 369 return URL; | 377 return metrics::OmniboxInputType::URL; |
| 370 | 378 |
| 371 // Trailing slashes force the input to be treated as a URL. | 379 // Trailing slashes force the input to be treated as a URL. |
| 372 if (parts->path.is_nonempty()) { | 380 if (parts->path.is_nonempty()) { |
| 373 char c = text[parts->path.end() - 1]; | 381 char c = text[parts->path.end() - 1]; |
| 374 if ((c == '\\') || (c == '/')) | 382 if ((c == '\\') || (c == '/')) |
| 375 return URL; | 383 return metrics::OmniboxInputType::URL; |
| 376 } | 384 } |
| 377 | 385 |
| 378 // If there is more than one recognized non-host component, this is likely to | 386 // If there is more than one recognized non-host component, this is likely to |
| 379 // be a URL, even if the TLD is unknown (in which case this is likely an | 387 // be a URL, even if the TLD is unknown (in which case this is likely an |
| 380 // intranet URL). | 388 // intranet URL). |
| 381 if (NumNonHostComponents(*parts) > 1) | 389 if (NumNonHostComponents(*parts) > 1) |
| 382 return URL; | 390 return metrics::OmniboxInputType::URL; |
| 383 | 391 |
| 384 // If the host has a known TLD or a port, it's probably a URL, with the | 392 // If the host has a known TLD or a port, it's probably a URL, with the |
| 385 // following exceptions: | 393 // following exceptions: |
| 386 // * Any "IP addresses" that make it here are more likely searches | 394 // * Any "IP addresses" that make it here are more likely searches |
| 387 // (see above). | 395 // (see above). |
| 388 // * If we reach here with a username, our input looks like "user@host[.tld]". | 396 // * If we reach here with a username, our input looks like "user@host[.tld]". |
| 389 // Because there is no scheme explicitly specified, we think this is more | 397 // Because there is no scheme explicitly specified, we think this is more |
| 390 // likely an email address than an HTTP auth attempt. Hence, we search by | 398 // likely an email address than an HTTP auth attempt. Hence, we search by |
| 391 // default and let users correct us on a case-by-case basis. | 399 // default and let users correct us on a case-by-case basis. |
| 392 // Note that we special-case "localhost" as a known hostname. | 400 // Note that we special-case "localhost" as a known hostname. |
| 393 if ((host_info.family != url::CanonHostInfo::IPV4) && | 401 if ((host_info.family != url::CanonHostInfo::IPV4) && |
| 394 ((registry_length != 0) || (host == base::ASCIIToUTF16("localhost") || | 402 ((registry_length != 0) || (host == base::ASCIIToUTF16("localhost") || |
| 395 parts->port.is_nonempty()))) | 403 parts->port.is_nonempty()))) |
| 396 return parts->username.is_nonempty() ? UNKNOWN : URL; | 404 return parts->username.is_nonempty() ? |
| 405 metrics::OmniboxInputType::UNKNOWN : metrics::OmniboxInputType::URL; | |
| 397 | 406 |
| 398 // If we reach this point, we know there's no known TLD on the input, so if | 407 // If we reach this point, we know there's no known TLD on the input, so if |
| 399 // the user wishes to add a desired_tld, the fixup code will oblige; thus this | 408 // the user wishes to add a desired_tld, the fixup code will oblige; thus this |
| 400 // is a URL. | 409 // is a URL. |
| 401 if (!desired_tld.empty()) | 410 if (!desired_tld.empty()) |
| 402 return URL; | 411 return metrics::OmniboxInputType::URL; |
| 403 | 412 |
| 404 // No scheme, password, port, path, and no known TLD on the host. | 413 // No scheme, password, port, path, and no known TLD on the host. |
| 405 // This could be: | 414 // This could be: |
| 406 // * An "incomplete IP address"; likely a search (see above). | 415 // * An "incomplete IP address"; likely a search (see above). |
| 407 // * An email-like input like "user@host", where "host" has no known TLD. | 416 // * An email-like input like "user@host", where "host" has no known TLD. |
| 408 // It's not clear what the user means here and searching seems reasonable. | 417 // It's not clear what the user means here and searching seems reasonable. |
| 409 // * A single word "foo"; possibly an intranet site, but more likely a search. | 418 // * A single word "foo"; possibly an intranet site, but more likely a search. |
| 410 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code | 419 // This is ideally an UNKNOWN, and we can let the Alternate Nav URL code |
| 411 // catch our mistakes. | 420 // catch our mistakes. |
| 412 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds | 421 // * A URL with a valid TLD we don't know about yet. If e.g. a registrar adds |
| 413 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know | 422 // "xxx" as a TLD, then until we add it to our data file, Chrome won't know |
| 414 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really | 423 // "foo.xxx" is a real URL. So ideally this is a URL, but we can't really |
| 415 // distinguish this case from: | 424 // distinguish this case from: |
| 416 // * A "URL-like" string that's not really a URL (like | 425 // * A "URL-like" string that's not really a URL (like |
| 417 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a | 426 // "browser.tabs.closeButtons" or "java.awt.event.*"). This is ideally a |
| 418 // QUERY. Since this is indistinguishable from the case above, and this | 427 // QUERY. Since this is indistinguishable from the case above, and this |
| 419 // case is much more likely, claim these are UNKNOWN, which should default | 428 // case is much more likely, claim these are UNKNOWN, which should default |
| 420 // to the right thing and let users correct us on a case-by-case basis. | 429 // to the right thing and let users correct us on a case-by-case basis. |
| 421 return UNKNOWN; | 430 return metrics::OmniboxInputType::UNKNOWN; |
| 422 } | 431 } |
| 423 | 432 |
| 424 // static | 433 // static |
| 425 void AutocompleteInput::ParseForEmphasizeComponents(const base::string16& text, | 434 void AutocompleteInput::ParseForEmphasizeComponents(const base::string16& text, |
| 426 url::Component* scheme, | 435 url::Component* scheme, |
| 427 url::Component* host) { | 436 url::Component* host) { |
| 428 url::Parsed parts; | 437 url::Parsed parts; |
| 429 base::string16 scheme_str; | 438 base::string16 scheme_str; |
| 430 Parse(text, base::string16(), &parts, &scheme_str, NULL); | 439 Parse(text, base::string16(), &parts, &scheme_str, NULL); |
| 431 | 440 |
| (...skipping 84 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 516 text_ = text; | 525 text_ = text; |
| 517 cursor_position_ = cursor_position; | 526 cursor_position_ = cursor_position; |
| 518 parts_ = parts; | 527 parts_ = parts; |
| 519 } | 528 } |
| 520 | 529 |
| 521 void AutocompleteInput::Clear() { | 530 void AutocompleteInput::Clear() { |
| 522 text_.clear(); | 531 text_.clear(); |
| 523 cursor_position_ = base::string16::npos; | 532 cursor_position_ = base::string16::npos; |
| 524 current_url_ = GURL(); | 533 current_url_ = GURL(); |
| 525 current_page_classification_ = AutocompleteInput::INVALID_SPEC; | 534 current_page_classification_ = AutocompleteInput::INVALID_SPEC; |
| 526 type_ = INVALID; | 535 type_ = metrics::OmniboxInputType::INVALID; |
| 527 parts_ = url::Parsed(); | 536 parts_ = url::Parsed(); |
| 528 scheme_.clear(); | 537 scheme_.clear(); |
| 529 canonicalized_url_ = GURL(); | 538 canonicalized_url_ = GURL(); |
| 530 prevent_inline_autocomplete_ = false; | 539 prevent_inline_autocomplete_ = false; |
| 531 prefer_keyword_ = false; | 540 prefer_keyword_ = false; |
| 532 allow_exact_keyword_match_ = false; | 541 allow_exact_keyword_match_ = false; |
| 533 want_asynchronous_matches_ = true; | 542 want_asynchronous_matches_ = true; |
| 534 } | 543 } |
| OLD | NEW |