OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef URL_GURL_H_ | 5 #ifndef URL_GURL_H_ |
6 #define URL_GURL_H_ | 6 #define URL_GURL_H_ |
7 | 7 |
8 #include <iosfwd> | 8 #include <iosfwd> |
9 #include <string> | 9 #include <string> |
10 | 10 |
11 #include "base/memory/scoped_ptr.h" | 11 #include "base/memory/scoped_ptr.h" |
12 #include "base/strings/string16.h" | 12 #include "base/strings/string16.h" |
| 13 #include "base/strings/string_piece.h" |
| 14 #include "url/third_party/mozilla/url_parse.h" |
13 #include "url/url_canon.h" | 15 #include "url/url_canon.h" |
14 #include "url/url_canon_stdstring.h" | 16 #include "url/url_canon_stdstring.h" |
15 #include "url/url_constants.h" | 17 #include "url/url_constants.h" |
16 #include "url/url_export.h" | 18 #include "url/url_export.h" |
17 #include "url/url_parse.h" | |
18 | 19 |
19 class URL_EXPORT GURL { | 20 class URL_EXPORT GURL { |
20 public: | 21 public: |
21 typedef url::StringPieceReplacements<std::string> Replacements; | 22 typedef url::StringPieceReplacements<std::string> Replacements; |
22 typedef url::StringPieceReplacements<base::string16> ReplacementsW; | 23 typedef url::StringPieceReplacements<base::string16> ReplacementsW; |
23 | 24 |
24 // Creates an empty, invalid URL. | 25 // Creates an empty, invalid URL. |
25 GURL(); | 26 GURL(); |
26 | 27 |
27 // Copy construction is relatively inexpensive, with most of the time going | 28 // Copy construction is relatively inexpensive, with most of the time going |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
84 // The exception is for empty() URLs (which are !is_valid()) but this will | 85 // The exception is for empty() URLs (which are !is_valid()) but this will |
85 // return the empty string without asserting. | 86 // return the empty string without asserting. |
86 // | 87 // |
87 // Used invalid_spec() below to get the unusable spec of an invalid URL. This | 88 // Used invalid_spec() below to get the unusable spec of an invalid URL. This |
88 // separation is designed to prevent errors that may cause security problems | 89 // separation is designed to prevent errors that may cause security problems |
89 // that could result from the mistaken use of an invalid URL. | 90 // that could result from the mistaken use of an invalid URL. |
90 const std::string& spec() const; | 91 const std::string& spec() const; |
91 | 92 |
92 // Returns the potentially invalid spec for a the URL. This spec MUST NOT be | 93 // Returns the potentially invalid spec for a the URL. This spec MUST NOT be |
93 // modified or sent over the network. It is designed to be displayed in error | 94 // modified or sent over the network. It is designed to be displayed in error |
94 // messages to the user, as the apperance of the spec may explain the error. | 95 // messages to the user, as the appearance of the spec may explain the error. |
95 // If the spec is valid, the valid spec will be returned. | 96 // If the spec is valid, the valid spec will be returned. |
96 // | 97 // |
97 // The returned string is guaranteed to be valid UTF-8. | 98 // The returned string is guaranteed to be valid UTF-8. |
98 const std::string& possibly_invalid_spec() const { | 99 const std::string& possibly_invalid_spec() const { |
99 return spec_; | 100 return spec_; |
100 } | 101 } |
101 | 102 |
102 // Getter for the raw parsed structure. This allows callers to locate parts | 103 // Getter for the raw parsed structure. This allows callers to locate parts |
103 // of the URL within the spec themselves. Most callers should consider using | 104 // of the URL within the spec themselves. Most callers should consider using |
104 // the individual component getters below. | 105 // the individual component getters below. |
(...skipping 12 matching lines...) Expand all Loading... |
117 | 118 |
118 // Allows GURL to used as a key in STL (for example, a std::set or std::map). | 119 // Allows GURL to used as a key in STL (for example, a std::set or std::map). |
119 bool operator<(const GURL& other) const; | 120 bool operator<(const GURL& other) const; |
120 bool operator>(const GURL& other) const; | 121 bool operator>(const GURL& other) const; |
121 | 122 |
122 // Resolves a URL that's possibly relative to this object's URL, and returns | 123 // Resolves a URL that's possibly relative to this object's URL, and returns |
123 // it. Absolute URLs are also handled according to the rules of URLs on web | 124 // it. Absolute URLs are also handled according to the rules of URLs on web |
124 // pages. | 125 // pages. |
125 // | 126 // |
126 // It may be impossible to resolve the URLs properly. If the input is not | 127 // It may be impossible to resolve the URLs properly. If the input is not |
127 // "standard" (SchemeIsStandard() == false) and the input looks relative, we | 128 // "standard" (IsStandard() == false) and the input looks relative, we can't |
128 // can't resolve it. In these cases, the result will be an empty, invalid | 129 // resolve it. In these cases, the result will be an empty, invalid GURL. |
129 // GURL. | |
130 // | 130 // |
131 // The result may also be a nonempty, invalid URL if the input has some kind | 131 // The result may also be a nonempty, invalid URL if the input has some kind |
132 // of encoding error. In these cases, we will try to construct a "good" URL | 132 // of encoding error. In these cases, we will try to construct a "good" URL |
133 // that may have meaning to the user, but it will be marked invalid. | 133 // that may have meaning to the user, but it will be marked invalid. |
134 // | 134 // |
135 // It is an error to resolve a URL relative to an invalid URL. The result | 135 // It is an error to resolve a URL relative to an invalid URL. The result |
136 // will be the empty URL. | 136 // will be the empty URL. |
137 GURL Resolve(const std::string& relative) const; | 137 GURL Resolve(const std::string& relative) const; |
138 GURL Resolve(const base::string16& relative) const; | 138 GURL Resolve(const base::string16& relative) const; |
139 | 139 |
140 // Like Resolve() above but takes a character set encoder which will be used | |
141 // for any query text specified in the input. The charset converter parameter | |
142 // may be NULL, in which case it will be treated as UTF-8. | |
143 // | |
144 // TODO(brettw): These should be replaced with versions that take something | |
145 // more friendly than a raw CharsetConverter (maybe like an ICU character set | |
146 // name). | |
147 GURL ResolveWithCharsetConverter( | |
148 const std::string& relative, | |
149 url::CharsetConverter* charset_converter) const; | |
150 GURL ResolveWithCharsetConverter( | |
151 const base::string16& relative, | |
152 url::CharsetConverter* charset_converter) const; | |
153 | |
154 // Creates a new GURL by replacing the current URL's components with the | 140 // Creates a new GURL by replacing the current URL's components with the |
155 // supplied versions. See the Replacements class in url_canon.h for more. | 141 // supplied versions. See the Replacements class in url_canon.h for more. |
156 // | 142 // |
157 // These are not particularly quick, so avoid doing mutations when possible. | 143 // These are not particularly quick, so avoid doing mutations when possible. |
158 // Prefer the 8-bit version when possible. | 144 // Prefer the 8-bit version when possible. |
159 // | 145 // |
160 // It is an error to replace components of an invalid URL. The result will | 146 // It is an error to replace components of an invalid URL. The result will |
161 // be the empty URL. | 147 // be the empty URL. |
162 // | 148 // |
163 // Note that we use the more general url::Replacements type to give | 149 // Note that we use the more general url::Replacements type to give |
(...skipping 23 matching lines...) Expand all Loading... |
187 // It is an error to get the origin of an invalid URL. The result | 173 // It is an error to get the origin of an invalid URL. The result |
188 // will be the empty URL. | 174 // will be the empty URL. |
189 GURL GetOrigin() const; | 175 GURL GetOrigin() const; |
190 | 176 |
191 // A helper function to return a GURL stripped from the elements that are not | 177 // A helper function to return a GURL stripped from the elements that are not |
192 // supposed to be sent as HTTP referrer: username, password and ref fragment. | 178 // supposed to be sent as HTTP referrer: username, password and ref fragment. |
193 // For invalid URLs or URLs that no valid referrers, an empty URL will be | 179 // For invalid URLs or URLs that no valid referrers, an empty URL will be |
194 // returned. | 180 // returned. |
195 GURL GetAsReferrer() const; | 181 GURL GetAsReferrer() const; |
196 | 182 |
197 // Returns true if the scheme for the current URL is a known "standard" | 183 // Returns true if the scheme for the current URL is a known "standard-format" |
198 // scheme. Standard schemes have an authority and a path section. This | 184 // scheme. A standard-format scheme adheres to what RFC 3986 calls "generic |
199 // includes file: and filesystem:, which some callers may want to filter out | 185 // URI syntax" (https://tools.ietf.org/html/rfc3986#section-3). This includes |
200 // explicitly by calling SchemeIsFile[System]. | 186 // file: and filesystem:, which some callers may want to filter out explicitly |
| 187 // by calling SchemeIsFile[System]. |
201 bool IsStandard() const; | 188 bool IsStandard() const; |
202 | 189 |
203 // Returns true if the given parameter (should be lower-case ASCII to match | 190 // Returns true if the given parameter (should be lower-case ASCII to match |
204 // the canonicalized scheme) is the scheme for this URL. This call is more | 191 // the canonicalized scheme) is the scheme for this URL. This call is more |
205 // efficient than getting the scheme and comparing it because no copies or | 192 // efficient than getting the scheme and comparing it because no copies or |
206 // object constructions are done. | 193 // object constructions are done. |
207 bool SchemeIs(const char* lower_ascii_scheme) const; | 194 bool SchemeIs(const char* lower_ascii_scheme) const; |
208 | 195 |
209 // Returns true if the scheme is "http" or "https". | 196 // Returns true if the scheme is "http" or "https". |
210 bool SchemeIsHTTPOrHTTPS() const; | 197 bool SchemeIsHTTPOrHTTPS() const; |
211 | 198 |
212 // Returns true is the scheme is "ws" or "wss". | 199 // Returns true is the scheme is "ws" or "wss". |
213 bool SchemeIsWSOrWSS() const; | 200 bool SchemeIsWSOrWSS() const; |
214 | 201 |
215 // We often need to know if this is a file URL. File URLs are "standard", but | 202 // We often need to know if this is a file URL. File URLs are "standard", but |
216 // are often treated separately by some programs. | 203 // are often treated separately by some programs. |
217 bool SchemeIsFile() const { | 204 bool SchemeIsFile() const { |
218 return SchemeIs(url::kFileScheme); | 205 return SchemeIs(url::kFileScheme); |
219 } | 206 } |
220 | 207 |
221 // FileSystem URLs need to be treated differently in some cases. | 208 // FileSystem URLs need to be treated differently in some cases. |
222 bool SchemeIsFileSystem() const { | 209 bool SchemeIsFileSystem() const { |
223 return SchemeIs(url::kFileSystemScheme); | 210 return SchemeIs(url::kFileSystemScheme); |
224 } | 211 } |
225 | 212 |
226 // If the scheme indicates a secure connection | 213 // Returns true if the scheme indicates a secure connection. |
| 214 // |
| 215 // NOTE: This function is deprecated. You probably want |
| 216 // |SchemeIsCryptographic| (if you just want to know if a scheme uses TLS for |
| 217 // network transport) or Chromium's |IsOriginSecure| for a higher-level test |
| 218 // about an origin's security. See those functions' documentation for more |
| 219 // detail. |
| 220 // |
| 221 // TODO(palmer): Audit callers and change them to |SchemeIsCryptographic| or |
| 222 // |IsOriginSecure|, as appropriate. Then remove |SchemeIsSecure|. |
| 223 // crbug.com/362214 |
227 bool SchemeIsSecure() const { | 224 bool SchemeIsSecure() const { |
228 return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme) || | 225 return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme) || |
229 (SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure()); | 226 (SchemeIsFileSystem() && inner_url() && |
| 227 inner_url()->SchemeIsSecure()); |
| 228 } |
| 229 |
| 230 // Returns true if the scheme indicates a network connection that uses TLS or |
| 231 // some other cryptographic protocol (e.g. QUIC) for security. |
| 232 // |
| 233 // This function is a not a complete test of whether or not an origin's code |
| 234 // is minimally trustworthy. For that, see Chromium's |IsOriginSecure| for a |
| 235 // higher-level and more complete semantics. See that function's documentation |
| 236 // for more detail. |
| 237 bool SchemeIsCryptographic() const { |
| 238 return SchemeIs(url::kHttpsScheme) || SchemeIs(url::kWssScheme); |
230 } | 239 } |
231 | 240 |
232 // Returns true if the scheme is "blob". | 241 // Returns true if the scheme is "blob". |
233 bool SchemeIsBlob() const { | 242 bool SchemeIsBlob() const { |
234 return SchemeIs(url::kBlobScheme); | 243 return SchemeIs(url::kBlobScheme); |
235 } | 244 } |
236 | 245 |
237 // The "content" of the URL is everything after the scheme (skipping the | 246 // The "content" of the URL is everything after the scheme (skipping the |
238 // scheme delimiting colon). It is an error to get the origin of an invalid | 247 // scheme delimiting colon). It is an error to get the content of an invalid |
239 // URL. The result will be an empty string. | 248 // URL: the result will be an empty string. |
240 std::string GetContent() const; | 249 std::string GetContent() const; |
241 | 250 |
242 // Returns true if the hostname is an IP address. Note: this function isn't | 251 // Returns true if the hostname is an IP address. Note: this function isn't |
243 // as cheap as a simple getter because it re-parses the hostname to verify. | 252 // as cheap as a simple getter because it re-parses the hostname to verify. |
244 // This currently identifies only IPv4 addresses (bug 822685). | |
245 bool HostIsIPAddress() const; | 253 bool HostIsIPAddress() const; |
246 | 254 |
247 // Getters for various components of the URL. The returned string will be | 255 // Getters for various components of the URL. The returned string will be |
248 // empty if the component is empty or is not present. | 256 // empty if the component is empty or is not present. |
249 std::string scheme() const { // Not including the colon. See also SchemeIs. | 257 std::string scheme() const { // Not including the colon. See also SchemeIs. |
250 return ComponentString(parsed_.scheme); | 258 return ComponentString(parsed_.scheme); |
251 } | 259 } |
252 std::string username() const { | 260 std::string username() const { |
253 return ComponentString(parsed_.username); | 261 return ComponentString(parsed_.username); |
254 } | 262 } |
(...skipping 12 matching lines...) Expand all Loading... |
267 std::string path() const { // Including first slash following host | 275 std::string path() const { // Including first slash following host |
268 return ComponentString(parsed_.path); | 276 return ComponentString(parsed_.path); |
269 } | 277 } |
270 std::string query() const { // Stuff following '?' | 278 std::string query() const { // Stuff following '?' |
271 return ComponentString(parsed_.query); | 279 return ComponentString(parsed_.query); |
272 } | 280 } |
273 std::string ref() const { // Stuff following '#' | 281 std::string ref() const { // Stuff following '#' |
274 return ComponentString(parsed_.ref); | 282 return ComponentString(parsed_.ref); |
275 } | 283 } |
276 | 284 |
277 // Existance querying. These functions will return true if the corresponding | 285 // Existence querying. These functions will return true if the corresponding |
278 // URL component exists in this URL. Note that existance is different than | 286 // URL component exists in this URL. Note that existence is different than |
279 // being nonempty. http://www.google.com/? has a query that just happens to | 287 // being nonempty. http://www.google.com/? has a query that just happens to |
280 // be empty, and has_query() will return true. | 288 // be empty, and has_query() will return true. |
281 bool has_scheme() const { | 289 bool has_scheme() const { |
282 return parsed_.scheme.len >= 0; | 290 return parsed_.scheme.len >= 0; |
283 } | 291 } |
284 bool has_username() const { | 292 bool has_username() const { |
285 return parsed_.username.len >= 0; | 293 return parsed_.username.len >= 0; |
286 } | 294 } |
287 bool has_password() const { | 295 bool has_password() const { |
288 return parsed_.password.len >= 0; | 296 return parsed_.password.len >= 0; |
289 } | 297 } |
290 bool has_host() const { | 298 bool has_host() const { |
291 // Note that hosts are special, absense of host means length 0. | 299 // Note that hosts are special, absence of host means length 0. |
292 return parsed_.host.len > 0; | 300 return parsed_.host.len > 0; |
293 } | 301 } |
294 bool has_port() const { | 302 bool has_port() const { |
295 return parsed_.port.len >= 0; | 303 return parsed_.port.len >= 0; |
296 } | 304 } |
297 bool has_path() const { | 305 bool has_path() const { |
298 // Note that http://www.google.com/" has a path, the path is "/". This can | 306 // Note that http://www.google.com/" has a path, the path is "/". This can |
299 // return false only for invalid or nonstandard URLs. | 307 // return false only for invalid or nonstandard URLs. |
300 return parsed_.path.len >= 0; | 308 return parsed_.path.len >= 0; |
301 } | 309 } |
302 bool has_query() const { | 310 bool has_query() const { |
303 return parsed_.query.len >= 0; | 311 return parsed_.query.len >= 0; |
304 } | 312 } |
305 bool has_ref() const { | 313 bool has_ref() const { |
306 return parsed_.ref.len >= 0; | 314 return parsed_.ref.len >= 0; |
307 } | 315 } |
308 | 316 |
309 // Returns a parsed version of the port. Can also be any of the special | 317 // Returns a parsed version of the port. Can also be any of the special |
310 // values defined in Parsed for ExtractPort. | 318 // values defined in Parsed for ExtractPort. |
311 int IntPort() const; | 319 int IntPort() const; |
312 | 320 |
313 // Returns the port number of the url, or the default port number. | 321 // Returns the port number of the URL, or the default port number. |
314 // If the scheme has no concept of port (or unknown default) returns | 322 // If the scheme has no concept of port (or unknown default) returns |
315 // PORT_UNSPECIFIED. | 323 // PORT_UNSPECIFIED. |
316 int EffectiveIntPort() const; | 324 int EffectiveIntPort() const; |
317 | 325 |
318 // Extracts the filename portion of the path and returns it. The filename | 326 // Extracts the filename portion of the path and returns it. The filename |
319 // is everything after the last slash in the path. This may be empty. | 327 // is everything after the last slash in the path. This may be empty. |
320 std::string ExtractFileName() const; | 328 std::string ExtractFileName() const; |
321 | 329 |
322 // Returns the path that should be sent to the server. This is the path, | 330 // Returns the path that should be sent to the server. This is the path, |
323 // parameter, and query portions of the URL. It is guaranteed to be ASCII. | 331 // parameter, and query portions of the URL. It is guaranteed to be ASCII. |
324 std::string PathForRequest() const; | 332 std::string PathForRequest() const; |
325 | 333 |
326 // Returns the host, excluding the square brackets surrounding IPv6 address | 334 // Returns the host, excluding the square brackets surrounding IPv6 address |
327 // literals. This can be useful for passing to getaddrinfo(). | 335 // literals. This can be useful for passing to getaddrinfo(). |
328 std::string HostNoBrackets() const; | 336 std::string HostNoBrackets() const; |
329 | 337 |
330 // Returns true if this URL's host matches or is in the same domain as | 338 // Returns true if this URL's host matches or is in the same domain as |
331 // the given input string. For example if this URL was "www.google.com", | 339 // the given input string. For example, if the hostname of the URL is |
332 // this would match "com", "google.com", and "www.google.com | 340 // "www.google.com", this will return true for "com", "google.com", and |
333 // (input domain should be lower-case ASCII to match the canonicalized | 341 // "www.google.com". |
334 // scheme). This call is more efficient than getting the host and check | 342 // |
| 343 // The input domain should be lower-case ASCII to match the canonicalized |
| 344 // scheme. This call is more efficient than getting the host and check |
335 // whether host has the specific domain or not because no copies or | 345 // whether host has the specific domain or not because no copies or |
336 // object constructions are done. | 346 // object constructions are done. |
337 // | 347 bool DomainIs(base::StringPiece lower_ascii_domain) const; |
338 // If function DomainIs has parameter domain_len, which means the parameter | |
339 // lower_ascii_domain does not gurantee to terminate with NULL character. | |
340 bool DomainIs(const char* lower_ascii_domain, int domain_len) const; | |
341 | 348 |
342 // If function DomainIs only has parameter lower_ascii_domain, which means | 349 // Swaps the contents of this GURL object with |other|, without doing |
343 // domain string should be terminate with NULL character. | |
344 bool DomainIs(const char* lower_ascii_domain) const { | |
345 return DomainIs(lower_ascii_domain, | |
346 static_cast<int>(strlen(lower_ascii_domain))); | |
347 } | |
348 | |
349 // Swaps the contents of this GURL object with the argument without doing | |
350 // any memory allocations. | 350 // any memory allocations. |
351 void Swap(GURL* other); | 351 void Swap(GURL* other); |
352 | 352 |
353 // Returns a reference to a singleton empty GURL. This object is for callers | 353 // Returns a reference to a singleton empty GURL. This object is for callers |
354 // who return references but don't have anything to return in some cases. | 354 // who return references but don't have anything to return in some cases. |
355 // This function may be called from any thread. | 355 // This function may be called from any thread. |
356 static const GURL& EmptyGURL(); | 356 static const GURL& EmptyGURL(); |
357 | 357 |
358 // Returns the inner URL of a nested URL [currently only non-null for | 358 // Returns the inner URL of a nested URL [currently only non-null for |
359 // filesystem: URLs]. | 359 // filesystem: URLs]. |
360 const GURL* inner_url() const { | 360 const GURL* inner_url() const { |
361 return inner_url_.get(); | 361 return inner_url_.get(); |
362 } | 362 } |
363 | 363 |
364 private: | 364 private: |
365 // Variant of the string parsing constructor that allows the caller to elect | 365 // Variant of the string parsing constructor that allows the caller to elect |
366 // retain trailing whitespace, if any, on the passed URL spec but only if the | 366 // retain trailing whitespace, if any, on the passed URL spec, but only if |
367 // scheme is one that allows trailing whitespace. The primary use-case is | 367 // the scheme is one that allows trailing whitespace. The primary use-case is |
368 // for data: URLs. In most cases, you want to use the single parameter | 368 // for data: URLs. In most cases, you want to use the single parameter |
369 // constructor above. | 369 // constructor above. |
370 enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE }; | 370 enum RetainWhiteSpaceSelector { RETAIN_TRAILING_PATH_WHITEPACE }; |
371 GURL(const std::string& url_string, RetainWhiteSpaceSelector); | 371 GURL(const std::string& url_string, RetainWhiteSpaceSelector); |
372 | 372 |
373 template<typename STR> | 373 template<typename STR> |
374 void InitCanonical(const STR& input_spec, bool trim_path_end); | 374 void InitCanonical(const STR& input_spec, bool trim_path_end); |
375 | 375 |
376 void InitializeFromCanonicalSpec(); | 376 void InitializeFromCanonicalSpec(); |
377 | 377 |
(...skipping 18 matching lines...) Expand all Loading... |
396 // Used for nested schemes [currently only filesystem:]. | 396 // Used for nested schemes [currently only filesystem:]. |
397 scoped_ptr<GURL> inner_url_; | 397 scoped_ptr<GURL> inner_url_; |
398 | 398 |
399 // TODO bug 684583: Add encoding for query params. | 399 // TODO bug 684583: Add encoding for query params. |
400 }; | 400 }; |
401 | 401 |
402 // Stream operator so GURL can be used in assertion statements. | 402 // Stream operator so GURL can be used in assertion statements. |
403 URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url); | 403 URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url); |
404 | 404 |
405 #endif // URL_GURL_H_ | 405 #endif // URL_GURL_H_ |
OLD | NEW |