Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef URL_GURL_H_ | 5 #ifndef URL_GURL_H_ |
| 6 #define URL_GURL_H_ | 6 #define URL_GURL_H_ |
| 7 | 7 |
| 8 #include <iosfwd> | 8 #include <iosfwd> |
| 9 #include <string> | 9 #include <string> |
| 10 | 10 |
| 11 #include "base/string16.h" | 11 #include "base/string16.h" |
| 12 #include "url/url_canon.h" | 12 #include "url/url_canon.h" |
| 13 #include "url/url_canon_stdstring.h" | 13 #include "url/url_canon_stdstring.h" |
| 14 #include "url/url_export.h" | |
| 14 #include "url/url_parse.h" | 15 #include "url/url_parse.h" |
| 15 | 16 |
| 16 class GURL { | 17 class GURL { |
| 17 public: | 18 public: |
| 18 typedef url_canon::StdStringReplacements<std::string> Replacements; | 19 typedef url_canon::StdStringReplacements<std::string> Replacements; |
| 19 typedef url_canon::StdStringReplacements<string16> ReplacementsW; | 20 typedef url_canon::StdStringReplacements<string16> ReplacementsW; |
| 20 | 21 |
| 21 // Creates an empty, invalid URL. | 22 // Creates an empty, invalid URL. |
| 22 GURL(); | 23 URL_EXPORT GURL(); |
|
tfarina
2013/05/29 03:00:57
I'm not sure if this was intentional, otherwise I'
Lei Zhang
2013/05/29 03:23:25
Done.
| |
| 23 | 24 |
| 24 // Copy construction is relatively inexpensive, with most of the time going | 25 // Copy construction is relatively inexpensive, with most of the time going |
| 25 // to reallocating the string. It does not re-parse. | 26 // to reallocating the string. It does not re-parse. |
| 26 GURL(const GURL& other); | 27 URL_EXPORT GURL(const GURL& other); |
| 27 | 28 |
| 28 // The narrow version requires the input be UTF-8. Invalid UTF-8 input will | 29 // The narrow version requires the input be UTF-8. Invalid UTF-8 input will |
| 29 // result in an invalid URL. | 30 // result in an invalid URL. |
| 30 // | 31 // |
| 31 // The wide version should also take an encoding parameter so we know how to | 32 // The wide version should also take an encoding parameter so we know how to |
| 32 // encode the query parameters. It is probably sufficient for the narrow | 33 // encode the query parameters. It is probably sufficient for the narrow |
| 33 // version to assume the query parameter encoding should be the same as the | 34 // version to assume the query parameter encoding should be the same as the |
| 34 // input encoding. | 35 // input encoding. |
| 35 explicit GURL(const std::string& url_string | 36 URL_EXPORT explicit GURL(const std::string& url_string |
| 36 /*, output_param_encoding*/); | 37 /*, output_param_encoding*/); |
| 37 explicit GURL(const string16& url_string | 38 URL_EXPORT explicit GURL(const string16& url_string |
| 38 /*, output_param_encoding*/); | 39 /*, output_param_encoding*/); |
| 39 | 40 |
| 40 // Constructor for URLs that have already been parsed and canonicalized. This | 41 // Constructor for URLs that have already been parsed and canonicalized. This |
| 41 // is used for conversions from KURL, for example. The caller must supply all | 42 // is used for conversions from KURL, for example. The caller must supply all |
| 42 // information associated with the URL, which must be correct and consistent. | 43 // information associated with the URL, which must be correct and consistent. |
| 43 GURL(const char* canonical_spec, size_t canonical_spec_len, | 44 URL_EXPORT GURL(const char* canonical_spec, size_t canonical_spec_len, |
| 44 const url_parse::Parsed& parsed, bool is_valid); | 45 const url_parse::Parsed& parsed, bool is_valid); |
| 45 | 46 |
| 46 ~GURL(); | 47 URL_EXPORT ~GURL(); |
| 47 | 48 |
| 48 GURL& operator=(const GURL& other); | 49 URL_EXPORT GURL& operator=(const GURL& other); |
| 49 | 50 |
| 50 // Returns true when this object represents a valid parsed URL. When not | 51 // Returns true when this object represents a valid parsed URL. When not |
| 51 // valid, other functions will still succeed, but you will not get canonical | 52 // valid, other functions will still succeed, but you will not get canonical |
| 52 // data out in the format you may be expecting. Instead, we keep something | 53 // data out in the format you may be expecting. Instead, we keep something |
| 53 // "reasonable looking" so that the user can see how it's busted if | 54 // "reasonable looking" so that the user can see how it's busted if |
| 54 // displayed to them. | 55 // displayed to them. |
| 55 bool is_valid() const { | 56 bool is_valid() const { |
| 56 return is_valid_; | 57 return is_valid_; |
| 57 } | 58 } |
| 58 | 59 |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 70 // | 71 // |
| 71 // The URL will be ASCII except the reference fragment, which may be UTF-8. | 72 // The URL will be ASCII except the reference fragment, which may be UTF-8. |
| 72 // It is guaranteed to be valid UTF-8. | 73 // It is guaranteed to be valid UTF-8. |
| 73 // | 74 // |
| 74 // The exception is for empty() URLs (which are !is_valid()) but this will | 75 // The exception is for empty() URLs (which are !is_valid()) but this will |
| 75 // return the empty string without asserting. | 76 // return the empty string without asserting. |
| 76 // | 77 // |
| 77 // Used invalid_spec() below to get the unusable spec of an invalid URL. This | 78 // Used invalid_spec() below to get the unusable spec of an invalid URL. This |
| 78 // separation is designed to prevent errors that may cause security problems | 79 // separation is designed to prevent errors that may cause security problems |
| 79 // that could result from the mistaken use of an invalid URL. | 80 // that could result from the mistaken use of an invalid URL. |
| 80 const std::string& spec() const; | 81 URL_EXPORT const std::string& spec() const; |
| 81 | 82 |
| 82 // Returns the potentially invalid spec for a the URL. This spec MUST NOT be | 83 // Returns the potentially invalid spec for a the URL. This spec MUST NOT be |
| 83 // modified or sent over the network. It is designed to be displayed in error | 84 // modified or sent over the network. It is designed to be displayed in error |
| 84 // messages to the user, as the apperance of the spec may explain the error. | 85 // messages to the user, as the apperance of the spec may explain the error. |
| 85 // If the spec is valid, the valid spec will be returned. | 86 // If the spec is valid, the valid spec will be returned. |
| 86 // | 87 // |
| 87 // The returned string is guaranteed to be valid UTF-8. | 88 // The returned string is guaranteed to be valid UTF-8. |
| 88 const std::string& possibly_invalid_spec() const { | 89 const std::string& possibly_invalid_spec() const { |
| 89 return spec_; | 90 return spec_; |
| 90 } | 91 } |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 122 // "standard" (SchemeIsStandard() == false) and the input looks relative, we | 123 // "standard" (SchemeIsStandard() == false) and the input looks relative, we |
| 123 // can't resolve it. In these cases, the result will be an empty, invalid | 124 // can't resolve it. In these cases, the result will be an empty, invalid |
| 124 // GURL. | 125 // GURL. |
| 125 // | 126 // |
| 126 // The result may also be a nonempty, invalid URL if the input has some kind | 127 // The result may also be a nonempty, invalid URL if the input has some kind |
| 127 // of encoding error. In these cases, we will try to construct a "good" URL | 128 // of encoding error. In these cases, we will try to construct a "good" URL |
| 128 // that may have meaning to the user, but it will be marked invalid. | 129 // that may have meaning to the user, but it will be marked invalid. |
| 129 // | 130 // |
| 130 // It is an error to resolve a URL relative to an invalid URL. The result | 131 // It is an error to resolve a URL relative to an invalid URL. The result |
| 131 // will be the empty URL. | 132 // will be the empty URL. |
| 132 GURL Resolve(const std::string& relative) const; | 133 URL_EXPORT GURL Resolve(const std::string& relative) const; |
| 133 GURL Resolve(const string16& relative) const; | 134 URL_EXPORT GURL Resolve(const string16& relative) const; |
| 134 | 135 |
| 135 // Like Resolve() above but takes a character set encoder which will be used | 136 // Like Resolve() above but takes a character set encoder which will be used |
| 136 // for any query text specified in the input. The charset converter parameter | 137 // for any query text specified in the input. The charset converter parameter |
| 137 // may be NULL, in which case it will be treated as UTF-8. | 138 // may be NULL, in which case it will be treated as UTF-8. |
| 138 // | 139 // |
| 139 // TODO(brettw): These should be replaced with versions that take something | 140 // TODO(brettw): These should be replaced with versions that take something |
| 140 // more friendly than a raw CharsetConverter (maybe like an ICU character set | 141 // more friendly than a raw CharsetConverter (maybe like an ICU character set |
| 141 // name). | 142 // name). |
| 142 GURL ResolveWithCharsetConverter( | 143 URL_EXPORT GURL ResolveWithCharsetConverter( |
| 143 const std::string& relative, | 144 const std::string& relative, |
| 144 url_canon::CharsetConverter* charset_converter) const; | 145 url_canon::CharsetConverter* charset_converter) const; |
| 145 GURL ResolveWithCharsetConverter( | 146 URL_EXPORT GURL ResolveWithCharsetConverter( |
| 146 const string16& relative, | 147 const string16& relative, |
| 147 url_canon::CharsetConverter* charset_converter) const; | 148 url_canon::CharsetConverter* charset_converter) const; |
| 148 | 149 |
| 149 // Creates a new GURL by replacing the current URL's components with the | 150 // Creates a new GURL by replacing the current URL's components with the |
| 150 // supplied versions. See the Replacements class in url_canon.h for more. | 151 // supplied versions. See the Replacements class in url_canon.h for more. |
| 151 // | 152 // |
| 152 // These are not particularly quick, so avoid doing mutations when possible. | 153 // These are not particularly quick, so avoid doing mutations when possible. |
| 153 // Prefer the 8-bit version when possible. | 154 // Prefer the 8-bit version when possible. |
| 154 // | 155 // |
| 155 // It is an error to replace components of an invalid URL. The result will | 156 // It is an error to replace components of an invalid URL. The result will |
| 156 // be the empty URL. | 157 // be the empty URL. |
| 157 // | 158 // |
| 158 // Note that we use the more general url_canon::Replacements type to give | 159 // Note that we use the more general url_canon::Replacements type to give |
| 159 // callers extra flexibility rather than our override. | 160 // callers extra flexibility rather than our override. |
| 160 GURL ReplaceComponents( | 161 URL_EXPORT GURL ReplaceComponents( |
| 161 const url_canon::Replacements<char>& replacements) const; | 162 const url_canon::Replacements<char>& replacements) const; |
| 162 GURL ReplaceComponents( | 163 URL_EXPORT GURL ReplaceComponents( |
| 163 const url_canon::Replacements<char16>& replacements) const; | 164 const url_canon::Replacements<char16>& replacements) const; |
| 164 | 165 |
| 165 // A helper function that is equivalent to replacing the path with a slash | 166 // A helper function that is equivalent to replacing the path with a slash |
| 166 // and clearing out everything after that. We sometimes need to know just the | 167 // and clearing out everything after that. We sometimes need to know just the |
| 167 // scheme and the authority. If this URL is not a standard URL (it doesn't | 168 // scheme and the authority. If this URL is not a standard URL (it doesn't |
| 168 // have the regular authority and path sections), then the result will be | 169 // have the regular authority and path sections), then the result will be |
| 169 // an empty, invalid GURL. Note that this *does* work for file: URLs, which | 170 // an empty, invalid GURL. Note that this *does* work for file: URLs, which |
| 170 // some callers may want to filter out before calling this. | 171 // some callers may want to filter out before calling this. |
| 171 // | 172 // |
| 172 // It is an error to get an empty path on an invalid URL. The result | 173 // It is an error to get an empty path on an invalid URL. The result |
| 173 // will be the empty URL. | 174 // will be the empty URL. |
| 174 GURL GetWithEmptyPath() const; | 175 URL_EXPORT GURL GetWithEmptyPath() const; |
| 175 | 176 |
| 176 // A helper function to return a GURL containing just the scheme, host, | 177 // A helper function to return a GURL containing just the scheme, host, |
| 177 // and port from a URL. Equivalent to clearing any username and password, | 178 // and port from a URL. Equivalent to clearing any username and password, |
| 178 // replacing the path with a slash, and clearing everything after that. If | 179 // replacing the path with a slash, and clearing everything after that. If |
| 179 // this URL is not a standard URL, then the result will be an empty, | 180 // this URL is not a standard URL, then the result will be an empty, |
| 180 // invalid GURL. If the URL has neither username nor password, this | 181 // invalid GURL. If the URL has neither username nor password, this |
| 181 // degenerates to GetWithEmptyPath(). | 182 // degenerates to GetWithEmptyPath(). |
| 182 // | 183 // |
| 183 // It is an error to get the origin of an invalid URL. The result | 184 // It is an error to get the origin of an invalid URL. The result |
| 184 // will be the empty URL. | 185 // will be the empty URL. |
| 185 GURL GetOrigin() const; | 186 URL_EXPORT GURL GetOrigin() const; |
| 186 | 187 |
| 187 // Returns true if the scheme for the current URL is a known "standard" | 188 // Returns true if the scheme for the current URL is a known "standard" |
| 188 // scheme. Standard schemes have an authority and a path section. This | 189 // scheme. Standard schemes have an authority and a path section. This |
| 189 // includes file: and filesystem:, which some callers may want to filter out | 190 // includes file: and filesystem:, which some callers may want to filter out |
| 190 // explicitly by calling SchemeIsFile[System]. | 191 // explicitly by calling SchemeIsFile[System]. |
| 191 bool IsStandard() const; | 192 URL_EXPORT bool IsStandard() const; |
| 192 | 193 |
| 193 // Returns true if the given parameter (should be lower-case ASCII to match | 194 // Returns true if the given parameter (should be lower-case ASCII to match |
| 194 // the canonicalized scheme) is the scheme for this URL. This call is more | 195 // the canonicalized scheme) is the scheme for this URL. This call is more |
| 195 // efficient than getting the scheme and comparing it because no copies or | 196 // efficient than getting the scheme and comparing it because no copies or |
| 196 // object constructions are done. | 197 // object constructions are done. |
| 197 bool SchemeIs(const char* lower_ascii_scheme) const; | 198 URL_EXPORT bool SchemeIs(const char* lower_ascii_scheme) const; |
| 198 | 199 |
| 199 // We often need to know if this is a file URL. File URLs are "standard", but | 200 // We often need to know if this is a file URL. File URLs are "standard", but |
| 200 // are often treated separately by some programs. | 201 // are often treated separately by some programs. |
| 201 bool SchemeIsFile() const { | 202 bool SchemeIsFile() const { |
| 202 return SchemeIs("file"); | 203 return SchemeIs("file"); |
| 203 } | 204 } |
| 204 | 205 |
| 205 // FileSystem URLs need to be treated differently in some cases. | 206 // FileSystem URLs need to be treated differently in some cases. |
| 206 bool SchemeIsFileSystem() const { | 207 bool SchemeIsFileSystem() const { |
| 207 return SchemeIs("filesystem"); | 208 return SchemeIs("filesystem"); |
| 208 } | 209 } |
| 209 | 210 |
| 210 // If the scheme indicates a secure connection | 211 // If the scheme indicates a secure connection |
| 211 bool SchemeIsSecure() const { | 212 bool SchemeIsSecure() const { |
| 212 return SchemeIs("https") || SchemeIs("wss") || | 213 return SchemeIs("https") || SchemeIs("wss") || |
| 213 (SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure()); | 214 (SchemeIsFileSystem() && inner_url() && inner_url()->SchemeIsSecure()); |
| 214 } | 215 } |
| 215 | 216 |
| 216 // Returns true if the hostname is an IP address. Note: this function isn't | 217 // Returns true if the hostname is an IP address. Note: this function isn't |
| 217 // as cheap as a simple getter because it re-parses the hostname to verify. | 218 // as cheap as a simple getter because it re-parses the hostname to verify. |
| 218 // This currently identifies only IPv4 addresses (bug 822685). | 219 // This currently identifies only IPv4 addresses (bug 822685). |
| 219 bool HostIsIPAddress() const; | 220 URL_EXPORT bool HostIsIPAddress() const; |
| 220 | 221 |
| 221 // Getters for various components of the URL. The returned string will be | 222 // Getters for various components of the URL. The returned string will be |
| 222 // empty if the component is empty or is not present. | 223 // empty if the component is empty or is not present. |
| 223 std::string scheme() const { // Not including the colon. See also SchemeIs. | 224 std::string scheme() const { // Not including the colon. See also SchemeIs. |
| 224 return ComponentString(parsed_.scheme); | 225 return ComponentString(parsed_.scheme); |
| 225 } | 226 } |
| 226 std::string username() const { | 227 std::string username() const { |
| 227 return ComponentString(parsed_.username); | 228 return ComponentString(parsed_.username); |
| 228 } | 229 } |
| 229 std::string password() const { | 230 std::string password() const { |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 275 } | 276 } |
| 276 bool has_query() const { | 277 bool has_query() const { |
| 277 return parsed_.query.len >= 0; | 278 return parsed_.query.len >= 0; |
| 278 } | 279 } |
| 279 bool has_ref() const { | 280 bool has_ref() const { |
| 280 return parsed_.ref.len >= 0; | 281 return parsed_.ref.len >= 0; |
| 281 } | 282 } |
| 282 | 283 |
| 283 // Returns a parsed version of the port. Can also be any of the special | 284 // Returns a parsed version of the port. Can also be any of the special |
| 284 // values defined in Parsed for ExtractPort. | 285 // values defined in Parsed for ExtractPort. |
| 285 int IntPort() const; | 286 URL_EXPORT int IntPort() const; |
| 286 | 287 |
| 287 // Returns the port number of the url, or the default port number. | 288 // Returns the port number of the url, or the default port number. |
| 288 // If the scheme has no concept of port (or unknown default) returns | 289 // If the scheme has no concept of port (or unknown default) returns |
| 289 // PORT_UNSPECIFIED. | 290 // PORT_UNSPECIFIED. |
| 290 int EffectiveIntPort() const; | 291 URL_EXPORT int EffectiveIntPort() const; |
| 291 | 292 |
| 292 // Extracts the filename portion of the path and returns it. The filename | 293 // Extracts the filename portion of the path and returns it. The filename |
| 293 // is everything after the last slash in the path. This may be empty. | 294 // is everything after the last slash in the path. This may be empty. |
| 294 std::string ExtractFileName() const; | 295 URL_EXPORT std::string ExtractFileName() const; |
| 295 | 296 |
| 296 // Returns the path that should be sent to the server. This is the path, | 297 // Returns the path that should be sent to the server. This is the path, |
| 297 // parameter, and query portions of the URL. It is guaranteed to be ASCII. | 298 // parameter, and query portions of the URL. It is guaranteed to be ASCII. |
| 298 std::string PathForRequest() const; | 299 URL_EXPORT std::string PathForRequest() const; |
| 299 | 300 |
| 300 // Returns the host, excluding the square brackets surrounding IPv6 address | 301 // Returns the host, excluding the square brackets surrounding IPv6 address |
| 301 // literals. This can be useful for passing to getaddrinfo(). | 302 // literals. This can be useful for passing to getaddrinfo(). |
| 302 std::string HostNoBrackets() const; | 303 URL_EXPORT std::string HostNoBrackets() const; |
| 303 | 304 |
| 304 // Returns true if this URL's host matches or is in the same domain as | 305 // Returns true if this URL's host matches or is in the same domain as |
| 305 // the given input string. For example if this URL was "www.google.com", | 306 // the given input string. For example if this URL was "www.google.com", |
| 306 // this would match "com", "google.com", and "www.google.com | 307 // this would match "com", "google.com", and "www.google.com |
| 307 // (input domain should be lower-case ASCII to match the canonicalized | 308 // (input domain should be lower-case ASCII to match the canonicalized |
| 308 // scheme). This call is more efficient than getting the host and check | 309 // scheme). This call is more efficient than getting the host and check |
| 309 // whether host has the specific domain or not because no copies or | 310 // whether host has the specific domain or not because no copies or |
| 310 // object constructions are done. | 311 // object constructions are done. |
| 311 // | 312 // |
| 312 // If function DomainIs has parameter domain_len, which means the parameter | 313 // If function DomainIs has parameter domain_len, which means the parameter |
| 313 // lower_ascii_domain does not gurantee to terminate with NULL character. | 314 // lower_ascii_domain does not gurantee to terminate with NULL character. |
| 314 bool DomainIs(const char* lower_ascii_domain, int domain_len) const; | 315 URL_EXPORT bool DomainIs(const char* lower_ascii_domain, |
| 316 int domain_len) const; | |
| 315 | 317 |
| 316 // If function DomainIs only has parameter lower_ascii_domain, which means | 318 // If function DomainIs only has parameter lower_ascii_domain, which means |
| 317 // domain string should be terminate with NULL character. | 319 // domain string should be terminate with NULL character. |
| 318 bool DomainIs(const char* lower_ascii_domain) const { | 320 bool DomainIs(const char* lower_ascii_domain) const { |
| 319 return DomainIs(lower_ascii_domain, | 321 return DomainIs(lower_ascii_domain, |
| 320 static_cast<int>(strlen(lower_ascii_domain))); | 322 static_cast<int>(strlen(lower_ascii_domain))); |
| 321 } | 323 } |
| 322 | 324 |
| 323 // Swaps the contents of this GURL object with the argument without doing | 325 // Swaps the contents of this GURL object with the argument without doing |
| 324 // any memory allocations. | 326 // any memory allocations. |
| 325 void Swap(GURL* other); | 327 URL_EXPORT void Swap(GURL* other); |
| 326 | 328 |
| 327 // Returns a reference to a singleton empty GURL. This object is for callers | 329 // Returns a reference to a singleton empty GURL. This object is for callers |
| 328 // who return references but don't have anything to return in some cases. | 330 // who return references but don't have anything to return in some cases. |
| 329 // This function may be called from any thread. | 331 // This function may be called from any thread. |
| 330 static const GURL& EmptyGURL(); | 332 URL_EXPORT static const GURL& EmptyGURL(); |
| 331 | 333 |
| 332 // Returns the inner URL of a nested URL [currently only non-null for | 334 // Returns the inner URL of a nested URL [currently only non-null for |
| 333 // filesystem: URLs]. | 335 // filesystem: URLs]. |
| 334 const GURL* inner_url() const { | 336 const GURL* inner_url() const { |
| 335 return inner_url_; | 337 return inner_url_; |
| 336 } | 338 } |
| 337 | 339 |
| 338 private: | 340 private: |
| 339 // Returns the substring of the input identified by the given component. | 341 // Returns the substring of the input identified by the given component. |
| 340 std::string ComponentString(const url_parse::Component& comp) const { | 342 std::string ComponentString(const url_parse::Component& comp) const { |
| (...skipping 13 matching lines...) Expand all Loading... | |
| 354 // Identified components of the canonical spec. | 356 // Identified components of the canonical spec. |
| 355 url_parse::Parsed parsed_; | 357 url_parse::Parsed parsed_; |
| 356 | 358 |
| 357 // Used for nested schemes [currently only filesystem:]. | 359 // Used for nested schemes [currently only filesystem:]. |
| 358 GURL* inner_url_; | 360 GURL* inner_url_; |
| 359 | 361 |
| 360 // TODO bug 684583: Add encoding for query params. | 362 // TODO bug 684583: Add encoding for query params. |
| 361 }; | 363 }; |
| 362 | 364 |
| 363 // Stream operator so GURL can be used in assertion statements. | 365 // Stream operator so GURL can be used in assertion statements. |
| 364 std::ostream& operator<<(std::ostream& out, const GURL& url); | 366 URL_EXPORT std::ostream& operator<<(std::ostream& out, const GURL& url); |
| 365 | 367 |
| 366 #endif // URL_GURL_H_ | 368 #endif // URL_GURL_H_ |
| OLD | NEW |