OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 18 matching lines...) Expand all Loading... |
29 | 29 |
30 #ifndef GOOGLEURL_SRC_GURL_H__ | 30 #ifndef GOOGLEURL_SRC_GURL_H__ |
31 #define GOOGLEURL_SRC_GURL_H__ | 31 #define GOOGLEURL_SRC_GURL_H__ |
32 | 32 |
33 #include <iostream> | 33 #include <iostream> |
34 #include <string> | 34 #include <string> |
35 | 35 |
36 #include "base/string16.h" | 36 #include "base/string16.h" |
37 #include "googleurl/src/url_canon.h" | 37 #include "googleurl/src/url_canon.h" |
38 #include "googleurl/src/url_canon_stdstring.h" | 38 #include "googleurl/src/url_canon_stdstring.h" |
| 39 #include "googleurl/src/url_common.h" |
39 #include "googleurl/src/url_parse.h" | 40 #include "googleurl/src/url_parse.h" |
40 | 41 |
41 class GURL { | 42 class GURL { |
42 public: | 43 public: |
43 typedef url_canon::StdStringReplacements<std::string> Replacements; | 44 typedef url_canon::StdStringReplacements<std::string> Replacements; |
44 typedef url_canon::StdStringReplacements<string16> ReplacementsW; | 45 typedef url_canon::StdStringReplacements<string16> ReplacementsW; |
45 | 46 |
46 // Creates an empty, invalid URL. | 47 // Creates an empty, invalid URL. |
47 GURL(); | 48 GURL_API GURL(); |
48 | 49 |
49 // Copy construction is relatively inexpensive, with most of the time going | 50 // Copy construction is relatively inexpensive, with most of the time going |
50 // to reallocating the string. It does not re-parse. | 51 // to reallocating the string. It does not re-parse. |
51 GURL(const GURL& other); | 52 GURL_API GURL(const GURL& other); |
52 | 53 |
53 // The narrow version requires the input be UTF-8. Invalid UTF-8 input will | 54 // The narrow version requires the input be UTF-8. Invalid UTF-8 input will |
54 // result in an invalid URL. | 55 // result in an invalid URL. |
55 // | 56 // |
56 // The wide version should also take an encoding parameter so we know how to | 57 // The wide version should also take an encoding parameter so we know how to |
57 // encode the query parameters. It is probably sufficient for the narrow | 58 // encode the query parameters. It is probably sufficient for the narrow |
58 // version to assume the query parameter encoding should be the same as the | 59 // version to assume the query parameter encoding should be the same as the |
59 // input encoding. | 60 // input encoding. |
60 explicit GURL(const std::string& url_string /*, output_param_encoding*/); | 61 GURL_API explicit GURL(const std::string& url_string |
61 explicit GURL(const string16& url_string /*, output_param_encoding*/); | 62 /*, output_param_encoding*/); |
| 63 GURL_API explicit GURL(const string16& url_string |
| 64 /*, output_param_encoding*/); |
62 | 65 |
63 // Constructor for URLs that have already been parsed and canonicalized. This | 66 // Constructor for URLs that have already been parsed and canonicalized. This |
64 // is used for conversions from KURL, for example. The caller must supply all | 67 // is used for conversions from KURL, for example. The caller must supply all |
65 // information associated with the URL, which must be correct and consistent. | 68 // information associated with the URL, which must be correct and consistent. |
66 GURL(const char* canonical_spec, size_t canonical_spec_len, | 69 GURL_API GURL(const char* canonical_spec, size_t canonical_spec_len, |
67 const url_parse::Parsed& parsed, bool is_valid); | 70 const url_parse::Parsed& parsed, bool is_valid); |
68 | 71 |
69 // Returns true when this object represents a valid parsed URL. When not | 72 // Returns true when this object represents a valid parsed URL. When not |
70 // valid, other functions will still succeed, but you will not get canonical | 73 // valid, other functions will still succeed, but you will not get canonical |
71 // data out in the format you may be expecting. Instead, we keep something | 74 // data out in the format you may be expecting. Instead, we keep something |
72 // "reasonable looking" so that the user can see how it's busted if | 75 // "reasonable looking" so that the user can see how it's busted if |
73 // displayed to them. | 76 // displayed to them. |
74 bool is_valid() const { | 77 bool is_valid() const { |
75 return is_valid_; | 78 return is_valid_; |
76 } | 79 } |
77 | 80 |
(...skipping 11 matching lines...) Expand all Loading... |
89 // | 92 // |
90 // The URL will be ASCII except the reference fragment, which may be UTF-8. | 93 // The URL will be ASCII except the reference fragment, which may be UTF-8. |
91 // It is guaranteed to be valid UTF-8. | 94 // It is guaranteed to be valid UTF-8. |
92 // | 95 // |
93 // The exception is for empty() URLs (which are !is_valid()) but this will | 96 // The exception is for empty() URLs (which are !is_valid()) but this will |
94 // return the empty string without asserting. | 97 // return the empty string without asserting. |
95 // | 98 // |
96 // Used invalid_spec() below to get the unusable spec of an invalid URL. This | 99 // Used invalid_spec() below to get the unusable spec of an invalid URL. This |
97 // separation is designed to prevent errors that may cause security problems | 100 // separation is designed to prevent errors that may cause security problems |
98 // that could result from the mistaken use of an invalid URL. | 101 // that could result from the mistaken use of an invalid URL. |
99 const std::string& spec() const; | 102 GURL_API const std::string& spec() const; |
100 | 103 |
101 // Returns the potentially invalid spec for a the URL. This spec MUST NOT be | 104 // Returns the potentially invalid spec for a the URL. This spec MUST NOT be |
102 // modified or sent over the network. It is designed to be displayed in error | 105 // modified or sent over the network. It is designed to be displayed in error |
103 // messages to the user, as the apperance of the spec may explain the error. | 106 // messages to the user, as the apperance of the spec may explain the error. |
104 // If the spec is valid, the valid spec will be returned. | 107 // If the spec is valid, the valid spec will be returned. |
105 // | 108 // |
106 // The returned string is guaranteed to be valid UTF-8. | 109 // The returned string is guaranteed to be valid UTF-8. |
107 const std::string& possibly_invalid_spec() const { | 110 const std::string& possibly_invalid_spec() const { |
108 return spec_; | 111 return spec_; |
109 } | 112 } |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
141 // "standard" (SchemeIsStandard() == false) and the input looks relative, we | 144 // "standard" (SchemeIsStandard() == false) and the input looks relative, we |
142 // can't resolve it. In these cases, the result will be an empty, invalid | 145 // can't resolve it. In these cases, the result will be an empty, invalid |
143 // GURL. | 146 // GURL. |
144 // | 147 // |
145 // The result may also be a nonempty, invalid URL if the input has some kind | 148 // The result may also be a nonempty, invalid URL if the input has some kind |
146 // of encoding error. In these cases, we will try to construct a "good" URL | 149 // of encoding error. In these cases, we will try to construct a "good" URL |
147 // that may have meaning to the user, but it will be marked invalid. | 150 // that may have meaning to the user, but it will be marked invalid. |
148 // | 151 // |
149 // It is an error to resolve a URL relative to an invalid URL. The result | 152 // It is an error to resolve a URL relative to an invalid URL. The result |
150 // will be the empty URL. | 153 // will be the empty URL. |
151 GURL Resolve(const std::string& relative) const; | 154 GURL_API GURL Resolve(const std::string& relative) const; |
152 GURL Resolve(const string16& relative) const; | 155 GURL_API GURL Resolve(const string16& relative) const; |
153 | 156 |
154 // Like Resolve() above but takes a character set encoder which will be used | 157 // Like Resolve() above but takes a character set encoder which will be used |
155 // for any query text specified in the input. The charset converter parameter | 158 // for any query text specified in the input. The charset converter parameter |
156 // may be NULL, in which case it will be treated as UTF-8. | 159 // may be NULL, in which case it will be treated as UTF-8. |
157 // | 160 // |
158 // TODO(brettw): These should be replaced with versions that take something | 161 // TODO(brettw): These should be replaced with versions that take something |
159 // more friendly than a raw CharsetConverter (maybe like an ICU character set | 162 // more friendly than a raw CharsetConverter (maybe like an ICU character set |
160 // name). | 163 // name). |
161 GURL ResolveWithCharsetConverter( | 164 GURL_API GURL ResolveWithCharsetConverter( |
162 const std::string& relative, | 165 const std::string& relative, |
163 url_canon::CharsetConverter* charset_converter) const; | 166 url_canon::CharsetConverter* charset_converter) const; |
164 GURL ResolveWithCharsetConverter( | 167 GURL_API GURL ResolveWithCharsetConverter( |
165 const string16& relative, | 168 const string16& relative, |
166 url_canon::CharsetConverter* charset_converter) const; | 169 url_canon::CharsetConverter* charset_converter) const; |
167 | 170 |
168 // Creates a new GURL by replacing the current URL's components with the | 171 // Creates a new GURL by replacing the current URL's components with the |
169 // supplied versions. See the Replacements class in url_canon.h for more. | 172 // supplied versions. See the Replacements class in url_canon.h for more. |
170 // | 173 // |
171 // These are not particularly quick, so avoid doing mutations when possible. | 174 // These are not particularly quick, so avoid doing mutations when possible. |
172 // Prefer the 8-bit version when possible. | 175 // Prefer the 8-bit version when possible. |
173 // | 176 // |
174 // It is an error to replace components of an invalid URL. The result will | 177 // It is an error to replace components of an invalid URL. The result will |
175 // be the empty URL. | 178 // be the empty URL. |
176 // | 179 // |
177 // Note that we use the more general url_canon::Replacements type to give | 180 // Note that we use the more general url_canon::Replacements type to give |
178 // callers extra flexibility rather than our override. | 181 // callers extra flexibility rather than our override. |
179 GURL ReplaceComponents( | 182 GURL_API GURL ReplaceComponents( |
180 const url_canon::Replacements<char>& replacements) const; | 183 const url_canon::Replacements<char>& replacements) const; |
181 GURL ReplaceComponents( | 184 GURL_API GURL ReplaceComponents( |
182 const url_canon::Replacements<char16>& replacements) const; | 185 const url_canon::Replacements<char16>& replacements) const; |
183 | 186 |
184 // A helper function that is equivalent to replacing the path with a slash | 187 // A helper function that is equivalent to replacing the path with a slash |
185 // and clearing out everything after that. We sometimes need to know just the | 188 // and clearing out everything after that. We sometimes need to know just the |
186 // scheme and the authority. If this URL is not a standard URL (it doesn't | 189 // scheme and the authority. If this URL is not a standard URL (it doesn't |
187 // have the regular authority and path sections), then the result will be | 190 // have the regular authority and path sections), then the result will be |
188 // an empty, invalid GURL. Note that this *does* work for file: URLs, which | 191 // an empty, invalid GURL. Note that this *does* work for file: URLs, which |
189 // some callers may want to filter out before calling this. | 192 // some callers may want to filter out before calling this. |
190 // | 193 // |
191 // It is an error to get an empty path on an invalid URL. The result | 194 // It is an error to get an empty path on an invalid URL. The result |
192 // will be the empty URL. | 195 // will be the empty URL. |
193 GURL GetWithEmptyPath() const; | 196 GURL_API GURL GetWithEmptyPath() const; |
194 | 197 |
195 // A helper function to return a GURL containing just the scheme, host, | 198 // A helper function to return a GURL containing just the scheme, host, |
196 // and port from a URL. Equivalent to clearing any username and password, | 199 // and port from a URL. Equivalent to clearing any username and password, |
197 // replacing the path with a slash, and clearing everything after that. If | 200 // replacing the path with a slash, and clearing everything after that. If |
198 // this URL is not a standard URL, then the result will be an empty, | 201 // this URL is not a standard URL, then the result will be an empty, |
199 // invalid GURL. If the URL has neither username nor password, this | 202 // invalid GURL. If the URL has neither username nor password, this |
200 // degenerates to GetWithEmptyPath(). | 203 // degenerates to GetWithEmptyPath(). |
201 // | 204 // |
202 // It is an error to get the origin of an invalid URL. The result | 205 // It is an error to get the origin of an invalid URL. The result |
203 // will be the empty URL. | 206 // will be the empty URL. |
204 GURL GetOrigin() const; | 207 GURL_API GURL GetOrigin() const; |
205 | 208 |
206 // Returns true if the scheme for the current URL is a known "standard" | 209 // Returns true if the scheme for the current URL is a known "standard" |
207 // scheme. Standard schemes have an authority and a path section. This | 210 // scheme. Standard schemes have an authority and a path section. This |
208 // includes file:, which some callers may want to filter out explicitly by | 211 // includes file:, which some callers may want to filter out explicitly by |
209 // calling SchemeIsFile. | 212 // calling SchemeIsFile. |
210 bool IsStandard() const; | 213 GURL_API bool IsStandard() const; |
211 | 214 |
212 // Returns true if the given parameter (should be lower-case ASCII to match | 215 // Returns true if the given parameter (should be lower-case ASCII to match |
213 // the canonicalized scheme) is the scheme for this URL. This call is more | 216 // the canonicalized scheme) is the scheme for this URL. This call is more |
214 // efficient than getting the scheme and comparing it because no copies or | 217 // efficient than getting the scheme and comparing it because no copies or |
215 // object constructions are done. | 218 // object constructions are done. |
216 bool SchemeIs(const char* lower_ascii_scheme) const; | 219 GURL_API bool SchemeIs(const char* lower_ascii_scheme) const; |
217 | 220 |
218 // We often need to know if this is a file URL. File URLs are "standard", but | 221 // We often need to know if this is a file URL. File URLs are "standard", but |
219 // are often treated separately by some programs. | 222 // are often treated separately by some programs. |
220 bool SchemeIsFile() const { | 223 bool SchemeIsFile() const { |
221 return SchemeIs("file"); | 224 return SchemeIs("file"); |
222 } | 225 } |
223 | 226 |
224 // If the scheme indicates a secure connection | 227 // If the scheme indicates a secure connection |
225 bool SchemeIsSecure() const { | 228 bool SchemeIsSecure() const { |
226 return SchemeIs("https"); | 229 return SchemeIs("https"); |
227 } | 230 } |
228 | 231 |
229 // Returns true if the hostname is an IP address. Note: this function isn't | 232 // Returns true if the hostname is an IP address. Note: this function isn't |
230 // as cheap as a simple getter because it re-parses the hostname to verify. | 233 // as cheap as a simple getter because it re-parses the hostname to verify. |
231 // This currently identifies only IPv4 addresses (bug 822685). | 234 // This currently identifies only IPv4 addresses (bug 822685). |
232 bool HostIsIPAddress() const; | 235 GURL_API bool HostIsIPAddress() const; |
233 | 236 |
234 // Getters for various components of the URL. The returned string will be | 237 // Getters for various components of the URL. The returned string will be |
235 // empty if the component is empty or is not present. | 238 // empty if the component is empty or is not present. |
236 std::string scheme() const { // Not including the colon. See also SchemeIs. | 239 std::string scheme() const { // Not including the colon. See also SchemeIs. |
237 return ComponentString(parsed_.scheme); | 240 return ComponentString(parsed_.scheme); |
238 } | 241 } |
239 std::string username() const { | 242 std::string username() const { |
240 return ComponentString(parsed_.username); | 243 return ComponentString(parsed_.username); |
241 } | 244 } |
242 std::string password() const { | 245 std::string password() const { |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
288 } | 291 } |
289 bool has_query() const { | 292 bool has_query() const { |
290 return parsed_.query.len >= 0; | 293 return parsed_.query.len >= 0; |
291 } | 294 } |
292 bool has_ref() const { | 295 bool has_ref() const { |
293 return parsed_.ref.len >= 0; | 296 return parsed_.ref.len >= 0; |
294 } | 297 } |
295 | 298 |
296 // Returns a parsed version of the port. Can also be any of the special | 299 // Returns a parsed version of the port. Can also be any of the special |
297 // values defined in Parsed for ExtractPort. | 300 // values defined in Parsed for ExtractPort. |
298 int IntPort() const; | 301 GURL_API int IntPort() const; |
299 | 302 |
300 // Returns the port number of the url, or the default port number. | 303 // Returns the port number of the url, or the default port number. |
301 // If the scheme has no concept of port (or unknown default) returns | 304 // If the scheme has no concept of port (or unknown default) returns |
302 // PORT_UNSPECIFIED. | 305 // PORT_UNSPECIFIED. |
303 int EffectiveIntPort() const; | 306 GURL_API int EffectiveIntPort() const; |
304 | 307 |
305 // Extracts the filename portion of the path and returns it. The filename | 308 // Extracts the filename portion of the path and returns it. The filename |
306 // is everything after the last slash in the path. This may be empty. | 309 // is everything after the last slash in the path. This may be empty. |
307 std::string ExtractFileName() const; | 310 GURL_API std::string ExtractFileName() const; |
308 | 311 |
309 // Returns the path that should be sent to the server. This is the path, | 312 // Returns the path that should be sent to the server. This is the path, |
310 // parameter, and query portions of the URL. It is guaranteed to be ASCII. | 313 // parameter, and query portions of the URL. It is guaranteed to be ASCII. |
311 std::string PathForRequest() const; | 314 GURL_API std::string PathForRequest() const; |
312 | 315 |
313 // Returns the host, excluding the square brackets surrounding IPv6 address | 316 // Returns the host, excluding the square brackets surrounding IPv6 address |
314 // literals. This can be useful for passing to getaddrinfo(). | 317 // literals. This can be useful for passing to getaddrinfo(). |
315 std::string HostNoBrackets() const; | 318 GURL_API std::string HostNoBrackets() const; |
316 | 319 |
317 // Returns true if this URL's host matches or is in the same domain as | 320 // Returns true if this URL's host matches or is in the same domain as |
318 // the given input string. For example if this URL was "www.google.com", | 321 // the given input string. For example if this URL was "www.google.com", |
319 // this would match "com", "google.com", and "www.google.com | 322 // this would match "com", "google.com", and "www.google.com |
320 // (input domain should be lower-case ASCII to match the canonicalized | 323 // (input domain should be lower-case ASCII to match the canonicalized |
321 // scheme). This call is more efficient than getting the host and check | 324 // scheme). This call is more efficient than getting the host and check |
322 // whether host has the specific domain or not because no copies or | 325 // whether host has the specific domain or not because no copies or |
323 // object constructions are done. | 326 // object constructions are done. |
324 // | 327 // |
325 // If function DomainIs has parameter domain_len, which means the parameter | 328 // If function DomainIs has parameter domain_len, which means the parameter |
326 // lower_ascii_domain does not gurantee to terminate with NULL character. | 329 // lower_ascii_domain does not gurantee to terminate with NULL character. |
327 bool DomainIs(const char* lower_ascii_domain, int domain_len) const; | 330 GURL_API bool DomainIs(const char* lower_ascii_domain, int domain_len) const; |
328 | 331 |
329 // If function DomainIs only has parameter lower_ascii_domain, which means | 332 // If function DomainIs only has parameter lower_ascii_domain, which means |
330 // domain string should be terminate with NULL character. | 333 // domain string should be terminate with NULL character. |
331 bool DomainIs(const char* lower_ascii_domain) const { | 334 bool DomainIs(const char* lower_ascii_domain) const { |
332 return DomainIs(lower_ascii_domain, | 335 return DomainIs(lower_ascii_domain, |
333 static_cast<int>(strlen(lower_ascii_domain))); | 336 static_cast<int>(strlen(lower_ascii_domain))); |
334 } | 337 } |
335 | 338 |
336 // Swaps the contents of this GURL object with the argument without doing | 339 // Swaps the contents of this GURL object with the argument without doing |
337 // any memory allocations. | 340 // any memory allocations. |
338 void Swap(GURL* other); | 341 GURL_API void Swap(GURL* other); |
339 | 342 |
340 // Returns a reference to a singleton empty GURL. This object is for callers | 343 // Returns a reference to a singleton empty GURL. This object is for callers |
341 // who return references but don't have anything to return in some cases. | 344 // who return references but don't have anything to return in some cases. |
342 // This function may be called from any thread. | 345 // This function may be called from any thread. |
343 static const GURL& EmptyGURL(); | 346 GURL_API static const GURL& EmptyGURL(); |
344 | 347 |
345 private: | 348 private: |
346 // Returns the substring of the input identified by the given component. | 349 // Returns the substring of the input identified by the given component. |
347 std::string ComponentString(const url_parse::Component& comp) const { | 350 std::string ComponentString(const url_parse::Component& comp) const { |
348 if (comp.len <= 0) | 351 if (comp.len <= 0) |
349 return std::string(); | 352 return std::string(); |
350 return std::string(spec_, comp.begin, comp.len); | 353 return std::string(spec_, comp.begin, comp.len); |
351 } | 354 } |
352 | 355 |
353 // The actual text of the URL, in canonical ASCII form. | 356 // The actual text of the URL, in canonical ASCII form. |
354 std::string spec_; | 357 std::string spec_; |
355 | 358 |
356 // Set when the given URL is valid. Otherwise, we may still have a spec and | 359 // Set when the given URL is valid. Otherwise, we may still have a spec and |
357 // components, but they may not identify valid resources (for example, an | 360 // components, but they may not identify valid resources (for example, an |
358 // invalid port number, invalid characters in the scheme, etc.). | 361 // invalid port number, invalid characters in the scheme, etc.). |
359 bool is_valid_; | 362 bool is_valid_; |
360 | 363 |
361 // Identified components of the canonical spec. | 364 // Identified components of the canonical spec. |
362 url_parse::Parsed parsed_; | 365 url_parse::Parsed parsed_; |
363 | 366 |
364 // TODO bug 684583: Add encoding for query params. | 367 // TODO bug 684583: Add encoding for query params. |
365 }; | 368 }; |
366 | 369 |
367 // Stream operator so GURL can be used in assertion statements. | 370 // Stream operator so GURL can be used in assertion statements. |
368 inline std::ostream& operator<<(std::ostream& out, const GURL& url) { | 371 inline std::ostream& operator<<(std::ostream& out, const GURL& url) { |
369 return out << url.possibly_invalid_spec(); | 372 return out << url.possibly_invalid_spec(); |
370 } | 373 } |
371 | 374 |
372 #endif // GOOGLEURL_SRC_GURL_H__ | 375 #endif // GOOGLEURL_SRC_GURL_H__ |
OLD | NEW |