OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
11 // copyright notice, this list of conditions and the following disclaimer | 11 // copyright notice, this list of conditions and the following disclaimer |
12 // in the documentation and/or other materials provided with the | 12 // in the documentation and/or other materials provided with the |
13 // distribution. | 13 // distribution. |
14 // * Neither the name of Google Inc. nor the names of its | 14 // * Neither the name of Google Inc. nor the names of its |
15 // contributors may be used to endorse or promote products derived from | 15 // contributors may be used to endorse or promote products derived from |
16 // this software without specific prior written permission. | 16 // this software without specific prior written permission. |
17 // | 17 // |
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 |
30 #ifndef GOOGLEURL_SRC_URL_PARSE_H__ | 30 #ifndef URL_URL_PARSE_H_ |
31 #define GOOGLEURL_SRC_URL_PARSE_H__ | 31 #define URL_URL_PARSE_H_ |
32 | 32 |
33 #include <string> | 33 #include <string> |
34 | 34 |
35 #include "base/basictypes.h" | 35 #include "base/basictypes.h" |
36 #include "base/string16.h" | 36 #include "base/string16.h" |
37 #include "googleurl/src/url_common.h" | |
38 | 37 |
39 namespace url_parse { | 38 namespace url_parse { |
40 | 39 |
41 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and | 40 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and |
42 // KURLGoogle.cpp still rely on this type. | 41 // KURLGoogle.cpp still rely on this type. |
43 typedef char16 UTF16Char; | 42 typedef char16 UTF16Char; |
44 | 43 |
45 // Component ------------------------------------------------------------------ | 44 // Component ------------------------------------------------------------------ |
46 | 45 |
47 // Represents a substring for URL parsing. | 46 // Represents a substring for URL parsing. |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
114 PASSWORD, | 113 PASSWORD, |
115 HOST, | 114 HOST, |
116 PORT, | 115 PORT, |
117 PATH, | 116 PATH, |
118 QUERY, | 117 QUERY, |
119 REF, | 118 REF, |
120 }; | 119 }; |
121 | 120 |
122 // The default constructor is sufficient for the components, but inner_parsed_ | 121 // The default constructor is sufficient for the components, but inner_parsed_ |
123 // requires special handling. | 122 // requires special handling. |
124 GURL_API Parsed(); | 123 Parsed(); |
125 GURL_API Parsed(const Parsed&); | 124 Parsed(const Parsed&); |
126 GURL_API Parsed& operator=(const Parsed&); | 125 Parsed& operator=(const Parsed&); |
127 GURL_API ~Parsed(); | 126 ~Parsed(); |
128 | 127 |
129 // Returns the length of the URL (the end of the last component). | 128 // Returns the length of the URL (the end of the last component). |
130 // | 129 // |
131 // Note that for some invalid, non-canonical URLs, this may not be the length | 130 // Note that for some invalid, non-canonical URLs, this may not be the length |
132 // of the string. For example "http://": the parsed structure will only | 131 // of the string. For example "http://": the parsed structure will only |
133 // contain an entry for the four-character scheme, and it doesn't know about | 132 // contain an entry for the four-character scheme, and it doesn't know about |
134 // the "://". For all other last-components, it will return the real length. | 133 // the "://". For all other last-components, it will return the real length. |
135 GURL_API int Length() const; | 134 int Length() const; |
136 | 135 |
137 // Returns the number of characters before the given component if it exists, | 136 // Returns the number of characters before the given component if it exists, |
138 // or where the component would be if it did exist. This will return the | 137 // or where the component would be if it did exist. This will return the |
139 // string length if the component would be appended to the end. | 138 // string length if the component would be appended to the end. |
140 // | 139 // |
141 // Note that this can get a little funny for the port, query, and ref | 140 // Note that this can get a little funny for the port, query, and ref |
142 // components which have a delimiter that is not counted as part of the | 141 // components which have a delimiter that is not counted as part of the |
143 // component. The |include_delimiter| flag controls if you want this counted | 142 // component. The |include_delimiter| flag controls if you want this counted |
144 // as part of the component or not when the component exists. | 143 // as part of the component or not when the component exists. |
145 // | 144 // |
146 // This example shows the difference between the two flags for two of these | 145 // This example shows the difference between the two flags for two of these |
147 // delimited components that is present (the port and query) and one that | 146 // delimited components that is present (the port and query) and one that |
148 // isn't (the reference). The components that this flag affects are marked | 147 // isn't (the reference). The components that this flag affects are marked |
149 // with a *. | 148 // with a *. |
150 // 0 1 2 | 149 // 0 1 2 |
151 // 012345678901234567890 | 150 // 012345678901234567890 |
152 // Example input: http://foo:80/?query | 151 // Example input: http://foo:80/?query |
153 // include_delim=true, ...=false ("<-" indicates different) | 152 // include_delim=true, ...=false ("<-" indicates different) |
154 // SCHEME: 0 0 | 153 // SCHEME: 0 0 |
155 // USERNAME: 5 5 | 154 // USERNAME: 5 5 |
156 // PASSWORD: 5 5 | 155 // PASSWORD: 5 5 |
157 // HOST: 7 7 | 156 // HOST: 7 7 |
158 // *PORT: 10 11 <- | 157 // *PORT: 10 11 <- |
159 // PATH: 13 13 | 158 // PATH: 13 13 |
160 // *QUERY: 14 15 <- | 159 // *QUERY: 14 15 <- |
161 // *REF: 20 20 | 160 // *REF: 20 20 |
162 // | 161 // |
163 GURL_API int CountCharactersBefore(ComponentType type, | 162 int CountCharactersBefore(ComponentType type, |
164 bool include_delimiter) const; | 163 bool include_delimiter) const; |
165 | 164 |
166 // Scheme without the colon: "http://foo"/ would have a scheme of "http". | 165 // Scheme without the colon: "http://foo"/ would have a scheme of "http". |
167 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there | 166 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there |
168 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed | 167 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed |
169 // to start at the beginning of the string if there are preceeding whitespace | 168 // to start at the beginning of the string if there are preceeding whitespace |
170 // or control characters. | 169 // or control characters. |
171 Component scheme; | 170 Component scheme; |
172 | 171 |
173 // Username. Specified in URLs with an @ sign before the host. See |password| | 172 // Username. Specified in URLs with an @ sign before the host. See |password| |
174 Component username; | 173 Component username; |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
239 // at any point in the process, and will actually handle embedded NULLs. | 238 // at any point in the process, and will actually handle embedded NULLs. |
240 // | 239 // |
241 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it | 240 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it |
242 // in any way. See the comment above the struct. | 241 // in any way. See the comment above the struct. |
243 // | 242 // |
244 // The 8-bit versions require UTF-8 encoding. | 243 // The 8-bit versions require UTF-8 encoding. |
245 | 244 |
246 // StandardURL is for when the scheme is known to be one that has an | 245 // StandardURL is for when the scheme is known to be one that has an |
247 // authority (host) like "http". This function will not handle weird ones | 246 // authority (host) like "http". This function will not handle weird ones |
248 // like "about:" and "javascript:", or do the right thing for "file:" URLs. | 247 // like "about:" and "javascript:", or do the right thing for "file:" URLs. |
249 GURL_API void ParseStandardURL(const char* url, int url_len, Parsed* parsed); | 248 void ParseStandardURL(const char* url, int url_len, Parsed* parsed); |
250 GURL_API void ParseStandardURL(const char16* url, int url_len, Parsed* parsed); | 249 void ParseStandardURL(const char16* url, int url_len, Parsed* parsed); |
251 | 250 |
252 // PathURL is for when the scheme is known not to have an authority (host) | 251 // PathURL is for when the scheme is known not to have an authority (host) |
253 // section but that aren't file URLs either. The scheme is parsed, and | 252 // section but that aren't file URLs either. The scheme is parsed, and |
254 // everything after the scheme is considered as the path. This is used for | 253 // everything after the scheme is considered as the path. This is used for |
255 // things like "about:" and "javascript:" | 254 // things like "about:" and "javascript:" |
256 GURL_API void ParsePathURL(const char* url, int url_len, Parsed* parsed); | 255 void ParsePathURL(const char* url, int url_len, Parsed* parsed); |
257 GURL_API void ParsePathURL(const char16* url, int url_len, Parsed* parsed); | 256 void ParsePathURL(const char16* url, int url_len, Parsed* parsed); |
258 | 257 |
259 // FileURL is for file URLs. There are some special rules for interpreting | 258 // FileURL is for file URLs. There are some special rules for interpreting |
260 // these. | 259 // these. |
261 GURL_API void ParseFileURL(const char* url, int url_len, Parsed* parsed); | 260 void ParseFileURL(const char* url, int url_len, Parsed* parsed); |
262 GURL_API void ParseFileURL(const char16* url, int url_len, Parsed* parsed); | 261 void ParseFileURL(const char16* url, int url_len, Parsed* parsed); |
263 | 262 |
264 // Filesystem URLs are structured differently than other URLs. | 263 // Filesystem URLs are structured differently than other URLs. |
265 GURL_API void ParseFileSystemURL(const char* url, | 264 void ParseFileSystemURL(const char* url, |
266 int url_len, | 265 int url_len, |
267 Parsed* parsed); | 266 Parsed* parsed); |
268 GURL_API void ParseFileSystemURL(const char16* url, | 267 void ParseFileSystemURL(const char16* url, |
269 int url_len, | 268 int url_len, |
270 Parsed* parsed); | 269 Parsed* parsed); |
271 | 270 |
272 // MailtoURL is for mailto: urls. They are made up scheme,path,query | 271 // MailtoURL is for mailto: urls. They are made up scheme,path,query |
273 GURL_API void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); | 272 void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); |
274 GURL_API void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); | 273 void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); |
275 | 274 |
276 // Helper functions ----------------------------------------------------------- | 275 // Helper functions ----------------------------------------------------------- |
277 | 276 |
278 // Locates the scheme according to the URL parser's rules. This function is | 277 // Locates the scheme according to the URL parser's rules. This function is |
279 // designed so the caller can find the scheme and call the correct Init* | 278 // designed so the caller can find the scheme and call the correct Init* |
280 // function according to their known scheme types. | 279 // function according to their known scheme types. |
281 // | 280 // |
282 // It also does not perform any validation on the scheme. | 281 // It also does not perform any validation on the scheme. |
283 // | 282 // |
284 // This function will return true if the scheme is found and will put the | 283 // This function will return true if the scheme is found and will put the |
285 // scheme's range into *scheme. False means no scheme could be found. Note | 284 // scheme's range into *scheme. False means no scheme could be found. Note |
286 // that a URL beginning with a colon has a scheme, but it is empty, so this | 285 // that a URL beginning with a colon has a scheme, but it is empty, so this |
287 // function will return true but *scheme will = (0,0). | 286 // function will return true but *scheme will = (0,0). |
288 // | 287 // |
289 // The scheme is found by skipping spaces and control characters at the | 288 // The scheme is found by skipping spaces and control characters at the |
290 // beginning, and taking everything from there to the first colon to be the | 289 // beginning, and taking everything from there to the first colon to be the |
291 // scheme. The character at scheme.end() will be the colon (we may enhance | 290 // scheme. The character at scheme.end() will be the colon (we may enhance |
292 // this to handle full width colons or something, so don't count on the | 291 // this to handle full width colons or something, so don't count on the |
293 // actual character value). The character at scheme.end()+1 will be the | 292 // actual character value). The character at scheme.end()+1 will be the |
294 // beginning of the rest of the URL, be it the authority or the path (or the | 293 // beginning of the rest of the URL, be it the authority or the path (or the |
295 // end of the string). | 294 // end of the string). |
296 // | 295 // |
297 // The 8-bit version requires UTF-8 encoding. | 296 // The 8-bit version requires UTF-8 encoding. |
298 GURL_API bool ExtractScheme(const char* url, int url_len, Component* scheme); | 297 bool ExtractScheme(const char* url, int url_len, Component* scheme); |
299 GURL_API bool ExtractScheme(const char16* url, int url_len, Component* scheme); | 298 bool ExtractScheme(const char16* url, int url_len, Component* scheme); |
300 | 299 |
301 // Returns true if ch is a character that terminates the authority segment | 300 // Returns true if ch is a character that terminates the authority segment |
302 // of a URL. | 301 // of a URL. |
303 GURL_API bool IsAuthorityTerminator(char16 ch); | 302 bool IsAuthorityTerminator(char16 ch); |
304 | 303 |
305 // Does a best effort parse of input |spec|, in range |auth|. If a particular | 304 // Does a best effort parse of input |spec|, in range |auth|. If a particular |
306 // component is not found, it will be set to invalid. | 305 // component is not found, it will be set to invalid. |
307 GURL_API void ParseAuthority(const char* spec, | 306 void ParseAuthority(const char* spec, |
308 const Component& auth, | 307 const Component& auth, |
309 Component* username, | 308 Component* username, |
310 Component* password, | 309 Component* password, |
311 Component* hostname, | 310 Component* hostname, |
312 Component* port_num); | 311 Component* port_num); |
313 GURL_API void ParseAuthority(const char16* spec, | 312 void ParseAuthority(const char16* spec, |
314 const Component& auth, | 313 const Component& auth, |
315 Component* username, | 314 Component* username, |
316 Component* password, | 315 Component* password, |
317 Component* hostname, | 316 Component* hostname, |
318 Component* port_num); | 317 Component* port_num); |
319 | 318 |
320 // Computes the integer port value from the given port component. The port | 319 // Computes the integer port value from the given port component. The port |
321 // component should have been identified by one of the init functions on | 320 // component should have been identified by one of the init functions on |
322 // |Parsed| for the given input url. | 321 // |Parsed| for the given input url. |
323 // | 322 // |
324 // The return value will be a positive integer between 0 and 64K, or one of | 323 // The return value will be a positive integer between 0 and 64K, or one of |
325 // the two special values below. | 324 // the two special values below. |
326 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; | 325 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; |
327 GURL_API int ParsePort(const char* url, const Component& port); | 326 int ParsePort(const char* url, const Component& port); |
328 GURL_API int ParsePort(const char16* url, const Component& port); | 327 int ParsePort(const char16* url, const Component& port); |
329 | 328 |
330 // Extracts the range of the file name in the given url. The path must | 329 // Extracts the range of the file name in the given url. The path must |
331 // already have been computed by the parse function, and the matching URL | 330 // already have been computed by the parse function, and the matching URL |
332 // and extracted path are provided to this function. The filename is | 331 // and extracted path are provided to this function. The filename is |
333 // defined as being everything from the last slash/backslash of the path | 332 // defined as being everything from the last slash/backslash of the path |
334 // to the end of the path. | 333 // to the end of the path. |
335 // | 334 // |
336 // The file name will be empty if the path is empty or there is nothing | 335 // The file name will be empty if the path is empty or there is nothing |
337 // following the last slash. | 336 // following the last slash. |
338 // | 337 // |
339 // The 8-bit version requires UTF-8 encoding. | 338 // The 8-bit version requires UTF-8 encoding. |
340 GURL_API void ExtractFileName(const char* url, | 339 void ExtractFileName(const char* url, |
341 const Component& path, | 340 const Component& path, |
342 Component* file_name); | 341 Component* file_name); |
343 GURL_API void ExtractFileName(const char16* url, | 342 void ExtractFileName(const char16* url, |
344 const Component& path, | 343 const Component& path, |
345 Component* file_name); | 344 Component* file_name); |
346 | 345 |
347 // Extract the first key/value from the range defined by |*query|. Updates | 346 // Extract the first key/value from the range defined by |*query|. Updates |
348 // |*query| to start at the end of the extracted key/value pair. This is | 347 // |*query| to start at the end of the extracted key/value pair. This is |
349 // designed for use in a loop: you can keep calling it with the same query | 348 // designed for use in a loop: you can keep calling it with the same query |
350 // object and it will iterate over all items in the query. | 349 // object and it will iterate over all items in the query. |
351 // | 350 // |
352 // Some key/value pairs may have the key, the value, or both be empty (for | 351 // Some key/value pairs may have the key, the value, or both be empty (for |
353 // example, the query string "?&"). These will be returned. Note that an empty | 352 // example, the query string "?&"). These will be returned. Note that an empty |
354 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case | 353 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case |
355 // is the same as "done." | 354 // is the same as "done." |
356 // | 355 // |
357 // The initial query component should not include the '?' (this is the default | 356 // The initial query component should not include the '?' (this is the default |
358 // for parsed URLs). | 357 // for parsed URLs). |
359 // | 358 // |
360 // If no key/value are found |*key| and |*value| will be unchanged and it will | 359 // If no key/value are found |*key| and |*value| will be unchanged and it will |
361 // return false. | 360 // return false. |
362 GURL_API bool ExtractQueryKeyValue(const char* url, | 361 bool ExtractQueryKeyValue(const char* url, |
363 Component* query, | 362 Component* query, |
364 Component* key, | 363 Component* key, |
365 Component* value); | 364 Component* value); |
366 GURL_API bool ExtractQueryKeyValue(const char16* url, | 365 bool ExtractQueryKeyValue(const char16* url, |
367 Component* query, | 366 Component* query, |
368 Component* key, | 367 Component* key, |
369 Component* value); | 368 Component* value); |
370 | 369 |
371 } // namespace url_parse | 370 } // namespace url_parse |
372 | 371 |
373 #endif // GOOGLEURL_SRC_URL_PARSE_H__ | 372 #endif // URL_URL_PARSE_H_ |
OLD | NEW |