OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 16 matching lines...) Expand all Loading... |
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | 29 |
30 #ifndef GOOGLEURL_SRC_URL_PARSE_H__ | 30 #ifndef GOOGLEURL_SRC_URL_PARSE_H__ |
31 #define GOOGLEURL_SRC_URL_PARSE_H__ | 31 #define GOOGLEURL_SRC_URL_PARSE_H__ |
32 | 32 |
33 #include <string> | 33 #include <string> |
34 | 34 |
35 #include "base/basictypes.h" | 35 #include "base/basictypes.h" |
36 #include "base/string16.h" | 36 #include "base/string16.h" |
| 37 #include "googleurl/src/url_common.h" |
37 | 38 |
38 namespace url_parse { | 39 namespace url_parse { |
39 | 40 |
40 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and | 41 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and |
41 // KURLGoogle.cpp still rely on this type. | 42 // KURLGoogle.cpp still rely on this type. |
42 typedef char16 UTF16Char; | 43 typedef char16 UTF16Char; |
43 | 44 |
44 // Component ------------------------------------------------------------------ | 45 // Component ------------------------------------------------------------------ |
45 | 46 |
46 // Represents a substring for URL parsing. | 47 // Represents a substring for URL parsing. |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
112 USERNAME, | 113 USERNAME, |
113 PASSWORD, | 114 PASSWORD, |
114 HOST, | 115 HOST, |
115 PORT, | 116 PORT, |
116 PATH, | 117 PATH, |
117 QUERY, | 118 QUERY, |
118 REF, | 119 REF, |
119 }; | 120 }; |
120 | 121 |
121 // The default constructor is sufficient for the components. | 122 // The default constructor is sufficient for the components. |
122 Parsed() {} | 123 GURL_API Parsed() {} |
123 | 124 |
124 // Returns the length of the URL (the end of the last component). | 125 // Returns the length of the URL (the end of the last component). |
125 // | 126 // |
126 // Note that for some invalid, non-canonical URLs, this may not be the length | 127 // Note that for some invalid, non-canonical URLs, this may not be the length |
127 // of the string. For example "http://": the parsed structure will only | 128 // of the string. For example "http://": the parsed structure will only |
128 // contain an entry for the four-character scheme, and it doesn't know about | 129 // contain an entry for the four-character scheme, and it doesn't know about |
129 // the "://". For all other last-components, it will return the real length. | 130 // the "://". For all other last-components, it will return the real length. |
130 int Length() const; | 131 GURL_API int Length() const; |
131 | 132 |
132 // Returns the number of characters before the given component if it exists, | 133 // Returns the number of characters before the given component if it exists, |
133 // or where the component would be if it did exist. This will return the | 134 // or where the component would be if it did exist. This will return the |
134 // string length if the component would be appended to the end. | 135 // string length if the component would be appended to the end. |
135 // | 136 // |
136 // Note that this can get a little funny for the port, query, and ref | 137 // Note that this can get a little funny for the port, query, and ref |
137 // components which have a delimiter that is not counted as part of the | 138 // components which have a delimiter that is not counted as part of the |
138 // component. The |include_delimiter| flag controls if you want this counted | 139 // component. The |include_delimiter| flag controls if you want this counted |
139 // as part of the component or not when the component exists. | 140 // as part of the component or not when the component exists. |
140 // | 141 // |
141 // This example shows the difference between the two flags for two of these | 142 // This example shows the difference between the two flags for two of these |
142 // delimited components that is present (the port and query) and one that | 143 // delimited components that is present (the port and query) and one that |
143 // isn't (the reference). The components that this flag affects are marked | 144 // isn't (the reference). The components that this flag affects are marked |
144 // with a *. | 145 // with a *. |
145 // 0 1 2 | 146 // 0 1 2 |
146 // 012345678901234567890 | 147 // 012345678901234567890 |
147 // Example input: http://foo:80/?query | 148 // Example input: http://foo:80/?query |
148 // include_delim=true, ...=false ("<-" indicates different) | 149 // include_delim=true, ...=false ("<-" indicates different) |
149 // SCHEME: 0 0 | 150 // SCHEME: 0 0 |
150 // USERNAME: 5 5 | 151 // USERNAME: 5 5 |
151 // PASSWORD: 5 5 | 152 // PASSWORD: 5 5 |
152 // HOST: 7 7 | 153 // HOST: 7 7 |
153 // *PORT: 10 11 <- | 154 // *PORT: 10 11 <- |
154 // PATH: 13 13 | 155 // PATH: 13 13 |
155 // *QUERY: 14 15 <- | 156 // *QUERY: 14 15 <- |
156 // *REF: 20 20 | 157 // *REF: 20 20 |
157 // | 158 // |
158 int CountCharactersBefore(ComponentType type, bool include_delimiter) const; | 159 GURL_API int CountCharactersBefore(ComponentType type, |
| 160 bool include_delimiter) const; |
159 | 161 |
160 // Scheme without the colon: "http://foo"/ would have a scheme of "http". | 162 // Scheme without the colon: "http://foo"/ would have a scheme of "http". |
161 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there | 163 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there |
162 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed | 164 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed |
163 // to start at the beginning of the string if there are preceeding whitespace | 165 // to start at the beginning of the string if there are preceeding whitespace |
164 // or control characters. | 166 // or control characters. |
165 Component scheme; | 167 Component scheme; |
166 | 168 |
167 // Username. Specified in URLs with an @ sign before the host. See |password| | 169 // Username. Specified in URLs with an @ sign before the host. See |password| |
168 Component username; | 170 Component username; |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
208 // at any point in the process, and will actually handle embedded NULLs. | 210 // at any point in the process, and will actually handle embedded NULLs. |
209 // | 211 // |
210 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it | 212 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it |
211 // in any way. See the comment above the struct. | 213 // in any way. See the comment above the struct. |
212 // | 214 // |
213 // The 8-bit versions require UTF-8 encoding. | 215 // The 8-bit versions require UTF-8 encoding. |
214 | 216 |
215 // StandardURL is for when the scheme is known to be one that has an | 217 // StandardURL is for when the scheme is known to be one that has an |
216 // authority (host) like "http". This function will not handle weird ones | 218 // authority (host) like "http". This function will not handle weird ones |
217 // like "about:" and "javascript:", or do the right thing for "file:" URLs. | 219 // like "about:" and "javascript:", or do the right thing for "file:" URLs. |
218 void ParseStandardURL(const char* url, int url_len, Parsed* parsed); | 220 GURL_API void ParseStandardURL(const char* url, int url_len, Parsed* parsed); |
219 void ParseStandardURL(const char16* url, int url_len, Parsed* parsed); | 221 GURL_API void ParseStandardURL(const char16* url, int url_len, Parsed* parsed); |
220 | 222 |
221 // PathURL is for when the scheme is known not to have an authority (host) | 223 // PathURL is for when the scheme is known not to have an authority (host) |
222 // section but that aren't file URLs either. The scheme is parsed, and | 224 // section but that aren't file URLs either. The scheme is parsed, and |
223 // everything after the scheme is considered as the path. This is used for | 225 // everything after the scheme is considered as the path. This is used for |
224 // things like "about:" and "javascript:" | 226 // things like "about:" and "javascript:" |
225 void ParsePathURL(const char* url, int url_len, Parsed* parsed); | 227 GURL_API void ParsePathURL(const char* url, int url_len, Parsed* parsed); |
226 void ParsePathURL(const char16* url, int url_len, Parsed* parsed); | 228 GURL_API void ParsePathURL(const char16* url, int url_len, Parsed* parsed); |
227 | 229 |
228 // FileURL is for file URLs. There are some special rules for interpreting | 230 // FileURL is for file URLs. There are some special rules for interpreting |
229 // these. | 231 // these. |
230 void ParseFileURL(const char* url, int url_len, Parsed* parsed); | 232 GURL_API void ParseFileURL(const char* url, int url_len, Parsed* parsed); |
231 void ParseFileURL(const char16* url, int url_len, Parsed* parsed); | 233 GURL_API void ParseFileURL(const char16* url, int url_len, Parsed* parsed); |
232 | 234 |
233 // MailtoURL is for mailto: urls. They are made up scheme,path,query | 235 // MailtoURL is for mailto: urls. They are made up scheme,path,query |
234 void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); | 236 GURL_API void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); |
235 void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); | 237 GURL_API void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); |
236 | 238 |
237 // Helper functions ----------------------------------------------------------- | 239 // Helper functions ----------------------------------------------------------- |
238 | 240 |
239 // Locates the scheme according to the URL parser's rules. This function is | 241 // Locates the scheme according to the URL parser's rules. This function is |
240 // designed so the caller can find the scheme and call the correct Init* | 242 // designed so the caller can find the scheme and call the correct Init* |
241 // function according to their known scheme types. | 243 // function according to their known scheme types. |
242 // | 244 // |
243 // It also does not perform any validation on the scheme. | 245 // It also does not perform any validation on the scheme. |
244 // | 246 // |
245 // This function will return true if the scheme is found and will put the | 247 // This function will return true if the scheme is found and will put the |
246 // scheme's range into *scheme. False means no scheme could be found. Note | 248 // scheme's range into *scheme. False means no scheme could be found. Note |
247 // that a URL beginning with a colon has a scheme, but it is empty, so this | 249 // that a URL beginning with a colon has a scheme, but it is empty, so this |
248 // function will return true but *scheme will = (0,0). | 250 // function will return true but *scheme will = (0,0). |
249 // | 251 // |
250 // The scheme is found by skipping spaces and control characters at the | 252 // The scheme is found by skipping spaces and control characters at the |
251 // beginning, and taking everything from there to the first colon to be the | 253 // beginning, and taking everything from there to the first colon to be the |
252 // scheme. The character at scheme.end() will be the colon (we may enhance | 254 // scheme. The character at scheme.end() will be the colon (we may enhance |
253 // this to handle full width colons or something, so don't count on the | 255 // this to handle full width colons or something, so don't count on the |
254 // actual character value). The character at scheme.end()+1 will be the | 256 // actual character value). The character at scheme.end()+1 will be the |
255 // beginning of the rest of the URL, be it the authority or the path (or the | 257 // beginning of the rest of the URL, be it the authority or the path (or the |
256 // end of the string). | 258 // end of the string). |
257 // | 259 // |
258 // The 8-bit version requires UTF-8 encoding. | 260 // The 8-bit version requires UTF-8 encoding. |
259 bool ExtractScheme(const char* url, int url_len, Component* scheme); | 261 GURL_API bool ExtractScheme(const char* url, int url_len, Component* scheme); |
260 bool ExtractScheme(const char16* url, int url_len, Component* scheme); | 262 GURL_API bool ExtractScheme(const char16* url, int url_len, Component* scheme); |
261 | 263 |
262 // Returns true if ch is a character that terminates the authority segment | 264 // Returns true if ch is a character that terminates the authority segment |
263 // of a URL. | 265 // of a URL. |
264 bool IsAuthorityTerminator(char16 ch); | 266 GURL_API bool IsAuthorityTerminator(char16 ch); |
265 | 267 |
266 // Does a best effort parse of input |spec|, in range |auth|. If a particular | 268 // Does a best effort parse of input |spec|, in range |auth|. If a particular |
267 // component is not found, it will be set to invalid. | 269 // component is not found, it will be set to invalid. |
268 void ParseAuthority(const char* spec, | 270 GURL_API void ParseAuthority(const char* spec, |
269 const Component& auth, | 271 const Component& auth, |
270 Component* username, | 272 Component* username, |
271 Component* password, | 273 Component* password, |
272 Component* hostname, | 274 Component* hostname, |
273 Component* port_num); | 275 Component* port_num); |
274 void ParseAuthority(const char16* spec, | 276 GURL_API void ParseAuthority(const char16* spec, |
275 const Component& auth, | 277 const Component& auth, |
276 Component* username, | 278 Component* username, |
277 Component* password, | 279 Component* password, |
278 Component* hostname, | 280 Component* hostname, |
279 Component* port_num); | 281 Component* port_num); |
280 | 282 |
281 // Computes the integer port value from the given port component. The port | 283 // Computes the integer port value from the given port component. The port |
282 // component should have been identified by one of the init functions on | 284 // component should have been identified by one of the init functions on |
283 // |Parsed| for the given input url. | 285 // |Parsed| for the given input url. |
284 // | 286 // |
285 // The return value will be a positive integer between 0 and 64K, or one of | 287 // The return value will be a positive integer between 0 and 64K, or one of |
286 // the two special values below. | 288 // the two special values below. |
287 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; | 289 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; |
288 int ParsePort(const char* url, const Component& port); | 290 GURL_API int ParsePort(const char* url, const Component& port); |
289 int ParsePort(const char16* url, const Component& port); | 291 GURL_API int ParsePort(const char16* url, const Component& port); |
290 | 292 |
291 // Extracts the range of the file name in the given url. The path must | 293 // Extracts the range of the file name in the given url. The path must |
292 // already have been computed by the parse function, and the matching URL | 294 // already have been computed by the parse function, and the matching URL |
293 // and extracted path are provided to this function. The filename is | 295 // and extracted path are provided to this function. The filename is |
294 // defined as being everything from the last slash/backslash of the path | 296 // defined as being everything from the last slash/backslash of the path |
295 // to the end of the path. | 297 // to the end of the path. |
296 // | 298 // |
297 // The file name will be empty if the path is empty or there is nothing | 299 // The file name will be empty if the path is empty or there is nothing |
298 // following the last slash. | 300 // following the last slash. |
299 // | 301 // |
300 // The 8-bit version requires UTF-8 encoding. | 302 // The 8-bit version requires UTF-8 encoding. |
301 void ExtractFileName(const char* url, | 303 GURL_API void ExtractFileName(const char* url, |
302 const Component& path, | 304 const Component& path, |
303 Component* file_name); | 305 Component* file_name); |
304 void ExtractFileName(const char16* url, | 306 GURL_API void ExtractFileName(const char16* url, |
305 const Component& path, | 307 const Component& path, |
306 Component* file_name); | 308 Component* file_name); |
307 | 309 |
308 // Extract the first key/value from the range defined by |*query|. Updates | 310 // Extract the first key/value from the range defined by |*query|. Updates |
309 // |*query| to start at the end of the extracted key/value pair. This is | 311 // |*query| to start at the end of the extracted key/value pair. This is |
310 // designed for use in a loop: you can keep calling it with the same query | 312 // designed for use in a loop: you can keep calling it with the same query |
311 // object and it will iterate over all items in the query. | 313 // object and it will iterate over all items in the query. |
312 // | 314 // |
313 // Some key/value pairs may have the key, the value, or both be empty (for | 315 // Some key/value pairs may have the key, the value, or both be empty (for |
314 // example, the query string "?&"). These will be returned. Note that an empty | 316 // example, the query string "?&"). These will be returned. Note that an empty |
315 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case | 317 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case |
316 // is the same as "done." | 318 // is the same as "done." |
317 // | 319 // |
318 // The initial query component should not include the '?' (this is the default | 320 // The initial query component should not include the '?' (this is the default |
319 // for parsed URLs). | 321 // for parsed URLs). |
320 // | 322 // |
321 // If no key/value are found |*key| and |*value| will be unchanged and it will | 323 // If no key/value are found |*key| and |*value| will be unchanged and it will |
322 // return false. | 324 // return false. |
323 bool ExtractQueryKeyValue(const char* url, | 325 GURL_API bool ExtractQueryKeyValue(const char* url, |
324 Component* query, | 326 Component* query, |
325 Component* key, | 327 Component* key, |
326 Component* value); | 328 Component* value); |
327 bool ExtractQueryKeyValue(const char16* url, | 329 GURL_API bool ExtractQueryKeyValue(const char16* url, |
328 Component* query, | 330 Component* query, |
329 Component* key, | 331 Component* key, |
330 Component* value); | 332 Component* value); |
331 | 333 |
332 } // namespace url_parse | 334 } // namespace url_parse |
333 | 335 |
334 #endif // GOOGLEURL_SRC_URL_PARSE_H__ | 336 #endif // GOOGLEURL_SRC_URL_PARSE_H__ |
OLD | NEW |