OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef URL_URL_PARSE_H_ | 5 #ifndef URL_URL_PARSE_H_ |
6 #define URL_URL_PARSE_H_ | 6 #define URL_URL_PARSE_H_ |
7 | 7 |
8 #include <string> | 8 #include <string> |
9 | 9 |
10 #include "base/basictypes.h" | 10 #include "base/basictypes.h" |
11 #include "base/string16.h" | 11 #include "base/string16.h" |
| 12 #include "url/url_export.h" |
12 | 13 |
13 namespace url_parse { | 14 namespace url_parse { |
14 | 15 |
15 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and | 16 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and |
16 // KURLGoogle.cpp still rely on this type. | 17 // KURLGoogle.cpp still rely on this type. |
17 typedef char16 UTF16Char; | 18 typedef char16 UTF16Char; |
18 | 19 |
19 // Component ------------------------------------------------------------------ | 20 // Component ------------------------------------------------------------------ |
20 | 21 |
21 // Represents a substring for URL parsing. | 22 // Represents a substring for URL parsing. |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
73 // if (!url_parse::ExtractScheme(url, url_len, &scheme)) | 74 // if (!url_parse::ExtractScheme(url, url_len, &scheme)) |
74 // return I_CAN_NOT_FIND_THE_SCHEME_DUDE; | 75 // return I_CAN_NOT_FIND_THE_SCHEME_DUDE; |
75 // | 76 // |
76 // if (IsStandardScheme(url, scheme)) // Not provided by this component | 77 // if (IsStandardScheme(url, scheme)) // Not provided by this component |
77 // url_parseParseStandardURL(url, url_len, &parsed); | 78 // url_parseParseStandardURL(url, url_len, &parsed); |
78 // else if (IsFileURL(url, scheme)) // Not provided by this component | 79 // else if (IsFileURL(url, scheme)) // Not provided by this component |
79 // url_parse::ParseFileURL(url, url_len, &parsed); | 80 // url_parse::ParseFileURL(url, url_len, &parsed); |
80 // else | 81 // else |
81 // url_parse::ParsePathURL(url, url_len, &parsed); | 82 // url_parse::ParsePathURL(url, url_len, &parsed); |
82 // | 83 // |
83 struct Parsed { | 84 struct URL_EXPORT Parsed { |
84 // Identifies different components. | 85 // Identifies different components. |
85 enum ComponentType { | 86 enum ComponentType { |
86 SCHEME, | 87 SCHEME, |
87 USERNAME, | 88 USERNAME, |
88 PASSWORD, | 89 PASSWORD, |
89 HOST, | 90 HOST, |
90 PORT, | 91 PORT, |
91 PATH, | 92 PATH, |
92 QUERY, | 93 QUERY, |
93 REF, | 94 REF, |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
127 // include_delim=true, ...=false ("<-" indicates different) | 128 // include_delim=true, ...=false ("<-" indicates different) |
128 // SCHEME: 0 0 | 129 // SCHEME: 0 0 |
129 // USERNAME: 5 5 | 130 // USERNAME: 5 5 |
130 // PASSWORD: 5 5 | 131 // PASSWORD: 5 5 |
131 // HOST: 7 7 | 132 // HOST: 7 7 |
132 // *PORT: 10 11 <- | 133 // *PORT: 10 11 <- |
133 // PATH: 13 13 | 134 // PATH: 13 13 |
134 // *QUERY: 14 15 <- | 135 // *QUERY: 14 15 <- |
135 // *REF: 20 20 | 136 // *REF: 20 20 |
136 // | 137 // |
137 int CountCharactersBefore(ComponentType type, | 138 int CountCharactersBefore(ComponentType type, bool include_delimiter) const; |
138 bool include_delimiter) const; | |
139 | 139 |
140 // Scheme without the colon: "http://foo"/ would have a scheme of "http". | 140 // Scheme without the colon: "http://foo"/ would have a scheme of "http". |
141 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there | 141 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there |
142 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed | 142 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed |
143 // to start at the beginning of the string if there are preceeding whitespace | 143 // to start at the beginning of the string if there are preceeding whitespace |
144 // or control characters. | 144 // or control characters. |
145 Component scheme; | 145 Component scheme; |
146 | 146 |
147 // Username. Specified in URLs with an @ sign before the host. See |password| | 147 // Username. Specified in URLs with an @ sign before the host. See |password| |
148 Component username; | 148 Component username; |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
213 // at any point in the process, and will actually handle embedded NULLs. | 213 // at any point in the process, and will actually handle embedded NULLs. |
214 // | 214 // |
215 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it | 215 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it |
216 // in any way. See the comment above the struct. | 216 // in any way. See the comment above the struct. |
217 // | 217 // |
218 // The 8-bit versions require UTF-8 encoding. | 218 // The 8-bit versions require UTF-8 encoding. |
219 | 219 |
220 // StandardURL is for when the scheme is known to be one that has an | 220 // StandardURL is for when the scheme is known to be one that has an |
221 // authority (host) like "http". This function will not handle weird ones | 221 // authority (host) like "http". This function will not handle weird ones |
222 // like "about:" and "javascript:", or do the right thing for "file:" URLs. | 222 // like "about:" and "javascript:", or do the right thing for "file:" URLs. |
223 void ParseStandardURL(const char* url, int url_len, Parsed* parsed); | 223 URL_EXPORT void ParseStandardURL(const char* url, |
224 void ParseStandardURL(const char16* url, int url_len, Parsed* parsed); | 224 int url_len, |
| 225 Parsed* parsed); |
| 226 URL_EXPORT void ParseStandardURL(const char16* url, |
| 227 int url_len, |
| 228 Parsed* parsed); |
225 | 229 |
226 // PathURL is for when the scheme is known not to have an authority (host) | 230 // PathURL is for when the scheme is known not to have an authority (host) |
227 // section but that aren't file URLs either. The scheme is parsed, and | 231 // section but that aren't file URLs either. The scheme is parsed, and |
228 // everything after the scheme is considered as the path. This is used for | 232 // everything after the scheme is considered as the path. This is used for |
229 // things like "about:" and "javascript:" | 233 // things like "about:" and "javascript:" |
230 void ParsePathURL(const char* url, int url_len, Parsed* parsed); | 234 URL_EXPORT void ParsePathURL(const char* url, int url_len, Parsed* parsed); |
231 void ParsePathURL(const char16* url, int url_len, Parsed* parsed); | 235 URL_EXPORT void ParsePathURL(const char16* url, int url_len, Parsed* parsed); |
232 | 236 |
233 // FileURL is for file URLs. There are some special rules for interpreting | 237 // FileURL is for file URLs. There are some special rules for interpreting |
234 // these. | 238 // these. |
235 void ParseFileURL(const char* url, int url_len, Parsed* parsed); | 239 URL_EXPORT void ParseFileURL(const char* url, int url_len, Parsed* parsed); |
236 void ParseFileURL(const char16* url, int url_len, Parsed* parsed); | 240 URL_EXPORT void ParseFileURL(const char16* url, int url_len, Parsed* parsed); |
237 | 241 |
238 // Filesystem URLs are structured differently than other URLs. | 242 // Filesystem URLs are structured differently than other URLs. |
239 void ParseFileSystemURL(const char* url, | 243 URL_EXPORT void ParseFileSystemURL(const char* url, |
240 int url_len, | 244 int url_len, |
241 Parsed* parsed); | 245 Parsed* parsed); |
242 void ParseFileSystemURL(const char16* url, | 246 URL_EXPORT void ParseFileSystemURL(const char16* url, |
243 int url_len, | 247 int url_len, |
244 Parsed* parsed); | 248 Parsed* parsed); |
245 | 249 |
246 // MailtoURL is for mailto: urls. They are made up scheme,path,query | 250 // MailtoURL is for mailto: urls. They are made up scheme,path,query |
247 void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); | 251 URL_EXPORT void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); |
248 void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); | 252 URL_EXPORT void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); |
249 | 253 |
250 // Helper functions ----------------------------------------------------------- | 254 // Helper functions ----------------------------------------------------------- |
251 | 255 |
252 // Locates the scheme according to the URL parser's rules. This function is | 256 // Locates the scheme according to the URL parser's rules. This function is |
253 // designed so the caller can find the scheme and call the correct Init* | 257 // designed so the caller can find the scheme and call the correct Init* |
254 // function according to their known scheme types. | 258 // function according to their known scheme types. |
255 // | 259 // |
256 // It also does not perform any validation on the scheme. | 260 // It also does not perform any validation on the scheme. |
257 // | 261 // |
258 // This function will return true if the scheme is found and will put the | 262 // This function will return true if the scheme is found and will put the |
259 // scheme's range into *scheme. False means no scheme could be found. Note | 263 // scheme's range into *scheme. False means no scheme could be found. Note |
260 // that a URL beginning with a colon has a scheme, but it is empty, so this | 264 // that a URL beginning with a colon has a scheme, but it is empty, so this |
261 // function will return true but *scheme will = (0,0). | 265 // function will return true but *scheme will = (0,0). |
262 // | 266 // |
263 // The scheme is found by skipping spaces and control characters at the | 267 // The scheme is found by skipping spaces and control characters at the |
264 // beginning, and taking everything from there to the first colon to be the | 268 // beginning, and taking everything from there to the first colon to be the |
265 // scheme. The character at scheme.end() will be the colon (we may enhance | 269 // scheme. The character at scheme.end() will be the colon (we may enhance |
266 // this to handle full width colons or something, so don't count on the | 270 // this to handle full width colons or something, so don't count on the |
267 // actual character value). The character at scheme.end()+1 will be the | 271 // actual character value). The character at scheme.end()+1 will be the |
268 // beginning of the rest of the URL, be it the authority or the path (or the | 272 // beginning of the rest of the URL, be it the authority or the path (or the |
269 // end of the string). | 273 // end of the string). |
270 // | 274 // |
271 // The 8-bit version requires UTF-8 encoding. | 275 // The 8-bit version requires UTF-8 encoding. |
272 bool ExtractScheme(const char* url, int url_len, Component* scheme); | 276 URL_EXPORT bool ExtractScheme(const char* url, |
273 bool ExtractScheme(const char16* url, int url_len, Component* scheme); | 277 int url_len, |
| 278 Component* scheme); |
| 279 URL_EXPORT bool ExtractScheme(const char16* url, |
| 280 int url_len, |
| 281 Component* scheme); |
274 | 282 |
275 // Returns true if ch is a character that terminates the authority segment | 283 // Returns true if ch is a character that terminates the authority segment |
276 // of a URL. | 284 // of a URL. |
277 bool IsAuthorityTerminator(char16 ch); | 285 URL_EXPORT bool IsAuthorityTerminator(char16 ch); |
278 | 286 |
279 // Does a best effort parse of input |spec|, in range |auth|. If a particular | 287 // Does a best effort parse of input |spec|, in range |auth|. If a particular |
280 // component is not found, it will be set to invalid. | 288 // component is not found, it will be set to invalid. |
281 void ParseAuthority(const char* spec, | 289 URL_EXPORT void ParseAuthority(const char* spec, |
282 const Component& auth, | 290 const Component& auth, |
283 Component* username, | 291 Component* username, |
284 Component* password, | 292 Component* password, |
285 Component* hostname, | 293 Component* hostname, |
286 Component* port_num); | 294 Component* port_num); |
287 void ParseAuthority(const char16* spec, | 295 URL_EXPORT void ParseAuthority(const char16* spec, |
288 const Component& auth, | 296 const Component& auth, |
289 Component* username, | 297 Component* username, |
290 Component* password, | 298 Component* password, |
291 Component* hostname, | 299 Component* hostname, |
292 Component* port_num); | 300 Component* port_num); |
293 | 301 |
294 // Computes the integer port value from the given port component. The port | 302 // Computes the integer port value from the given port component. The port |
295 // component should have been identified by one of the init functions on | 303 // component should have been identified by one of the init functions on |
296 // |Parsed| for the given input url. | 304 // |Parsed| for the given input url. |
297 // | 305 // |
298 // The return value will be a positive integer between 0 and 64K, or one of | 306 // The return value will be a positive integer between 0 and 64K, or one of |
299 // the two special values below. | 307 // the two special values below. |
300 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; | 308 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; |
301 int ParsePort(const char* url, const Component& port); | 309 URL_EXPORT int ParsePort(const char* url, const Component& port); |
302 int ParsePort(const char16* url, const Component& port); | 310 URL_EXPORT int ParsePort(const char16* url, const Component& port); |
303 | 311 |
304 // Extracts the range of the file name in the given url. The path must | 312 // Extracts the range of the file name in the given url. The path must |
305 // already have been computed by the parse function, and the matching URL | 313 // already have been computed by the parse function, and the matching URL |
306 // and extracted path are provided to this function. The filename is | 314 // and extracted path are provided to this function. The filename is |
307 // defined as being everything from the last slash/backslash of the path | 315 // defined as being everything from the last slash/backslash of the path |
308 // to the end of the path. | 316 // to the end of the path. |
309 // | 317 // |
310 // The file name will be empty if the path is empty or there is nothing | 318 // The file name will be empty if the path is empty or there is nothing |
311 // following the last slash. | 319 // following the last slash. |
312 // | 320 // |
313 // The 8-bit version requires UTF-8 encoding. | 321 // The 8-bit version requires UTF-8 encoding. |
314 void ExtractFileName(const char* url, | 322 URL_EXPORT void ExtractFileName(const char* url, |
315 const Component& path, | 323 const Component& path, |
316 Component* file_name); | 324 Component* file_name); |
317 void ExtractFileName(const char16* url, | 325 URL_EXPORT void ExtractFileName(const char16* url, |
318 const Component& path, | 326 const Component& path, |
319 Component* file_name); | 327 Component* file_name); |
320 | 328 |
321 // Extract the first key/value from the range defined by |*query|. Updates | 329 // Extract the first key/value from the range defined by |*query|. Updates |
322 // |*query| to start at the end of the extracted key/value pair. This is | 330 // |*query| to start at the end of the extracted key/value pair. This is |
323 // designed for use in a loop: you can keep calling it with the same query | 331 // designed for use in a loop: you can keep calling it with the same query |
324 // object and it will iterate over all items in the query. | 332 // object and it will iterate over all items in the query. |
325 // | 333 // |
326 // Some key/value pairs may have the key, the value, or both be empty (for | 334 // Some key/value pairs may have the key, the value, or both be empty (for |
327 // example, the query string "?&"). These will be returned. Note that an empty | 335 // example, the query string "?&"). These will be returned. Note that an empty |
328 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case | 336 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case |
329 // is the same as "done." | 337 // is the same as "done." |
330 // | 338 // |
331 // The initial query component should not include the '?' (this is the default | 339 // The initial query component should not include the '?' (this is the default |
332 // for parsed URLs). | 340 // for parsed URLs). |
333 // | 341 // |
334 // If no key/value are found |*key| and |*value| will be unchanged and it will | 342 // If no key/value are found |*key| and |*value| will be unchanged and it will |
335 // return false. | 343 // return false. |
336 bool ExtractQueryKeyValue(const char* url, | 344 URL_EXPORT bool ExtractQueryKeyValue(const char* url, |
337 Component* query, | 345 Component* query, |
338 Component* key, | 346 Component* key, |
339 Component* value); | 347 Component* value); |
340 bool ExtractQueryKeyValue(const char16* url, | 348 URL_EXPORT bool ExtractQueryKeyValue(const char16* url, |
341 Component* query, | 349 Component* query, |
342 Component* key, | 350 Component* key, |
343 Component* value); | 351 Component* value); |
344 | 352 |
345 } // namespace url_parse | 353 } // namespace url_parse |
346 | 354 |
347 #endif // URL_URL_PARSE_H_ | 355 #endif // URL_URL_PARSE_H_ |
OLD | NEW |