OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef URL_URL_PARSE_H_ | 5 #ifndef URL_URL_PARSE_H_ |
6 #define URL_URL_PARSE_H_ | 6 #define URL_URL_PARSE_H_ |
7 | 7 |
8 #include <string> | 8 #include <string> |
9 | 9 |
10 #include "base/basictypes.h" | 10 #include "base/basictypes.h" |
11 #include "base/string16.h" | 11 #include "base/string16.h" |
12 #include "url/url_export.h" | |
13 | 12 |
14 namespace url_parse { | 13 namespace url_parse { |
15 | 14 |
16 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and | 15 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and |
17 // KURLGoogle.cpp still rely on this type. | 16 // KURLGoogle.cpp still rely on this type. |
18 typedef char16 UTF16Char; | 17 typedef char16 UTF16Char; |
19 | 18 |
20 // Component ------------------------------------------------------------------ | 19 // Component ------------------------------------------------------------------ |
21 | 20 |
22 // Represents a substring for URL parsing. | 21 // Represents a substring for URL parsing. |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
74 // if (!url_parse::ExtractScheme(url, url_len, &scheme)) | 73 // if (!url_parse::ExtractScheme(url, url_len, &scheme)) |
75 // return I_CAN_NOT_FIND_THE_SCHEME_DUDE; | 74 // return I_CAN_NOT_FIND_THE_SCHEME_DUDE; |
76 // | 75 // |
77 // if (IsStandardScheme(url, scheme)) // Not provided by this component | 76 // if (IsStandardScheme(url, scheme)) // Not provided by this component |
78 // url_parseParseStandardURL(url, url_len, &parsed); | 77 // url_parseParseStandardURL(url, url_len, &parsed); |
79 // else if (IsFileURL(url, scheme)) // Not provided by this component | 78 // else if (IsFileURL(url, scheme)) // Not provided by this component |
80 // url_parse::ParseFileURL(url, url_len, &parsed); | 79 // url_parse::ParseFileURL(url, url_len, &parsed); |
81 // else | 80 // else |
82 // url_parse::ParsePathURL(url, url_len, &parsed); | 81 // url_parse::ParsePathURL(url, url_len, &parsed); |
83 // | 82 // |
84 struct URL_EXPORT Parsed { | 83 struct Parsed { |
85 // Identifies different components. | 84 // Identifies different components. |
86 enum ComponentType { | 85 enum ComponentType { |
87 SCHEME, | 86 SCHEME, |
88 USERNAME, | 87 USERNAME, |
89 PASSWORD, | 88 PASSWORD, |
90 HOST, | 89 HOST, |
91 PORT, | 90 PORT, |
92 PATH, | 91 PATH, |
93 QUERY, | 92 QUERY, |
94 REF, | 93 REF, |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
128 // include_delim=true, ...=false ("<-" indicates different) | 127 // include_delim=true, ...=false ("<-" indicates different) |
129 // SCHEME: 0 0 | 128 // SCHEME: 0 0 |
130 // USERNAME: 5 5 | 129 // USERNAME: 5 5 |
131 // PASSWORD: 5 5 | 130 // PASSWORD: 5 5 |
132 // HOST: 7 7 | 131 // HOST: 7 7 |
133 // *PORT: 10 11 <- | 132 // *PORT: 10 11 <- |
134 // PATH: 13 13 | 133 // PATH: 13 13 |
135 // *QUERY: 14 15 <- | 134 // *QUERY: 14 15 <- |
136 // *REF: 20 20 | 135 // *REF: 20 20 |
137 // | 136 // |
138 int CountCharactersBefore(ComponentType type, bool include_delimiter) const; | 137 int CountCharactersBefore(ComponentType type, |
| 138 bool include_delimiter) const; |
139 | 139 |
140 // Scheme without the colon: "http://foo"/ would have a scheme of "http". | 140 // Scheme without the colon: "http://foo"/ would have a scheme of "http". |
141 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there | 141 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there |
142 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed | 142 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed |
143 // to start at the beginning of the string if there are preceeding whitespace | 143 // to start at the beginning of the string if there are preceeding whitespace |
144 // or control characters. | 144 // or control characters. |
145 Component scheme; | 145 Component scheme; |
146 | 146 |
147 // Username. Specified in URLs with an @ sign before the host. See |password| | 147 // Username. Specified in URLs with an @ sign before the host. See |password| |
148 Component username; | 148 Component username; |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
213 // at any point in the process, and will actually handle embedded NULLs. | 213 // at any point in the process, and will actually handle embedded NULLs. |
214 // | 214 // |
215 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it | 215 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it |
216 // in any way. See the comment above the struct. | 216 // in any way. See the comment above the struct. |
217 // | 217 // |
218 // The 8-bit versions require UTF-8 encoding. | 218 // The 8-bit versions require UTF-8 encoding. |
219 | 219 |
220 // StandardURL is for when the scheme is known to be one that has an | 220 // StandardURL is for when the scheme is known to be one that has an |
221 // authority (host) like "http". This function will not handle weird ones | 221 // authority (host) like "http". This function will not handle weird ones |
222 // like "about:" and "javascript:", or do the right thing for "file:" URLs. | 222 // like "about:" and "javascript:", or do the right thing for "file:" URLs. |
223 URL_EXPORT void ParseStandardURL(const char* url, | 223 void ParseStandardURL(const char* url, int url_len, Parsed* parsed); |
224 int url_len, | 224 void ParseStandardURL(const char16* url, int url_len, Parsed* parsed); |
225 Parsed* parsed); | |
226 URL_EXPORT void ParseStandardURL(const char16* url, | |
227 int url_len, | |
228 Parsed* parsed); | |
229 | 225 |
230 // PathURL is for when the scheme is known not to have an authority (host) | 226 // PathURL is for when the scheme is known not to have an authority (host) |
231 // section but that aren't file URLs either. The scheme is parsed, and | 227 // section but that aren't file URLs either. The scheme is parsed, and |
232 // everything after the scheme is considered as the path. This is used for | 228 // everything after the scheme is considered as the path. This is used for |
233 // things like "about:" and "javascript:" | 229 // things like "about:" and "javascript:" |
234 URL_EXPORT void ParsePathURL(const char* url, int url_len, Parsed* parsed); | 230 void ParsePathURL(const char* url, int url_len, Parsed* parsed); |
235 URL_EXPORT void ParsePathURL(const char16* url, int url_len, Parsed* parsed); | 231 void ParsePathURL(const char16* url, int url_len, Parsed* parsed); |
236 | 232 |
237 // FileURL is for file URLs. There are some special rules for interpreting | 233 // FileURL is for file URLs. There are some special rules for interpreting |
238 // these. | 234 // these. |
239 URL_EXPORT void ParseFileURL(const char* url, int url_len, Parsed* parsed); | 235 void ParseFileURL(const char* url, int url_len, Parsed* parsed); |
240 URL_EXPORT void ParseFileURL(const char16* url, int url_len, Parsed* parsed); | 236 void ParseFileURL(const char16* url, int url_len, Parsed* parsed); |
241 | 237 |
242 // Filesystem URLs are structured differently than other URLs. | 238 // Filesystem URLs are structured differently than other URLs. |
243 URL_EXPORT void ParseFileSystemURL(const char* url, | 239 void ParseFileSystemURL(const char* url, |
244 int url_len, | 240 int url_len, |
245 Parsed* parsed); | 241 Parsed* parsed); |
246 URL_EXPORT void ParseFileSystemURL(const char16* url, | 242 void ParseFileSystemURL(const char16* url, |
247 int url_len, | 243 int url_len, |
248 Parsed* parsed); | 244 Parsed* parsed); |
249 | 245 |
250 // MailtoURL is for mailto: urls. They are made up scheme,path,query | 246 // MailtoURL is for mailto: urls. They are made up scheme,path,query |
251 URL_EXPORT void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); | 247 void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); |
252 URL_EXPORT void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); | 248 void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); |
253 | 249 |
254 // Helper functions ----------------------------------------------------------- | 250 // Helper functions ----------------------------------------------------------- |
255 | 251 |
256 // Locates the scheme according to the URL parser's rules. This function is | 252 // Locates the scheme according to the URL parser's rules. This function is |
257 // designed so the caller can find the scheme and call the correct Init* | 253 // designed so the caller can find the scheme and call the correct Init* |
258 // function according to their known scheme types. | 254 // function according to their known scheme types. |
259 // | 255 // |
260 // It also does not perform any validation on the scheme. | 256 // It also does not perform any validation on the scheme. |
261 // | 257 // |
262 // This function will return true if the scheme is found and will put the | 258 // This function will return true if the scheme is found and will put the |
263 // scheme's range into *scheme. False means no scheme could be found. Note | 259 // scheme's range into *scheme. False means no scheme could be found. Note |
264 // that a URL beginning with a colon has a scheme, but it is empty, so this | 260 // that a URL beginning with a colon has a scheme, but it is empty, so this |
265 // function will return true but *scheme will = (0,0). | 261 // function will return true but *scheme will = (0,0). |
266 // | 262 // |
267 // The scheme is found by skipping spaces and control characters at the | 263 // The scheme is found by skipping spaces and control characters at the |
268 // beginning, and taking everything from there to the first colon to be the | 264 // beginning, and taking everything from there to the first colon to be the |
269 // scheme. The character at scheme.end() will be the colon (we may enhance | 265 // scheme. The character at scheme.end() will be the colon (we may enhance |
270 // this to handle full width colons or something, so don't count on the | 266 // this to handle full width colons or something, so don't count on the |
271 // actual character value). The character at scheme.end()+1 will be the | 267 // actual character value). The character at scheme.end()+1 will be the |
272 // beginning of the rest of the URL, be it the authority or the path (or the | 268 // beginning of the rest of the URL, be it the authority or the path (or the |
273 // end of the string). | 269 // end of the string). |
274 // | 270 // |
275 // The 8-bit version requires UTF-8 encoding. | 271 // The 8-bit version requires UTF-8 encoding. |
276 URL_EXPORT bool ExtractScheme(const char* url, | 272 bool ExtractScheme(const char* url, int url_len, Component* scheme); |
277 int url_len, | 273 bool ExtractScheme(const char16* url, int url_len, Component* scheme); |
278 Component* scheme); | |
279 URL_EXPORT bool ExtractScheme(const char16* url, | |
280 int url_len, | |
281 Component* scheme); | |
282 | 274 |
283 // Returns true if ch is a character that terminates the authority segment | 275 // Returns true if ch is a character that terminates the authority segment |
284 // of a URL. | 276 // of a URL. |
285 URL_EXPORT bool IsAuthorityTerminator(char16 ch); | 277 bool IsAuthorityTerminator(char16 ch); |
286 | 278 |
287 // Does a best effort parse of input |spec|, in range |auth|. If a particular | 279 // Does a best effort parse of input |spec|, in range |auth|. If a particular |
288 // component is not found, it will be set to invalid. | 280 // component is not found, it will be set to invalid. |
289 URL_EXPORT void ParseAuthority(const char* spec, | 281 void ParseAuthority(const char* spec, |
290 const Component& auth, | 282 const Component& auth, |
291 Component* username, | 283 Component* username, |
292 Component* password, | 284 Component* password, |
293 Component* hostname, | 285 Component* hostname, |
294 Component* port_num); | 286 Component* port_num); |
295 URL_EXPORT void ParseAuthority(const char16* spec, | 287 void ParseAuthority(const char16* spec, |
296 const Component& auth, | 288 const Component& auth, |
297 Component* username, | 289 Component* username, |
298 Component* password, | 290 Component* password, |
299 Component* hostname, | 291 Component* hostname, |
300 Component* port_num); | 292 Component* port_num); |
301 | 293 |
302 // Computes the integer port value from the given port component. The port | 294 // Computes the integer port value from the given port component. The port |
303 // component should have been identified by one of the init functions on | 295 // component should have been identified by one of the init functions on |
304 // |Parsed| for the given input url. | 296 // |Parsed| for the given input url. |
305 // | 297 // |
306 // The return value will be a positive integer between 0 and 64K, or one of | 298 // The return value will be a positive integer between 0 and 64K, or one of |
307 // the two special values below. | 299 // the two special values below. |
308 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; | 300 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; |
309 URL_EXPORT int ParsePort(const char* url, const Component& port); | 301 int ParsePort(const char* url, const Component& port); |
310 URL_EXPORT int ParsePort(const char16* url, const Component& port); | 302 int ParsePort(const char16* url, const Component& port); |
311 | 303 |
312 // Extracts the range of the file name in the given url. The path must | 304 // Extracts the range of the file name in the given url. The path must |
313 // already have been computed by the parse function, and the matching URL | 305 // already have been computed by the parse function, and the matching URL |
314 // and extracted path are provided to this function. The filename is | 306 // and extracted path are provided to this function. The filename is |
315 // defined as being everything from the last slash/backslash of the path | 307 // defined as being everything from the last slash/backslash of the path |
316 // to the end of the path. | 308 // to the end of the path. |
317 // | 309 // |
318 // The file name will be empty if the path is empty or there is nothing | 310 // The file name will be empty if the path is empty or there is nothing |
319 // following the last slash. | 311 // following the last slash. |
320 // | 312 // |
321 // The 8-bit version requires UTF-8 encoding. | 313 // The 8-bit version requires UTF-8 encoding. |
322 URL_EXPORT void ExtractFileName(const char* url, | 314 void ExtractFileName(const char* url, |
323 const Component& path, | 315 const Component& path, |
324 Component* file_name); | 316 Component* file_name); |
325 URL_EXPORT void ExtractFileName(const char16* url, | 317 void ExtractFileName(const char16* url, |
326 const Component& path, | 318 const Component& path, |
327 Component* file_name); | 319 Component* file_name); |
328 | 320 |
329 // Extract the first key/value from the range defined by |*query|. Updates | 321 // Extract the first key/value from the range defined by |*query|. Updates |
330 // |*query| to start at the end of the extracted key/value pair. This is | 322 // |*query| to start at the end of the extracted key/value pair. This is |
331 // designed for use in a loop: you can keep calling it with the same query | 323 // designed for use in a loop: you can keep calling it with the same query |
332 // object and it will iterate over all items in the query. | 324 // object and it will iterate over all items in the query. |
333 // | 325 // |
334 // Some key/value pairs may have the key, the value, or both be empty (for | 326 // Some key/value pairs may have the key, the value, or both be empty (for |
335 // example, the query string "?&"). These will be returned. Note that an empty | 327 // example, the query string "?&"). These will be returned. Note that an empty |
336 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case | 328 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case |
337 // is the same as "done." | 329 // is the same as "done." |
338 // | 330 // |
339 // The initial query component should not include the '?' (this is the default | 331 // The initial query component should not include the '?' (this is the default |
340 // for parsed URLs). | 332 // for parsed URLs). |
341 // | 333 // |
342 // If no key/value are found |*key| and |*value| will be unchanged and it will | 334 // If no key/value are found |*key| and |*value| will be unchanged and it will |
343 // return false. | 335 // return false. |
344 URL_EXPORT bool ExtractQueryKeyValue(const char* url, | 336 bool ExtractQueryKeyValue(const char* url, |
345 Component* query, | 337 Component* query, |
346 Component* key, | 338 Component* key, |
347 Component* value); | 339 Component* value); |
348 URL_EXPORT bool ExtractQueryKeyValue(const char16* url, | 340 bool ExtractQueryKeyValue(const char16* url, |
349 Component* query, | 341 Component* query, |
350 Component* key, | 342 Component* key, |
351 Component* value); | 343 Component* value); |
352 | 344 |
353 } // namespace url_parse | 345 } // namespace url_parse |
354 | 346 |
355 #endif // URL_URL_PARSE_H_ | 347 #endif // URL_URL_PARSE_H_ |
OLD | NEW |