| OLD | NEW |
| 1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
| 2 // All rights reserved. | 2 // All rights reserved. |
| 3 // | 3 // |
| 4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
| 5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
| 6 // met: | 6 // met: |
| 7 // | 7 // |
| 8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
| 9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
| 10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
| (...skipping 16 matching lines...) Expand all Loading... |
| 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 29 | 29 |
| 30 #ifndef GOOGLEURL_SRC_URL_PARSE_H__ | 30 #ifndef GOOGLEURL_SRC_URL_PARSE_H__ |
| 31 #define GOOGLEURL_SRC_URL_PARSE_H__ | 31 #define GOOGLEURL_SRC_URL_PARSE_H__ |
| 32 | 32 |
| 33 #include <string> | 33 #include <string> |
| 34 | 34 |
| 35 #include "base/basictypes.h" | 35 #include "base/basictypes.h" |
| 36 #include "base/string16.h" | 36 #include "base/string16.h" |
| 37 #include "googleurl/src/url_common.h" |
| 37 | 38 |
| 38 namespace url_parse { | 39 namespace url_parse { |
| 39 | 40 |
| 40 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and | 41 // Deprecated, but WebKit/WebCore/platform/KURLGooglePrivate.h and |
| 41 // KURLGoogle.cpp still rely on this type. | 42 // KURLGoogle.cpp still rely on this type. |
| 42 typedef char16 UTF16Char; | 43 typedef char16 UTF16Char; |
| 43 | 44 |
| 44 // Component ------------------------------------------------------------------ | 45 // Component ------------------------------------------------------------------ |
| 45 | 46 |
| 46 // Represents a substring for URL parsing. | 47 // Represents a substring for URL parsing. |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 112 USERNAME, | 113 USERNAME, |
| 113 PASSWORD, | 114 PASSWORD, |
| 114 HOST, | 115 HOST, |
| 115 PORT, | 116 PORT, |
| 116 PATH, | 117 PATH, |
| 117 QUERY, | 118 QUERY, |
| 118 REF, | 119 REF, |
| 119 }; | 120 }; |
| 120 | 121 |
| 121 // The default constructor is sufficient for the components. | 122 // The default constructor is sufficient for the components. |
| 122 Parsed() {} | 123 GURL_API Parsed() {} |
| 123 | 124 |
| 124 // Returns the length of the URL (the end of the last component). | 125 // Returns the length of the URL (the end of the last component). |
| 125 // | 126 // |
| 126 // Note that for some invalid, non-canonical URLs, this may not be the length | 127 // Note that for some invalid, non-canonical URLs, this may not be the length |
| 127 // of the string. For example "http://": the parsed structure will only | 128 // of the string. For example "http://": the parsed structure will only |
| 128 // contain an entry for the four-character scheme, and it doesn't know about | 129 // contain an entry for the four-character scheme, and it doesn't know about |
| 129 // the "://". For all other last-components, it will return the real length. | 130 // the "://". For all other last-components, it will return the real length. |
| 130 int Length() const; | 131 GURL_API int Length() const; |
| 131 | 132 |
| 132 // Returns the number of characters before the given component if it exists, | 133 // Returns the number of characters before the given component if it exists, |
| 133 // or where the component would be if it did exist. This will return the | 134 // or where the component would be if it did exist. This will return the |
| 134 // string length if the component would be appended to the end. | 135 // string length if the component would be appended to the end. |
| 135 // | 136 // |
| 136 // Note that this can get a little funny for the port, query, and ref | 137 // Note that this can get a little funny for the port, query, and ref |
| 137 // components which have a delimiter that is not counted as part of the | 138 // components which have a delimiter that is not counted as part of the |
| 138 // component. The |include_delimiter| flag controls if you want this counted | 139 // component. The |include_delimiter| flag controls if you want this counted |
| 139 // as part of the component or not when the component exists. | 140 // as part of the component or not when the component exists. |
| 140 // | 141 // |
| 141 // This example shows the difference between the two flags for two of these | 142 // This example shows the difference between the two flags for two of these |
| 142 // delimited components that is present (the port and query) and one that | 143 // delimited components that is present (the port and query) and one that |
| 143 // isn't (the reference). The components that this flag affects are marked | 144 // isn't (the reference). The components that this flag affects are marked |
| 144 // with a *. | 145 // with a *. |
| 145 // 0 1 2 | 146 // 0 1 2 |
| 146 // 012345678901234567890 | 147 // 012345678901234567890 |
| 147 // Example input: http://foo:80/?query | 148 // Example input: http://foo:80/?query |
| 148 // include_delim=true, ...=false ("<-" indicates different) | 149 // include_delim=true, ...=false ("<-" indicates different) |
| 149 // SCHEME: 0 0 | 150 // SCHEME: 0 0 |
| 150 // USERNAME: 5 5 | 151 // USERNAME: 5 5 |
| 151 // PASSWORD: 5 5 | 152 // PASSWORD: 5 5 |
| 152 // HOST: 7 7 | 153 // HOST: 7 7 |
| 153 // *PORT: 10 11 <- | 154 // *PORT: 10 11 <- |
| 154 // PATH: 13 13 | 155 // PATH: 13 13 |
| 155 // *QUERY: 14 15 <- | 156 // *QUERY: 14 15 <- |
| 156 // *REF: 20 20 | 157 // *REF: 20 20 |
| 157 // | 158 // |
| 158 int CountCharactersBefore(ComponentType type, bool include_delimiter) const; | 159 GURL_API int CountCharactersBefore(ComponentType type, |
| 160 bool include_delimiter) const; |
| 159 | 161 |
| 160 // Scheme without the colon: "http://foo"/ would have a scheme of "http". | 162 // Scheme without the colon: "http://foo"/ would have a scheme of "http". |
| 161 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there | 163 // The length will be -1 if no scheme is specified ("foo.com"), or 0 if there |
| 162 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed | 164 // is a colon but no scheme (":foo"). Note that the scheme is not guaranteed |
| 163 // to start at the beginning of the string if there are preceeding whitespace | 165 // to start at the beginning of the string if there are preceeding whitespace |
| 164 // or control characters. | 166 // or control characters. |
| 165 Component scheme; | 167 Component scheme; |
| 166 | 168 |
| 167 // Username. Specified in URLs with an @ sign before the host. See |password| | 169 // Username. Specified in URLs with an @ sign before the host. See |password| |
| 168 Component username; | 170 Component username; |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 208 // at any point in the process, and will actually handle embedded NULLs. | 210 // at any point in the process, and will actually handle embedded NULLs. |
| 209 // | 211 // |
| 210 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it | 212 // IMPORTANT: These functions do NOT hang on to the given pointer or copy it |
| 211 // in any way. See the comment above the struct. | 213 // in any way. See the comment above the struct. |
| 212 // | 214 // |
| 213 // The 8-bit versions require UTF-8 encoding. | 215 // The 8-bit versions require UTF-8 encoding. |
| 214 | 216 |
| 215 // StandardURL is for when the scheme is known to be one that has an | 217 // StandardURL is for when the scheme is known to be one that has an |
| 216 // authority (host) like "http". This function will not handle weird ones | 218 // authority (host) like "http". This function will not handle weird ones |
| 217 // like "about:" and "javascript:", or do the right thing for "file:" URLs. | 219 // like "about:" and "javascript:", or do the right thing for "file:" URLs. |
| 218 void ParseStandardURL(const char* url, int url_len, Parsed* parsed); | 220 GURL_API void ParseStandardURL(const char* url, int url_len, Parsed* parsed); |
| 219 void ParseStandardURL(const char16* url, int url_len, Parsed* parsed); | 221 GURL_API void ParseStandardURL(const char16* url, int url_len, Parsed* parsed); |
| 220 | 222 |
| 221 // PathURL is for when the scheme is known not to have an authority (host) | 223 // PathURL is for when the scheme is known not to have an authority (host) |
| 222 // section but that aren't file URLs either. The scheme is parsed, and | 224 // section but that aren't file URLs either. The scheme is parsed, and |
| 223 // everything after the scheme is considered as the path. This is used for | 225 // everything after the scheme is considered as the path. This is used for |
| 224 // things like "about:" and "javascript:" | 226 // things like "about:" and "javascript:" |
| 225 void ParsePathURL(const char* url, int url_len, Parsed* parsed); | 227 GURL_API void ParsePathURL(const char* url, int url_len, Parsed* parsed); |
| 226 void ParsePathURL(const char16* url, int url_len, Parsed* parsed); | 228 GURL_API void ParsePathURL(const char16* url, int url_len, Parsed* parsed); |
| 227 | 229 |
| 228 // FileURL is for file URLs. There are some special rules for interpreting | 230 // FileURL is for file URLs. There are some special rules for interpreting |
| 229 // these. | 231 // these. |
| 230 void ParseFileURL(const char* url, int url_len, Parsed* parsed); | 232 GURL_API void ParseFileURL(const char* url, int url_len, Parsed* parsed); |
| 231 void ParseFileURL(const char16* url, int url_len, Parsed* parsed); | 233 GURL_API void ParseFileURL(const char16* url, int url_len, Parsed* parsed); |
| 232 | 234 |
| 233 // MailtoURL is for mailto: urls. They are made up scheme,path,query | 235 // MailtoURL is for mailto: urls. They are made up scheme,path,query |
| 234 void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); | 236 GURL_API void ParseMailtoURL(const char* url, int url_len, Parsed* parsed); |
| 235 void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); | 237 GURL_API void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed); |
| 236 | 238 |
| 237 // Helper functions ----------------------------------------------------------- | 239 // Helper functions ----------------------------------------------------------- |
| 238 | 240 |
| 239 // Locates the scheme according to the URL parser's rules. This function is | 241 // Locates the scheme according to the URL parser's rules. This function is |
| 240 // designed so the caller can find the scheme and call the correct Init* | 242 // designed so the caller can find the scheme and call the correct Init* |
| 241 // function according to their known scheme types. | 243 // function according to their known scheme types. |
| 242 // | 244 // |
| 243 // It also does not perform any validation on the scheme. | 245 // It also does not perform any validation on the scheme. |
| 244 // | 246 // |
| 245 // This function will return true if the scheme is found and will put the | 247 // This function will return true if the scheme is found and will put the |
| 246 // scheme's range into *scheme. False means no scheme could be found. Note | 248 // scheme's range into *scheme. False means no scheme could be found. Note |
| 247 // that a URL beginning with a colon has a scheme, but it is empty, so this | 249 // that a URL beginning with a colon has a scheme, but it is empty, so this |
| 248 // function will return true but *scheme will = (0,0). | 250 // function will return true but *scheme will = (0,0). |
| 249 // | 251 // |
| 250 // The scheme is found by skipping spaces and control characters at the | 252 // The scheme is found by skipping spaces and control characters at the |
| 251 // beginning, and taking everything from there to the first colon to be the | 253 // beginning, and taking everything from there to the first colon to be the |
| 252 // scheme. The character at scheme.end() will be the colon (we may enhance | 254 // scheme. The character at scheme.end() will be the colon (we may enhance |
| 253 // this to handle full width colons or something, so don't count on the | 255 // this to handle full width colons or something, so don't count on the |
| 254 // actual character value). The character at scheme.end()+1 will be the | 256 // actual character value). The character at scheme.end()+1 will be the |
| 255 // beginning of the rest of the URL, be it the authority or the path (or the | 257 // beginning of the rest of the URL, be it the authority or the path (or the |
| 256 // end of the string). | 258 // end of the string). |
| 257 // | 259 // |
| 258 // The 8-bit version requires UTF-8 encoding. | 260 // The 8-bit version requires UTF-8 encoding. |
| 259 bool ExtractScheme(const char* url, int url_len, Component* scheme); | 261 GURL_API bool ExtractScheme(const char* url, int url_len, Component* scheme); |
| 260 bool ExtractScheme(const char16* url, int url_len, Component* scheme); | 262 GURL_API bool ExtractScheme(const char16* url, int url_len, Component* scheme); |
| 261 | 263 |
| 262 // Returns true if ch is a character that terminates the authority segment | 264 // Returns true if ch is a character that terminates the authority segment |
| 263 // of a URL. | 265 // of a URL. |
| 264 bool IsAuthorityTerminator(char16 ch); | 266 GURL_API bool IsAuthorityTerminator(char16 ch); |
| 265 | 267 |
| 266 // Does a best effort parse of input |spec|, in range |auth|. If a particular | 268 // Does a best effort parse of input |spec|, in range |auth|. If a particular |
| 267 // component is not found, it will be set to invalid. | 269 // component is not found, it will be set to invalid. |
| 268 void ParseAuthority(const char* spec, | 270 GURL_API void ParseAuthority(const char* spec, |
| 269 const Component& auth, | 271 const Component& auth, |
| 270 Component* username, | 272 Component* username, |
| 271 Component* password, | 273 Component* password, |
| 272 Component* hostname, | 274 Component* hostname, |
| 273 Component* port_num); | 275 Component* port_num); |
| 274 void ParseAuthority(const char16* spec, | 276 GURL_API void ParseAuthority(const char16* spec, |
| 275 const Component& auth, | 277 const Component& auth, |
| 276 Component* username, | 278 Component* username, |
| 277 Component* password, | 279 Component* password, |
| 278 Component* hostname, | 280 Component* hostname, |
| 279 Component* port_num); | 281 Component* port_num); |
| 280 | 282 |
| 281 // Computes the integer port value from the given port component. The port | 283 // Computes the integer port value from the given port component. The port |
| 282 // component should have been identified by one of the init functions on | 284 // component should have been identified by one of the init functions on |
| 283 // |Parsed| for the given input url. | 285 // |Parsed| for the given input url. |
| 284 // | 286 // |
| 285 // The return value will be a positive integer between 0 and 64K, or one of | 287 // The return value will be a positive integer between 0 and 64K, or one of |
| 286 // the two special values below. | 288 // the two special values below. |
| 287 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; | 289 enum SpecialPort { PORT_UNSPECIFIED = -1, PORT_INVALID = -2 }; |
| 288 int ParsePort(const char* url, const Component& port); | 290 GURL_API int ParsePort(const char* url, const Component& port); |
| 289 int ParsePort(const char16* url, const Component& port); | 291 GURL_API int ParsePort(const char16* url, const Component& port); |
| 290 | 292 |
| 291 // Extracts the range of the file name in the given url. The path must | 293 // Extracts the range of the file name in the given url. The path must |
| 292 // already have been computed by the parse function, and the matching URL | 294 // already have been computed by the parse function, and the matching URL |
| 293 // and extracted path are provided to this function. The filename is | 295 // and extracted path are provided to this function. The filename is |
| 294 // defined as being everything from the last slash/backslash of the path | 296 // defined as being everything from the last slash/backslash of the path |
| 295 // to the end of the path. | 297 // to the end of the path. |
| 296 // | 298 // |
| 297 // The file name will be empty if the path is empty or there is nothing | 299 // The file name will be empty if the path is empty or there is nothing |
| 298 // following the last slash. | 300 // following the last slash. |
| 299 // | 301 // |
| 300 // The 8-bit version requires UTF-8 encoding. | 302 // The 8-bit version requires UTF-8 encoding. |
| 301 void ExtractFileName(const char* url, | 303 GURL_API void ExtractFileName(const char* url, |
| 302 const Component& path, | 304 const Component& path, |
| 303 Component* file_name); | 305 Component* file_name); |
| 304 void ExtractFileName(const char16* url, | 306 GURL_API void ExtractFileName(const char16* url, |
| 305 const Component& path, | 307 const Component& path, |
| 306 Component* file_name); | 308 Component* file_name); |
| 307 | 309 |
| 308 // Extract the first key/value from the range defined by |*query|. Updates | 310 // Extract the first key/value from the range defined by |*query|. Updates |
| 309 // |*query| to start at the end of the extracted key/value pair. This is | 311 // |*query| to start at the end of the extracted key/value pair. This is |
| 310 // designed for use in a loop: you can keep calling it with the same query | 312 // designed for use in a loop: you can keep calling it with the same query |
| 311 // object and it will iterate over all items in the query. | 313 // object and it will iterate over all items in the query. |
| 312 // | 314 // |
| 313 // Some key/value pairs may have the key, the value, or both be empty (for | 315 // Some key/value pairs may have the key, the value, or both be empty (for |
| 314 // example, the query string "?&"). These will be returned. Note that an empty | 316 // example, the query string "?&"). These will be returned. Note that an empty |
| 315 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case | 317 // last parameter "foo.com?" or foo.com?a&" will not be returned, this case |
| 316 // is the same as "done." | 318 // is the same as "done." |
| 317 // | 319 // |
| 318 // The initial query component should not include the '?' (this is the default | 320 // The initial query component should not include the '?' (this is the default |
| 319 // for parsed URLs). | 321 // for parsed URLs). |
| 320 // | 322 // |
| 321 // If no key/value are found |*key| and |*value| will be unchanged and it will | 323 // If no key/value are found |*key| and |*value| will be unchanged and it will |
| 322 // return false. | 324 // return false. |
| 323 bool ExtractQueryKeyValue(const char* url, | 325 GURL_API bool ExtractQueryKeyValue(const char* url, |
| 324 Component* query, | 326 Component* query, |
| 325 Component* key, | 327 Component* key, |
| 326 Component* value); | 328 Component* value); |
| 327 bool ExtractQueryKeyValue(const char16* url, | 329 GURL_API bool ExtractQueryKeyValue(const char16* url, |
| 328 Component* query, | 330 Component* query, |
| 329 Component* key, | 331 Component* key, |
| 330 Component* value); | 332 Component* value); |
| 331 | 333 |
| 332 } // namespace url_parse | 334 } // namespace url_parse |
| 333 | 335 |
| 334 #endif // GOOGLEURL_SRC_URL_PARSE_H__ | 336 #endif // GOOGLEURL_SRC_URL_PARSE_H__ |
| OLD | NEW |