OLD | NEW |
1 // Copyright 2007, Google Inc. | 1 // Copyright 2007, Google Inc. |
2 // All rights reserved. | 2 // All rights reserved. |
3 // | 3 // |
4 // Redistribution and use in source and binary forms, with or without | 4 // Redistribution and use in source and binary forms, with or without |
5 // modification, are permitted provided that the following conditions are | 5 // modification, are permitted provided that the following conditions are |
6 // met: | 6 // met: |
7 // | 7 // |
8 // * Redistributions of source code must retain the above copyright | 8 // * Redistributions of source code must retain the above copyright |
9 // notice, this list of conditions and the following disclaimer. | 9 // notice, this list of conditions and the following disclaimer. |
10 // * Redistributions in binary form must reproduce the above | 10 // * Redistributions in binary form must reproduce the above |
(...skipping 308 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
319 url_parse::Component* out_username, | 319 url_parse::Component* out_username, |
320 url_parse::Component* out_password); | 320 url_parse::Component* out_password); |
321 bool CanonicalizeUserInfo(const char16* username_source, | 321 bool CanonicalizeUserInfo(const char16* username_source, |
322 const url_parse::Component& username, | 322 const url_parse::Component& username, |
323 const char16* password_source, | 323 const char16* password_source, |
324 const url_parse::Component& password, | 324 const url_parse::Component& password, |
325 CanonOutput* output, | 325 CanonOutput* output, |
326 url_parse::Component* out_username, | 326 url_parse::Component* out_username, |
327 url_parse::Component* out_password); | 327 url_parse::Component* out_password); |
328 | 328 |
| 329 |
| 330 // This structure holds detailed state exported from the IP/Host canonicalizers. |
| 331 // Additional fields may be added as callers require them. |
| 332 struct CanonHostInfo { |
| 333 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {} |
| 334 |
| 335 // Convenience function to test if family is an IP address. |
| 336 bool IsIPAddress() const { return family == IPV4 || family == IPV6; } |
| 337 |
| 338 // This field summarizes how the input was classified by the canonicalizer. |
| 339 enum Family { |
| 340 NEUTRAL, // - Doesn't resemble an IP address. As far as the IP |
| 341 // canonicalizer is concerned, it should be treated as a |
| 342 // hostname. |
| 343 BROKEN, // - Almost an IP, but was not canonicalized. This could be an |
| 344 // IPv4 address where truncation occurred, or something |
| 345 // containing the special characters :[] which did not parse |
| 346 // as an IPv6 address. Never attempt to connect to this |
| 347 // address, because it might actually succeed! |
| 348 IPV4, // - Successfully canonicalized as an IPv4 address. |
| 349 IPV6, // - Successfully canonicalized as an IPv6 address. |
| 350 }; |
| 351 Family family; |
| 352 |
| 353 // If |family| is IPV4, then this is the number of nonempty dot-separated |
| 354 // components in the input text, from 1 to 4. If |family| is not IPV4, |
| 355 // this value is undefined. |
| 356 int num_ipv4_components; |
| 357 |
| 358 // Location of host within the canonicalized output. |
| 359 // CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6. |
| 360 // CanonicalizeHostVerbose() always sets it. |
| 361 url_parse::Component out_host; |
| 362 }; |
| 363 |
| 364 |
329 // Host. | 365 // Host. |
330 // | 366 // |
331 // The 8-bit version requires UTF-8 encoding. | 367 // The 8-bit version requires UTF-8 encoding. Use this version when you only |
| 368 // need to know whether canonicalization succeeded. |
332 bool CanonicalizeHost(const char* spec, | 369 bool CanonicalizeHost(const char* spec, |
333 const url_parse::Component& host, | 370 const url_parse::Component& host, |
334 CanonOutput* output, | 371 CanonOutput* output, |
335 url_parse::Component* out_host); | 372 url_parse::Component* out_host); |
336 bool CanonicalizeHost(const char16* spec, | 373 bool CanonicalizeHost(const char16* spec, |
337 const url_parse::Component& host, | 374 const url_parse::Component& host, |
338 CanonOutput* output, | 375 CanonOutput* output, |
339 url_parse::Component* out_host); | 376 url_parse::Component* out_host); |
340 | 377 |
| 378 // Extended version of CanonicalizeHost, which returns additional information. |
| 379 // Use this when you need to know whether the hostname was an IP address. |
| 380 // A successful return is indicated by host_info->family != BROKEN. See the |
| 381 // definition of CanonHostInfo above for details. |
| 382 void CanonicalizeHostVerbose(const char* spec, |
| 383 const url_parse::Component& host, |
| 384 CanonOutput* output, |
| 385 CanonHostInfo* host_info); |
| 386 void CanonicalizeHostVerbose(const char16* spec, |
| 387 const url_parse::Component& host, |
| 388 CanonOutput* output, |
| 389 CanonHostInfo* host_info); |
| 390 |
341 | 391 |
342 // IP addresses. | 392 // IP addresses. |
343 // | 393 // |
344 // Tries to interpret the given host name as an IP address. If it is an IP | 394 // Tries to interpret the given host name as an IPv4 or IPv6 address. If it is |
345 // address, it will canonicalize it as such, appending it to |output| and | 395 // an IP address, it will canonicalize it as such, appending it to |output|. |
346 // identifying the added regions in |*out_host|, and will return true. If it | 396 // Additional status information is returned via the |*host_info| parameter. |
347 // is not an IP address, it will do nothing and will return false. This means | 397 // See the definition of CanonHostInfo above for details. |
348 // that the host name should be treated as a non-IP address and resolved using | |
349 // DNS like most names. | |
350 // | 398 // |
351 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that | 399 // This is called AUTOMATICALLY from the host canonicalizer, which ensures that |
352 // the input is unescaped and name-prepped, etc. It should not normally be | 400 // the input is unescaped and name-prepped, etc. It should not normally be |
353 // necessary or wise to call this directly, other than to check if a given | 401 // necessary or wise to call this directly. |
354 // canonical hostname is an IP address. | 402 void CanonicalizeIPAddress(const char* spec, |
355 bool CanonicalizeIPAddress(const char* spec, | |
356 const url_parse::Component& host, | 403 const url_parse::Component& host, |
357 CanonOutput* output, | 404 CanonOutput* output, |
358 url_parse::Component* out_host); | 405 CanonHostInfo* host_info); |
359 bool CanonicalizeIPAddress(const char16* spec, | 406 void CanonicalizeIPAddress(const char16* spec, |
360 const url_parse::Component& host, | 407 const url_parse::Component& host, |
361 CanonOutput* output, | 408 CanonOutput* output, |
362 url_parse::Component* out_host); | 409 CanonHostInfo* host_info); |
363 | 410 |
364 // Port: this function will add the colon for the port if a port is present. | 411 // Port: this function will add the colon for the port if a port is present. |
365 // The caller can pass url_parse::PORT_UNSPECIFIED as the | 412 // The caller can pass url_parse::PORT_UNSPECIFIED as the |
366 // default_port_for_scheme argument if there is no default port. | 413 // default_port_for_scheme argument if there is no default port. |
367 // | 414 // |
368 // The 8-bit version requires UTF-8 encoding. | 415 // The 8-bit version requires UTF-8 encoding. |
369 bool CanonicalizePort(const char* spec, | 416 bool CanonicalizePort(const char* spec, |
370 const url_parse::Component& port, | 417 const url_parse::Component& port, |
371 int default_port_for_scheme, | 418 int default_port_for_scheme, |
372 CanonOutput* output, | 419 CanonOutput* output, |
(...skipping 442 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
815 bool base_is_file, | 862 bool base_is_file, |
816 const char16* relative_url, | 863 const char16* relative_url, |
817 const url_parse::Component& relative_component, | 864 const url_parse::Component& relative_component, |
818 CharsetConverter* query_converter, | 865 CharsetConverter* query_converter, |
819 CanonOutput* output, | 866 CanonOutput* output, |
820 url_parse::Parsed* out_parsed); | 867 url_parse::Parsed* out_parsed); |
821 | 868 |
822 } // namespace url_canon | 869 } // namespace url_canon |
823 | 870 |
824 #endif // GOOGLEURL_SRC_URL_CANON_H__ | 871 #endif // GOOGLEURL_SRC_URL_CANON_H__ |
OLD | NEW |