Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: url/url_canon.h

Issue 1270443006: Proof-read comments in src/url/. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef URL_URL_CANON_H_ 5 #ifndef URL_URL_CANON_H_
6 #define URL_URL_CANON_H_ 6 #define URL_URL_CANON_H_
7 7
8 #include <stdlib.h> 8 #include <stdlib.h>
9 #include <string.h> 9 #include <string.h>
10 10
(...skipping 267 matching lines...) Expand 10 before | Expand all | Expand 10 after
278 CanonOutput* output, 278 CanonOutput* output,
279 Component* out_scheme); 279 Component* out_scheme);
280 URL_EXPORT bool CanonicalizeScheme(const base::char16* spec, 280 URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,
281 const Component& scheme, 281 const Component& scheme,
282 CanonOutput* output, 282 CanonOutput* output,
283 Component* out_scheme); 283 Component* out_scheme);
284 284
285 // User info: username/password. If present, this will add the delimiters so 285 // User info: username/password. If present, this will add the delimiters so
286 // the output will be "<username>:<password>@" or "<username>@". Empty 286 // the output will be "<username>:<password>@" or "<username>@". Empty
287 // username/password pairs, or empty passwords, will get converted to 287 // username/password pairs, or empty passwords, will get converted to
288 // nonexistant in the canonical version. 288 // nonexistent in the canonical version.
289 // 289 //
290 // The components for the username and password refer to ranges in the 290 // The components for the username and password refer to ranges in the
291 // respective source strings. Usually, these will be the same string, which 291 // respective source strings. Usually, these will be the same string, which
292 // is legal as long as the two components don't overlap. 292 // is legal as long as the two components don't overlap.
293 // 293 //
294 // The 8-bit version requires UTF-8 encoding. 294 // The 8-bit version requires UTF-8 encoding.
295 URL_EXPORT bool CanonicalizeUserInfo(const char* username_source, 295 URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,
296 const Component& username, 296 const Component& username,
297 const char* password_source, 297 const char* password_source,
298 const Component& password, 298 const Component& password,
(...skipping 11 matching lines...) Expand all
310 // This structure holds detailed state exported from the IP/Host canonicalizers. 310 // This structure holds detailed state exported from the IP/Host canonicalizers.
311 // Additional fields may be added as callers require them. 311 // Additional fields may be added as callers require them.
312 struct CanonHostInfo { 312 struct CanonHostInfo {
313 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {} 313 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}
314 314
315 // Convenience function to test if family is an IP address. 315 // Convenience function to test if family is an IP address.
316 bool IsIPAddress() const { return family == IPV4 || family == IPV6; } 316 bool IsIPAddress() const { return family == IPV4 || family == IPV6; }
317 317
318 // This field summarizes how the input was classified by the canonicalizer. 318 // This field summarizes how the input was classified by the canonicalizer.
319 enum Family { 319 enum Family {
320 NEUTRAL, // - Doesn't resemble an IP address. As far as the IP 320 NEUTRAL, // - Doesn't resemble an IP address. As far as the IP
321 // canonicalizer is concerned, it should be treated as a 321 // canonicalizer is concerned, it should be treated as a
322 // hostname. 322 // hostname.
323 BROKEN, // - Almost an IP, but was not canonicalized. This could be an 323 BROKEN, // - Almost an IP, but was not canonicalized. This could be an
324 // IPv4 address where truncation occurred, or something 324 // IPv4 address where truncation occurred, or something
325 // containing the special characters :[] which did not parse 325 // containing the special characters :[] which did not parse
326 // as an IPv6 address. Never attempt to connect to this 326 // as an IPv6 address. Never attempt to connect to this
327 // address, because it might actually succeed! 327 // address, because it might actually succeed!
328 IPV4, // - Successfully canonicalized as an IPv4 address. 328 IPV4, // - Successfully canonicalized as an IPv4 address.
329 IPV6, // - Successfully canonicalized as an IPv6 address. 329 IPV6, // - Successfully canonicalized as an IPv6 address.
330 }; 330 };
331 Family family; 331 Family family;
332 332
333 // If |family| is IPV4, then this is the number of nonempty dot-separated 333 // If |family| is IPV4, then this is the number of nonempty dot-separated
334 // components in the input text, from 1 to 4. If |family| is not IPV4, 334 // components in the input text, from 1 to 4. If |family| is not IPV4,
335 // this value is undefined. 335 // this value is undefined.
336 int num_ipv4_components; 336 int num_ipv4_components;
337 337
338 // Location of host within the canonicalized output. 338 // Location of host within the canonicalized output.
339 // CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6. 339 // CanonicalizeIPAddress() only sets this field if |family| is IPV4 or IPV6.
340 // CanonicalizeHostVerbose() always sets it. 340 // CanonicalizeHostVerbose() always sets it.
341 Component out_host; 341 Component out_host;
342 342
343 // |address| contains the parsed IP Address (if any) in its first 343 // |address| contains the parsed IP Address (if any) in its first
344 // AddressLength() bytes, in network order. If IsIPAddress() is false 344 // AddressLength() bytes, in network order. If IsIPAddress() is false
345 // AddressLength() will return zero and the content of |address| is undefined. 345 // AddressLength() will return zero and the content of |address| is undefined.
346 unsigned char address[16]; 346 unsigned char address[16];
347 347
348 // Convenience function to calculate the length of an IP address corresponding 348 // Convenience function to calculate the length of an IP address corresponding
349 // to the current IP version in |family|, if any. For use with |address|. 349 // to the current IP version in |family|, if any. For use with |address|.
350 int AddressLength() const { 350 int AddressLength() const {
351 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0); 351 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);
352 } 352 }
353 }; 353 };
354 354
355 355
356 // Host. 356 // Host.
357 // 357 //
358 // The 8-bit version requires UTF-8 encoding. Use this version when you only 358 // The 8-bit version requires UTF-8 encoding. Use this version when you only
359 // need to know whether canonicalization succeeded. 359 // need to know whether canonicalization succeeded.
360 URL_EXPORT bool CanonicalizeHost(const char* spec, 360 URL_EXPORT bool CanonicalizeHost(const char* spec,
361 const Component& host, 361 const Component& host,
362 CanonOutput* output, 362 CanonOutput* output,
363 Component* out_host); 363 Component* out_host);
364 URL_EXPORT bool CanonicalizeHost(const base::char16* spec, 364 URL_EXPORT bool CanonicalizeHost(const base::char16* spec,
365 const Component& host, 365 const Component& host,
366 CanonOutput* output, 366 CanonOutput* output,
367 Component* out_host); 367 Component* out_host);
368 368
369 // Extended version of CanonicalizeHost, which returns additional information. 369 // Extended version of CanonicalizeHost, which returns additional information.
370 // Use this when you need to know whether the hostname was an IP address. 370 // Use this when you need to know whether the hostname was an IP address.
371 // A successful return is indicated by host_info->family != BROKEN. See the 371 // A successful return is indicated by host_info->family != BROKEN. See the
372 // definition of CanonHostInfo above for details. 372 // definition of CanonHostInfo above for details.
373 URL_EXPORT void CanonicalizeHostVerbose(const char* spec, 373 URL_EXPORT void CanonicalizeHostVerbose(const char* spec,
374 const Component& host, 374 const Component& host,
375 CanonOutput* output, 375 CanonOutput* output,
376 CanonHostInfo* host_info); 376 CanonHostInfo* host_info);
377 URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec, 377 URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec,
378 const Component& host, 378 const Component& host,
379 CanonOutput* output, 379 CanonOutput* output,
380 CanonHostInfo* host_info); 380 CanonHostInfo* host_info);
381 381
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
547 int spec_len, 547 int spec_len,
548 const Parsed& parsed, 548 const Parsed& parsed,
549 CanonOutput* output, 549 CanonOutput* output,
550 Parsed* new_parsed); 550 Parsed* new_parsed);
551 URL_EXPORT bool CanonicalizePathURL(const base::char16* spec, 551 URL_EXPORT bool CanonicalizePathURL(const base::char16* spec,
552 int spec_len, 552 int spec_len,
553 const Parsed& parsed, 553 const Parsed& parsed,
554 CanonOutput* output, 554 CanonOutput* output,
555 Parsed* new_parsed); 555 Parsed* new_parsed);
556 556
557 // Use for mailto URLs. This "canonicalizes" the url into a path and query 557 // Use for mailto URLs. This "canonicalizes" the URL into a path and query
558 // component. It does not attempt to merge "to" fields. It uses UTF-8 for 558 // component. It does not attempt to merge "to" fields. It uses UTF-8 for
559 // the query encoding if there is a query. This is because a mailto URL is 559 // the query encoding if there is a query. This is because a mailto URL is
560 // really intended for an external mail program, and the encoding of a page, 560 // really intended for an external mail program, and the encoding of a page,
561 // etc. which would influence a query encoding normally are irrelevant. 561 // etc. which would influence a query encoding normally are irrelevant.
562 URL_EXPORT bool CanonicalizeMailtoURL(const char* spec, 562 URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,
563 int spec_len, 563 int spec_len,
564 const Parsed& parsed, 564 const Parsed& parsed,
565 CanonOutput* output, 565 CanonOutput* output,
566 Parsed* new_parsed); 566 Parsed* new_parsed);
567 URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec, 567 URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,
568 int spec_len, 568 int spec_len,
569 const Parsed& parsed, 569 const Parsed& parsed,
570 CanonOutput* output, 570 CanonOutput* output,
571 Parsed* new_parsed); 571 Parsed* new_parsed);
572 572
573 // Part replacer -------------------------------------------------------------- 573 // Part replacer --------------------------------------------------------------
574 574
575 // Internal structure used for storing separate strings for each component. 575 // Internal structure used for storing separate strings for each component.
576 // The basic canonicalization functions use this structure internally so that 576 // The basic canonicalization functions use this structure internally so that
577 // component replacement (different strings for different components) can be 577 // component replacement (different strings for different components) can be
578 // treated on the same code path as regular canonicalization (the same string 578 // treated on the same code path as regular canonicalization (the same string
579 // for each component). 579 // for each component).
580 // 580 //
581 // A Parsed structure usually goes along with this. Those 581 // A Parsed structure usually goes along with this. Those components identify
582 // components identify offsets within these strings, so that they can all be 582 // offsets within these strings, so that they can all be in the same string,
583 // in the same string, or spread arbitrarily across different ones. 583 // or spread arbitrarily across different ones.
584 // 584 //
585 // This structures does not own any data. It is the caller's responsibility to 585 // This structures does not own any data. It is the caller's responsibility to
586 // ensure that the data the pointers point to stays in scope and is not 586 // ensure that the data the pointers point to stays in scope and is not
587 // modified. 587 // modified.
588 template<typename CHAR> 588 template<typename CHAR>
589 struct URLComponentSource { 589 struct URLComponentSource {
590 // Constructor normally used by callers wishing to replace components. This 590 // Constructor normally used by callers wishing to replace components. This
591 // will make them all NULL, which is no replacement. The caller would then 591 // will make them all NULL, which is no replacement. The caller would then
592 // override the components they want to replace. 592 // override the components they want to replace.
593 URLComponentSource() 593 URLComponentSource()
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
718 void SetRef(const CHAR* s, const Component& comp) { 718 void SetRef(const CHAR* s, const Component& comp) {
719 sources_.ref = s; 719 sources_.ref = s;
720 components_.ref = comp; 720 components_.ref = comp;
721 } 721 }
722 void ClearRef() { 722 void ClearRef() {
723 sources_.ref = Placeholder(); 723 sources_.ref = Placeholder();
724 components_.ref = Component(); 724 components_.ref = Component();
725 } 725 }
726 bool IsRefOverridden() const { return sources_.ref != NULL; } 726 bool IsRefOverridden() const { return sources_.ref != NULL; }
727 727
728 // Getters for the itnernal data. See the variables below for how the 728 // Getters for the internal data. See the variables below for how the
729 // information is encoded. 729 // information is encoded.
730 const URLComponentSource<CHAR>& sources() const { return sources_; } 730 const URLComponentSource<CHAR>& sources() const { return sources_; }
731 const Parsed& components() const { return components_; } 731 const Parsed& components() const { return components_; }
732 732
733 private: 733 private:
734 // Returns a pointer to a static empty string that is used as a placeholder 734 // Returns a pointer to a static empty string that is used as a placeholder
735 // to indicate a component should be deleted (see below). 735 // to indicate a component should be deleted (see below).
736 const CHAR* Placeholder() { 736 const CHAR* Placeholder() {
737 static const CHAR empty_cstr = 0; 737 static const CHAR empty_cstr = 0;
738 return &empty_cstr; 738 return &empty_cstr;
(...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after
856 // and the identified relevant portion of the relative URL (computed by 856 // and the identified relevant portion of the relative URL (computed by
857 // IsRelativeURL), this produces a new parsed canonical URL in |output| and 857 // IsRelativeURL), this produces a new parsed canonical URL in |output| and
858 // |out_parsed|. 858 // |out_parsed|.
859 // 859 //
860 // It also requires a flag indicating whether the base URL is a file: URL 860 // It also requires a flag indicating whether the base URL is a file: URL
861 // which triggers additional logic. 861 // which triggers additional logic.
862 // 862 //
863 // The base URL should be canonical and have a host (may be empty for file 863 // The base URL should be canonical and have a host (may be empty for file
864 // URLs) and a path. If it doesn't have these, we can't resolve relative 864 // URLs) and a path. If it doesn't have these, we can't resolve relative
865 // URLs off of it and will return the base as the output with an error flag. 865 // URLs off of it and will return the base as the output with an error flag.
866 // Becausee it is canonical is should also be ASCII. 866 // Because it is canonical is should also be ASCII.
867 // 867 //
868 // The query charset converter follows the same rules as CanonicalizeQuery. 868 // The query charset converter follows the same rules as CanonicalizeQuery.
869 // 869 //
870 // Returns true on success. On failure, the output will be "something 870 // Returns true on success. On failure, the output will be "something
871 // reasonable" that will be consistent and valid, just probably not what 871 // reasonable" that will be consistent and valid, just probably not what
872 // was intended by the web page author or caller. 872 // was intended by the web page author or caller.
873 URL_EXPORT bool ResolveRelativeURL(const char* base_url, 873 URL_EXPORT bool ResolveRelativeURL(const char* base_url,
874 const Parsed& base_parsed, 874 const Parsed& base_parsed,
875 bool base_is_file, 875 bool base_is_file,
876 const char* relative_url, 876 const char* relative_url,
877 const Component& relative_component, 877 const Component& relative_component,
878 CharsetConverter* query_converter, 878 CharsetConverter* query_converter,
879 CanonOutput* output, 879 CanonOutput* output,
880 Parsed* out_parsed); 880 Parsed* out_parsed);
881 URL_EXPORT bool ResolveRelativeURL(const char* base_url, 881 URL_EXPORT bool ResolveRelativeURL(const char* base_url,
882 const Parsed& base_parsed, 882 const Parsed& base_parsed,
883 bool base_is_file, 883 bool base_is_file,
884 const base::char16* relative_url, 884 const base::char16* relative_url,
885 const Component& relative_component, 885 const Component& relative_component,
886 CharsetConverter* query_converter, 886 CharsetConverter* query_converter,
887 CanonOutput* output, 887 CanonOutput* output,
888 Parsed* out_parsed); 888 Parsed* out_parsed);
889 889
890 } // namespace url 890 } // namespace url
891 891
892 #endif // URL_URL_CANON_H_ 892 #endif // URL_URL_CANON_H_
OLDNEW
« url/gurl.h ('K') | « url/origin.h ('k') | url/url_canon_etc.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698