url_canon.h - Issue 2029803003: Update to Chromium //url at Chromium commit 79dc59ac7602413181079ecb463873e29a1d7d0a.

Side by Side Diff: url_canon.h

Issue 2029803003: Update to Chromium //url at Chromium commit 79dc59ac7602413181079ecb463873e29a1d7d0a. (Closed) Base URL: https://chromium.googlesource.com/external/github.com/domokit/gurl@master

Patch Set: Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef URL_URL_CANON_H_	5 #ifndef URL_URL_CANON_H_

6 #define URL_URL_CANON_H_	6 #define URL_URL_CANON_H_

7	7

8 #include <stdlib.h>	8 #include <stdlib.h>

9 #include <string.h>	9 #include <string.h>

10	10

11 #include "base/strings/string16.h"	11 #include "base/strings/string16.h"

	12 #include "url/third_party/mozilla/url_parse.h"

12 #include "url/url_export.h"	13 #include "url/url_export.h"

13 #include "url/url_parse.h"

14	14

15 namespace url {	15 namespace url {

16	16

17 // Canonicalizer output -------------------------------------------------------	17 // Canonicalizer output -------------------------------------------------------

18	18

19 // Base class for the canonicalizer output, this maintains a buffer and	19 // Base class for the canonicalizer output, this maintains a buffer and

20 // supports simple resizing and append operations on it.	20 // supports simple resizing and append operations on it.

21 //	21 //

22 // It is VERY IMPORTANT that no virtual function calls be made on the common	22 // It is VERY IMPORTANT that no virtual function calls be made on the common

23 // code path. We only have two virtual function calls, the destructor and a	23 // code path. We only have two virtual function calls, the destructor and a

(...skipping 254 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
278 CanonOutput* output,	278 CanonOutput* output,

279 Component* out_scheme);	279 Component* out_scheme);

280 URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,	280 URL_EXPORT bool CanonicalizeScheme(const base::char16* spec,

281 const Component& scheme,	281 const Component& scheme,

282 CanonOutput* output,	282 CanonOutput* output,

283 Component* out_scheme);	283 Component* out_scheme);

284	284

285 // User info: username/password. If present, this will add the delimiters so	285 // User info: username/password. If present, this will add the delimiters so

286 // the output will be "<username>:<password>@" or "<username>@". Empty	286 // the output will be "<username>:<password>@" or "<username>@". Empty

287 // username/password pairs, or empty passwords, will get converted to	287 // username/password pairs, or empty passwords, will get converted to

288 // nonexistant in the canonical version.	288 // nonexistent in the canonical version.

289 //	289 //

290 // The components for the username and password refer to ranges in the	290 // The components for the username and password refer to ranges in the

291 // respective source strings. Usually, these will be the same string, which	291 // respective source strings. Usually, these will be the same string, which

292 // is legal as long as the two components don't overlap.	292 // is legal as long as the two components don't overlap.

293 //	293 //

294 // The 8-bit version requires UTF-8 encoding.	294 // The 8-bit version requires UTF-8 encoding.

295 URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,	295 URL_EXPORT bool CanonicalizeUserInfo(const char* username_source,

296 const Component& username,	296 const Component& username,

297 const char* password_source,	297 const char* password_source,

298 const Component& password,	298 const Component& password,

(...skipping 11 matching lines...) Expand all Loading...
310 // This structure holds detailed state exported from the IP/Host canonicalizers.	310 // This structure holds detailed state exported from the IP/Host canonicalizers.

311 // Additional fields may be added as callers require them.	311 // Additional fields may be added as callers require them.

312 struct CanonHostInfo {	312 struct CanonHostInfo {

313 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}	313 CanonHostInfo() : family(NEUTRAL), num_ipv4_components(0), out_host() {}

314	314

315 // Convenience function to test if family is an IP address.	315 // Convenience function to test if family is an IP address.

316 bool IsIPAddress() const { return family == IPV4 \|\| family == IPV6; }	316 bool IsIPAddress() const { return family == IPV4 \|\| family == IPV6; }

317	317

318 // This field summarizes how the input was classified by the canonicalizer.	318 // This field summarizes how the input was classified by the canonicalizer.

319 enum Family {	319 enum Family {

320 NEUTRAL, // - Doesn't resemble an IP address. As far as the IP	320 NEUTRAL, // - Doesn't resemble an IP address. As far as the IP

321 // canonicalizer is concerned, it should be treated as a	321 // canonicalizer is concerned, it should be treated as a

322 // hostname.	322 // hostname.

323 BROKEN, // - Almost an IP, but was not canonicalized. This could be an	323 BROKEN, // - Almost an IP, but was not canonicalized. This could be an

324 // IPv4 address where truncation occurred, or something	324 // IPv4 address where truncation occurred, or something

325 // containing the special characters :[] which did not parse	325 // containing the special characters :[] which did not parse

326 // as an IPv6 address. Never attempt to connect to this	326 // as an IPv6 address. Never attempt to connect to this

327 // address, because it might actually succeed!	327 // address, because it might actually succeed!

328 IPV4, // - Successfully canonicalized as an IPv4 address.	328 IPV4, // - Successfully canonicalized as an IPv4 address.

329 IPV6, // - Successfully canonicalized as an IPv6 address.	329 IPV6, // - Successfully canonicalized as an IPv6 address.

330 };	330 };

331 Family family;	331 Family family;

332	332

333 // If \|family\| is IPV4, then this is the number of nonempty dot-separated	333 // If \|family\| is IPV4, then this is the number of nonempty dot-separated

334 // components in the input text, from 1 to 4. If \|family\| is not IPV4,	334 // components in the input text, from 1 to 4. If \|family\| is not IPV4,

335 // this value is undefined.	335 // this value is undefined.

336 int num_ipv4_components;	336 int num_ipv4_components;

337	337

338 // Location of host within the canonicalized output.	338 // Location of host within the canonicalized output.

339 // CanonicalizeIPAddress() only sets this field if \|family\| is IPV4 or IPV6.	339 // CanonicalizeIPAddress() only sets this field if \|family\| is IPV4 or IPV6.

340 // CanonicalizeHostVerbose() always sets it.	340 // CanonicalizeHostVerbose() always sets it.

341 Component out_host;	341 Component out_host;

342	342

343 // \|address\| contains the parsed IP Address (if any) in its first	343 // \|address\| contains the parsed IP Address (if any) in its first

344 // AddressLength() bytes, in network order. If IsIPAddress() is false	344 // AddressLength() bytes, in network order. If IsIPAddress() is false

345 // AddressLength() will return zero and the content of \|address\| is undefined.	345 // AddressLength() will return zero and the content of \|address\| is undefined.

346 unsigned char address[16];	346 unsigned char address[16];

347	347

348 // Convenience function to calculate the length of an IP address corresponding	348 // Convenience function to calculate the length of an IP address corresponding

349 // to the current IP version in \|family\|, if any. For use with \|address\|.	349 // to the current IP version in \|family\|, if any. For use with \|address\|.

350 int AddressLength() const {	350 int AddressLength() const {

351 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);	351 return family == IPV4 ? 4 : (family == IPV6 ? 16 : 0);

352 }	352 }

353 };	353 };

354	354

355	355

356 // Host.	356 // Host.

357 //	357 //

358 // The 8-bit version requires UTF-8 encoding. Use this version when you only	358 // The 8-bit version requires UTF-8 encoding. Use this version when you only

359 // need to know whether canonicalization succeeded.	359 // need to know whether canonicalization succeeded.

360 URL_EXPORT bool CanonicalizeHost(const char* spec,	360 URL_EXPORT bool CanonicalizeHost(const char* spec,

361 const Component& host,	361 const Component& host,

362 CanonOutput* output,	362 CanonOutput* output,

363 Component* out_host);	363 Component* out_host);

364 URL_EXPORT bool CanonicalizeHost(const base::char16* spec,	364 URL_EXPORT bool CanonicalizeHost(const base::char16* spec,

365 const Component& host,	365 const Component& host,

366 CanonOutput* output,	366 CanonOutput* output,

367 Component* out_host);	367 Component* out_host);

368	368

369 // Extended version of CanonicalizeHost, which returns additional information.	369 // Extended version of CanonicalizeHost, which returns additional information.

370 // Use this when you need to know whether the hostname was an IP address.	370 // Use this when you need to know whether the hostname was an IP address.

371 // A successful return is indicated by host_info->family != BROKEN. See the	371 // A successful return is indicated by host_info->family != BROKEN. See the

372 // definition of CanonHostInfo above for details.	372 // definition of CanonHostInfo above for details.

373 URL_EXPORT void CanonicalizeHostVerbose(const char* spec,	373 URL_EXPORT void CanonicalizeHostVerbose(const char* spec,

374 const Component& host,	374 const Component& host,

375 CanonOutput* output,	375 CanonOutput* output,

376 CanonHostInfo* host_info);	376 CanonHostInfo* host_info);

377 URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec,	377 URL_EXPORT void CanonicalizeHostVerbose(const base::char16* spec,

378 const Component& host,	378 const Component& host,

379 CanonOutput* output,	379 CanonOutput* output,

380 CanonHostInfo* host_info);	380 CanonHostInfo* host_info);

381	381

(...skipping 165 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
547 int spec_len,	547 int spec_len,

548 const Parsed& parsed,	548 const Parsed& parsed,

549 CanonOutput* output,	549 CanonOutput* output,

550 Parsed* new_parsed);	550 Parsed* new_parsed);

551 URL_EXPORT bool CanonicalizePathURL(const base::char16* spec,	551 URL_EXPORT bool CanonicalizePathURL(const base::char16* spec,

552 int spec_len,	552 int spec_len,

553 const Parsed& parsed,	553 const Parsed& parsed,

554 CanonOutput* output,	554 CanonOutput* output,

555 Parsed* new_parsed);	555 Parsed* new_parsed);

556	556

557 // Use for mailto URLs. This "canonicalizes" the url into a path and query	557 // Use for mailto URLs. This "canonicalizes" the URL into a path and query

558 // component. It does not attempt to merge "to" fields. It uses UTF-8 for	558 // component. It does not attempt to merge "to" fields. It uses UTF-8 for

559 // the query encoding if there is a query. This is because a mailto URL is	559 // the query encoding if there is a query. This is because a mailto URL is

560 // really intended for an external mail program, and the encoding of a page,	560 // really intended for an external mail program, and the encoding of a page,

561 // etc. which would influence a query encoding normally are irrelevant.	561 // etc. which would influence a query encoding normally are irrelevant.

562 URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,	562 URL_EXPORT bool CanonicalizeMailtoURL(const char* spec,

563 int spec_len,	563 int spec_len,

564 const Parsed& parsed,	564 const Parsed& parsed,

565 CanonOutput* output,	565 CanonOutput* output,

566 Parsed* new_parsed);	566 Parsed* new_parsed);

567 URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,	567 URL_EXPORT bool CanonicalizeMailtoURL(const base::char16* spec,

568 int spec_len,	568 int spec_len,

569 const Parsed& parsed,	569 const Parsed& parsed,

570 CanonOutput* output,	570 CanonOutput* output,

571 Parsed* new_parsed);	571 Parsed* new_parsed);

572	572

573 // Part replacer --------------------------------------------------------------	573 // Part replacer --------------------------------------------------------------

574	574

575 // Internal structure used for storing separate strings for each component.	575 // Internal structure used for storing separate strings for each component.

576 // The basic canonicalization functions use this structure internally so that	576 // The basic canonicalization functions use this structure internally so that

577 // component replacement (different strings for different components) can be	577 // component replacement (different strings for different components) can be

578 // treated on the same code path as regular canonicalization (the same string	578 // treated on the same code path as regular canonicalization (the same string

579 // for each component).	579 // for each component).

580 //	580 //

581 // A Parsed structure usually goes along with this. Those	581 // A Parsed structure usually goes along with this. Those components identify

582 // components identify offsets within these strings, so that they can all be	582 // offsets within these strings, so that they can all be in the same string,

583 // in the same string, or spread arbitrarily across different ones.	583 // or spread arbitrarily across different ones.

584 //	584 //

585 // This structures does not own any data. It is the caller's responsibility to	585 // This structures does not own any data. It is the caller's responsibility to

586 // ensure that the data the pointers point to stays in scope and is not	586 // ensure that the data the pointers point to stays in scope and is not

587 // modified.	587 // modified.

588 template<typename CHAR>	588 template<typename CHAR>

589 struct URLComponentSource {	589 struct URLComponentSource {

590 // Constructor normally used by callers wishing to replace components. This	590 // Constructor normally used by callers wishing to replace components. This

591 // will make them all NULL, which is no replacement. The caller would then	591 // will make them all NULL, which is no replacement. The caller would then

592 // override the components they want to replace.	592 // override the components they want to replace.

593 URLComponentSource()	593 URLComponentSource()

(...skipping 124 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
718 void SetRef(const CHAR* s, const Component& comp) {	718 void SetRef(const CHAR* s, const Component& comp) {

719 sources_.ref = s;	719 sources_.ref = s;

720 components_.ref = comp;	720 components_.ref = comp;

721 }	721 }

722 void ClearRef() {	722 void ClearRef() {

723 sources_.ref = Placeholder();	723 sources_.ref = Placeholder();

724 components_.ref = Component();	724 components_.ref = Component();

725 }	725 }

726 bool IsRefOverridden() const { return sources_.ref != NULL; }	726 bool IsRefOverridden() const { return sources_.ref != NULL; }

727	727

728 // Getters for the itnernal data. See the variables below for how the	728 // Getters for the internal data. See the variables below for how the

729 // information is encoded.	729 // information is encoded.

730 const URLComponentSource<CHAR>& sources() const { return sources_; }	730 const URLComponentSource<CHAR>& sources() const { return sources_; }

731 const Parsed& components() const { return components_; }	731 const Parsed& components() const { return components_; }

732	732

733 private:	733 private:

734 // Returns a pointer to a static empty string that is used as a placeholder	734 // Returns a pointer to a static empty string that is used as a placeholder

735 // to indicate a component should be deleted (see below).	735 // to indicate a component should be deleted (see below).

736 const CHAR* Placeholder() {	736 const CHAR* Placeholder() {

737 static const CHAR empty_cstr = 0;	737 static const CHAR empty_cstr = 0;

738 return &empty_cstr;	738 return &empty_cstr;

(...skipping 117 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
856 // and the identified relevant portion of the relative URL (computed by	856 // and the identified relevant portion of the relative URL (computed by

857 // IsRelativeURL), this produces a new parsed canonical URL in \|output\| and	857 // IsRelativeURL), this produces a new parsed canonical URL in \|output\| and

858 // \|out_parsed\|.	858 // \|out_parsed\|.

859 //	859 //

860 // It also requires a flag indicating whether the base URL is a file: URL	860 // It also requires a flag indicating whether the base URL is a file: URL

861 // which triggers additional logic.	861 // which triggers additional logic.

862 //	862 //

863 // The base URL should be canonical and have a host (may be empty for file	863 // The base URL should be canonical and have a host (may be empty for file

864 // URLs) and a path. If it doesn't have these, we can't resolve relative	864 // URLs) and a path. If it doesn't have these, we can't resolve relative

865 // URLs off of it and will return the base as the output with an error flag.	865 // URLs off of it and will return the base as the output with an error flag.

866 // Becausee it is canonical is should also be ASCII.	866 // Because it is canonical is should also be ASCII.

867 //	867 //

868 // The query charset converter follows the same rules as CanonicalizeQuery.	868 // The query charset converter follows the same rules as CanonicalizeQuery.

869 //	869 //

870 // Returns true on success. On failure, the output will be "something	870 // Returns true on success. On failure, the output will be "something

871 // reasonable" that will be consistent and valid, just probably not what	871 // reasonable" that will be consistent and valid, just probably not what

872 // was intended by the web page author or caller.	872 // was intended by the web page author or caller.

873 URL_EXPORT bool ResolveRelativeURL(const char* base_url,	873 URL_EXPORT bool ResolveRelativeURL(const char* base_url,

874 const Parsed& base_parsed,	874 const Parsed& base_parsed,

875 bool base_is_file,	875 bool base_is_file,

876 const char* relative_url,	876 const char* relative_url,

877 const Component& relative_component,	877 const Component& relative_component,

878 CharsetConverter* query_converter,	878 CharsetConverter* query_converter,

879 CanonOutput* output,	879 CanonOutput* output,

880 Parsed* out_parsed);	880 Parsed* out_parsed);

881 URL_EXPORT bool ResolveRelativeURL(const char* base_url,	881 URL_EXPORT bool ResolveRelativeURL(const char* base_url,

882 const Parsed& base_parsed,	882 const Parsed& base_parsed,

883 bool base_is_file,	883 bool base_is_file,

884 const base::char16* relative_url,	884 const base::char16* relative_url,

885 const Component& relative_component,	885 const Component& relative_component,

886 CharsetConverter* query_converter,	886 CharsetConverter* query_converter,

887 CanonOutput* output,	887 CanonOutput* output,

888 Parsed* out_parsed);	888 Parsed* out_parsed);

889	889

890 } // namespace url	890 } // namespace url

891	891

892 #endif // URL_URL_CANON_H_	892 #endif // URL_URL_CANON_H_

OLD	NEW

« no previous file with comments | « third_party/mozilla/url_parse.h ('k') | url_canon_etc.cc » ('j') | no next file with comments »