url/url_canon_pathurl.cc - Issue 23835019: Support URL fragment resolution againt non-hierarchical schemes

Side by Side Diff: url/url_canon_pathurl.cc

Issue 23835019: Support URL fragment resolution againt non-hierarchical schemes (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: brettw2 Created 7 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Functions for canonicalizing "path" URLs. Not to be confused with the path	5 // Functions for canonicalizing "path" URLs. Not to be confused with the path

6 // of a URL, these are URLs that have no authority section, only a path. For	6 // of a URL, these are URLs that have no authority section, only a path. For

7 // example, "javascript:" and "data:".	7 // example, "javascript:" and "data:".

8	8

9 #include "url/url_canon.h"	9 #include "url/url_canon.h"

10 #include "url/url_canon_internal.h"	10 #include "url/url_canon_internal.h"

11	11

12 namespace url_canon {	12 namespace url_canon {

13	13

14 namespace {	14 namespace {

15	15

	16 // Canonicalize the given \|component\| from \|source\| into \|output\| and

	17 // \|new_component\|. If \|separator\| is non-zero, it is pre-pended to \|ouput\|

	18 // prior to the canonicalized component; i.e. for the '?' or '#' characters.

	19 template<typename CHAR, typename UCHAR>

	20 bool DoCanonicalizePathComponent(const CHAR* source,

	21 const url_parse::Component& component,

	22 CHAR seperator,

	23 CanonOutput* output,

	24 url_parse::Component* new_component) {

	25 bool success = true;

	26 if (component.is_valid()) {

	27 if (seperator)

	28 output->push_back(seperator);

	29 // Copy the path using path URL's more lax escaping rules (think for

	30 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all

	31 // ASCII characters alone. This helps readability of JavaStript.

	32 new_component->begin = output->length();

	33 int end = component.end();

	34 for (int i = component.begin; i < end; i++) {

	35 UCHAR uch = static_cast<UCHAR>(source[i]);

	36 if (uch < 0x20 \|\| uch >= 0x80)

	37 success &= AppendUTF8EscapedChar(source, &i, end, output);

	38 else

	39 output->push_back(static_cast<char>(uch));

	40 }

	41 new_component->len = output->length() - new_component->begin;

	42 } else {

	43 // Empty part.

	44 new_component->reset();

	45 }

	46 return success;

	47 }

	48

16 template<typename CHAR, typename UCHAR>	49 template<typename CHAR, typename UCHAR>

17 bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,	50 bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,

18 const url_parse::Parsed& parsed,	51 const url_parse::Parsed& parsed,

19 CanonOutput* output,	52 CanonOutput* output,

20 url_parse::Parsed* new_parsed) {	53 url_parse::Parsed* new_parsed) {

21 // Scheme: this will append the colon.	54 // Scheme: this will append the colon.

22 bool success = CanonicalizeScheme(source.scheme, parsed.scheme,	55 bool success = CanonicalizeScheme(source.scheme, parsed.scheme,

23 output, &new_parsed->scheme);	56 output, &new_parsed->scheme);

24	57

25 // We assume there's no authority for path URLs. Note that hosts should never	58 // We assume there's no authority for path URLs. Note that hosts should never

26 // have -1 length.	59 // have -1 length.

27 new_parsed->username.reset();	60 new_parsed->username.reset();

28 new_parsed->password.reset();	61 new_parsed->password.reset();

29 new_parsed->host.reset();	62 new_parsed->host.reset();

30 new_parsed->port.reset();	63 new_parsed->port.reset();

31	64 // We allow path URLs to have the path, query and fragment components, but we

32 if (parsed.path.is_valid()) {	65 // will canonicalize each of the via the weaker path URL rules.

33 // Copy the path using path URL's more lax escaping rules (think for	66 success &= DoCanonicalizePathComponent<CHAR, UCHAR>(

34 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all	67 source.path, parsed.path, 0, output, &new_parsed->path);

35 // ASCII characters alone. This helps readability of JavaStript.	68 success &= DoCanonicalizePathComponent<CHAR, UCHAR>(

36 new_parsed->path.begin = output->length();	69 source.query, parsed.query, '?', output, &new_parsed->query);

37 int end = parsed.path.end();	70 success &= DoCanonicalizePathComponent<CHAR, UCHAR>(

38 for (int i = parsed.path.begin; i < end; i++) {	71 source.ref, parsed.ref, '#', output, &new_parsed->ref);

39 UCHAR uch = static_cast<UCHAR>(source.path[i]);

40 if (uch < 0x20 \|\| uch >= 0x80)

41 success &= AppendUTF8EscapedChar(source.path, &i, end, output);

42 else

43 output->push_back(static_cast<char>(uch));

44 }

45 new_parsed->path.len = output->length() - new_parsed->path.begin;

46 } else {

47 // Empty path.

48 new_parsed->path.reset();

49 }

50

51 // Assume there's no query or ref.

52 new_parsed->query.reset();

53 new_parsed->ref.reset();

54	72

55 return success;	73 return success;

56 }	74 }

57	75

58 } // namespace	76 } // namespace

59	77

60 bool CanonicalizePathURL(const char* spec,	78 bool CanonicalizePathURL(const char* spec,

61 int spec_len,	79 int spec_len,

62 const url_parse::Parsed& parsed,	80 const url_parse::Parsed& parsed,

63 CanonOutput* output,	81 CanonOutput* output,

(...skipping 30 matching lines...) Expand all Loading...
94 url_parse::Parsed* new_parsed) {	112 url_parse::Parsed* new_parsed) {

95 RawCanonOutput<1024> utf8;	113 RawCanonOutput<1024> utf8;

96 URLComponentSource<char> source(base);	114 URLComponentSource<char> source(base);

97 url_parse::Parsed parsed(base_parsed);	115 url_parse::Parsed parsed(base_parsed);

98 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);	116 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);

99 return DoCanonicalizePathURL<char, unsigned char>(	117 return DoCanonicalizePathURL<char, unsigned char>(

100 source, parsed, output, new_parsed);	118 source, parsed, output, new_parsed);

101 }	119 }

102	120

103 } // namespace url_canon	121 } // namespace url_canon

OLD	NEW

« no previous file with comments | « url/third_party/mozilla/url_parse.cc ('k') | url/url_canon_relative.cc » ('j') | no next file with comments »