url/url_canon_pathurl.cc - Issue 23835019: Support URL fragment resolution againt non-hierarchical schemes

Side by Side Diff: url/url_canon_pathurl.cc

Issue 23835019: Support URL fragment resolution againt non-hierarchical schemes (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: actually restore PS4 this time Created 7 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Functions for canonicalizing "path" URLs. Not to be confused with the path	5 // Functions for canonicalizing "path" URLs. Not to be confused with the path

6 // of a URL, these are URLs that have no authority section, only a path. For	6 // of a URL, these are URLs that have no authority section, only a path. For

7 // example, "javascript:" and "data:".	7 // example, "javascript:" and "data:".

8	8

9 #include "url/url_canon.h"	9 #include "url/url_canon.h"

10 #include "url/url_canon_internal.h"	10 #include "url/url_canon_internal.h"

11	11

12 namespace url_canon {	12 namespace url_canon {

13	13

14 namespace {	14 namespace {

15	15

16 template<typename CHAR, typename UCHAR>	16 template<typename CHAR, typename UCHAR>
	brettw 2013/11/20 00:03:51 This should probably have a comment that separator This should probably have a comment that separator can be 0 to skip adding. joth 2013/11/21 00:08:45 Done. Show quoted text On 2013/11/20 00:03:51, brettw wrote: > This should probably have a comment that separator can be 0 to skip adding. Done.
	17 bool DoCanonicalizePathComponent(const CHAR* source,

	18 const url_parse::Component& component,

	19 CHAR seperator,

	20 CanonOutput* output,

	21 url_parse::Component* new_parsed) {
	brettw 2013/11/20 00:03:51 Can this be called "new_component" instead? Can this be called "new_component" instead? joth 2013/11/21 00:08:45 Done. Show quoted text On 2013/11/20 00:03:51, brettw wrote: > Can this be called "new_component" instead? Done.
	22 bool success = true;

	23 if (component.is_valid()) {

	24 if (seperator)

	25 output->push_back(seperator);

	26 // Copy the path using path URL's more lax escaping rules (think for

	27 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all

	28 // ASCII characters alone. This helps readability of JavaStript.

	29 new_parsed->begin = output->length();

	30 int end = component.end();

	31 for (int i = component.begin; i < end; i++) {

	32 UCHAR uch = static_cast<UCHAR>(source[i]);

	33 if (uch < 0x20 \|\| uch >= 0x80)

	34 success &= AppendUTF8EscapedChar(source, &i, end, output);

	35 else

	36 output->push_back(static_cast<char>(uch));

	37 }

	38 new_parsed->len = output->length() - new_parsed->begin;

	39 } else {

	40 // Empty part.

	41 new_parsed->reset();

	42 }

	43 return success;

	44 }

	45

	46 template<typename CHAR, typename UCHAR>

17 bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,	47 bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,

18 const url_parse::Parsed& parsed,	48 const url_parse::Parsed& parsed,

19 CanonOutput* output,	49 CanonOutput* output,

20 url_parse::Parsed* new_parsed) {	50 url_parse::Parsed* new_parsed) {

21 // Scheme: this will append the colon.	51 // Scheme: this will append the colon.

22 bool success = CanonicalizeScheme(source.scheme, parsed.scheme,	52 bool success = CanonicalizeScheme(source.scheme, parsed.scheme,

23 output, &new_parsed->scheme);	53 output, &new_parsed->scheme);

24	54

25 // We assume there's no authority for path URLs. Note that hosts should never	55 // We assume there's no authority for path URLs. Note that hosts should never

26 // have -1 length.	56 // have -1 length.

27 new_parsed->username.reset();	57 new_parsed->username.reset();

28 new_parsed->password.reset();	58 new_parsed->password.reset();

29 new_parsed->host.reset();	59 new_parsed->host.reset();

30 new_parsed->port.reset();	60 new_parsed->port.reset();

31	61 success &= DoCanonicalizePathComponent<CHAR, UCHAR>(
	brettw 2013/11/20 00:03:51 Can you add a comment here that for path URLs we l Can you add a comment here that for path URLs we let them have these components, but canonicalize them using the weaker path URL rules. It should work, but I want to be sure we get a test case for ReplaceComponents where we change the scheme from a path to a regular one and the canonicalization of these elements are updated to the more strict rules, and we lose any trailing whitespace. joth 2013/11/21 00:08:45 Done. Show quoted text On 2013/11/20 00:03:51, brettw wrote: > Can you add a comment here that for path URLs we let them have these components, > but canonicalize them using the weaker path URL rules. > Done. Show quoted text > It should work, but I want to be sure we get a test case for ReplaceComponents > where we change the scheme from a path to a regular one and the canonicalization > of these elements are updated to the more strict rules, and we lose any trailing > whitespace. Added a case to URLUtilTest, ReplaceScheme "myscheme:example.com/ hello # world " => "http://example.com/%20hello%20# world"
32 if (parsed.path.is_valid()) {	62 source.path, parsed.path, 0, output, &new_parsed->path);

33 // Copy the path using path URL's more lax escaping rules (think for	63 success &= DoCanonicalizePathComponent<CHAR, UCHAR>(

34 // javascript:). We convert to UTF-8 and escape non-ASCII, but leave all	64 source.query, parsed.query, '?', output, &new_parsed->query);

35 // ASCII characters alone. This helps readability of JavaStript.	65 success &= DoCanonicalizePathComponent<CHAR, UCHAR>(

36 new_parsed->path.begin = output->length();	66 source.ref, parsed.ref, '#', output, &new_parsed->ref);

37 int end = parsed.path.end();

38 for (int i = parsed.path.begin; i < end; i++) {

39 UCHAR uch = static_cast<UCHAR>(source.path[i]);

40 if (uch < 0x20 \|\| uch >= 0x80)

41 success &= AppendUTF8EscapedChar(source.path, &i, end, output);

42 else

43 output->push_back(static_cast<char>(uch));

44 }

45 new_parsed->path.len = output->length() - new_parsed->path.begin;

46 } else {

47 // Empty path.

48 new_parsed->path.reset();

49 }

50

51 // Assume there's no query or ref.

52 new_parsed->query.reset();

53 new_parsed->ref.reset();

54	67

55 return success;	68 return success;

56 }	69 }

57	70

58 } // namespace	71 } // namespace

59	72

60 bool CanonicalizePathURL(const char* spec,	73 bool CanonicalizePathURL(const char* spec,

61 int spec_len,	74 int spec_len,

62 const url_parse::Parsed& parsed,	75 const url_parse::Parsed& parsed,

63 CanonOutput* output,	76 CanonOutput* output,

(...skipping 30 matching lines...) Expand all Loading...
94 url_parse::Parsed* new_parsed) {	107 url_parse::Parsed* new_parsed) {

95 RawCanonOutput<1024> utf8;	108 RawCanonOutput<1024> utf8;

96 URLComponentSource<char> source(base);	109 URLComponentSource<char> source(base);

97 url_parse::Parsed parsed(base_parsed);	110 url_parse::Parsed parsed(base_parsed);

98 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);	111 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);

99 return DoCanonicalizePathURL<char, unsigned char>(	112 return DoCanonicalizePathURL<char, unsigned char>(

100 source, parsed, output, new_parsed);	113 source, parsed, output, new_parsed);

101 }	114 }

102	115

103 } // namespace url_canon	116 } // namespace url_canon

OLD	NEW

« url/third_party/mozilla/url_parse.cc ('K') | « url/third_party/mozilla/url_parse.cc ('k') | url/url_canon_relative.cc » ('j') | url/url_canon_relative.cc » ('J')