url/url_canon_mailtourl.cc - Issue 2833983005: M59 Merge of 'Improve canonicalization of mailto url path components'

Side by Side Diff: url/url_canon_mailtourl.cc

Issue 2833983005: M59 Merge of 'Improve canonicalization of mailto url path components' (Closed)

Patch Set: Created 3 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Functions for canonicalizing "mailto:" URLs.	5 // Functions for canonicalizing "mailto:" URLs.

6	6

7 #include "url/url_canon.h"	7 #include "url/url_canon.h"

8 #include "url/url_canon_internal.h"	8 #include "url/url_canon_internal.h"

9 #include "url/url_file.h"	9 #include "url/url_file.h"

10 #include "url/url_parse_internal.h"	10 #include "url/url_parse_internal.h"

11	11

12 namespace url {	12 namespace url {

13	13

14 namespace {	14 namespace {

15	15

	16 // Certain characters should be percent-encoded when they appear in the path

	17 // component of a mailto URL, to improve compatibility and mitigate against

	18 // command-injection attacks on mailto handlers. See https://crbug.com/711020.

	19 template <typename UCHAR>

	20 bool ShouldEncodeMailboxCharacter(UCHAR uch) {

	21 if (uch < 0x21 \|\| // space & control characters.

	22 uch > 0x7e \|\| // high-ascii characters.

	23 uch == 0x22 \|\| // quote.

	24 uch == 0x3c \|\| uch == 0x3e \|\| // angle brackets.

	25 uch == 0x60 \|\| // backtick.

	26 uch == 0x7b \|\| uch == 0x7c \|\| uch == 0x7d // braces and pipe.

	27 ) {

	28 return true;

	29 }

	30 return false;

	31 }

	32

16 template <typename CHAR, typename UCHAR>	33 template <typename CHAR, typename UCHAR>

17 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,	34 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,

18 const Parsed& parsed,	35 const Parsed& parsed,

19 CanonOutput* output,	36 CanonOutput* output,

20 Parsed* new_parsed) {	37 Parsed* new_parsed) {

21 // mailto: only uses {scheme, path, query} -- clear the rest.	38 // mailto: only uses {scheme, path, query} -- clear the rest.

22 new_parsed->username = Component();	39 new_parsed->username = Component();

23 new_parsed->password = Component();	40 new_parsed->password = Component();

24 new_parsed->host = Component();	41 new_parsed->host = Component();

25 new_parsed->port = Component();	42 new_parsed->port = Component();

26 new_parsed->ref = Component();	43 new_parsed->ref = Component();

27	44

28 // Scheme (known, so we don't bother running it through the more	45 // Scheme (known, so we don't bother running it through the more

29 // complicated scheme canonicalizer).	46 // complicated scheme canonicalizer).

30 new_parsed->scheme.begin = output->length();	47 new_parsed->scheme.begin = output->length();

31 output->Append("mailto:", 7);	48 output->Append("mailto:", 7);

32 new_parsed->scheme.len = 6;	49 new_parsed->scheme.len = 6;

33	50

34 bool success = true;	51 bool success = true;

35	52

36 // Path	53 // Path

37 if (parsed.path.is_valid()) {	54 if (parsed.path.is_valid()) {

38 new_parsed->path.begin = output->length();	55 new_parsed->path.begin = output->length();

39	56

40 // Copy the path using path URL's more lax escaping rules.	57 // Copy the path using path URL's more lax escaping rules.

41 // We convert to UTF-8 and escape non-ASCII, but leave all	58 // We convert to UTF-8 and escape non-ASCII, but leave most

42 // ASCII characters alone.	59 // ASCII characters alone.

43 int end = parsed.path.end();	60 int end = parsed.path.end();

44 for (int i = parsed.path.begin; i < end; ++i) {	61 for (int i = parsed.path.begin; i < end; ++i) {

45 UCHAR uch = static_cast<UCHAR>(source.path[i]);	62 UCHAR uch = static_cast<UCHAR>(source.path[i]);

46 if (uch < 0x20 \|\| uch >= 0x80)	63 if (ShouldEncodeMailboxCharacter<UCHAR>(uch))

47 success &= AppendUTF8EscapedChar(source.path, &i, end, output);	64 success &= AppendUTF8EscapedChar(source.path, &i, end, output);

48 else	65 else

49 output->push_back(static_cast<char>(uch));	66 output->push_back(static_cast<char>(uch));

50 }	67 }

51	68

52 new_parsed->path.len = output->length() - new_parsed->path.begin;	69 new_parsed->path.len = output->length() - new_parsed->path.begin;

53 } else {	70 } else {

54 // No path at all	71 // No path at all

55 new_parsed->path.reset();	72 new_parsed->path.reset();

56 }	73 }

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
101 Parsed* new_parsed) {	118 Parsed* new_parsed) {

102 RawCanonOutput<1024> utf8;	119 RawCanonOutput<1024> utf8;

103 URLComponentSource<char> source(base);	120 URLComponentSource<char> source(base);

104 Parsed parsed(base_parsed);	121 Parsed parsed(base_parsed);

105 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);	122 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);

106 return DoCanonicalizeMailtoURL<char, unsigned char>(	123 return DoCanonicalizeMailtoURL<char, unsigned char>(

107 source, parsed, output, new_parsed);	124 source, parsed, output, new_parsed);

108 }	125 }

109	126

110 } // namespace url	127 } // namespace url

OLD	NEW

« no previous file with comments | « third_party/WebKit/LayoutTests/fast/url/script-tests/mailto.js ('k') | url/url_canon_unittest.cc » ('j') | no next file with comments »