Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(149)

Side by Side Diff: url/url_canon_mailtourl.cc

Issue 2823883005: Revert of Improve canonicalization of mailto url path components (Closed)
Patch Set: Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | url/url_canon_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Functions for canonicalizing "mailto:" URLs. 5 // Functions for canonicalizing "mailto:" URLs.
6 6
7 #include "url/url_canon.h" 7 #include "url/url_canon.h"
8 #include "url/url_canon_internal.h" 8 #include "url/url_canon_internal.h"
9 #include "url/url_file.h" 9 #include "url/url_file.h"
10 #include "url/url_parse_internal.h" 10 #include "url/url_parse_internal.h"
11 11
12 namespace url { 12 namespace url {
13 13
14 namespace { 14 namespace {
15 15
16 // Certain characters should be percent-encoded when they appear in the path
17 // component of a mailto URL, to improve compatibility and mitigate against
18 // command-injection attacks on mailto handlers. See https://crbug.com/711020.
19 template <typename UCHAR>
20 bool ShouldEncodeMailboxCharacter(UCHAR uch) {
21 if (uch < 0x21 || // space & control characters.
22 uch > 0x7e || // high-ascii characters.
23 uch == 0x22 || // quote.
24 uch == 0x3c || uch == 0x3e || // angle brackets.
25 uch == 0x60 || // backtick.
26 uch == 0x7b || uch == 0x7c || uch == 0x7d // braces and pipe.
27 ) {
28 return true;
29 }
30 return false;
31 }
32
33 template <typename CHAR, typename UCHAR> 16 template <typename CHAR, typename UCHAR>
34 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source, 17 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source,
35 const Parsed& parsed, 18 const Parsed& parsed,
36 CanonOutput* output, 19 CanonOutput* output,
37 Parsed* new_parsed) { 20 Parsed* new_parsed) {
38 // mailto: only uses {scheme, path, query} -- clear the rest. 21 // mailto: only uses {scheme, path, query} -- clear the rest.
39 new_parsed->username = Component(); 22 new_parsed->username = Component();
40 new_parsed->password = Component(); 23 new_parsed->password = Component();
41 new_parsed->host = Component(); 24 new_parsed->host = Component();
42 new_parsed->port = Component(); 25 new_parsed->port = Component();
43 new_parsed->ref = Component(); 26 new_parsed->ref = Component();
44 27
45 // Scheme (known, so we don't bother running it through the more 28 // Scheme (known, so we don't bother running it through the more
46 // complicated scheme canonicalizer). 29 // complicated scheme canonicalizer).
47 new_parsed->scheme.begin = output->length(); 30 new_parsed->scheme.begin = output->length();
48 output->Append("mailto:", 7); 31 output->Append("mailto:", 7);
49 new_parsed->scheme.len = 6; 32 new_parsed->scheme.len = 6;
50 33
51 bool success = true; 34 bool success = true;
52 35
53 // Path 36 // Path
54 if (parsed.path.is_valid()) { 37 if (parsed.path.is_valid()) {
55 new_parsed->path.begin = output->length(); 38 new_parsed->path.begin = output->length();
56 39
57 // Copy the path using path URL's more lax escaping rules. 40 // Copy the path using path URL's more lax escaping rules.
58 // We convert to UTF-8 and escape non-ASCII, but leave most 41 // We convert to UTF-8 and escape non-ASCII, but leave all
59 // ASCII characters alone. 42 // ASCII characters alone.
60 int end = parsed.path.end(); 43 int end = parsed.path.end();
61 for (int i = parsed.path.begin; i < end; ++i) { 44 for (int i = parsed.path.begin; i < end; ++i) {
62 UCHAR uch = static_cast<UCHAR>(source.path[i]); 45 UCHAR uch = static_cast<UCHAR>(source.path[i]);
63 if (ShouldEncodeMailboxCharacter<UCHAR>(uch)) 46 if (uch < 0x20 || uch >= 0x80)
64 success &= AppendUTF8EscapedChar(source.path, &i, end, output); 47 success &= AppendUTF8EscapedChar(source.path, &i, end, output);
65 else 48 else
66 output->push_back(static_cast<char>(uch)); 49 output->push_back(static_cast<char>(uch));
67 } 50 }
68 51
69 new_parsed->path.len = output->length() - new_parsed->path.begin; 52 new_parsed->path.len = output->length() - new_parsed->path.begin;
70 } else { 53 } else {
71 // No path at all 54 // No path at all
72 new_parsed->path.reset(); 55 new_parsed->path.reset();
73 } 56 }
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
118 Parsed* new_parsed) { 101 Parsed* new_parsed) {
119 RawCanonOutput<1024> utf8; 102 RawCanonOutput<1024> utf8;
120 URLComponentSource<char> source(base); 103 URLComponentSource<char> source(base);
121 Parsed parsed(base_parsed); 104 Parsed parsed(base_parsed);
122 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); 105 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
123 return DoCanonicalizeMailtoURL<char, unsigned char>( 106 return DoCanonicalizeMailtoURL<char, unsigned char>(
124 source, parsed, output, new_parsed); 107 source, parsed, output, new_parsed);
125 } 108 }
126 109
127 } // namespace url 110 } // namespace url
OLDNEW
« no previous file with comments | « no previous file | url/url_canon_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698