| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Functions for canonicalizing "mailto:" URLs. | 5 // Functions for canonicalizing "mailto:" URLs. |
| 6 | 6 |
| 7 #include "url/url_canon.h" | 7 #include "url/url_canon.h" |
| 8 #include "url/url_canon_internal.h" | 8 #include "url/url_canon_internal.h" |
| 9 #include "url/url_file.h" | 9 #include "url/url_file.h" |
| 10 #include "url/url_parse_internal.h" | 10 #include "url/url_parse_internal.h" |
| 11 | 11 |
| 12 namespace url { | 12 namespace url { |
| 13 | 13 |
| 14 namespace { | 14 namespace { |
| 15 | 15 |
| 16 // Certain characters should be percent-encoded when they appear in the path | |
| 17 // component of a mailto URL, to improve compatibility and mitigate against | |
| 18 // command-injection attacks on mailto handlers. See https://crbug.com/711020. | |
| 19 template <typename UCHAR> | |
| 20 bool ShouldEncodeMailboxCharacter(UCHAR uch) { | |
| 21 if (uch < 0x21 || // space & control characters. | |
| 22 uch > 0x7e || // high-ascii characters. | |
| 23 uch == 0x22 || // quote. | |
| 24 uch == 0x3c || uch == 0x3e || // angle brackets. | |
| 25 uch == 0x60 || // backtick. | |
| 26 uch == 0x7b || uch == 0x7c || uch == 0x7d // braces and pipe. | |
| 27 ) { | |
| 28 return true; | |
| 29 } | |
| 30 return false; | |
| 31 } | |
| 32 | |
| 33 template <typename CHAR, typename UCHAR> | 16 template <typename CHAR, typename UCHAR> |
| 34 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source, | 17 bool DoCanonicalizeMailtoURL(const URLComponentSource<CHAR>& source, |
| 35 const Parsed& parsed, | 18 const Parsed& parsed, |
| 36 CanonOutput* output, | 19 CanonOutput* output, |
| 37 Parsed* new_parsed) { | 20 Parsed* new_parsed) { |
| 38 // mailto: only uses {scheme, path, query} -- clear the rest. | 21 // mailto: only uses {scheme, path, query} -- clear the rest. |
| 39 new_parsed->username = Component(); | 22 new_parsed->username = Component(); |
| 40 new_parsed->password = Component(); | 23 new_parsed->password = Component(); |
| 41 new_parsed->host = Component(); | 24 new_parsed->host = Component(); |
| 42 new_parsed->port = Component(); | 25 new_parsed->port = Component(); |
| 43 new_parsed->ref = Component(); | 26 new_parsed->ref = Component(); |
| 44 | 27 |
| 45 // Scheme (known, so we don't bother running it through the more | 28 // Scheme (known, so we don't bother running it through the more |
| 46 // complicated scheme canonicalizer). | 29 // complicated scheme canonicalizer). |
| 47 new_parsed->scheme.begin = output->length(); | 30 new_parsed->scheme.begin = output->length(); |
| 48 output->Append("mailto:", 7); | 31 output->Append("mailto:", 7); |
| 49 new_parsed->scheme.len = 6; | 32 new_parsed->scheme.len = 6; |
| 50 | 33 |
| 51 bool success = true; | 34 bool success = true; |
| 52 | 35 |
| 53 // Path | 36 // Path |
| 54 if (parsed.path.is_valid()) { | 37 if (parsed.path.is_valid()) { |
| 55 new_parsed->path.begin = output->length(); | 38 new_parsed->path.begin = output->length(); |
| 56 | 39 |
| 57 // Copy the path using path URL's more lax escaping rules. | 40 // Copy the path using path URL's more lax escaping rules. |
| 58 // We convert to UTF-8 and escape non-ASCII, but leave most | 41 // We convert to UTF-8 and escape non-ASCII, but leave all |
| 59 // ASCII characters alone. | 42 // ASCII characters alone. |
| 60 int end = parsed.path.end(); | 43 int end = parsed.path.end(); |
| 61 for (int i = parsed.path.begin; i < end; ++i) { | 44 for (int i = parsed.path.begin; i < end; ++i) { |
| 62 UCHAR uch = static_cast<UCHAR>(source.path[i]); | 45 UCHAR uch = static_cast<UCHAR>(source.path[i]); |
| 63 if (ShouldEncodeMailboxCharacter<UCHAR>(uch)) | 46 if (uch < 0x20 || uch >= 0x80) |
| 64 success &= AppendUTF8EscapedChar(source.path, &i, end, output); | 47 success &= AppendUTF8EscapedChar(source.path, &i, end, output); |
| 65 else | 48 else |
| 66 output->push_back(static_cast<char>(uch)); | 49 output->push_back(static_cast<char>(uch)); |
| 67 } | 50 } |
| 68 | 51 |
| 69 new_parsed->path.len = output->length() - new_parsed->path.begin; | 52 new_parsed->path.len = output->length() - new_parsed->path.begin; |
| 70 } else { | 53 } else { |
| 71 // No path at all | 54 // No path at all |
| 72 new_parsed->path.reset(); | 55 new_parsed->path.reset(); |
| 73 } | 56 } |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 118 Parsed* new_parsed) { | 101 Parsed* new_parsed) { |
| 119 RawCanonOutput<1024> utf8; | 102 RawCanonOutput<1024> utf8; |
| 120 URLComponentSource<char> source(base); | 103 URLComponentSource<char> source(base); |
| 121 Parsed parsed(base_parsed); | 104 Parsed parsed(base_parsed); |
| 122 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); | 105 SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed); |
| 123 return DoCanonicalizeMailtoURL<char, unsigned char>( | 106 return DoCanonicalizeMailtoURL<char, unsigned char>( |
| 124 source, parsed, output, new_parsed); | 107 source, parsed, output, new_parsed); |
| 125 } | 108 } |
| 126 | 109 |
| 127 } // namespace url | 110 } // namespace url |
| OLD | NEW |