Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(121)

Side by Side Diff: net/base/escape.cc

Issue 181483008: Don't unescape BiDi control characters in URL components (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | net/base/escape_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/escape.h" 5 #include "net/base/escape.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after
90 // @ A B C D E F G H I J K L M N O 90 // @ A B C D E F G H I J K L M N O
91 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 91 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
92 // P Q R S T U V W X Y Z [ \ ] ^ _ 92 // P Q R S T U V W X Y Z [ \ ] ^ _
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
94 // ` a b c d e f g h i j k l m n o 94 // ` a b c d e f g h i j k l m n o
95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 95 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
96 // p q r s t u v w x y z { | } ~ <NBSP> 96 // p q r s t u v w x y z { | } ~ <NBSP>
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0
98 }; 98 };
99 99
100 // Unescapes the escape sequence starting at index in escaped_text into unsigned
101 // char value.
Peter Kasting 2014/02/27 04:43:04 Nit: // Attempts to unescape the sequence at |ind
Anuj 2014/02/27 19:38:45 Done.
102 template<typename STR>
103 bool UnescapeUnsignedCharAtIndex(const STR& escaped_text,
104 int index,
Peter Kasting 2014/02/27 04:43:04 This should be a size_t. Nit: Indenting (2 lines)
Anuj 2014/02/27 19:38:45 Done.
105 unsigned char* value) {
106 const typename STR::value_type most_sig_digit(
Peter Kasting 2014/02/27 04:43:04 This function should also check whether escaped_te
Anuj 2014/02/27 19:38:45 Done.
107 static_cast<typename STR::value_type>(escaped_text[index + 1]));
108 const typename STR::value_type least_sig_digit(
109 static_cast<typename STR::value_type>(escaped_text[index + 2]));
110 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {
111 *value = HexDigitToInt(most_sig_digit) * 16 +
112 HexDigitToInt(least_sig_digit);
113 return true;
114 }
115 return false;
116 }
117
100 template<typename STR> 118 template<typename STR>
101 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, 119 STR UnescapeURLWithOffsetsImpl(const STR& escaped_text,
102 UnescapeRule::Type rules, 120 UnescapeRule::Type rules,
103 std::vector<size_t>* offsets_for_adjustment) { 121 std::vector<size_t>* offsets_for_adjustment) {
104 if (offsets_for_adjustment) { 122 if (offsets_for_adjustment) {
105 std::for_each(offsets_for_adjustment->begin(), 123 std::for_each(offsets_for_adjustment->begin(),
106 offsets_for_adjustment->end(), 124 offsets_for_adjustment->end(),
107 base::LimitOffset<STR>(escaped_text.length())); 125 base::LimitOffset<STR>(escaped_text.length()));
108 } 126 }
109 // Do not unescape anything, return the |escaped_text| text. 127 // Do not unescape anything, return the |escaped_text| text.
110 if (rules == UnescapeRule::NONE) 128 if (rules == UnescapeRule::NONE)
111 return escaped_text; 129 return escaped_text;
112 130
113 // The output of the unescaping is always smaller than the input, so we can 131 // The output of the unescaping is always smaller than the input, so we can
114 // reserve the input size to make sure we have enough buffer and don't have 132 // reserve the input size to make sure we have enough buffer and don't have
115 // to allocate in the loop below. 133 // to allocate in the loop below.
116 STR result; 134 STR result;
117 result.reserve(escaped_text.length()); 135 result.reserve(escaped_text.length());
118 136
119 // Locations of adjusted text. 137 // Locations of adjusted text.
120 net::internal::AdjustEncodingOffset::Adjustments adjustments; 138 net::internal::AdjustEncodingOffset::Adjustments adjustments;
121 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) { 139 for (size_t i = 0, max = escaped_text.size(); i < max; ++i) {
122 if (static_cast<unsigned char>(escaped_text[i]) >= 128) { 140 if (static_cast<unsigned char>(escaped_text[i]) >= 128) {
123 // Non ASCII character, append as is. 141 // Non ASCII character, append as is.
124 result.push_back(escaped_text[i]); 142 result.push_back(escaped_text[i]);
125 continue; 143 continue;
126 } 144 }
127 145
128 char current_char = static_cast<char>(escaped_text[i]); 146 char current_char = static_cast<char>(escaped_text[i]);
129 if (current_char == '%' && i + 2 < max) { 147 if (current_char == '%' && i + 2 < max) {
Peter Kasting 2014/02/27 04:43:04 If you add the checks mentioned above, |max| can b
Anuj 2014/02/27 19:38:45 Done.
130 const typename STR::value_type most_sig_digit( 148 unsigned char value;
131 static_cast<typename STR::value_type>(escaped_text[i + 1])); 149 if (UnescapeUnsignedCharAtIndex(escaped_text, i, &value)) {
132 const typename STR::value_type least_sig_digit( 150 // As per http://tools.ietf.org/html/rfc3987#section-4.1, BiDi control
133 static_cast<typename STR::value_type>(escaped_text[i + 2])); 151 // characters are disallowed. The BiDi control characters in escaped
134 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) { 152 // form are :
135 unsigned char value = HexDigitToInt(most_sig_digit) * 16 + 153 // kRightToLeftMark = "%E2%80%8F"
136 HexDigitToInt(least_sig_digit); 154 // kLeftToRightMark = "%E2%80%8E"
155 // kLeftToRightEmbeddingMark = "%E2%80%AA"
156 // kRightToLeftEmbeddingMark = "%E2%80%AB"
157 // kPopDirectionalFormatting = "%E2%80%AC"
158 // kLeftToRightOverride = "%E2%80%AD"
159 // kRightToLeftOverride = "%E2%80%AE"
Peter Kasting 2014/02/27 04:43:04 Nit: Don't use kNames for things that are just com
Anuj 2014/02/27 19:38:45 Done.
160 if (value == 0xE2 && i + 8 < max) {
Peter Kasting 2014/02/27 04:43:04 If you add the checks mentioned above, you can eli
Anuj 2014/02/27 19:38:45 Done.
161 // Possible BiDi control character.
162 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &value);
163 if (value == 0x80) {
164 UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &value);
165 if (value == 0xAA || value == 0xAB || value == 0xAC ||
166 value == 0xAD || value == 0xAE || value == 0x8E ||
167 value == 0x8F) {
Peter Kasting 2014/02/27 04:43:04 Nit: Simpler: if ((value == 0x8E) || (value == 0x
Anuj 2014/02/27 19:38:45 Done.
168 result.append(escaped_text, i, 9);
169 i += 8;
170 continue;
171 }
172 }
173 // Restore value if BiDi control character not found.
Peter Kasting 2014/02/27 04:43:04 Prefer declaring a different temp to hold the seco
Anuj 2014/02/27 19:38:45 Done.
174 value = 0xE2;
175 }
137 if (value >= 0x80 || // Unescape all high-bit characters. 176 if (value >= 0x80 || // Unescape all high-bit characters.
138 // For 7-bit characters, the lookup table tells us all valid chars. 177 // For 7-bit characters, the lookup table tells us all valid chars.
139 (kUrlUnescape[value] || 178 (kUrlUnescape[value] ||
140 // ...and we allow some additional unescaping when flags are set. 179 // ...and we allow some additional unescaping when flags are set.
141 (value == ' ' && (rules & UnescapeRule::SPACES)) || 180 (value == ' ' && (rules & UnescapeRule::SPACES)) ||
142 // Allow any of the prohibited but non-control characters when 181 // Allow any of the prohibited but non-control characters when
143 // we're doing "special" chars. 182 // we're doing "special" chars.
144 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || 183 (value > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
145 // Additionally allow control characters if requested. 184 // Additionally allow control characters if requested.
146 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { 185 (value < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
(...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after
385 return; 424 return;
386 } 425 }
387 adjusted_offset -= 2; 426 adjusted_offset -= 2;
388 } 427 }
389 offset = adjusted_offset; 428 offset = adjusted_offset;
390 } 429 }
391 430
392 } // namespace internal 431 } // namespace internal
393 432
394 } // namespace net 433 } // namespace net
OLDNEW
« no previous file with comments | « no previous file | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698