Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(600)

Side by Side Diff: net/base/escape.cc

Issue 1180393003: Added characters that look like padlocks to URL unescaping blacklist. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rename NON_DISPLAY_CHARS to SPOOFING_AND_CONTROL_CHARS. Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/escape.h" 5 #include "net/base/escape.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) 157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))
158 return false; 158 return false;
159 if (second_byte == 0x80) { 159 if (second_byte == 0x80) {
160 return third_byte == 0x8E || 160 return third_byte == 0x8E ||
161 third_byte == 0x8F || 161 third_byte == 0x8F ||
162 (third_byte >= 0xAA && third_byte <= 0xAE); 162 (third_byte >= 0xAA && third_byte <= 0xAE);
163 } 163 }
164 return third_byte >= 0xA6 && third_byte <= 0xA9; 164 return third_byte >= 0xA6 && third_byte <= 0xA9;
165 } 165 }
166 166
167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is
168 // the byte at |index|.
169 template <typename STR>
170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text,
171 unsigned char first_byte,
172 size_t index) {
173 // The following characters are blacklisted for spoofability concerns.
174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
176 // U+1F512 LOCK (%F0%9F%94%92)
177 // U+1F513 OPEN LOCK (%F0%9F%94%93)
178 if (first_byte != 0xF0)
179 return false;
180
181 unsigned char second_byte;
182 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte) ||
183 second_byte != 0x9F) {
184 return false;
185 }
186
187 unsigned char third_byte;
188 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte) ||
189 third_byte != 0x94) {
190 return false;
191 }
192
193 unsigned char fourth_byte;
194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&
195 (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 ||
196 fourth_byte == 0x93);
197 }
198
167 // Unescapes |escaped_text| according to |rules|, returning the resulting 199 // Unescapes |escaped_text| according to |rules|, returning the resulting
168 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects 200 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects
169 // the alterations done to the string that are not one-character-to-one- 201 // the alterations done to the string that are not one-character-to-one-
170 // character. The resulting |adjustments| will always be sorted by increasing 202 // character. The resulting |adjustments| will always be sorted by increasing
171 // offset. 203 // offset.
172 template<typename STR> 204 template<typename STR>
173 STR UnescapeURLWithAdjustmentsImpl( 205 STR UnescapeURLWithAdjustmentsImpl(
174 const STR& escaped_text, 206 const STR& escaped_text,
175 UnescapeRule::Type rules, 207 UnescapeRule::Type rules,
176 base::OffsetAdjuster::Adjustments* adjustments) { 208 base::OffsetAdjuster::Adjustments* adjustments) {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
210 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC 242 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC
211 // 3987 above has since added some new BiDi control characters. 243 // 3987 above has since added some new BiDi control characters.
212 // http://www.unicode.org/reports/tr9 244 // http://www.unicode.org/reports/tr9
213 // 245 //
214 // U+061C ARABIC LETTER MARK (%D8%9C) 246 // U+061C ARABIC LETTER MARK (%D8%9C)
215 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) 247 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)
216 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) 248 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)
217 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) 249 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)
218 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) 250 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)
219 // 251 //
252 // The following spoofable characters are also banned, because they could
253 // be used to imitate parts of a web browser's UI.
254 //
255 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
256 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
257 // U+1F512 LOCK (%F0%9F%94%92)
258 // U+1F513 OPEN LOCK (%F0%9F%94%93)
259 //
220 // However, some schemes such as data: and file: need to parse the exact 260 // However, some schemes such as data: and file: need to parse the exact
221 // binary data when loading the URL. For that reason, CONTROL_CHARS allows 261 // binary data when loading the URL. For that reason,
222 // unescaping BiDi control characters. 262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.
223 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed 263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be
224 // in the UI. 264 // displayed in the UI.
225 if (!(rules & UnescapeRule::CONTROL_CHARS)) { 265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {
226 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { 266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
227 // Keep Arabic Language Mark escaped. 267 // Keep Arabic Language Mark escaped.
228 result.append(escaped_text, i, 6); 268 result.append(escaped_text, i, 6);
229 i += 5; 269 i += 5;
230 continue; 270 continue;
231 } 271 }
232 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { 272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {
233 // Keep BiDi control char escaped. 273 // Keep BiDi control char escaped.
234 result.append(escaped_text, i, 9); 274 result.append(escaped_text, i, 9);
235 i += 8; 275 i += 8;
236 continue; 276 continue;
237 } 277 }
278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {
279 // Keep banned char escaped.
280 result.append(escaped_text, i, 12);
281 i += 11;
282 continue;
283 }
238 } 284 }
239 285
240 if (first_byte >= 0x80 || // Unescape all high-bit characters. 286 if (first_byte >= 0x80 || // Unescape all high-bit characters.
241 // For 7-bit characters, the lookup table tells us all valid chars. 287 // For 7-bit characters, the lookup table tells us all valid chars.
242 (kUrlUnescape[first_byte] || 288 (kUrlUnescape[first_byte] ||
243 // ...and we allow some additional unescaping when flags are set. 289 // ...and we allow some additional unescaping when flags are set.
244 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || 290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) ||
245 // Allow any of the prohibited but non-control characters when 291 // Allow any of the prohibited but non-control characters when
246 // we're doing "special" chars. 292 // we're doing "special" chars.
247 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || 293 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
248 // Additionally allow control characters if requested. 294 // Additionally allow non-display characters if requested.
249 (first_byte < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { 295 (first_byte < ' ' &&
296 (rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)))) {
250 // Use the unescaped version of the character. 297 // Use the unescaped version of the character.
251 if (adjustments) 298 if (adjustments)
252 adjustments->push_back(base::OffsetAdjuster::Adjustment(i, 3, 1)); 299 adjustments->push_back(base::OffsetAdjuster::Adjustment(i, 3, 1));
253 result.push_back(first_byte); 300 result.push_back(first_byte);
254 i += 2; 301 i += 2;
255 } else { 302 } else {
256 // Keep escaped. Append a percent and we'll get the following two 303 // Keep escaped. Append a percent and we'll get the following two
257 // digits on the next loops through. 304 // digits on the next loops through.
258 result.push_back('%'); 305 result.push_back('%');
259 } 306 }
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
454 1, kEscapeToChars[i].replacement); 501 1, kEscapeToChars[i].replacement);
455 break; 502 break;
456 } 503 }
457 } 504 }
458 } 505 }
459 } 506 }
460 return text; 507 return text;
461 } 508 }
462 509
463 } // namespace net 510 } // namespace net
OLDNEW
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698