Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(104)

Side by Side Diff: net/base/escape.cc

Issue 1180393003: Added characters that look like padlocks to URL unescaping blacklist. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/escape.h" 5 #include "net/base/escape.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) 157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))
158 return false; 158 return false;
159 if (second_byte == 0x80) { 159 if (second_byte == 0x80) {
160 return third_byte == 0x8E || 160 return third_byte == 0x8E ||
161 third_byte == 0x8F || 161 third_byte == 0x8F ||
162 (third_byte >= 0xAA && third_byte <= 0xAE); 162 (third_byte >= 0xAA && third_byte <= 0xAE);
163 } 163 }
164 return third_byte >= 0xA6 && third_byte <= 0xA9; 164 return third_byte >= 0xA6 && third_byte <= 0xA9;
165 } 165 }
166 166
167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is
168 // the byte at |index|.
169 template <typename STR>
170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text,
171 unsigned char first_byte,
172 size_t index) {
173 // The following characters are blacklisted for spoofability concerns.
174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
176 // U+1F512 LOCK (%F0%9F%94%92)
177 // U+1F513 OPEN LOCK (%F0%9F%94%93)
178 if (first_byte != 0xF0)
179 return false;
180 unsigned char second_byte;
181 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
Peter Kasting 2015/06/16 06:36:15 Nit: Combine these next two conditionals.
Matt Giuca 2015/06/16 07:55:34 Do you mean if (!...(..., &second_byte) || second
Peter Kasting 2015/06/16 08:05:08 Yes, I meant this. This reads more clearly to me
mmenke 2015/06/17 19:50:16 I think 4 ifs (1 per character is) a little easie
Matt Giuca 2015/06/22 07:27:03 Done. (Not a fan of this change, but don't want to
182 return false;
183 if (second_byte != 0x9F)
184 return false;
185 unsigned char third_byte;
186 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))
Peter Kasting 2015/06/16 06:36:15 Nit: And these.
Matt Giuca 2015/06/22 07:27:03 Done.
187 return false;
188 if (third_byte != 0x94)
189 return false;
190 unsigned char fourth_byte;
191 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte))
Peter Kasting 2015/06/16 06:36:15 Nit: And maybe these (more optional). Also option
Matt Giuca 2015/06/22 07:27:03 Done.
192 return false;
193 return fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 ||
194 fourth_byte == 0x93;
195 }
196
167 // Unescapes |escaped_text| according to |rules|, returning the resulting 197 // Unescapes |escaped_text| according to |rules|, returning the resulting
168 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects 198 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects
169 // the alterations done to the string that are not one-character-to-one- 199 // the alterations done to the string that are not one-character-to-one-
170 // character. The resulting |adjustments| will always be sorted by increasing 200 // character. The resulting |adjustments| will always be sorted by increasing
171 // offset. 201 // offset.
172 template<typename STR> 202 template<typename STR>
173 STR UnescapeURLWithAdjustmentsImpl( 203 STR UnescapeURLWithAdjustmentsImpl(
174 const STR& escaped_text, 204 const STR& escaped_text,
175 UnescapeRule::Type rules, 205 UnescapeRule::Type rules,
176 base::OffsetAdjuster::Adjustments* adjustments) { 206 base::OffsetAdjuster::Adjustments* adjustments) {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
210 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC 240 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC
211 // 3987 above has since added some new BiDi control characters. 241 // 3987 above has since added some new BiDi control characters.
212 // http://www.unicode.org/reports/tr9 242 // http://www.unicode.org/reports/tr9
213 // 243 //
214 // U+061C ARABIC LETTER MARK (%D8%9C) 244 // U+061C ARABIC LETTER MARK (%D8%9C)
215 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) 245 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)
216 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) 246 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)
217 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) 247 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)
218 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) 248 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)
219 // 249 //
250 // We also ban certain spoofable characters that could be used to imitate
mmenke 2015/06/17 19:50:16 nit: Should avoid "we" in comments. Just use pas
Matt Giuca 2015/06/22 07:27:03 Done.
251 // parts of the browser UI.
mmenke 2015/06/17 19:50:16 nit: Mentioning the browser seems like a bit of a
Matt Giuca 2015/06/22 07:27:03 I think we want to be fairly specific here. Yes, i
252 //
253 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
254 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
255 // U+1F512 LOCK (%F0%9F%94%92)
256 // U+1F513 OPEN LOCK (%F0%9F%94%93)
257 //
220 // However, some schemes such as data: and file: need to parse the exact 258 // However, some schemes such as data: and file: need to parse the exact
221 // binary data when loading the URL. For that reason, CONTROL_CHARS allows 259 // binary data when loading the URL. For that reason, CONTROL_CHARS allows
222 // unescaping BiDi control characters. 260 // unescaping BiDi control characters.
223 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed 261 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed
224 // in the UI. 262 // in the UI.
225 if (!(rules & UnescapeRule::CONTROL_CHARS)) { 263 if (!(rules & UnescapeRule::CONTROL_CHARS)) {
226 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { 264 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
227 // Keep Arabic Language Mark escaped. 265 // Keep Arabic Language Mark escaped.
228 result.append(escaped_text, i, 6); 266 result.append(escaped_text, i, 6);
229 i += 5; 267 i += 5;
230 continue; 268 continue;
231 } 269 }
232 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { 270 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {
233 // Keep BiDi control char escaped. 271 // Keep BiDi control char escaped.
234 result.append(escaped_text, i, 9); 272 result.append(escaped_text, i, 9);
235 i += 8; 273 i += 8;
236 continue; 274 continue;
237 } 275 }
276 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {
277 // Keep banned char escaped.
278 result.append(escaped_text, i, 12);
279 i += 11;
280 continue;
281 }
238 } 282 }
239 283
240 if (first_byte >= 0x80 || // Unescape all high-bit characters. 284 if (first_byte >= 0x80 || // Unescape all high-bit characters.
241 // For 7-bit characters, the lookup table tells us all valid chars. 285 // For 7-bit characters, the lookup table tells us all valid chars.
242 (kUrlUnescape[first_byte] || 286 (kUrlUnescape[first_byte] ||
243 // ...and we allow some additional unescaping when flags are set. 287 // ...and we allow some additional unescaping when flags are set.
244 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || 288 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) ||
245 // Allow any of the prohibited but non-control characters when 289 // Allow any of the prohibited but non-control characters when
246 // we're doing "special" chars. 290 // we're doing "special" chars.
247 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || 291 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after
454 1, kEscapeToChars[i].replacement); 498 1, kEscapeToChars[i].replacement);
455 break; 499 break;
456 } 500 }
457 } 501 }
458 } 502 }
459 } 503 }
460 return text; 504 return text;
461 } 505 }
462 506
463 } // namespace net 507 } // namespace net
OLDNEW
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698