Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(148)

Side by Side Diff: net/base/escape.cc

Issue 1180393003: Added characters that look like padlocks to URL unescaping blacklist. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Combine if statements. Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/escape.h" 5 #include "net/base/escape.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after
157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) 157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))
158 return false; 158 return false;
159 if (second_byte == 0x80) { 159 if (second_byte == 0x80) {
160 return third_byte == 0x8E || 160 return third_byte == 0x8E ||
161 third_byte == 0x8F || 161 third_byte == 0x8F ||
162 (third_byte >= 0xAA && third_byte <= 0xAE); 162 (third_byte >= 0xAA && third_byte <= 0xAE);
163 } 163 }
164 return third_byte >= 0xA6 && third_byte <= 0xA9; 164 return third_byte >= 0xA6 && third_byte <= 0xA9;
165 } 165 }
166 166
167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is
168 // the byte at |index|.
169 template <typename STR>
170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text,
171 unsigned char first_byte,
172 size_t index) {
173 // The following characters are blacklisted for spoofability concerns.
174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
176 // U+1F512 LOCK (%F0%9F%94%92)
177 // U+1F513 OPEN LOCK (%F0%9F%94%93)
178 if (first_byte != 0xF0)
179 return false;
180
181 unsigned char second_byte;
182 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte) ||
183 second_byte != 0x9F) {
184 return false;
185 }
186
187 unsigned char third_byte;
188 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte) ||
189 third_byte != 0x94) {
190 return false;
191 }
192
193 unsigned char fourth_byte;
194 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) ||
195 (fourth_byte != 0x8F && fourth_byte != 0x90 && fourth_byte != 0x92 &&
196 fourth_byte != 0x93)) {
Peter Kasting 2015/06/22 07:35:22 Nit: Simpler: return UnescapeUnsignedCharAtInde
Matt Giuca 2015/06/23 04:14:10 Done.
197 return false;
198 }
199
200 return true;
201 }
202
167 // Unescapes |escaped_text| according to |rules|, returning the resulting 203 // Unescapes |escaped_text| according to |rules|, returning the resulting
168 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects 204 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects
169 // the alterations done to the string that are not one-character-to-one- 205 // the alterations done to the string that are not one-character-to-one-
170 // character. The resulting |adjustments| will always be sorted by increasing 206 // character. The resulting |adjustments| will always be sorted by increasing
171 // offset. 207 // offset.
172 template<typename STR> 208 template<typename STR>
173 STR UnescapeURLWithAdjustmentsImpl( 209 STR UnescapeURLWithAdjustmentsImpl(
174 const STR& escaped_text, 210 const STR& escaped_text,
175 UnescapeRule::Type rules, 211 UnescapeRule::Type rules,
176 base::OffsetAdjuster::Adjustments* adjustments) { 212 base::OffsetAdjuster::Adjustments* adjustments) {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
210 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC 246 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC
211 // 3987 above has since added some new BiDi control characters. 247 // 3987 above has since added some new BiDi control characters.
212 // http://www.unicode.org/reports/tr9 248 // http://www.unicode.org/reports/tr9
213 // 249 //
214 // U+061C ARABIC LETTER MARK (%D8%9C) 250 // U+061C ARABIC LETTER MARK (%D8%9C)
215 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) 251 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)
216 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) 252 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)
217 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) 253 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)
218 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) 254 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)
219 // 255 //
256 // The following spoofable characters are also banned, because they could
257 // be used to imitate parts of the browser UI.
mmenke 2015/06/22 17:05:07 It's a layering violation for net/ to know about a
Matt Giuca 2015/06/23 04:14:10 OK well the entire reason for these chars being ba
258 //
259 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
260 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
261 // U+1F512 LOCK (%F0%9F%94%92)
262 // U+1F513 OPEN LOCK (%F0%9F%94%93)
263 //
220 // However, some schemes such as data: and file: need to parse the exact 264 // However, some schemes such as data: and file: need to parse the exact
221 // binary data when loading the URL. For that reason, CONTROL_CHARS allows 265 // binary data when loading the URL. For that reason, CONTROL_CHARS allows
222 // unescaping BiDi control characters. 266 // unescaping BiDi control characters.
223 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed 267 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed
224 // in the UI. 268 // in the UI.
225 if (!(rules & UnescapeRule::CONTROL_CHARS)) { 269 if (!(rules & UnescapeRule::CONTROL_CHARS)) {
226 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { 270 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
227 // Keep Arabic Language Mark escaped. 271 // Keep Arabic Language Mark escaped.
228 result.append(escaped_text, i, 6); 272 result.append(escaped_text, i, 6);
229 i += 5; 273 i += 5;
230 continue; 274 continue;
231 } 275 }
232 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { 276 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {
233 // Keep BiDi control char escaped. 277 // Keep BiDi control char escaped.
234 result.append(escaped_text, i, 9); 278 result.append(escaped_text, i, 9);
235 i += 8; 279 i += 8;
236 continue; 280 continue;
237 } 281 }
282 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {
283 // Keep banned char escaped.
284 result.append(escaped_text, i, 12);
285 i += 11;
286 continue;
287 }
238 } 288 }
239 289
240 if (first_byte >= 0x80 || // Unescape all high-bit characters. 290 if (first_byte >= 0x80 || // Unescape all high-bit characters.
241 // For 7-bit characters, the lookup table tells us all valid chars. 291 // For 7-bit characters, the lookup table tells us all valid chars.
242 (kUrlUnescape[first_byte] || 292 (kUrlUnescape[first_byte] ||
243 // ...and we allow some additional unescaping when flags are set. 293 // ...and we allow some additional unescaping when flags are set.
244 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || 294 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) ||
245 // Allow any of the prohibited but non-control characters when 295 // Allow any of the prohibited but non-control characters when
246 // we're doing "special" chars. 296 // we're doing "special" chars.
247 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || 297 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
(...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after
454 1, kEscapeToChars[i].replacement); 504 1, kEscapeToChars[i].replacement);
455 break; 505 break;
456 } 506 }
457 } 507 }
458 } 508 }
459 } 509 }
460 return text; 510 return text;
461 } 511 }
462 512
463 } // namespace net 513 } // namespace net
OLDNEW
« net/base/escape.h ('K') | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698