Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(457)

Unified Diff: net/base/escape.cc

Issue 1180393003: Added characters that look like padlocks to URL unescaping blacklist. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Rename NON_DISPLAY_CHARS to SPOOFING_AND_CONTROL_CHARS. Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: net/base/escape.cc
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 8e6f870fa8b7beca15ab608136148193230892a7..df8313cbf70c9b183965a1095e724c2b34d2b099 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -164,6 +164,38 @@ bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text,
return third_byte >= 0xA6 && third_byte <= 0xA9;
}
+// Returns true if there is a four-byte banned char at |index|. |first_byte| is
+// the byte at |index|.
+template <typename STR>
+bool HasFourByteBannedCharAtIndex(const STR& escaped_text,
+ unsigned char first_byte,
+ size_t index) {
+ // The following characters are blacklisted for spoofability concerns.
+ // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
+ // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
+ // U+1F512 LOCK (%F0%9F%94%92)
+ // U+1F513 OPEN LOCK (%F0%9F%94%93)
+ if (first_byte != 0xF0)
+ return false;
+
+ unsigned char second_byte;
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte) ||
+ second_byte != 0x9F) {
+ return false;
+ }
+
+ unsigned char third_byte;
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte) ||
+ third_byte != 0x94) {
+ return false;
+ }
+
+ unsigned char fourth_byte;
+ return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&
+ (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 ||
+ fourth_byte == 0x93);
+}
+
// Unescapes |escaped_text| according to |rules|, returning the resulting
// string. Fills in an |adjustments| parameter, if non-NULL, so it reflects
// the alterations done to the string that are not one-character-to-one-
@@ -217,12 +249,20 @@ STR UnescapeURLWithAdjustmentsImpl(
// U+2068 FIRST STRONG ISOLATE (%E2%81%A8)
// U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)
//
+ // The following spoofable characters are also banned, because they could
+ // be used to imitate parts of a web browser's UI.
+ //
+ // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)
+ // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)
+ // U+1F512 LOCK (%F0%9F%94%92)
+ // U+1F513 OPEN LOCK (%F0%9F%94%93)
+ //
// However, some schemes such as data: and file: need to parse the exact
- // binary data when loading the URL. For that reason, CONTROL_CHARS allows
- // unescaping BiDi control characters.
- // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed
- // in the UI.
- if (!(rules & UnescapeRule::CONTROL_CHARS)) {
+ // binary data when loading the URL. For that reason,
+ // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.
+ // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be
+ // displayed in the UI.
+ if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {
if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
// Keep Arabic Language Mark escaped.
result.append(escaped_text, i, 6);
@@ -235,6 +275,12 @@ STR UnescapeURLWithAdjustmentsImpl(
i += 8;
continue;
}
+ if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {
+ // Keep banned char escaped.
+ result.append(escaped_text, i, 12);
+ i += 11;
+ continue;
+ }
}
if (first_byte >= 0x80 || // Unescape all high-bit characters.
@@ -245,8 +291,9 @@ STR UnescapeURLWithAdjustmentsImpl(
// Allow any of the prohibited but non-control characters when
// we're doing "special" chars.
(first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) ||
- // Additionally allow control characters if requested.
- (first_byte < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {
+ // Additionally allow non-display characters if requested.
+ (first_byte < ' ' &&
+ (rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)))) {
// Use the unescaped version of the character.
if (adjustments)
adjustments->push_back(base::OffsetAdjuster::Adjustment(i, 3, 1));
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698