Index: net/base/escape.cc |
diff --git a/net/base/escape.cc b/net/base/escape.cc |
index 8e6f870fa8b7beca15ab608136148193230892a7..df8313cbf70c9b183965a1095e724c2b34d2b099 100644 |
--- a/net/base/escape.cc |
+++ b/net/base/escape.cc |
@@ -164,6 +164,38 @@ bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text, |
return third_byte >= 0xA6 && third_byte <= 0xA9; |
} |
+// Returns true if there is a four-byte banned char at |index|. |first_byte| is |
+// the byte at |index|. |
+template <typename STR> |
+bool HasFourByteBannedCharAtIndex(const STR& escaped_text, |
+ unsigned char first_byte, |
+ size_t index) { |
+ // The following characters are blacklisted for spoofability concerns. |
+ // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) |
+ // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) |
+ // U+1F512 LOCK (%F0%9F%94%92) |
+ // U+1F513 OPEN LOCK (%F0%9F%94%93) |
+ if (first_byte != 0xF0) |
+ return false; |
+ |
+ unsigned char second_byte; |
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte) || |
+ second_byte != 0x9F) { |
+ return false; |
+ } |
+ |
+ unsigned char third_byte; |
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte) || |
+ third_byte != 0x94) { |
+ return false; |
+ } |
+ |
+ unsigned char fourth_byte; |
+ return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) && |
+ (fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 || |
+ fourth_byte == 0x93); |
+} |
+ |
// Unescapes |escaped_text| according to |rules|, returning the resulting |
// string. Fills in an |adjustments| parameter, if non-NULL, so it reflects |
// the alterations done to the string that are not one-character-to-one- |
@@ -217,12 +249,20 @@ STR UnescapeURLWithAdjustmentsImpl( |
// U+2068 FIRST STRONG ISOLATE (%E2%81%A8) |
// U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) |
// |
+ // The following spoofable characters are also banned, because they could |
+ // be used to imitate parts of a web browser's UI. |
+ // |
+ // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) |
+ // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) |
+ // U+1F512 LOCK (%F0%9F%94%92) |
+ // U+1F513 OPEN LOCK (%F0%9F%94%93) |
+ // |
// However, some schemes such as data: and file: need to parse the exact |
- // binary data when loading the URL. For that reason, CONTROL_CHARS allows |
- // unescaping BiDi control characters. |
- // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed |
- // in the UI. |
- if (!(rules & UnescapeRule::CONTROL_CHARS)) { |
+ // binary data when loading the URL. For that reason, |
+ // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters. |
+ // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be |
+ // displayed in the UI. |
+ if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) { |
if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { |
// Keep Arabic Language Mark escaped. |
result.append(escaped_text, i, 6); |
@@ -235,6 +275,12 @@ STR UnescapeURLWithAdjustmentsImpl( |
i += 8; |
continue; |
} |
+ if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) { |
+ // Keep banned char escaped. |
+ result.append(escaped_text, i, 12); |
+ i += 11; |
+ continue; |
+ } |
} |
if (first_byte >= 0x80 || // Unescape all high-bit characters. |
@@ -245,8 +291,9 @@ STR UnescapeURLWithAdjustmentsImpl( |
// Allow any of the prohibited but non-control characters when |
// we're doing "special" chars. |
(first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || |
- // Additionally allow control characters if requested. |
- (first_byte < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) { |
+ // Additionally allow non-display characters if requested. |
+ (first_byte < ' ' && |
+ (rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)))) { |
// Use the unescaped version of the character. |
if (adjustments) |
adjustments->push_back(base::OffsetAdjuster::Adjustment(i, 3, 1)); |