Index: net/base/escape.cc |
diff --git a/net/base/escape.cc b/net/base/escape.cc |
index ab70f1db30187e6a28e09757819d2f307291fd53..e2e9962b397f1e242d991bcad112ec4c1cc1793b 100644 |
--- a/net/base/escape.cc |
+++ b/net/base/escape.cc |
@@ -120,6 +120,51 @@ bool UnescapeUnsignedCharAtIndex(const STR& escaped_text, |
return false; |
} |
+// Returns true if there is an Arabic Language Mark at |index|. |first_byte| |
+// is the byte at |index|. |
+template<typename STR> |
+bool HasArabicLanguageMarkAtIndex(const STR& escaped_text, |
+ unsigned char first_byte, |
+ size_t index) { |
+ if (first_byte != 0xD8) |
+ return false; |
+ unsigned char second_byte; |
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) |
+ return false; |
+ return second_byte == 0x9c; |
+} |
+ |
+// Returns true if second and third bytes are of a three byte BiDi control |
+// character sequence. |
+bool IsLastTwoBytesofThreeByteBidiControlChar(unsigned char second_byte, |
+ unsigned char third_byte) { |
mmenke
2014/10/20 15:46:40
This function name is a lie. Its actually IsLastT
meacer
2014/10/20 17:23:26
Sounds reasonable, inlined.
|
+ if (second_byte == 0x80) { |
+ return third_byte == 0x8E || |
+ third_byte == 0x8F || |
+ (third_byte >= 0xAA && third_byte <= 0xAE); |
+ } |
mmenke
2014/10/20 15:46:40
DCHECK_EQ(0x81, second_byte)? If this is a separa
meacer
2014/10/20 17:23:26
Done.
|
+ return third_byte >= 0xA6 && third_byte <= 0xA9; |
+} |
+ |
+// Returns true if there is a BiDi control char at |index|. |first_byte| is the |
+// byte at |index|. |
+template<typename STR> |
+bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text, |
+ unsigned char first_byte, |
+ size_t index) { |
+ if (first_byte != 0xE2) |
+ return false; |
+ unsigned char second_byte; |
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) |
+ return false; |
+ if (second_byte != 0x80 && second_byte != 0x81) |
+ return false; |
+ unsigned char third_byte; |
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) |
+ return false; |
+ return IsLastTwoBytesofThreeByteBidiControlChar(second_byte, third_byte); |
+} |
+ |
// Unescapes |escaped_text| according to |rules|, returning the resulting |
// string. Fills in an |adjustments| parameter, if non-NULL, so it reflects |
// the alterations done to the string that are not one-character-to-one- |
@@ -172,27 +217,19 @@ STR UnescapeURLWithAdjustmentsImpl( |
// U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) |
// U+2068 FIRST STRONG ISOLATE (%E2%81%A8) |
// U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) |
- |
- unsigned char second_byte; |
- // Check for ALM. |
- if ((first_byte == 0xD8) && |
- UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && |
- (second_byte == 0x9c)) { |
- result.append(escaped_text, i, 6); |
- i += 5; |
- continue; |
- } |
- |
- // Check for other BiDi control characters. |
- if ((first_byte == 0xE2) && |
- UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && |
- ((second_byte == 0x80) || (second_byte == 0x81))) { |
- unsigned char third_byte; |
- if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) && |
- ((second_byte == 0x80) ? |
- ((third_byte == 0x8E) || (third_byte == 0x8F) || |
- ((third_byte >= 0xAA) && (third_byte <= 0xAE))) : |
- ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) { |
+ // |
+ // However, escaping these characters in data: urls result in |
+ // escaped BiDi control characters being displayed in the rendered html, |
+ // so the parsing for data: urls is allowed to force unescaping of these |
+ // characters. DO NOT use BIDI_CONTROL_CHARS flag without talking to a |
+ // security person. |
+ if (!(rules & UnescapeRule::BIDI_CONTROL_CHARS)) { |
+ if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { |
+ result.append(escaped_text, i, 6); |
+ i += 5; |
+ continue; |
+ } |
+ if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { |
result.append(escaped_text, i, 9); |
i += 8; |
continue; |