Chromium Code Reviews| Index: net/base/escape.cc |
| diff --git a/net/base/escape.cc b/net/base/escape.cc |
| index ab70f1db30187e6a28e09757819d2f307291fd53..e2e9962b397f1e242d991bcad112ec4c1cc1793b 100644 |
| --- a/net/base/escape.cc |
| +++ b/net/base/escape.cc |
| @@ -120,6 +120,51 @@ bool UnescapeUnsignedCharAtIndex(const STR& escaped_text, |
| return false; |
| } |
| +// Returns true if there is an Arabic Language Mark at |index|. |first_byte| |
| +// is the byte at |index|. |
| +template<typename STR> |
| +bool HasArabicLanguageMarkAtIndex(const STR& escaped_text, |
| + unsigned char first_byte, |
| + size_t index) { |
| + if (first_byte != 0xD8) |
| + return false; |
| + unsigned char second_byte; |
| + if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) |
| + return false; |
| + return second_byte == 0x9c; |
| +} |
| + |
| +// Returns true if second and third bytes are of a three byte BiDi control |
| +// character sequence. |
| +bool IsLastTwoBytesofThreeByteBidiControlChar(unsigned char second_byte, |
| + unsigned char third_byte) { |
|
mmenke
2014/10/20 15:46:40
This function name is a lie. Its actually IsLastT
meacer
2014/10/20 17:23:26
Sounds reasonable, inlined.
|
| + if (second_byte == 0x80) { |
| + return third_byte == 0x8E || |
| + third_byte == 0x8F || |
| + (third_byte >= 0xAA && third_byte <= 0xAE); |
| + } |
|
mmenke
2014/10/20 15:46:40
DCHECK_EQ(0x81, second_byte)? If this is a separa
meacer
2014/10/20 17:23:26
Done.
|
| + return third_byte >= 0xA6 && third_byte <= 0xA9; |
| +} |
| + |
| +// Returns true if there is a BiDi control char at |index|. |first_byte| is the |
| +// byte at |index|. |
| +template<typename STR> |
| +bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text, |
| + unsigned char first_byte, |
| + size_t index) { |
| + if (first_byte != 0xE2) |
| + return false; |
| + unsigned char second_byte; |
| + if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) |
| + return false; |
| + if (second_byte != 0x80 && second_byte != 0x81) |
| + return false; |
| + unsigned char third_byte; |
| + if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) |
| + return false; |
| + return IsLastTwoBytesofThreeByteBidiControlChar(second_byte, third_byte); |
| +} |
| + |
| // Unescapes |escaped_text| according to |rules|, returning the resulting |
| // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects |
| // the alterations done to the string that are not one-character-to-one- |
| @@ -172,27 +217,19 @@ STR UnescapeURLWithAdjustmentsImpl( |
| // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) |
| // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) |
| // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) |
| - |
| - unsigned char second_byte; |
| - // Check for ALM. |
| - if ((first_byte == 0xD8) && |
| - UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && |
| - (second_byte == 0x9c)) { |
| - result.append(escaped_text, i, 6); |
| - i += 5; |
| - continue; |
| - } |
| - |
| - // Check for other BiDi control characters. |
| - if ((first_byte == 0xE2) && |
| - UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && |
| - ((second_byte == 0x80) || (second_byte == 0x81))) { |
| - unsigned char third_byte; |
| - if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) && |
| - ((second_byte == 0x80) ? |
| - ((third_byte == 0x8E) || (third_byte == 0x8F) || |
| - ((third_byte >= 0xAA) && (third_byte <= 0xAE))) : |
| - ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) { |
| + // |
| + // However, escaping these characters in data: urls result in |
| + // escaped BiDi control characters being displayed in the rendered html, |
| + // so the parsing for data: urls is allowed to force unescaping of these |
| + // characters. DO NOT use BIDI_CONTROL_CHARS flag without talking to a |
| + // security person. |
| + if (!(rules & UnescapeRule::BIDI_CONTROL_CHARS)) { |
| + if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { |
| + result.append(escaped_text, i, 6); |
| + i += 5; |
| + continue; |
| + } |
| + if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { |
| result.append(escaped_text, i, 9); |
| i += 8; |
| continue; |