Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(801)

Unified Diff: net/base/escape.cc

Issue 643963004: Unescape BiDi control chars while parsing data urls (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Fix test comment Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: net/base/escape.cc
diff --git a/net/base/escape.cc b/net/base/escape.cc
index ab70f1db30187e6a28e09757819d2f307291fd53..e2e9962b397f1e242d991bcad112ec4c1cc1793b 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -120,6 +120,51 @@ bool UnescapeUnsignedCharAtIndex(const STR& escaped_text,
return false;
}
+// Returns true if there is an Arabic Language Mark at |index|. |first_byte|
+// is the byte at |index|.
+template<typename STR>
+bool HasArabicLanguageMarkAtIndex(const STR& escaped_text,
+ unsigned char first_byte,
+ size_t index) {
+ if (first_byte != 0xD8)
+ return false;
+ unsigned char second_byte;
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
+ return false;
+ return second_byte == 0x9c;
+}
+
+// Returns true if second and third bytes are of a three byte BiDi control
+// character sequence.
+bool IsLastTwoBytesofThreeByteBidiControlChar(unsigned char second_byte,
+ unsigned char third_byte) {
mmenke 2014/10/20 15:46:40 This function name is a lie. Its actually IsLastT
meacer 2014/10/20 17:23:26 Sounds reasonable, inlined.
+ if (second_byte == 0x80) {
+ return third_byte == 0x8E ||
+ third_byte == 0x8F ||
+ (third_byte >= 0xAA && third_byte <= 0xAE);
+ }
mmenke 2014/10/20 15:46:40 DCHECK_EQ(0x81, second_byte)? If this is a separa
meacer 2014/10/20 17:23:26 Done.
+ return third_byte >= 0xA6 && third_byte <= 0xA9;
+}
+
+// Returns true if there is a BiDi control char at |index|. |first_byte| is the
+// byte at |index|.
+template<typename STR>
+bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text,
+ unsigned char first_byte,
+ size_t index) {
+ if (first_byte != 0xE2)
+ return false;
+ unsigned char second_byte;
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
+ return false;
+ if (second_byte != 0x80 && second_byte != 0x81)
+ return false;
+ unsigned char third_byte;
+ if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))
+ return false;
+ return IsLastTwoBytesofThreeByteBidiControlChar(second_byte, third_byte);
+}
+
// Unescapes |escaped_text| according to |rules|, returning the resulting
// string. Fills in an |adjustments| parameter, if non-NULL, so it reflects
// the alterations done to the string that are not one-character-to-one-
@@ -172,27 +217,19 @@ STR UnescapeURLWithAdjustmentsImpl(
// U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)
// U+2068 FIRST STRONG ISOLATE (%E2%81%A8)
// U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)
-
- unsigned char second_byte;
- // Check for ALM.
- if ((first_byte == 0xD8) &&
- UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
- (second_byte == 0x9c)) {
- result.append(escaped_text, i, 6);
- i += 5;
- continue;
- }
-
- // Check for other BiDi control characters.
- if ((first_byte == 0xE2) &&
- UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
- ((second_byte == 0x80) || (second_byte == 0x81))) {
- unsigned char third_byte;
- if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) &&
- ((second_byte == 0x80) ?
- ((third_byte == 0x8E) || (third_byte == 0x8F) ||
- ((third_byte >= 0xAA) && (third_byte <= 0xAE))) :
- ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) {
+ //
+ // However, escaping these characters in data: urls result in
+ // escaped BiDi control characters being displayed in the rendered html,
+ // so the parsing for data: urls is allowed to force unescaping of these
+ // characters. DO NOT use BIDI_CONTROL_CHARS flag without talking to a
+ // security person.
+ if (!(rules & UnescapeRule::BIDI_CONTROL_CHARS)) {
+ if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
+ result.append(escaped_text, i, 6);
+ i += 5;
+ continue;
+ }
+ if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {
result.append(escaped_text, i, 9);
i += 8;
continue;
« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698