Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1451)

Unified Diff: net/base/escape.cc

Issue 643963004: Unescape BiDi control chars while parsing data urls (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Add a browsertest Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: net/base/escape.cc
diff --git a/net/base/escape.cc b/net/base/escape.cc
index ab70f1db30187e6a28e09757819d2f307291fd53..30eef06ee7bca1493236ba1de249871f88716bb2 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -172,31 +172,37 @@ STR UnescapeURLWithAdjustmentsImpl(
// U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)
// U+2068 FIRST STRONG ISOLATE (%E2%81%A8)
// U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)
-
- unsigned char second_byte;
- // Check for ALM.
- if ((first_byte == 0xD8) &&
- UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
- (second_byte == 0x9c)) {
- result.append(escaped_text, i, 6);
- i += 5;
- continue;
- }
-
- // Check for other BiDi control characters.
- if ((first_byte == 0xE2) &&
- UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
- ((second_byte == 0x80) || (second_byte == 0x81))) {
- unsigned char third_byte;
- if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) &&
- ((second_byte == 0x80) ?
- ((third_byte == 0x8E) || (third_byte == 0x8F) ||
- ((third_byte >= 0xAA) && (third_byte <= 0xAE))) :
- ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) {
- result.append(escaped_text, i, 9);
- i += 8;
+ //
+ // However, not unescaping these characters in data urls result in
Tom Sepez 2014/10/17 17:08:48 Nit: "Not unescaping" is a double negative. Maybe
meacer 2014/10/17 20:41:54 This is true. I wanted to test if the actual url b
+ // escaped BiDi control characters being displayed in the rendered html,
+ // so the parsing for data urls is allowed force unescaping of these
+ // characters.
+ if (!(rules & UnescapeRule::BIDI_CONTROL_CHARS)) {
+ unsigned char second_byte;
+ // Check for ALM.
Tom Sepez 2014/10/17 17:08:49 Nit: expand ALM to Arabic Letter Mark.
meacer 2014/10/17 20:41:54 Done.
+ if ((first_byte == 0xD8) &&
Tom Sepez 2014/10/17 17:08:49 Nit: It took me longer to understand this code tha
meacer 2014/10/17 20:41:54 Pulled these into methods (with somewhat questiona
+ UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
+ (second_byte == 0x9c)) {
+ result.append(escaped_text, i, 6);
+ i += 5;
continue;
}
+
+ // Check for other BiDi control characters.
+ if ((first_byte == 0xE2) &&
+ UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
+ ((second_byte == 0x80) || (second_byte == 0x81))) {
+ unsigned char third_byte;
+ if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) &&
+ ((second_byte == 0x80) ?
+ ((third_byte == 0x8E) || (third_byte == 0x8F) ||
+ ((third_byte >= 0xAA) && (third_byte <= 0xAE))) :
+ ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) {
+ result.append(escaped_text, i, 9);
+ i += 8;
+ continue;
+ }
+ }
}
if (first_byte >= 0x80 || // Unescape all high-bit characters.

Powered by Google App Engine
This is Rietveld 408576698