Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(746)

Unified Diff: net/base/escape.cc

Issue 181483008: Don't unescape BiDi control characters in URL components (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 6 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | net/base/escape_unittest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: net/base/escape.cc
diff --git a/net/base/escape.cc b/net/base/escape.cc
index 134a98652013177eaf7f52d2fa8c504f53654607..08d102e5e76c3001f2399c2df6812517cde49ff6 100644
--- a/net/base/escape.cc
+++ b/net/base/escape.cc
@@ -97,6 +97,24 @@ const char kUrlUnescape[128] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0
};
+// Unescapes the escape sequence starting at index in escaped_text into unsigned
+// char value.
Peter Kasting 2014/02/27 04:43:04 Nit: // Attempts to unescape the sequence at |ind
Anuj 2014/02/27 19:38:45 Done.
+template<typename STR>
+bool UnescapeUnsignedCharAtIndex(const STR& escaped_text,
+ int index,
Peter Kasting 2014/02/27 04:43:04 This should be a size_t. Nit: Indenting (2 lines)
Anuj 2014/02/27 19:38:45 Done.
+ unsigned char* value) {
+ const typename STR::value_type most_sig_digit(
Peter Kasting 2014/02/27 04:43:04 This function should also check whether escaped_te
Anuj 2014/02/27 19:38:45 Done.
+ static_cast<typename STR::value_type>(escaped_text[index + 1]));
+ const typename STR::value_type least_sig_digit(
+ static_cast<typename STR::value_type>(escaped_text[index + 2]));
+ if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {
+ *value = HexDigitToInt(most_sig_digit) * 16 +
+ HexDigitToInt(least_sig_digit);
+ return true;
+ }
+ return false;
+}
+
template<typename STR>
STR UnescapeURLWithOffsetsImpl(const STR& escaped_text,
UnescapeRule::Type rules,
@@ -127,13 +145,34 @@ STR UnescapeURLWithOffsetsImpl(const STR& escaped_text,
char current_char = static_cast<char>(escaped_text[i]);
if (current_char == '%' && i + 2 < max) {
Peter Kasting 2014/02/27 04:43:04 If you add the checks mentioned above, |max| can b
Anuj 2014/02/27 19:38:45 Done.
- const typename STR::value_type most_sig_digit(
- static_cast<typename STR::value_type>(escaped_text[i + 1]));
- const typename STR::value_type least_sig_digit(
- static_cast<typename STR::value_type>(escaped_text[i + 2]));
- if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {
- unsigned char value = HexDigitToInt(most_sig_digit) * 16 +
- HexDigitToInt(least_sig_digit);
+ unsigned char value;
+ if (UnescapeUnsignedCharAtIndex(escaped_text, i, &value)) {
+ // As per http://tools.ietf.org/html/rfc3987#section-4.1, BiDi control
+ // characters are disallowed. The BiDi control characters in escaped
+ // form are :
+ // kRightToLeftMark = "%E2%80%8F"
+ // kLeftToRightMark = "%E2%80%8E"
+ // kLeftToRightEmbeddingMark = "%E2%80%AA"
+ // kRightToLeftEmbeddingMark = "%E2%80%AB"
+ // kPopDirectionalFormatting = "%E2%80%AC"
+ // kLeftToRightOverride = "%E2%80%AD"
+ // kRightToLeftOverride = "%E2%80%AE"
Peter Kasting 2014/02/27 04:43:04 Nit: Don't use kNames for things that are just com
Anuj 2014/02/27 19:38:45 Done.
+ if (value == 0xE2 && i + 8 < max) {
Peter Kasting 2014/02/27 04:43:04 If you add the checks mentioned above, you can eli
Anuj 2014/02/27 19:38:45 Done.
+ // Possible BiDi control character.
+ UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &value);
+ if (value == 0x80) {
+ UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &value);
+ if (value == 0xAA || value == 0xAB || value == 0xAC ||
+ value == 0xAD || value == 0xAE || value == 0x8E ||
+ value == 0x8F) {
Peter Kasting 2014/02/27 04:43:04 Nit: Simpler: if ((value == 0x8E) || (value == 0x
Anuj 2014/02/27 19:38:45 Done.
+ result.append(escaped_text, i, 9);
+ i += 8;
+ continue;
+ }
+ }
+ // Restore value if BiDi control character not found.
Peter Kasting 2014/02/27 04:43:04 Prefer declaring a different temp to hold the seco
Anuj 2014/02/27 19:38:45 Done.
+ value = 0xE2;
+ }
if (value >= 0x80 || // Unescape all high-bit characters.
// For 7-bit characters, the lookup table tells us all valid chars.
(kUrlUnescape[value] ||
« no previous file with comments | « no previous file | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698