Chromium Code Reviews| Index: net/base/escape.cc |
| diff --git a/net/base/escape.cc b/net/base/escape.cc |
| index 134a98652013177eaf7f52d2fa8c504f53654607..08d102e5e76c3001f2399c2df6812517cde49ff6 100644 |
| --- a/net/base/escape.cc |
| +++ b/net/base/escape.cc |
| @@ -97,6 +97,24 @@ const char kUrlUnescape[128] = { |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 |
| }; |
| +// Unescapes the escape sequence starting at index in escaped_text into unsigned |
| +// char value. |
|
Peter Kasting
2014/02/27 04:43:04
Nit:
// Attempts to unescape the sequence at |ind
Anuj
2014/02/27 19:38:45
Done.
|
| +template<typename STR> |
| +bool UnescapeUnsignedCharAtIndex(const STR& escaped_text, |
| + int index, |
|
Peter Kasting
2014/02/27 04:43:04
This should be a size_t.
Nit: Indenting (2 lines)
Anuj
2014/02/27 19:38:45
Done.
|
| + unsigned char* value) { |
| + const typename STR::value_type most_sig_digit( |
|
Peter Kasting
2014/02/27 04:43:04
This function should also check whether escaped_te
Anuj
2014/02/27 19:38:45
Done.
|
| + static_cast<typename STR::value_type>(escaped_text[index + 1])); |
| + const typename STR::value_type least_sig_digit( |
| + static_cast<typename STR::value_type>(escaped_text[index + 2])); |
| + if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) { |
| + *value = HexDigitToInt(most_sig_digit) * 16 + |
| + HexDigitToInt(least_sig_digit); |
| + return true; |
| + } |
| + return false; |
| +} |
| + |
| template<typename STR> |
| STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, |
| UnescapeRule::Type rules, |
| @@ -127,13 +145,34 @@ STR UnescapeURLWithOffsetsImpl(const STR& escaped_text, |
| char current_char = static_cast<char>(escaped_text[i]); |
| if (current_char == '%' && i + 2 < max) { |
|
Peter Kasting
2014/02/27 04:43:04
If you add the checks mentioned above, |max| can b
Anuj
2014/02/27 19:38:45
Done.
|
| - const typename STR::value_type most_sig_digit( |
| - static_cast<typename STR::value_type>(escaped_text[i + 1])); |
| - const typename STR::value_type least_sig_digit( |
| - static_cast<typename STR::value_type>(escaped_text[i + 2])); |
| - if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) { |
| - unsigned char value = HexDigitToInt(most_sig_digit) * 16 + |
| - HexDigitToInt(least_sig_digit); |
| + unsigned char value; |
| + if (UnescapeUnsignedCharAtIndex(escaped_text, i, &value)) { |
| + // As per http://tools.ietf.org/html/rfc3987#section-4.1, BiDi control |
| + // characters are disallowed. The BiDi control characters in escaped |
| + // form are : |
| + // kRightToLeftMark = "%E2%80%8F" |
| + // kLeftToRightMark = "%E2%80%8E" |
| + // kLeftToRightEmbeddingMark = "%E2%80%AA" |
| + // kRightToLeftEmbeddingMark = "%E2%80%AB" |
| + // kPopDirectionalFormatting = "%E2%80%AC" |
| + // kLeftToRightOverride = "%E2%80%AD" |
| + // kRightToLeftOverride = "%E2%80%AE" |
|
Peter Kasting
2014/02/27 04:43:04
Nit: Don't use kNames for things that are just com
Anuj
2014/02/27 19:38:45
Done.
|
| + if (value == 0xE2 && i + 8 < max) { |
|
Peter Kasting
2014/02/27 04:43:04
If you add the checks mentioned above, you can eli
Anuj
2014/02/27 19:38:45
Done.
|
| + // Possible BiDi control character. |
| + UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &value); |
| + if (value == 0x80) { |
| + UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &value); |
| + if (value == 0xAA || value == 0xAB || value == 0xAC || |
| + value == 0xAD || value == 0xAE || value == 0x8E || |
| + value == 0x8F) { |
|
Peter Kasting
2014/02/27 04:43:04
Nit: Simpler:
if ((value == 0x8E) || (value == 0x
Anuj
2014/02/27 19:38:45
Done.
|
| + result.append(escaped_text, i, 9); |
| + i += 8; |
| + continue; |
| + } |
| + } |
| + // Restore value if BiDi control character not found. |
|
Peter Kasting
2014/02/27 04:43:04
Prefer declaring a different temp to hold the seco
Anuj
2014/02/27 19:38:45
Done.
|
| + value = 0xE2; |
| + } |
| if (value >= 0x80 || // Unescape all high-bit characters. |
| // For 7-bit characters, the lookup table tells us all valid chars. |
| (kUrlUnescape[value] || |