Chromium Code Reviews| Index: net/base/escape.cc |
| diff --git a/net/base/escape.cc b/net/base/escape.cc |
| index 8e6f870fa8b7beca15ab608136148193230892a7..d767b29a2845955da398e91ac17cd9d8aecc936f 100644 |
| --- a/net/base/escape.cc |
| +++ b/net/base/escape.cc |
| @@ -164,6 +164,36 @@ bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text, |
| return third_byte >= 0xA6 && third_byte <= 0xA9; |
| } |
| +// Returns true if there is a four-byte banned char at |index|. |first_byte| is |
| +// the byte at |index|. |
| +template <typename STR> |
| +bool HasFourByteBannedCharAtIndex(const STR& escaped_text, |
| + unsigned char first_byte, |
| + size_t index) { |
| + // The following characters are blacklisted for spoofability concerns. |
| + // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) |
| + // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) |
| + // U+1F512 LOCK (%F0%9F%94%92) |
| + // U+1F513 OPEN LOCK (%F0%9F%94%93) |
| + if (first_byte != 0xF0) |
| + return false; |
| + unsigned char second_byte; |
| + if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) |
|
Peter Kasting
2015/06/16 06:36:15
Nit: Combine these next two conditionals.
Matt Giuca
2015/06/16 07:55:34
Do you mean
if (!...(..., &second_byte) || second
Peter Kasting
2015/06/16 08:05:08
Yes, I meant this. This reads more clearly to me
mmenke
2015/06/17 19:50:16
I think 4 ifs (1 per character is) a little easie
Matt Giuca
2015/06/22 07:27:03
Done. (Not a fan of this change, but don't want to
|
| + return false; |
| + if (second_byte != 0x9F) |
| + return false; |
| + unsigned char third_byte; |
| + if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) |
|
Peter Kasting
2015/06/16 06:36:15
Nit: And these.
Matt Giuca
2015/06/22 07:27:03
Done.
|
| + return false; |
| + if (third_byte != 0x94) |
| + return false; |
| + unsigned char fourth_byte; |
| + if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte)) |
|
Peter Kasting
2015/06/16 06:36:15
Nit: And maybe these (more optional).
Also option
Matt Giuca
2015/06/22 07:27:03
Done.
|
| + return false; |
| + return fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 || |
| + fourth_byte == 0x93; |
| +} |
| + |
| // Unescapes |escaped_text| according to |rules|, returning the resulting |
| // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects |
| // the alterations done to the string that are not one-character-to-one- |
| @@ -217,6 +247,14 @@ STR UnescapeURLWithAdjustmentsImpl( |
| // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) |
| // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) |
| // |
| + // We also ban certain spoofable characters that could be used to imitate |
|
mmenke
2015/06/17 19:50:16
nit: Should avoid "we" in comments. Just use pas
Matt Giuca
2015/06/22 07:27:03
Done.
|
| + // parts of the browser UI. |
|
mmenke
2015/06/17 19:50:16
nit: Mentioning the browser seems like a bit of a
Matt Giuca
2015/06/22 07:27:03
I think we want to be fairly specific here. Yes, i
|
| + // |
| + // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) |
| + // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) |
| + // U+1F512 LOCK (%F0%9F%94%92) |
| + // U+1F513 OPEN LOCK (%F0%9F%94%93) |
| + // |
| // However, some schemes such as data: and file: need to parse the exact |
| // binary data when loading the URL. For that reason, CONTROL_CHARS allows |
| // unescaping BiDi control characters. |
| @@ -235,6 +273,12 @@ STR UnescapeURLWithAdjustmentsImpl( |
| i += 8; |
| continue; |
| } |
| + if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) { |
| + // Keep banned char escaped. |
| + result.append(escaped_text, i, 12); |
| + i += 11; |
| + continue; |
| + } |
| } |
| if (first_byte >= 0x80 || // Unescape all high-bit characters. |