Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| (...skipping 146 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) | 157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) |
| 158 return false; | 158 return false; |
| 159 if (second_byte == 0x80) { | 159 if (second_byte == 0x80) { |
| 160 return third_byte == 0x8E || | 160 return third_byte == 0x8E || |
| 161 third_byte == 0x8F || | 161 third_byte == 0x8F || |
| 162 (third_byte >= 0xAA && third_byte <= 0xAE); | 162 (third_byte >= 0xAA && third_byte <= 0xAE); |
| 163 } | 163 } |
| 164 return third_byte >= 0xA6 && third_byte <= 0xA9; | 164 return third_byte >= 0xA6 && third_byte <= 0xA9; |
| 165 } | 165 } |
| 166 | 166 |
| 167 // Returns true if there is a four-byte banned char at |index|. |first_byte| is | |
| 168 // the byte at |index|. | |
| 169 template <typename STR> | |
| 170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text, | |
| 171 unsigned char first_byte, | |
| 172 size_t index) { | |
| 173 // The following characters are blacklisted for spoofability concerns. | |
| 174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) | |
| 175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) | |
| 176 // U+1F512 LOCK (%F0%9F%94%92) | |
| 177 // U+1F513 OPEN LOCK (%F0%9F%94%93) | |
| 178 if (first_byte != 0xF0) | |
| 179 return false; | |
| 180 unsigned char second_byte; | |
| 181 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte)) | |
|
Peter Kasting
2015/06/16 06:36:15
Nit: Combine these next two conditionals.
Matt Giuca
2015/06/16 07:55:34
Do you mean
if (!...(..., &second_byte) || second
Peter Kasting
2015/06/16 08:05:08
Yes, I meant this. This reads more clearly to me
mmenke
2015/06/17 19:50:16
I think 4 ifs (1 per character is) a little easie
Matt Giuca
2015/06/22 07:27:03
Done. (Not a fan of this change, but don't want to
| |
| 182 return false; | |
| 183 if (second_byte != 0x9F) | |
| 184 return false; | |
| 185 unsigned char third_byte; | |
| 186 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte)) | |
|
Peter Kasting
2015/06/16 06:36:15
Nit: And these.
Matt Giuca
2015/06/22 07:27:03
Done.
| |
| 187 return false; | |
| 188 if (third_byte != 0x94) | |
| 189 return false; | |
| 190 unsigned char fourth_byte; | |
| 191 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte)) | |
|
Peter Kasting
2015/06/16 06:36:15
Nit: And maybe these (more optional).
Also option
Matt Giuca
2015/06/22 07:27:03
Done.
| |
| 192 return false; | |
| 193 return fourth_byte == 0x8F || fourth_byte == 0x90 || fourth_byte == 0x92 || | |
| 194 fourth_byte == 0x93; | |
| 195 } | |
| 196 | |
| 167 // Unescapes |escaped_text| according to |rules|, returning the resulting | 197 // Unescapes |escaped_text| according to |rules|, returning the resulting |
| 168 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects | 198 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects |
| 169 // the alterations done to the string that are not one-character-to-one- | 199 // the alterations done to the string that are not one-character-to-one- |
| 170 // character. The resulting |adjustments| will always be sorted by increasing | 200 // character. The resulting |adjustments| will always be sorted by increasing |
| 171 // offset. | 201 // offset. |
| 172 template<typename STR> | 202 template<typename STR> |
| 173 STR UnescapeURLWithAdjustmentsImpl( | 203 STR UnescapeURLWithAdjustmentsImpl( |
| 174 const STR& escaped_text, | 204 const STR& escaped_text, |
| 175 UnescapeRule::Type rules, | 205 UnescapeRule::Type rules, |
| 176 base::OffsetAdjuster::Adjustments* adjustments) { | 206 base::OffsetAdjuster::Adjustments* adjustments) { |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 210 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC | 240 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC |
| 211 // 3987 above has since added some new BiDi control characters. | 241 // 3987 above has since added some new BiDi control characters. |
| 212 // http://www.unicode.org/reports/tr9 | 242 // http://www.unicode.org/reports/tr9 |
| 213 // | 243 // |
| 214 // U+061C ARABIC LETTER MARK (%D8%9C) | 244 // U+061C ARABIC LETTER MARK (%D8%9C) |
| 215 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) | 245 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) |
| 216 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) | 246 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) |
| 217 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) | 247 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) |
| 218 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) | 248 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) |
| 219 // | 249 // |
| 250 // We also ban certain spoofable characters that could be used to imitate | |
|
mmenke
2015/06/17 19:50:16
nit: Should avoid "we" in comments. Just use pas
Matt Giuca
2015/06/22 07:27:03
Done.
| |
| 251 // parts of the browser UI. | |
|
mmenke
2015/06/17 19:50:16
nit: Mentioning the browser seems like a bit of a
Matt Giuca
2015/06/22 07:27:03
I think we want to be fairly specific here. Yes, i
| |
| 252 // | |
| 253 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F) | |
| 254 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90) | |
| 255 // U+1F512 LOCK (%F0%9F%94%92) | |
| 256 // U+1F513 OPEN LOCK (%F0%9F%94%93) | |
| 257 // | |
| 220 // However, some schemes such as data: and file: need to parse the exact | 258 // However, some schemes such as data: and file: need to parse the exact |
| 221 // binary data when loading the URL. For that reason, CONTROL_CHARS allows | 259 // binary data when loading the URL. For that reason, CONTROL_CHARS allows |
| 222 // unescaping BiDi control characters. | 260 // unescaping BiDi control characters. |
| 223 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed | 261 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed |
| 224 // in the UI. | 262 // in the UI. |
| 225 if (!(rules & UnescapeRule::CONTROL_CHARS)) { | 263 if (!(rules & UnescapeRule::CONTROL_CHARS)) { |
| 226 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { | 264 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) { |
| 227 // Keep Arabic Language Mark escaped. | 265 // Keep Arabic Language Mark escaped. |
| 228 result.append(escaped_text, i, 6); | 266 result.append(escaped_text, i, 6); |
| 229 i += 5; | 267 i += 5; |
| 230 continue; | 268 continue; |
| 231 } | 269 } |
| 232 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { | 270 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) { |
| 233 // Keep BiDi control char escaped. | 271 // Keep BiDi control char escaped. |
| 234 result.append(escaped_text, i, 9); | 272 result.append(escaped_text, i, 9); |
| 235 i += 8; | 273 i += 8; |
| 236 continue; | 274 continue; |
| 237 } | 275 } |
| 276 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) { | |
| 277 // Keep banned char escaped. | |
| 278 result.append(escaped_text, i, 12); | |
| 279 i += 11; | |
| 280 continue; | |
| 281 } | |
| 238 } | 282 } |
| 239 | 283 |
| 240 if (first_byte >= 0x80 || // Unescape all high-bit characters. | 284 if (first_byte >= 0x80 || // Unescape all high-bit characters. |
| 241 // For 7-bit characters, the lookup table tells us all valid chars. | 285 // For 7-bit characters, the lookup table tells us all valid chars. |
| 242 (kUrlUnescape[first_byte] || | 286 (kUrlUnescape[first_byte] || |
| 243 // ...and we allow some additional unescaping when flags are set. | 287 // ...and we allow some additional unescaping when flags are set. |
| 244 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || | 288 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || |
| 245 // Allow any of the prohibited but non-control characters when | 289 // Allow any of the prohibited but non-control characters when |
| 246 // we're doing "special" chars. | 290 // we're doing "special" chars. |
| 247 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || | 291 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) || |
| (...skipping 206 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 454 1, kEscapeToChars[i].replacement); | 498 1, kEscapeToChars[i].replacement); |
| 455 break; | 499 break; |
| 456 } | 500 } |
| 457 } | 501 } |
| 458 } | 502 } |
| 459 } | 503 } |
| 460 return text; | 504 return text; |
| 461 } | 505 } |
| 462 | 506 |
| 463 } // namespace net | 507 } // namespace net |
| OLD | NEW |