Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
| (...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 165 // | 165 // |
| 166 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC | 166 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC |
| 167 // 3987 above has since added some new BiDi control characters. | 167 // 3987 above has since added some new BiDi control characters. |
| 168 // http://www.unicode.org/reports/tr9 | 168 // http://www.unicode.org/reports/tr9 |
| 169 // | 169 // |
| 170 // U+061C ARABIC LETTER MARK (%D8%9C) | 170 // U+061C ARABIC LETTER MARK (%D8%9C) |
| 171 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) | 171 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) |
| 172 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) | 172 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) |
| 173 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) | 173 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) |
| 174 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) | 174 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) |
| 175 // | |
| 176 // However, not unescaping these characters in data urls result in | |
|
Tom Sepez
2014/10/17 17:08:48
Nit: "Not unescaping" is a double negative. Maybe
meacer
2014/10/17 20:41:54
This is true. I wanted to test if the actual url b
| |
| 177 // escaped BiDi control characters being displayed in the rendered html, | |
| 178 // so the parsing for data urls is allowed force unescaping of these | |
| 179 // characters. | |
| 180 if (!(rules & UnescapeRule::BIDI_CONTROL_CHARS)) { | |
| 181 unsigned char second_byte; | |
| 182 // Check for ALM. | |
|
Tom Sepez
2014/10/17 17:08:49
Nit: expand ALM to Arabic Letter Mark.
meacer
2014/10/17 20:41:54
Done.
| |
| 183 if ((first_byte == 0xD8) && | |
|
Tom Sepez
2014/10/17 17:08:49
Nit: It took me longer to understand this code tha
meacer
2014/10/17 20:41:54
Pulled these into methods (with somewhat questiona
| |
| 184 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && | |
| 185 (second_byte == 0x9c)) { | |
| 186 result.append(escaped_text, i, 6); | |
| 187 i += 5; | |
| 188 continue; | |
| 189 } | |
| 175 | 190 |
| 176 unsigned char second_byte; | 191 // Check for other BiDi control characters. |
| 177 // Check for ALM. | 192 if ((first_byte == 0xE2) && |
| 178 if ((first_byte == 0xD8) && | 193 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && |
| 179 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && | 194 ((second_byte == 0x80) || (second_byte == 0x81))) { |
| 180 (second_byte == 0x9c)) { | 195 unsigned char third_byte; |
| 181 result.append(escaped_text, i, 6); | 196 if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) && |
| 182 i += 5; | 197 ((second_byte == 0x80) ? |
| 183 continue; | 198 ((third_byte == 0x8E) || (third_byte == 0x8F) || |
| 184 } | 199 ((third_byte >= 0xAA) && (third_byte <= 0xAE))) : |
| 185 | 200 ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) { |
| 186 // Check for other BiDi control characters. | 201 result.append(escaped_text, i, 9); |
| 187 if ((first_byte == 0xE2) && | 202 i += 8; |
| 188 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && | 203 continue; |
| 189 ((second_byte == 0x80) || (second_byte == 0x81))) { | 204 } |
| 190 unsigned char third_byte; | |
| 191 if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) && | |
| 192 ((second_byte == 0x80) ? | |
| 193 ((third_byte == 0x8E) || (third_byte == 0x8F) || | |
| 194 ((third_byte >= 0xAA) && (third_byte <= 0xAE))) : | |
| 195 ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) { | |
| 196 result.append(escaped_text, i, 9); | |
| 197 i += 8; | |
| 198 continue; | |
| 199 } | 205 } |
| 200 } | 206 } |
| 201 | 207 |
| 202 if (first_byte >= 0x80 || // Unescape all high-bit characters. | 208 if (first_byte >= 0x80 || // Unescape all high-bit characters. |
| 203 // For 7-bit characters, the lookup table tells us all valid chars. | 209 // For 7-bit characters, the lookup table tells us all valid chars. |
| 204 (kUrlUnescape[first_byte] || | 210 (kUrlUnescape[first_byte] || |
| 205 // ...and we allow some additional unescaping when flags are set. | 211 // ...and we allow some additional unescaping when flags are set. |
| 206 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || | 212 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || |
| 207 // Allow any of the prohibited but non-control characters when | 213 // Allow any of the prohibited but non-control characters when |
| 208 // we're doing "special" chars. | 214 // we're doing "special" chars. |
| (...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 402 1, kEscapeToChars[i].replacement); | 408 1, kEscapeToChars[i].replacement); |
| 403 break; | 409 break; |
| 404 } | 410 } |
| 405 } | 411 } |
| 406 } | 412 } |
| 407 } | 413 } |
| 408 return text; | 414 return text; |
| 409 } | 415 } |
| 410 | 416 |
| 411 } // namespace net | 417 } // namespace net |
| OLD | NEW |