OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "net/base/escape.h" | 5 #include "net/base/escape.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/memory/scoped_ptr.h" | 10 #include "base/memory/scoped_ptr.h" |
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
165 // | 165 // |
166 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC | 166 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC |
167 // 3987 above has since added some new BiDi control characters. | 167 // 3987 above has since added some new BiDi control characters. |
168 // http://www.unicode.org/reports/tr9 | 168 // http://www.unicode.org/reports/tr9 |
169 // | 169 // |
170 // U+061C ARABIC LETTER MARK (%D8%9C) | 170 // U+061C ARABIC LETTER MARK (%D8%9C) |
171 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) | 171 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) |
172 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) | 172 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) |
173 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) | 173 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) |
174 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) | 174 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) |
175 // | |
176 // However, not unescaping these characters in data urls result in | |
Tom Sepez
2014/10/17 17:08:48
Nit: "Not unescaping" is a double negative. Maybe
meacer
2014/10/17 20:41:54
This is true. I wanted to test if the actual url b
| |
177 // escaped BiDi control characters being displayed in the rendered html, | |
178 // so the parsing for data urls is allowed force unescaping of these | |
179 // characters. | |
180 if (!(rules & UnescapeRule::BIDI_CONTROL_CHARS)) { | |
181 unsigned char second_byte; | |
182 // Check for ALM. | |
Tom Sepez
2014/10/17 17:08:49
Nit: expand ALM to Arabic Letter Mark.
meacer
2014/10/17 20:41:54
Done.
| |
183 if ((first_byte == 0xD8) && | |
Tom Sepez
2014/10/17 17:08:49
Nit: It took me longer to understand this code tha
meacer
2014/10/17 20:41:54
Pulled these into methods (with somewhat questiona
| |
184 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && | |
185 (second_byte == 0x9c)) { | |
186 result.append(escaped_text, i, 6); | |
187 i += 5; | |
188 continue; | |
189 } | |
175 | 190 |
176 unsigned char second_byte; | 191 // Check for other BiDi control characters. |
177 // Check for ALM. | 192 if ((first_byte == 0xE2) && |
178 if ((first_byte == 0xD8) && | 193 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && |
179 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && | 194 ((second_byte == 0x80) || (second_byte == 0x81))) { |
180 (second_byte == 0x9c)) { | 195 unsigned char third_byte; |
181 result.append(escaped_text, i, 6); | 196 if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) && |
182 i += 5; | 197 ((second_byte == 0x80) ? |
183 continue; | 198 ((third_byte == 0x8E) || (third_byte == 0x8F) || |
184 } | 199 ((third_byte >= 0xAA) && (third_byte <= 0xAE))) : |
185 | 200 ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) { |
186 // Check for other BiDi control characters. | 201 result.append(escaped_text, i, 9); |
187 if ((first_byte == 0xE2) && | 202 i += 8; |
188 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && | 203 continue; |
189 ((second_byte == 0x80) || (second_byte == 0x81))) { | 204 } |
190 unsigned char third_byte; | |
191 if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) && | |
192 ((second_byte == 0x80) ? | |
193 ((third_byte == 0x8E) || (third_byte == 0x8F) || | |
194 ((third_byte >= 0xAA) && (third_byte <= 0xAE))) : | |
195 ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) { | |
196 result.append(escaped_text, i, 9); | |
197 i += 8; | |
198 continue; | |
199 } | 205 } |
200 } | 206 } |
201 | 207 |
202 if (first_byte >= 0x80 || // Unescape all high-bit characters. | 208 if (first_byte >= 0x80 || // Unescape all high-bit characters. |
203 // For 7-bit characters, the lookup table tells us all valid chars. | 209 // For 7-bit characters, the lookup table tells us all valid chars. |
204 (kUrlUnescape[first_byte] || | 210 (kUrlUnescape[first_byte] || |
205 // ...and we allow some additional unescaping when flags are set. | 211 // ...and we allow some additional unescaping when flags are set. |
206 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || | 212 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || |
207 // Allow any of the prohibited but non-control characters when | 213 // Allow any of the prohibited but non-control characters when |
208 // we're doing "special" chars. | 214 // we're doing "special" chars. |
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
402 1, kEscapeToChars[i].replacement); | 408 1, kEscapeToChars[i].replacement); |
403 break; | 409 break; |
404 } | 410 } |
405 } | 411 } |
406 } | 412 } |
407 } | 413 } |
408 return text; | 414 return text; |
409 } | 415 } |
410 | 416 |
411 } // namespace net | 417 } // namespace net |
OLD | NEW |