Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(120)

Side by Side Diff: net/base/escape.cc

Issue 643963004: Unescape BiDi control chars while parsing data urls (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Add a browsertest Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/escape.h" 5 #include "net/base/escape.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 // 165 //
166 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC 166 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC
167 // 3987 above has since added some new BiDi control characters. 167 // 3987 above has since added some new BiDi control characters.
168 // http://www.unicode.org/reports/tr9 168 // http://www.unicode.org/reports/tr9
169 // 169 //
170 // U+061C ARABIC LETTER MARK (%D8%9C) 170 // U+061C ARABIC LETTER MARK (%D8%9C)
171 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) 171 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)
172 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) 172 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)
173 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) 173 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)
174 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) 174 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)
175 //
176 // However, not unescaping these characters in data urls result in
Tom Sepez 2014/10/17 17:08:48 Nit: "Not unescaping" is a double negative. Maybe
meacer 2014/10/17 20:41:54 This is true. I wanted to test if the actual url b
177 // escaped BiDi control characters being displayed in the rendered html,
178 // so the parsing for data urls is allowed force unescaping of these
179 // characters.
180 if (!(rules & UnescapeRule::BIDI_CONTROL_CHARS)) {
181 unsigned char second_byte;
182 // Check for ALM.
Tom Sepez 2014/10/17 17:08:49 Nit: expand ALM to Arabic Letter Mark.
meacer 2014/10/17 20:41:54 Done.
183 if ((first_byte == 0xD8) &&
Tom Sepez 2014/10/17 17:08:49 Nit: It took me longer to understand this code tha
meacer 2014/10/17 20:41:54 Pulled these into methods (with somewhat questiona
184 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
185 (second_byte == 0x9c)) {
186 result.append(escaped_text, i, 6);
187 i += 5;
188 continue;
189 }
175 190
176 unsigned char second_byte; 191 // Check for other BiDi control characters.
177 // Check for ALM. 192 if ((first_byte == 0xE2) &&
178 if ((first_byte == 0xD8) && 193 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
179 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && 194 ((second_byte == 0x80) || (second_byte == 0x81))) {
180 (second_byte == 0x9c)) { 195 unsigned char third_byte;
181 result.append(escaped_text, i, 6); 196 if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) &&
182 i += 5; 197 ((second_byte == 0x80) ?
183 continue; 198 ((third_byte == 0x8E) || (third_byte == 0x8F) ||
184 } 199 ((third_byte >= 0xAA) && (third_byte <= 0xAE))) :
185 200 ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) {
186 // Check for other BiDi control characters. 201 result.append(escaped_text, i, 9);
187 if ((first_byte == 0xE2) && 202 i += 8;
188 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && 203 continue;
189 ((second_byte == 0x80) || (second_byte == 0x81))) { 204 }
190 unsigned char third_byte;
191 if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) &&
192 ((second_byte == 0x80) ?
193 ((third_byte == 0x8E) || (third_byte == 0x8F) ||
194 ((third_byte >= 0xAA) && (third_byte <= 0xAE))) :
195 ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) {
196 result.append(escaped_text, i, 9);
197 i += 8;
198 continue;
199 } 205 }
200 } 206 }
201 207
202 if (first_byte >= 0x80 || // Unescape all high-bit characters. 208 if (first_byte >= 0x80 || // Unescape all high-bit characters.
203 // For 7-bit characters, the lookup table tells us all valid chars. 209 // For 7-bit characters, the lookup table tells us all valid chars.
204 (kUrlUnescape[first_byte] || 210 (kUrlUnescape[first_byte] ||
205 // ...and we allow some additional unescaping when flags are set. 211 // ...and we allow some additional unescaping when flags are set.
206 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) || 212 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) ||
207 // Allow any of the prohibited but non-control characters when 213 // Allow any of the prohibited but non-control characters when
208 // we're doing "special" chars. 214 // we're doing "special" chars.
(...skipping 193 matching lines...) Expand 10 before | Expand all | Expand 10 after
402 1, kEscapeToChars[i].replacement); 408 1, kEscapeToChars[i].replacement);
403 break; 409 break;
404 } 410 }
405 } 411 }
406 } 412 }
407 } 413 }
408 return text; 414 return text;
409 } 415 }
410 416
411 } // namespace net 417 } // namespace net
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698