Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(631)

Side by Side Diff: net/base/escape.cc

Issue 643963004: Unescape BiDi control chars while parsing data urls (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Inline IsLastTwoBytesofThreeByteBidiControlChar Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "net/base/escape.h" 5 #include "net/base/escape.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/memory/scoped_ptr.h" 10 #include "base/memory/scoped_ptr.h"
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 const typename STR::value_type least_sig_digit( 113 const typename STR::value_type least_sig_digit(
114 static_cast<typename STR::value_type>(escaped_text[index + 2])); 114 static_cast<typename STR::value_type>(escaped_text[index + 2]));
115 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) { 115 if (IsHexDigit(most_sig_digit) && IsHexDigit(least_sig_digit)) {
116 *value = HexDigitToInt(most_sig_digit) * 16 + 116 *value = HexDigitToInt(most_sig_digit) * 16 +
117 HexDigitToInt(least_sig_digit); 117 HexDigitToInt(least_sig_digit);
118 return true; 118 return true;
119 } 119 }
120 return false; 120 return false;
121 } 121 }
122 122
123 // Returns true if there is an Arabic Language Mark at |index|. |first_byte|
124 // is the byte at |index|.
125 template<typename STR>
126 bool HasArabicLanguageMarkAtIndex(const STR& escaped_text,
127 unsigned char first_byte,
128 size_t index) {
129 if (first_byte != 0xD8)
130 return false;
131 unsigned char second_byte;
132 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
133 return false;
134 return second_byte == 0x9c;
135 }
136
137 // Returns true if there is a BiDi control char at |index|. |first_byte| is the
138 // byte at |index|.
139 template<typename STR>
140 bool HasThreeByteBidiControlCharAtIndex(const STR& escaped_text,
141 unsigned char first_byte,
142 size_t index) {
143 if (first_byte != 0xE2)
144 return false;
145 unsigned char second_byte;
146 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte))
147 return false;
148 if (second_byte != 0x80 && second_byte != 0x81)
149 return false;
150 unsigned char third_byte;
151 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))
152 return false;
153 if (second_byte == 0x80) {
154 return third_byte == 0x8E ||
155 third_byte == 0x8F ||
156 (third_byte >= 0xAA && third_byte <= 0xAE);
157 }
158 return third_byte >= 0xA6 && third_byte <= 0xA9;
159 }
160
123 // Unescapes |escaped_text| according to |rules|, returning the resulting 161 // Unescapes |escaped_text| according to |rules|, returning the resulting
124 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects 162 // string. Fills in an |adjustments| parameter, if non-NULL, so it reflects
125 // the alterations done to the string that are not one-character-to-one- 163 // the alterations done to the string that are not one-character-to-one-
126 // character. The resulting |adjustments| will always be sorted by increasing 164 // character. The resulting |adjustments| will always be sorted by increasing
127 // offset. 165 // offset.
128 template<typename STR> 166 template<typename STR>
129 STR UnescapeURLWithAdjustmentsImpl( 167 STR UnescapeURLWithAdjustmentsImpl(
130 const STR& escaped_text, 168 const STR& escaped_text,
131 UnescapeRule::Type rules, 169 UnescapeRule::Type rules,
132 base::OffsetAdjuster::Adjustments* adjustments) { 170 base::OffsetAdjuster::Adjustments* adjustments) {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 // 203 //
166 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC 204 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC
167 // 3987 above has since added some new BiDi control characters. 205 // 3987 above has since added some new BiDi control characters.
168 // http://www.unicode.org/reports/tr9 206 // http://www.unicode.org/reports/tr9
169 // 207 //
170 // U+061C ARABIC LETTER MARK (%D8%9C) 208 // U+061C ARABIC LETTER MARK (%D8%9C)
171 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6) 209 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)
172 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7) 210 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)
173 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8) 211 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)
174 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9) 212 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)
175 213 //
176 unsigned char second_byte; 214 // However, escaping these characters in data: urls result in
brettw 2014/10/20 18:04:42 This sentence doesn't make sense to me. Here, we'r
meacer 2014/10/20 22:54:22 Done.
177 // Check for ALM. 215 // escaped BiDi control characters being displayed in the rendered html,
brettw 2014/10/20 18:04:42 The use of the word "render" in this patch (both h
meacer 2014/10/20 22:54:22 Edited CL description to make it clear.
178 if ((first_byte == 0xD8) && 216 // so the parsing for data: urls is allowed to force unescaping of these
179 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) && 217 // characters. DO NOT use BIDI_CONTROL_CHARS flag without talking to a
180 (second_byte == 0x9c)) { 218 // security person.
181 result.append(escaped_text, i, 6); 219 if (!(rules & UnescapeRule::BIDI_CONTROL_CHARS)) {
182 i += 5; 220 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {
183 continue; 221 result.append(escaped_text, i, 6);
184 } 222 i += 5;
185 223 continue;
186 // Check for other BiDi control characters. 224 }
187 if ((first_byte == 0xE2) && 225 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {
188 UnescapeUnsignedCharAtIndex(escaped_text, i + 3, &second_byte) &&
189 ((second_byte == 0x80) || (second_byte == 0x81))) {
190 unsigned char third_byte;
191 if (UnescapeUnsignedCharAtIndex(escaped_text, i + 6, &third_byte) &&
192 ((second_byte == 0x80) ?
193 ((third_byte == 0x8E) || (third_byte == 0x8F) ||
194 ((third_byte >= 0xAA) && (third_byte <= 0xAE))) :
195 ((third_byte >= 0xA6) && (third_byte <= 0xA9)))) {
196 result.append(escaped_text, i, 9); 226 result.append(escaped_text, i, 9);
197 i += 8; 227 i += 8;
198 continue; 228 continue;
199 } 229 }
200 } 230 }
201 231
202 if (first_byte >= 0x80 || // Unescape all high-bit characters. 232 if (first_byte >= 0x80 || // Unescape all high-bit characters.
203 // For 7-bit characters, the lookup table tells us all valid chars. 233 // For 7-bit characters, the lookup table tells us all valid chars.
204 (kUrlUnescape[first_byte] || 234 (kUrlUnescape[first_byte] ||
205 // ...and we allow some additional unescaping when flags are set. 235 // ...and we allow some additional unescaping when flags are set.
(...skipping 196 matching lines...) Expand 10 before | Expand all | Expand 10 after
402 1, kEscapeToChars[i].replacement); 432 1, kEscapeToChars[i].replacement);
403 break; 433 break;
404 } 434 }
405 } 435 }
406 } 436 }
407 } 437 }
408 return text; 438 return text;
409 } 439 }
410 440
411 } // namespace net 441 } // namespace net
OLDNEW
« net/base/escape.h ('K') | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698