net/base/escape.cc - Issue 1180393003: Added characters that look like padlocks to URL unescaping blacklist.

Side by Side Diff: net/base/escape.cc

Issue 1180393003: Added characters that look like padlocks to URL unescaping blacklist. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Rename NON_DISPLAY_CHARS to SPOOFING_AND_CONTROL_CHARS. Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/escape.h"	5 #include "net/base/escape.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8	8

9 #include "base/logging.h"	9 #include "base/logging.h"

10 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

(...skipping 146 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))	157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))

158 return false;	158 return false;

159 if (second_byte == 0x80) {	159 if (second_byte == 0x80) {

160 return third_byte == 0x8E \|\|	160 return third_byte == 0x8E \|\|

161 third_byte == 0x8F \|\|	161 third_byte == 0x8F \|\|

162 (third_byte >= 0xAA && third_byte <= 0xAE);	162 (third_byte >= 0xAA && third_byte <= 0xAE);

163 }	163 }

164 return third_byte >= 0xA6 && third_byte <= 0xA9;	164 return third_byte >= 0xA6 && third_byte <= 0xA9;

165 }	165 }

166	166

	167 // Returns true if there is a four-byte banned char at \|index\|. \|first_byte\| is

	168 // the byte at \|index\|.

	169 template <typename STR>

	170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text,

	171 unsigned char first_byte,

	172 size_t index) {

	173 // The following characters are blacklisted for spoofability concerns.

	174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)

	175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)

	176 // U+1F512 LOCK (%F0%9F%94%92)

	177 // U+1F513 OPEN LOCK (%F0%9F%94%93)

	178 if (first_byte != 0xF0)

	179 return false;

	180

	181 unsigned char second_byte;

	182 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte) \|\|

	183 second_byte != 0x9F) {

	184 return false;

	185 }

	186

	187 unsigned char third_byte;

	188 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte) \|\|

	189 third_byte != 0x94) {

	190 return false;

	191 }

	192

	193 unsigned char fourth_byte;

	194 return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) &&

	195 (fourth_byte == 0x8F \|\| fourth_byte == 0x90 \|\| fourth_byte == 0x92 \|\|

	196 fourth_byte == 0x93);

	197 }

	198

167 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting	199 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting

168 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects	200 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects

169 // the alterations done to the string that are not one-character-to-one-	201 // the alterations done to the string that are not one-character-to-one-

170 // character. The resulting \|adjustments\| will always be sorted by increasing	202 // character. The resulting \|adjustments\| will always be sorted by increasing

171 // offset.	203 // offset.

172 template<typename STR>	204 template<typename STR>

173 STR UnescapeURLWithAdjustmentsImpl(	205 STR UnescapeURLWithAdjustmentsImpl(

174 const STR& escaped_text,	206 const STR& escaped_text,

175 UnescapeRule::Type rules,	207 UnescapeRule::Type rules,

176 base::OffsetAdjuster::Adjustments* adjustments) {	208 base::OffsetAdjuster::Adjustments* adjustments) {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
210 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC	242 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC

211 // 3987 above has since added some new BiDi control characters.	243 // 3987 above has since added some new BiDi control characters.

212 // http://www.unicode.org/reports/tr9	244 // http://www.unicode.org/reports/tr9

213 //	245 //

214 // U+061C ARABIC LETTER MARK (%D8%9C)	246 // U+061C ARABIC LETTER MARK (%D8%9C)

215 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)	247 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)

216 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)	248 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)

217 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)	249 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)

218 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)	250 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)

219 //	251 //

	252 // The following spoofable characters are also banned, because they could

	253 // be used to imitate parts of a web browser's UI.

	254 //

	255 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)

	256 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)

	257 // U+1F512 LOCK (%F0%9F%94%92)

	258 // U+1F513 OPEN LOCK (%F0%9F%94%93)

	259 //

220 // However, some schemes such as data: and file: need to parse the exact	260 // However, some schemes such as data: and file: need to parse the exact

221 // binary data when loading the URL. For that reason, CONTROL_CHARS allows	261 // binary data when loading the URL. For that reason,

222 // unescaping BiDi control characters.	262 // SPOOFING_AND_CONTROL_CHARS allows unescaping BiDi control characters.

223 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed	263 // DO NOT use SPOOFING_AND_CONTROL_CHARS if the parsed URL is going to be

224 // in the UI.	264 // displayed in the UI.

225 if (!(rules & UnescapeRule::CONTROL_CHARS)) {	265 if (!(rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)) {

226 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {	266 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {

227 // Keep Arabic Language Mark escaped.	267 // Keep Arabic Language Mark escaped.

228 result.append(escaped_text, i, 6);	268 result.append(escaped_text, i, 6);

229 i += 5;	269 i += 5;

230 continue;	270 continue;

231 }	271 }

232 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {	272 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {

233 // Keep BiDi control char escaped.	273 // Keep BiDi control char escaped.

234 result.append(escaped_text, i, 9);	274 result.append(escaped_text, i, 9);

235 i += 8;	275 i += 8;

236 continue;	276 continue;

237 }	277 }

	278 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {

	279 // Keep banned char escaped.

	280 result.append(escaped_text, i, 12);

	281 i += 11;

	282 continue;

	283 }

238 }	284 }

239	285

240 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.	286 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.

241 // For 7-bit characters, the lookup table tells us all valid chars.	287 // For 7-bit characters, the lookup table tells us all valid chars.

242 (kUrlUnescape[first_byte] \|\|	288 (kUrlUnescape[first_byte] \|\|

243 // ...and we allow some additional unescaping when flags are set.	289 // ...and we allow some additional unescaping when flags are set.

244 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) \|\|	290 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) \|\|

245 // Allow any of the prohibited but non-control characters when	291 // Allow any of the prohibited but non-control characters when

246 // we're doing "special" chars.	292 // we're doing "special" chars.

247 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) \|\|	293 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) \|\|

248 // Additionally allow control characters if requested.	294 // Additionally allow non-display characters if requested.

249 (first_byte < ' ' && (rules & UnescapeRule::CONTROL_CHARS)))) {	295 (first_byte < ' ' &&

	296 (rules & UnescapeRule::SPOOFING_AND_CONTROL_CHARS)))) {

250 // Use the unescaped version of the character.	297 // Use the unescaped version of the character.

251 if (adjustments)	298 if (adjustments)

252 adjustments->push_back(base::OffsetAdjuster::Adjustment(i, 3, 1));	299 adjustments->push_back(base::OffsetAdjuster::Adjustment(i, 3, 1));

253 result.push_back(first_byte);	300 result.push_back(first_byte);

254 i += 2;	301 i += 2;

255 } else {	302 } else {

256 // Keep escaped. Append a percent and we'll get the following two	303 // Keep escaped. Append a percent and we'll get the following two

257 // digits on the next loops through.	304 // digits on the next loops through.

258 result.push_back('%');	305 result.push_back('%');

259 }	306 }

(...skipping 194 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
454 1, kEscapeToChars[i].replacement);	501 1, kEscapeToChars[i].replacement);

455 break;	502 break;

456 }	503 }

457 }	504 }

458 }	505 }

459 }	506 }

460 return text;	507 return text;

461 }	508 }

462	509

463 } // namespace net	510 } // namespace net

OLD	NEW

« no previous file with comments | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »