net/base/escape.cc - Issue 1180393003: Added characters that look like padlocks to URL unescaping blacklist.

Side by Side Diff: net/base/escape.cc

Issue 1180393003: Added characters that look like padlocks to URL unescaping blacklist. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Combine if statements. Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "net/base/escape.h"	5 #include "net/base/escape.h"

6	6

7 #include <algorithm>	7 #include <algorithm>

8	8

9 #include "base/logging.h"	9 #include "base/logging.h"

10 #include "base/memory/scoped_ptr.h"	10 #include "base/memory/scoped_ptr.h"

(...skipping 146 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))	157 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte))

158 return false;	158 return false;

159 if (second_byte == 0x80) {	159 if (second_byte == 0x80) {

160 return third_byte == 0x8E \|\|	160 return third_byte == 0x8E \|\|

161 third_byte == 0x8F \|\|	161 third_byte == 0x8F \|\|

162 (third_byte >= 0xAA && third_byte <= 0xAE);	162 (third_byte >= 0xAA && third_byte <= 0xAE);

163 }	163 }

164 return third_byte >= 0xA6 && third_byte <= 0xA9;	164 return third_byte >= 0xA6 && third_byte <= 0xA9;

165 }	165 }

166	166

	167 // Returns true if there is a four-byte banned char at \|index\|. \|first_byte\| is

	168 // the byte at \|index\|.

	169 template <typename STR>

	170 bool HasFourByteBannedCharAtIndex(const STR& escaped_text,

	171 unsigned char first_byte,

	172 size_t index) {

	173 // The following characters are blacklisted for spoofability concerns.

	174 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)

	175 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)

	176 // U+1F512 LOCK (%F0%9F%94%92)

	177 // U+1F513 OPEN LOCK (%F0%9F%94%93)

	178 if (first_byte != 0xF0)

	179 return false;

	180

	181 unsigned char second_byte;

	182 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 3, &second_byte) \|\|

	183 second_byte != 0x9F) {

	184 return false;

	185 }

	186

	187 unsigned char third_byte;

	188 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 6, &third_byte) \|\|

	189 third_byte != 0x94) {

	190 return false;

	191 }

	192

	193 unsigned char fourth_byte;

	194 if (!UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) \|\|

	195 (fourth_byte != 0x8F && fourth_byte != 0x90 && fourth_byte != 0x92 &&

	196 fourth_byte != 0x93)) {
	Peter Kasting 2015/06/22 07:35:22 Nit: Simpler: return UnescapeUnsignedCharAtInde Nit: Simpler: return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) && (fourth_byte == 0x8F \|\| fourth_byte == 0x90 \|\| fourth_byte == 0x92 \|\| fourth_byte == 0x93); Matt Giuca 2015/06/23 04:14:10 Done. Show quoted text On 2015/06/22 07:35:22, Peter Kasting wrote: > Nit: Simpler: > > return UnescapeUnsignedCharAtIndex(escaped_text, index + 9, &fourth_byte) && > (fourth_byte == 0x8F \|\| fourth_byte == 0x90 \|\| fourth_byte == 0x92 \|\| > fourth_byte == 0x93); Done.
	197 return false;

	198 }

	199

	200 return true;

	201 }

	202

167 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting	203 // Unescapes \|escaped_text\| according to \|rules\|, returning the resulting

168 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects	204 // string. Fills in an \|adjustments\| parameter, if non-NULL, so it reflects

169 // the alterations done to the string that are not one-character-to-one-	205 // the alterations done to the string that are not one-character-to-one-

170 // character. The resulting \|adjustments\| will always be sorted by increasing	206 // character. The resulting \|adjustments\| will always be sorted by increasing

171 // offset.	207 // offset.

172 template<typename STR>	208 template<typename STR>

173 STR UnescapeURLWithAdjustmentsImpl(	209 STR UnescapeURLWithAdjustmentsImpl(

174 const STR& escaped_text,	210 const STR& escaped_text,

175 UnescapeRule::Type rules,	211 UnescapeRule::Type rules,

176 base::OffsetAdjuster::Adjustments* adjustments) {	212 base::OffsetAdjuster::Adjustments* adjustments) {

(...skipping 33 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
210 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC	246 // Additionally, the Unicode Technical Report (TR9) as referenced by RFC

211 // 3987 above has since added some new BiDi control characters.	247 // 3987 above has since added some new BiDi control characters.

212 // http://www.unicode.org/reports/tr9	248 // http://www.unicode.org/reports/tr9

213 //	249 //

214 // U+061C ARABIC LETTER MARK (%D8%9C)	250 // U+061C ARABIC LETTER MARK (%D8%9C)

215 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)	251 // U+2066 LEFT-TO-RIGHT ISOLATE (%E2%81%A6)

216 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)	252 // U+2067 RIGHT-TO-LEFT ISOLATE (%E2%81%A7)

217 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)	253 // U+2068 FIRST STRONG ISOLATE (%E2%81%A8)

218 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)	254 // U+2069 POP DIRECTIONAL ISOLATE (%E2%81%A9)

219 //	255 //

	256 // The following spoofable characters are also banned, because they could

	257 // be used to imitate parts of the browser UI.
	mmenke 2015/06/22 17:05:07 It's a layering violation for net/ to know about a Show quoted text On 2015/06/22 07:27:03, Matt Giuca wrote: > I think we want to be fairly specific here. Yes, it is a bit of a layering > violation, but these characters are banned for a fairly specific reason, which > is that web browsers commonly use similar iconography to indicate security. > > (Note: I didn't say which web browser, so it's not Chrome-specific.) It's a layering violation for net/ to know about anything in content/ (Which includes things like the UI thread and IO thread...as well as the fact its primary use case is being used in a browser). Also worth noting the network stack is used in apps other than browsers. Matt Giuca 2015/06/23 04:14:10 OK well the entire reason for these chars being ba OK well the entire reason for these chars being banned is that they clash with web browser UI, so it doesn't make sense for this comment not to mention web browsers. (ie. if you use this module in a non-web-browser app, then the locks probably don't need to be banned.) But I take your point, so I changed "parts of the browser UI" to "parts of a web browser's UI."
	258 //

	259 // U+1F50F LOCK WITH INK PEN (%F0%9F%94%8F)

	260 // U+1F510 CLOSED LOCK WITH KEY (%F0%9F%94%90)

	261 // U+1F512 LOCK (%F0%9F%94%92)

	262 // U+1F513 OPEN LOCK (%F0%9F%94%93)

	263 //

220 // However, some schemes such as data: and file: need to parse the exact	264 // However, some schemes such as data: and file: need to parse the exact

221 // binary data when loading the URL. For that reason, CONTROL_CHARS allows	265 // binary data when loading the URL. For that reason, CONTROL_CHARS allows

222 // unescaping BiDi control characters.	266 // unescaping BiDi control characters.

223 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed	267 // DO NOT use CONTROL_CHARS if the parsed URL is going to be displayed

224 // in the UI.	268 // in the UI.

225 if (!(rules & UnescapeRule::CONTROL_CHARS)) {	269 if (!(rules & UnescapeRule::CONTROL_CHARS)) {

226 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {	270 if (HasArabicLanguageMarkAtIndex(escaped_text, first_byte, i)) {

227 // Keep Arabic Language Mark escaped.	271 // Keep Arabic Language Mark escaped.

228 result.append(escaped_text, i, 6);	272 result.append(escaped_text, i, 6);

229 i += 5;	273 i += 5;

230 continue;	274 continue;

231 }	275 }

232 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {	276 if (HasThreeByteBidiControlCharAtIndex(escaped_text, first_byte, i)) {

233 // Keep BiDi control char escaped.	277 // Keep BiDi control char escaped.

234 result.append(escaped_text, i, 9);	278 result.append(escaped_text, i, 9);

235 i += 8;	279 i += 8;

236 continue;	280 continue;

237 }	281 }

	282 if (HasFourByteBannedCharAtIndex(escaped_text, first_byte, i)) {

	283 // Keep banned char escaped.

	284 result.append(escaped_text, i, 12);

	285 i += 11;

	286 continue;

	287 }

238 }	288 }

239	289

240 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.	290 if (first_byte >= 0x80 \|\| // Unescape all high-bit characters.

241 // For 7-bit characters, the lookup table tells us all valid chars.	291 // For 7-bit characters, the lookup table tells us all valid chars.

242 (kUrlUnescape[first_byte] \|\|	292 (kUrlUnescape[first_byte] \|\|

243 // ...and we allow some additional unescaping when flags are set.	293 // ...and we allow some additional unescaping when flags are set.

244 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) \|\|	294 (first_byte == ' ' && (rules & UnescapeRule::SPACES)) \|\|

245 // Allow any of the prohibited but non-control characters when	295 // Allow any of the prohibited but non-control characters when

246 // we're doing "special" chars.	296 // we're doing "special" chars.

247 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) \|\|	297 (first_byte > ' ' && (rules & UnescapeRule::URL_SPECIAL_CHARS)) \|\|

(...skipping 206 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
454 1, kEscapeToChars[i].replacement);	504 1, kEscapeToChars[i].replacement);

455 break;	505 break;

456 }	506 }

457 }	507 }

458 }	508 }

459 }	509 }

460 return text;	510 return text;

461 }	511 }

462	512

463 } // namespace net	513 } // namespace net

OLD	NEW

« net/base/escape.h ('K') | « net/base/escape.h ('k') | net/base/escape_unittest.cc » ('j') | no next file with comments »