url/url_canon_path.cc - Issue 1358433004: Correctly handle problematic nested escapes in URL paths.

Side by Side Diff: url/url_canon_path.cc

Issue 1358433004: Correctly handle problematic nested escapes in URL paths. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2013 The Chromium Authors. All rights reserved.	1 // Copyright 2013 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/logging.h"	5 #include "base/logging.h"

6 #include "url/url_canon.h"	6 #include "url/url_canon.h"

7 #include "url/url_canon_internal.h"	7 #include "url/url_canon_internal.h"

8 #include "url/url_parse_internal.h"	8 #include "url/url_parse_internal.h"

9	9

10 namespace url {	10 namespace url {

(...skipping 227 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
238	238

239 } else if (out_ch == '\\') {	239 } else if (out_ch == '\\') {

240 // Convert backslashes to forward slashes	240 // Convert backslashes to forward slashes

241 output->push_back('/');	241 output->push_back('/');

242	242

243 } else if (out_ch == '%') {	243 } else if (out_ch == '%') {

244 // Handle escape sequences.	244 // Handle escape sequences.

245 unsigned char unescaped_value;	245 unsigned char unescaped_value;

246 if (DecodeEscaped(spec, &i, end, &unescaped_value)) {	246 if (DecodeEscaped(spec, &i, end, &unescaped_value)) {

247 // Valid escape sequence, see if we keep, reject, or unescape it.	247 // Valid escape sequence, see if we keep, reject, or unescape it.

	248 // Note that at this point DecodeEscape() will have advanced \|i\| to

	249 // the last character of the escape sequence.

248 char unescaped_flags = kPathCharLookup[unescaped_value];	250 char unescaped_flags = kPathCharLookup[unescaped_value];

249	251

250 if (unescaped_flags & UNESCAPE) {	252 bool unescape = (unescaped_flags & UNESCAPE) != 0;

251 // This escaped value shouldn't be escaped, copy it.	253 if (unescape) {

	254 // This escaped value shouldn't be escaped. Try to copy it.

	255 int original_length = output->length();

252 output->push_back(unescaped_value);	256 output->push_back(unescaped_value);

253 } else if (unescaped_flags & INVALID_BIT) {	257

254 // Invalid escaped character, copy it and remember the error.	258 // Bail if this results in the output string containing a new

	259 // escaped value -- this means the source string nested escapes

	260 // multiple levels deep (e.g. "%%300", which would turn into

	261 // "%00"), and unescaping would result in a URL spec that could

	262 // change further if canonicalized a second time, which can cause

	263 // a variety of problems in various places in the codebase.

	264 unsigned char temp;

	265 if ((original_length > 0) && ((i + 1) < end) &&

	266 (output->at(original_length - 1) == '%')) {

	267 // The output contains "%x" where 'x' is the unescaped value

	268 // computed above. Try appending the next source character and

	269 // see if we get a new escape sequence. Note that because we

	270 // simply append the next character instead of seeing whether

	271 // it's also a nested escape sequence, we'll unescape an input

	272 // like "%%30%30" into "%0%30" before detecting that the second

	273 // "%30" can cause a problem and bailing. This is sufficient to

	274 // avoid problematic cases and easier/more performant.

	275 output->push_back(spec[i + 1]);

	276 int begin = original_length - 1;

	277 if (DecodeEscaped(output->data(), &begin, output->length(),

	278 &temp)) {

	279 // New escape sequence found; refuse to unescape this

	280 // character.

	281 unescape = false;

	282 output->set_length(original_length);

	283 } else {

	284 // We're OK, but we still need to undo the naive appending of

	285 // the next source character so the next loop iteration can

	286 // handle it correctly.

	287 output->set_length(original_length + 1);

	288 }

	289 } else if ((original_length > 1) &&

	290 (output->at(original_length - 2) == '%')) {

	291 // The output contains "%yx" where 'x' is the unescaped value

	292 // computed above and 'y' is some other character. See if this

	293 // forms a new escape sequence.

	294 int begin = original_length - 2;

	295 if (DecodeEscaped(output->data(), &begin, output->length(),

	296 &temp)) {

	297 // New escape sequence found; refuse to unescape this

	298 // character.

	299 unescape = false;

	300 output->set_length(original_length);

	301 }

	302 }

	303 }

	304

	305 if (!unescape) {

	306 // Either this is an invalid escaped character, or it's a valid

	307 // escaped character we should keep escaped. In the first case we

	308 // should just copy it exactly and remember the error. In the

	309 // second we also copy exactly in case the server is sensitive to

	310 // changing the case of any hex letters.

255 output->push_back('%');	311 output->push_back('%');

256 output->push_back(static_cast<char>(spec[i - 1]));	312 output->push_back(static_cast<char>(spec[i - 1]));

257 output->push_back(static_cast<char>(spec[i]));	313 output->push_back(static_cast<char>(spec[i]));

258 success = false;	314 if (unescaped_flags & INVALID_BIT)

259 } else {	315 success = false;

260 // Valid escaped character but we should keep it escaped. We

261 // don't want to change the case of any hex letters in case

262 // the server is sensitive to that, so we just copy the two

263 // characters without checking (DecodeEscape will have advanced

264 // to the last character of the pair).

265 output->push_back('%');

266 output->push_back(static_cast<char>(spec[i - 1]));

267 output->push_back(static_cast<char>(spec[i]));

268 }	316 }

269 } else {	317 } else {

270 // Invalid escape sequence. IE7 rejects any URLs with such	318 // Invalid escape sequence. IE7 rejects any URLs with such

271 // sequences, while Firefox, IE6, and Safari all pass it through	319 // sequences, while Firefox, IE6, and Safari all pass it through

272 // unchanged. We are more permissive unlike IE7. I don't think this	320 // unchanged. We are more permissive unlike IE7. I don't think this

273 // can cause significant problems, if it does, we should change	321 // can cause significant problems, if it does, we should change

274 // to be more like IE7.	322 // to be more like IE7.

275 output->push_back('%');	323 output->push_back('%');

276 }	324 }

277	325

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
344 bool CanonicalizePartialPath(const base::char16* spec,	392 bool CanonicalizePartialPath(const base::char16* spec,

345 const Component& path,	393 const Component& path,

346 int path_begin_in_output,	394 int path_begin_in_output,

347 CanonOutput* output) {	395 CanonOutput* output) {

348 return DoPartialPath<base::char16, base::char16>(spec, path,	396 return DoPartialPath<base::char16, base::char16>(spec, path,

349 path_begin_in_output,	397 path_begin_in_output,

350 output);	398 output);

351 }	399 }

352	400

353 } // namespace url	401 } // namespace url

OLD	NEW

« no previous file with comments | « no previous file | url/url_canon_unittest.cc » ('j') | no next file with comments »