Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(142)

Side by Side Diff: url/url_canon_path.cc

Issue 1358433004: Correctly handle problematic nested escapes in URL paths. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | url/url_canon_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "base/logging.h" 5 #include "base/logging.h"
6 #include "url/url_canon.h" 6 #include "url/url_canon.h"
7 #include "url/url_canon_internal.h" 7 #include "url/url_canon_internal.h"
8 #include "url/url_parse_internal.h" 8 #include "url/url_parse_internal.h"
9 9
10 namespace url { 10 namespace url {
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after
238 238
239 } else if (out_ch == '\\') { 239 } else if (out_ch == '\\') {
240 // Convert backslashes to forward slashes 240 // Convert backslashes to forward slashes
241 output->push_back('/'); 241 output->push_back('/');
242 242
243 } else if (out_ch == '%') { 243 } else if (out_ch == '%') {
244 // Handle escape sequences. 244 // Handle escape sequences.
245 unsigned char unescaped_value; 245 unsigned char unescaped_value;
246 if (DecodeEscaped(spec, &i, end, &unescaped_value)) { 246 if (DecodeEscaped(spec, &i, end, &unescaped_value)) {
247 // Valid escape sequence, see if we keep, reject, or unescape it. 247 // Valid escape sequence, see if we keep, reject, or unescape it.
248 // Note that at this point DecodeEscape() will have advanced |i| to
249 // the last character of the escape sequence.
248 char unescaped_flags = kPathCharLookup[unescaped_value]; 250 char unescaped_flags = kPathCharLookup[unescaped_value];
249 251
250 if (unescaped_flags & UNESCAPE) { 252 bool unescape = (unescaped_flags & UNESCAPE) != 0;
251 // This escaped value shouldn't be escaped, copy it. 253 if (unescape) {
254 // This escaped value shouldn't be escaped. Try to copy it.
255 int original_length = output->length();
252 output->push_back(unescaped_value); 256 output->push_back(unescaped_value);
253 } else if (unescaped_flags & INVALID_BIT) { 257
254 // Invalid escaped character, copy it and remember the error. 258 // Bail if this results in the output string containing a new
259 // escaped value -- this means the source string nested escapes
260 // multiple levels deep (e.g. "%%300", which would turn into
261 // "%00"), and unescaping would result in a URL spec that could
262 // change further if canonicalized a second time, which can cause
263 // a variety of problems in various places in the codebase.
264 unsigned char temp;
265 if ((original_length > 0) && ((i + 1) < end) &&
266 (output->at(original_length - 1) == '%')) {
267 // The output contains "%x" where 'x' is the unescaped value
268 // computed above. Try appending the next source character and
269 // see if we get a new escape sequence. Note that because we
270 // simply append the next character instead of seeing whether
271 // it's also a nested escape sequence, we'll unescape an input
272 // like "%%30%30" into "%0%30" before detecting that the second
273 // "%30" can cause a problem and bailing. This is sufficient to
274 // avoid problematic cases and easier/more performant.
275 output->push_back(spec[i + 1]);
276 int begin = original_length - 1;
277 if (DecodeEscaped(output->data(), &begin, output->length(),
278 &temp)) {
279 // New escape sequence found; refuse to unescape this
280 // character.
281 unescape = false;
282 output->set_length(original_length);
283 } else {
284 // We're OK, but we still need to undo the naive appending of
285 // the next source character so the next loop iteration can
286 // handle it correctly.
287 output->set_length(original_length + 1);
288 }
289 } else if ((original_length > 1) &&
290 (output->at(original_length - 2) == '%')) {
291 // The output contains "%yx" where 'x' is the unescaped value
292 // computed above and 'y' is some other character. See if this
293 // forms a new escape sequence.
294 int begin = original_length - 2;
295 if (DecodeEscaped(output->data(), &begin, output->length(),
296 &temp)) {
297 // New escape sequence found; refuse to unescape this
298 // character.
299 unescape = false;
300 output->set_length(original_length);
301 }
302 }
303 }
304
305 if (!unescape) {
306 // Either this is an invalid escaped character, or it's a valid
307 // escaped character we should keep escaped. In the first case we
308 // should just copy it exactly and remember the error. In the
309 // second we also copy exactly in case the server is sensitive to
310 // changing the case of any hex letters.
255 output->push_back('%'); 311 output->push_back('%');
256 output->push_back(static_cast<char>(spec[i - 1])); 312 output->push_back(static_cast<char>(spec[i - 1]));
257 output->push_back(static_cast<char>(spec[i])); 313 output->push_back(static_cast<char>(spec[i]));
258 success = false; 314 if (unescaped_flags & INVALID_BIT)
259 } else { 315 success = false;
260 // Valid escaped character but we should keep it escaped. We
261 // don't want to change the case of any hex letters in case
262 // the server is sensitive to that, so we just copy the two
263 // characters without checking (DecodeEscape will have advanced
264 // to the last character of the pair).
265 output->push_back('%');
266 output->push_back(static_cast<char>(spec[i - 1]));
267 output->push_back(static_cast<char>(spec[i]));
268 } 316 }
269 } else { 317 } else {
270 // Invalid escape sequence. IE7 rejects any URLs with such 318 // Invalid escape sequence. IE7 rejects any URLs with such
271 // sequences, while Firefox, IE6, and Safari all pass it through 319 // sequences, while Firefox, IE6, and Safari all pass it through
272 // unchanged. We are more permissive unlike IE7. I don't think this 320 // unchanged. We are more permissive unlike IE7. I don't think this
273 // can cause significant problems, if it does, we should change 321 // can cause significant problems, if it does, we should change
274 // to be more like IE7. 322 // to be more like IE7.
275 output->push_back('%'); 323 output->push_back('%');
276 } 324 }
277 325
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
344 bool CanonicalizePartialPath(const base::char16* spec, 392 bool CanonicalizePartialPath(const base::char16* spec,
345 const Component& path, 393 const Component& path,
346 int path_begin_in_output, 394 int path_begin_in_output,
347 CanonOutput* output) { 395 CanonOutput* output) {
348 return DoPartialPath<base::char16, base::char16>(spec, path, 396 return DoPartialPath<base::char16, base::char16>(spec, path,
349 path_begin_in_output, 397 path_begin_in_output,
350 output); 398 output);
351 } 399 }
352 400
353 } // namespace url 401 } // namespace url
OLDNEW
« no previous file with comments | « no previous file | url/url_canon_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698