url/url_canon_path.cc - Issue 1358433004: Correctly handle problematic nested escapes in URL paths.

Unified Diff: url/url_canon_path.cc

Issue 1358433004: Correctly handle problematic nested escapes in URL paths. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: url/url_canon_path.cc

diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc

index ee1cd9626c5d39b8b466c4f32e9a719f05dee7f6..713f0ae1ae61662aa551a23ada7d610e8119c47e 100644

--- a/url/url_canon_path.cc

+++ b/url/url_canon_path.cc

@@ -245,26 +245,74 @@ bool DoPartialPath(const CHAR* spec,

unsigned char unescaped_value;

if (DecodeEscaped(spec, &i, end, &unescaped_value)) {

// Valid escape sequence, see if we keep, reject, or unescape it.

+ // Note that at this point DecodeEscape() will have advanced |i| to

+ // the last character of the escape sequence.

char unescaped_flags = kPathCharLookup[unescaped_value];

- if (unescaped_flags & UNESCAPE) {

- // This escaped value shouldn't be escaped, copy it.

+ bool unescape = (unescaped_flags & UNESCAPE) != 0;

+ if (unescape) {

+ // This escaped value shouldn't be escaped. Try to copy it.

+ int original_length = output->length();

output->push_back(unescaped_value);

- } else if (unescaped_flags & INVALID_BIT) {

- // Invalid escaped character, copy it and remember the error.

- output->push_back('%');

- output->push_back(static_cast<char>(spec[i - 1]));

- output->push_back(static_cast<char>(spec[i]));

- success = false;

- } else {

- // Valid escaped character but we should keep it escaped. We

- // don't want to change the case of any hex letters in case

- // the server is sensitive to that, so we just copy the two

- // characters without checking (DecodeEscape will have advanced

- // to the last character of the pair).

+ // Bail if this results in the output string containing a new

+ // escaped value -- this means the source string nested escapes

+ // multiple levels deep (e.g. "%%300", which would turn into

+ // "%00"), and unescaping would result in a URL spec that could

+ // change further if canonicalized a second time, which can cause

+ // a variety of problems in various places in the codebase.

+ unsigned char temp;

+ if ((original_length > 0) && ((i + 1) < end) &&

+ (output->at(original_length - 1) == '%')) {

+ // The output contains "%x" where 'x' is the unescaped value

+ // computed above. Try appending the next source character and

+ // see if we get a new escape sequence. Note that because we

+ // simply append the next character instead of seeing whether

+ // it's also a nested escape sequence, we'll unescape an input

+ // like "%%30%30" into "%0%30" before detecting that the second

+ // "%30" can cause a problem and bailing. This is sufficient to

+ // avoid problematic cases and easier/more performant.

+ output->push_back(spec[i + 1]);

+ int begin = original_length - 1;

+ if (DecodeEscaped(output->data(), &begin, output->length(),

+ &temp)) {

+ // New escape sequence found; refuse to unescape this

+ // character.

+ unescape = false;

+ output->set_length(original_length);

+ } else {

+ // We're OK, but we still need to undo the naive appending of

+ // the next source character so the next loop iteration can

+ // handle it correctly.

+ output->set_length(original_length + 1);

+ }

+ } else if ((original_length > 1) &&

+ (output->at(original_length - 2) == '%')) {

+ // The output contains "%yx" where 'x' is the unescaped value

+ // computed above and 'y' is some other character. See if this

+ // forms a new escape sequence.

+ int begin = original_length - 2;

+ if (DecodeEscaped(output->data(), &begin, output->length(),

+ &temp)) {

+ // New escape sequence found; refuse to unescape this

+ // character.

+ unescape = false;

+ output->set_length(original_length);

+ }

+ if (!unescape) {

+ // Either this is an invalid escaped character, or it's a valid

+ // escaped character we should keep escaped. In the first case we

+ // should just copy it exactly and remember the error. In the

+ // second we also copy exactly in case the server is sensitive to

+ // changing the case of any hex letters.

output->push_back('%');

output->push_back(static_cast<char>(spec[i - 1]));

output->push_back(static_cast<char>(spec[i]));

+ if (unescaped_flags & INVALID_BIT)

+ success = false;

}

} else {

// Invalid escape sequence. IE7 rejects any URLs with such

« no previous file with comments | « no previous file | url/url_canon_unittest.cc » ('j') | no next file with comments »