| Index: url/url_canon_path.cc
|
| diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc
|
| index ee1cd9626c5d39b8b466c4f32e9a719f05dee7f6..713f0ae1ae61662aa551a23ada7d610e8119c47e 100644
|
| --- a/url/url_canon_path.cc
|
| +++ b/url/url_canon_path.cc
|
| @@ -245,26 +245,74 @@ bool DoPartialPath(const CHAR* spec,
|
| unsigned char unescaped_value;
|
| if (DecodeEscaped(spec, &i, end, &unescaped_value)) {
|
| // Valid escape sequence, see if we keep, reject, or unescape it.
|
| + // Note that at this point DecodeEscape() will have advanced |i| to
|
| + // the last character of the escape sequence.
|
| char unescaped_flags = kPathCharLookup[unescaped_value];
|
|
|
| - if (unescaped_flags & UNESCAPE) {
|
| - // This escaped value shouldn't be escaped, copy it.
|
| + bool unescape = (unescaped_flags & UNESCAPE) != 0;
|
| + if (unescape) {
|
| + // This escaped value shouldn't be escaped. Try to copy it.
|
| + int original_length = output->length();
|
| output->push_back(unescaped_value);
|
| - } else if (unescaped_flags & INVALID_BIT) {
|
| - // Invalid escaped character, copy it and remember the error.
|
| - output->push_back('%');
|
| - output->push_back(static_cast<char>(spec[i - 1]));
|
| - output->push_back(static_cast<char>(spec[i]));
|
| - success = false;
|
| - } else {
|
| - // Valid escaped character but we should keep it escaped. We
|
| - // don't want to change the case of any hex letters in case
|
| - // the server is sensitive to that, so we just copy the two
|
| - // characters without checking (DecodeEscape will have advanced
|
| - // to the last character of the pair).
|
| +
|
| + // Bail if this results in the output string containing a new
|
| + // escaped value -- this means the source string nested escapes
|
| + // multiple levels deep (e.g. "%%300", which would turn into
|
| + // "%00"), and unescaping would result in a URL spec that could
|
| + // change further if canonicalized a second time, which can cause
|
| + // a variety of problems in various places in the codebase.
|
| + unsigned char temp;
|
| + if ((original_length > 0) && ((i + 1) < end) &&
|
| + (output->at(original_length - 1) == '%')) {
|
| + // The output contains "%x" where 'x' is the unescaped value
|
| + // computed above. Try appending the next source character and
|
| + // see if we get a new escape sequence. Note that because we
|
| + // simply append the next character instead of seeing whether
|
| + // it's also a nested escape sequence, we'll unescape an input
|
| + // like "%%30%30" into "%0%30" before detecting that the second
|
| + // "%30" can cause a problem and bailing. This is sufficient to
|
| + // avoid problematic cases and easier/more performant.
|
| + output->push_back(spec[i + 1]);
|
| + int begin = original_length - 1;
|
| + if (DecodeEscaped(output->data(), &begin, output->length(),
|
| + &temp)) {
|
| + // New escape sequence found; refuse to unescape this
|
| + // character.
|
| + unescape = false;
|
| + output->set_length(original_length);
|
| + } else {
|
| + // We're OK, but we still need to undo the naive appending of
|
| + // the next source character so the next loop iteration can
|
| + // handle it correctly.
|
| + output->set_length(original_length + 1);
|
| + }
|
| + } else if ((original_length > 1) &&
|
| + (output->at(original_length - 2) == '%')) {
|
| + // The output contains "%yx" where 'x' is the unescaped value
|
| + // computed above and 'y' is some other character. See if this
|
| + // forms a new escape sequence.
|
| + int begin = original_length - 2;
|
| + if (DecodeEscaped(output->data(), &begin, output->length(),
|
| + &temp)) {
|
| + // New escape sequence found; refuse to unescape this
|
| + // character.
|
| + unescape = false;
|
| + output->set_length(original_length);
|
| + }
|
| + }
|
| + }
|
| +
|
| + if (!unescape) {
|
| + // Either this is an invalid escaped character, or it's a valid
|
| + // escaped character we should keep escaped. In the first case we
|
| + // should just copy it exactly and remember the error. In the
|
| + // second we also copy exactly in case the server is sensitive to
|
| + // changing the case of any hex letters.
|
| output->push_back('%');
|
| output->push_back(static_cast<char>(spec[i - 1]));
|
| output->push_back(static_cast<char>(spec[i]));
|
| + if (unescaped_flags & INVALID_BIT)
|
| + success = false;
|
| }
|
| } else {
|
| // Invalid escape sequence. IE7 rejects any URLs with such
|
|
|