Index: url/url_canon_path.cc |
diff --git a/url/url_canon_path.cc b/url/url_canon_path.cc |
index ee1cd9626c5d39b8b466c4f32e9a719f05dee7f6..713f0ae1ae61662aa551a23ada7d610e8119c47e 100644 |
--- a/url/url_canon_path.cc |
+++ b/url/url_canon_path.cc |
@@ -245,26 +245,74 @@ bool DoPartialPath(const CHAR* spec, |
unsigned char unescaped_value; |
if (DecodeEscaped(spec, &i, end, &unescaped_value)) { |
// Valid escape sequence, see if we keep, reject, or unescape it. |
+ // Note that at this point DecodeEscape() will have advanced |i| to |
+ // the last character of the escape sequence. |
char unescaped_flags = kPathCharLookup[unescaped_value]; |
- if (unescaped_flags & UNESCAPE) { |
- // This escaped value shouldn't be escaped, copy it. |
+ bool unescape = (unescaped_flags & UNESCAPE) != 0; |
+ if (unescape) { |
+ // This escaped value shouldn't be escaped. Try to copy it. |
+ int original_length = output->length(); |
output->push_back(unescaped_value); |
- } else if (unescaped_flags & INVALID_BIT) { |
- // Invalid escaped character, copy it and remember the error. |
- output->push_back('%'); |
- output->push_back(static_cast<char>(spec[i - 1])); |
- output->push_back(static_cast<char>(spec[i])); |
- success = false; |
- } else { |
- // Valid escaped character but we should keep it escaped. We |
- // don't want to change the case of any hex letters in case |
- // the server is sensitive to that, so we just copy the two |
- // characters without checking (DecodeEscape will have advanced |
- // to the last character of the pair). |
+ |
+ // Bail if this results in the output string containing a new |
+ // escaped value -- this means the source string nested escapes |
+ // multiple levels deep (e.g. "%%300", which would turn into |
+ // "%00"), and unescaping would result in a URL spec that could |
+ // change further if canonicalized a second time, which can cause |
+ // a variety of problems in various places in the codebase. |
+ unsigned char temp; |
+ if ((original_length > 0) && ((i + 1) < end) && |
+ (output->at(original_length - 1) == '%')) { |
+ // The output contains "%x" where 'x' is the unescaped value |
+ // computed above. Try appending the next source character and |
+ // see if we get a new escape sequence. Note that because we |
+ // simply append the next character instead of seeing whether |
+ // it's also a nested escape sequence, we'll unescape an input |
+ // like "%%30%30" into "%0%30" before detecting that the second |
+ // "%30" can cause a problem and bailing. This is sufficient to |
+ // avoid problematic cases and easier/more performant. |
+ output->push_back(spec[i + 1]); |
+ int begin = original_length - 1; |
+ if (DecodeEscaped(output->data(), &begin, output->length(), |
+ &temp)) { |
+ // New escape sequence found; refuse to unescape this |
+ // character. |
+ unescape = false; |
+ output->set_length(original_length); |
+ } else { |
+ // We're OK, but we still need to undo the naive appending of |
+ // the next source character so the next loop iteration can |
+ // handle it correctly. |
+ output->set_length(original_length + 1); |
+ } |
+ } else if ((original_length > 1) && |
+ (output->at(original_length - 2) == '%')) { |
+ // The output contains "%yx" where 'x' is the unescaped value |
+ // computed above and 'y' is some other character. See if this |
+ // forms a new escape sequence. |
+ int begin = original_length - 2; |
+ if (DecodeEscaped(output->data(), &begin, output->length(), |
+ &temp)) { |
+ // New escape sequence found; refuse to unescape this |
+ // character. |
+ unescape = false; |
+ output->set_length(original_length); |
+ } |
+ } |
+ } |
+ |
+ if (!unescape) { |
+ // Either this is an invalid escaped character, or it's a valid |
+ // escaped character we should keep escaped. In the first case we |
+ // should just copy it exactly and remember the error. In the |
+ // second we also copy exactly in case the server is sensitive to |
+ // changing the case of any hex letters. |
output->push_back('%'); |
output->push_back(static_cast<char>(spec[i - 1])); |
output->push_back(static_cast<char>(spec[i])); |
+ if (unescaped_flags & INVALID_BIT) |
+ success = false; |
} |
} else { |
// Invalid escape sequence. IE7 rejects any URLs with such |