Chromium Code Reviews| Index: Source/core/html/parser/XSSAuditor.cpp |
| diff --git a/Source/core/html/parser/XSSAuditor.cpp b/Source/core/html/parser/XSSAuditor.cpp |
| index 7adae45adddac45258c9ca04b8bbbca3c74e71ae..c1d19415c9b2c393e8aefe1328ce4b3b837c4a4a 100644 |
| --- a/Source/core/html/parser/XSSAuditor.cpp |
| +++ b/Source/core/html/parser/XSSAuditor.cpp |
| @@ -43,6 +43,7 @@ |
| #include "platform/JSONValues.h" |
| #include "platform/network/FormData.h" |
| #include "platform/text/DecodeEscapeSequences.h" |
| +#include "wtf/ASCIICType.h" |
| #include "wtf/MainThread.h" |
| namespace { |
| @@ -111,6 +112,14 @@ static bool startsMultiLineCommentAt(const String& string, size_t start) |
| return (start + 1 < string.length() && string[start] == '/' && string[start+1] == '*'); |
| } |
| +static bool startsOpeningScriptTagAt(const String& string, size_t start) |
| +{ |
| + return start + 6 < string.length() && string[start] == '<' |
| + && WTF::toASCIILowerUnchecked(string[start+1]) == 's' && WTF::toASCIILowerUnchecked(string[start+2]) == 'c' |
|
dbates
2014/03/20 17:39:35
Nit: There should be space characters on either si
|
| + && WTF::toASCIILowerUnchecked(string[start+3]) == 'r' && WTF::toASCIILowerUnchecked(string[start+4]) == 'i' |
| + && WTF::toASCIILowerUnchecked(string[start+5]) == 'p' && WTF::toASCIILowerUnchecked(string[start+6]) == 't'; |
| +} |
| + |
| // If other files need this, we should move this to core/html/parser/HTMLParserIdioms.h |
| template<size_t inlineCapacity> |
| bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector, const QualifiedName& qname) |
| @@ -649,6 +658,7 @@ String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request |
| size_t startPosition = 0; |
| size_t endPosition = string.length(); |
| size_t foundPosition = kNotFound; |
| + size_t lastNonSpacePosition = kNotFound; |
| // Skip over initial comments to find start of code. |
| while (startPosition < endPosition) { |
| @@ -677,13 +687,10 @@ String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request |
| String result; |
| while (startPosition < endPosition && !result.length()) { |
| - // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we |
| - // encounter a comma, or when we exceed the maximum length target. The comma rule |
| + // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma, |
| + // when we hit an opening <script> tag, or when we exceed the maximum length target. The comma rule |
|
dbates
2014/03/20 17:39:35
Nit: There are two space characters that precede t
|
| // covers a common parameter concatenation case performed by some webservers. |
|
dbates
2014/03/20 17:39:35
Nit: Although this isn't part of the patch, "webse
|
| - // After hitting the length target, we can only stop at a point where we know we are |
| - // not in the middle of a %-escape sequence. For the sake of simplicity, approximate |
| - // not stopping inside a (possibly multiply encoded) %-esacpe sequence by breaking on |
| - // whitespace only. We should have enough text in these cases to avoid false positives. |
| + lastNonSpacePosition = kNotFound; |
| for (foundPosition = startPosition; foundPosition < endPosition; foundPosition++) { |
| if (!request.shouldAllowCDATA) { |
| if (startsSingleLineCommentAt(string, foundPosition) || startsMultiLineCommentAt(string, foundPosition)) { |
| @@ -695,9 +702,25 @@ String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request |
| break; |
| } |
| } |
| - if (string[foundPosition] == ',' || (foundPosition > startPosition + kMaximumFragmentLengthTarget && isHTMLSpace<UChar>(string[foundPosition]))) { |
| + if (string[foundPosition] == ',') |
| + break; |
| + |
| + if (lastNonSpacePosition != kNotFound && startsOpeningScriptTagAt(string, foundPosition)) { |
| + foundPosition = lastNonSpacePosition; |
|
dbates
2014/03/20 17:39:35
The coverage included in this patch is sufficient.
|
| break; |
| } |
| + |
| + if (foundPosition > startPosition + kMaximumFragmentLengthTarget) { |
| + // After hitting the length target, we can only stop at a point where we know we are |
| + // not in the middle of a %-escape sequence. For the sake of simplicity, approximate |
| + // not stopping inside a (possibly multiply encoded) %-esacpe sequence by breaking on |
|
dbates
2014/03/20 17:39:35
Nit: esacpe => escape
|
| + // whitespace only. We should have enough text in these cases to avoid false positives. |
| + if (isHTMLSpace<UChar>(string[foundPosition])) |
| + break; |
| + } |
| + |
| + if (!isHTMLSpace<UChar>(string[foundPosition])) |
| + lastNonSpacePosition = foundPosition; |
| } |
| result = canonicalize(fullyDecodeString(string.substring(startPosition, foundPosition - startPosition), m_encoding)); |