| Index: third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
|
| diff --git a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
|
| index ac9644fb61bb868c1d92df3bdca778f4083c2f5f..30f0aef82839d4313401c7cfaaae6d84b39b6651 100644
|
| --- a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
|
| +++ b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
|
| @@ -96,6 +96,10 @@ static bool isTerminatingCharacter(UChar c) {
|
| c == '>' || c == ',');
|
| }
|
|
|
| +static bool isSlash(UChar c) {
|
| + return (c == '/' || c == '\\');
|
| +}
|
| +
|
| static bool isHTMLQuote(UChar c) {
|
| return (c == '"' || c == '\'');
|
| }
|
| @@ -203,30 +207,54 @@ static String fullyDecodeString(const String& string,
|
| return workingString;
|
| }
|
|
|
| +// XSSAuditor's task is to determine how much of any given content came
|
| +// from a reflection vs. what occurs normally on the page. It must do
|
| +// this in face of an attacker avoiding detection by splicing on page
|
| +// content in such a way as to remain syntactically valid. The next two
|
| +// functions apply heurisitcs to get the longest possible fragment in
|
| +// face of such trickery.
|
| +
|
| static void truncateForSrcLikeAttribute(String& decodedSnippet) {
|
| - // In HTTP URLs, characters following the first ?, #, or third slash may come
|
| - // from the page itself and can be merely ignored by an attacker's server when
|
| - // a remote script or script-like resource is requested. In DATA URLS, the
|
| - // payload starts at the first comma, and the the first /*, //, or <!-- may
|
| - // introduce a comment.
|
| + // In HTTP URLs, characters in the query string (following the first ?),
|
| + // in the fragment (following the first #), or even in the path (typically
|
| + // following the third slash but subject to generous interpretation of a
|
| + // lack of leading slashes) may be merely ignored by an attacker's server
|
| + // when a remote script or script-like resource is requested. Hence these
|
| + // are places where organic page content may be spliced.
|
| + //
|
| + // In DATA URLS, the payload starts at the first comma, and the the first
|
| + // "/*", "//", or "<!--" may introduce a comment, which can then be used
|
| + // to splice page data harmlessly onto the end of the payload.
|
| //
|
| // Also, DATA URLs may use the same string literal tricks as with script
|
| // content itself. In either case, content following this may come from the
|
| // page and may be ignored when the script is executed. Also, any of these
|
| // characters may now be represented by the (enlarged) set of html5 entities.
|
| //
|
| - // For simplicity, we don't differentiate based on URL scheme, and stop at the
|
| - // first & (since it might be part of an entity for any of the subsequent
|
| - // punctuation), the first # or ?, the third slash, or the first slash, <, ',
|
| - // or " once a comma is seen.
|
| + // For simplicity, we don't differentiate based on URL scheme, and stop at
|
| + // any of the following:
|
| + // - the first &, since it might be part of an entity for any of the
|
| + // subsequent punctuation.
|
| + // - the first # or ?, since the query and fragment can be ignored.
|
| + // - the third slash, since this typically starts the path, but account
|
| + // for a possible lack of leading slashes following the scheme).
|
| + // - the first slash, <, ', or " once a comma is seen, since we
|
| + // may now be in a data URL payload.
|
| int slashCount = 0;
|
| bool commaSeen = false;
|
| - for (size_t currentLength = 0; currentLength < decodedSnippet.length();
|
| - ++currentLength) {
|
| + bool colonSeen = false;
|
| + for (size_t currentLength = 0, remainingLength = decodedSnippet.length();
|
| + remainingLength; ++currentLength, --remainingLength) {
|
| UChar currentChar = decodedSnippet[currentLength];
|
| + if (currentChar == ':' && !colonSeen) {
|
| + if (remainingLength > 1 && !isSlash(decodedSnippet[currentLength + 1]))
|
| + ++slashCount;
|
| + if (remainingLength > 2 && !isSlash(decodedSnippet[currentLength + 2]))
|
| + ++slashCount;
|
| + colonSeen = true;
|
| + }
|
| if (currentChar == '&' || currentChar == '?' || currentChar == '#' ||
|
| - ((currentChar == '/' || currentChar == '\\') &&
|
| - (commaSeen || ++slashCount > 2)) ||
|
| + (isSlash(currentChar) && (commaSeen || ++slashCount > 2)) ||
|
| (currentChar == '<' && commaSeen) ||
|
| (currentChar == '\'' && commaSeen) ||
|
| (currentChar == '"' && commaSeen)) {
|
|
|