third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp - Issue 2554403004: Make XSSAuditor more suspicious about start of path

Unified Diff: third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

Issue 2554403004: Make XSSAuditor more suspicious about start of path (Closed)

Patch Set: run-on sentences in comments. Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « third_party/WebKit/LayoutTests/http/tests/security/xssAuditor/script-tag-with-source-implied-host-expected.txt ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

diff --git a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

index ac9644fb61bb868c1d92df3bdca778f4083c2f5f..30f0aef82839d4313401c7cfaaae6d84b39b6651 100644

--- a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

+++ b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

@@ -96,6 +96,10 @@ static bool isTerminatingCharacter(UChar c) {

c == '>' || c == ',');

}

+static bool isSlash(UChar c) {

+ return (c == '/' || c == '\\');

static bool isHTMLQuote(UChar c) {

return (c == '"' || c == '\'');

}

@@ -203,30 +207,54 @@ static String fullyDecodeString(const String& string,

return workingString;

}

+// XSSAuditor's task is to determine how much of any given content came

+// from a reflection vs. what occurs normally on the page. It must do

+// this in face of an attacker avoiding detection by splicing on page

+// content in such a way as to remain syntactically valid. The next two

+// functions apply heurisitcs to get the longest possible fragment in

+// face of such trickery.

static void truncateForSrcLikeAttribute(String& decodedSnippet) {

- // In HTTP URLs, characters following the first ?, #, or third slash may come

- // from the page itself and can be merely ignored by an attacker's server when

- // a remote script or script-like resource is requested. In DATA URLS, the

- // payload starts at the first comma, and the the first /*, //, or <!-- may

- // introduce a comment.

+ // In HTTP URLs, characters in the query string (following the first ?),

+ // in the fragment (following the first #), or even in the path (typically

+ // following the third slash but subject to generous interpretation of a

+ // lack of leading slashes) may be merely ignored by an attacker's server

+ // when a remote script or script-like resource is requested. Hence these

+ // are places where organic page content may be spliced.

+ //

+ // In DATA URLS, the payload starts at the first comma, and the the first

+ // "/*", "//", or "<!--" may introduce a comment, which can then be used

+ // to splice page data harmlessly onto the end of the payload.

// Also, DATA URLs may use the same string literal tricks as with script

// content itself. In either case, content following this may come from the

// page and may be ignored when the script is executed. Also, any of these

// characters may now be represented by the (enlarged) set of html5 entities.

- // For simplicity, we don't differentiate based on URL scheme, and stop at the

- // first & (since it might be part of an entity for any of the subsequent

- // punctuation), the first # or ?, the third slash, or the first slash, <, ',

- // or " once a comma is seen.

+ // For simplicity, we don't differentiate based on URL scheme, and stop at

+ // any of the following:

+ // - the first &, since it might be part of an entity for any of the

+ // subsequent punctuation.

+ // - the first # or ?, since the query and fragment can be ignored.

+ // - the third slash, since this typically starts the path, but account

+ // for a possible lack of leading slashes following the scheme).

+ // - the first slash, <, ', or " once a comma is seen, since we

+ // may now be in a data URL payload.

int slashCount = 0;

bool commaSeen = false;

- for (size_t currentLength = 0; currentLength < decodedSnippet.length();

- ++currentLength) {

+ bool colonSeen = false;

+ for (size_t currentLength = 0, remainingLength = decodedSnippet.length();

+ remainingLength; ++currentLength, --remainingLength) {

UChar currentChar = decodedSnippet[currentLength];

+ if (currentChar == ':' && !colonSeen) {

+ if (remainingLength > 1 && !isSlash(decodedSnippet[currentLength + 1]))

+ ++slashCount;

+ if (remainingLength > 2 && !isSlash(decodedSnippet[currentLength + 2]))

+ ++slashCount;

+ colonSeen = true;

+ }

if (currentChar == '&' || currentChar == '?' || currentChar == '#' ||

- ((currentChar == '/' || currentChar == '\\') &&

- (commaSeen || ++slashCount > 2)) ||

+ (isSlash(currentChar) && (commaSeen || ++slashCount > 2)) ||

(currentChar == '<' && commaSeen) ||

(currentChar == '\'' && commaSeen) ||

(currentChar == '"' && commaSeen)) {