Source/core/html/parser/XSSAuditor.cpp - Issue 338193002: Refactor XSS Auditor string operations.

Unified Diff: Source/core/html/parser/XSSAuditor.cpp

Issue 338193002: Refactor XSS Auditor string operations. (Closed) Base URL: svn://svn.chromium.org/blink/trunk

Patch Set: nameFromAttribute() not needed until next CL lands. Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: Source/core/html/parser/XSSAuditor.cpp

diff --git a/Source/core/html/parser/XSSAuditor.cpp b/Source/core/html/parser/XSSAuditor.cpp

index 65bc34b30b7bd31b31cf1755042187b1a7b9b864..5ec5378a8d9765927773a10d6860b51479544b6d 100644

--- a/Source/core/html/parser/XSSAuditor.cpp

+++ b/Source/core/html/parser/XSSAuditor.cpp

@@ -71,11 +71,6 @@ static bool isNonCanonicalCharacter(UChar c)

return (c == '\\' || c == '0' || c == '\0' || c == '/' || c >= 127);

}

-static String canonicalize(const String& string)

- return string.removeCharacters(&isNonCanonicalCharacter);

static bool isRequiredForInjection(UChar c)

{

return (c == '\'' || c == '"' || c == '<' || c == '>');

@@ -188,6 +183,57 @@ static String fullyDecodeString(const String& string, const WTF::TextEncoding& e

return workingString;

}

+static void truncateForSrcLikeAttribute(String& decodedSnippet)

+ // In HTTP URLs, characters following the first ?, #, or third slash may come from

+ // the page itself and can be merely ignored by an attacker's server when a remote

+ // script or script-like resource is requested. In DATA URLS, the payload starts at

+ // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters

+ // following this may come from the page itself and may be ignored when the script is

+ // executed. For simplicity, we don't differentiate based on URL scheme, and stop at

+ // the first # or ?, the third slash, or the first slash or < once a comma is seen.

+ int slashCount = 0;

+ bool commaSeen = false;

+ for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) {

+ UChar currentChar = decodedSnippet[currentLength];

+ if (currentChar == '?'

+ || currentChar == '#'

+ || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2))

+ || (currentChar == '<' && commaSeen)) {

+ decodedSnippet.truncate(currentLength);

+ return;

+ }

+ if (currentChar == ',')

+ commaSeen = true;

+ }

+static void truncateForScriptLikeAttribute(String& decodedSnippet)

+ // Beware of trailing characters which came from the page itself, not the

+ // injected vector. Excluding the terminating character covers common cases

+ // where the page immediately ends the attribute, but doesn't cover more

+ // complex cases where there is other page data following the injection.

+ // Generally, these won't parse as javascript, so the injected vector

+ // typically excludes them from consideration via a single-line comment or

+ // by enclosing them in a string literal terminated later by the page's own

+ // closing punctuation. Since the snippet has not been parsed, the vector

+ // may also try to introduce these via entities. As a result, we'd like to

+ // stop before the first "//", the first <!--, the first entity, or the first

+ // quote not immediately following the first equals sign (taking whitespace

+ // into consideration). To keep things simpler, we don't try to distinguish

+ // between entity-introducing amperands vs. other uses, nor do we bother to

+ // check for a second slash for a comment, nor do we bother to check for

+ // !-- following a less-than sign. We stop instead on any ampersand

+ // slash, or less-than sign.

+ size_t position = 0;

+ if ((position = decodedSnippet.find("=")) != kNotFound

+ && (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != kNotFound

+ && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != kNotFound) {

+ decodedSnippet.truncate(position);

+ }

static ReflectedXSSDisposition combineXSSProtectionHeaderAndCSP(ReflectedXSSDisposition xssProtection, ReflectedXSSDisposition reflectedXSS)

{

ReflectedXSSDisposition result = std::max(xssProtection, reflectedXSS);

@@ -322,12 +368,12 @@ void XSSAuditor::setEncoding(const WTF::TextEncoding& encoding)

m_encoding = encoding;

- m_decodedURL = canonicalize(fullyDecodeString(m_documentURL.string(), m_encoding));

+ m_decodedURL = canonicalize(m_documentURL.string(), NoTruncation);

if (m_decodedURL.find(isRequiredForInjection) == kNotFound)

m_decodedURL = String();

if (!m_httpBodyAsString.isEmpty()) {

- m_decodedHTTPBody = canonicalize(fullyDecodeString(m_httpBodyAsString, m_encoding));

+ m_decodedHTTPBody = canonicalize(m_httpBodyAsString, NoTruncation);

m_httpBodyAsString = String();

if (m_decodedHTTPBody.find(isRequiredForInjection) == kNotFound)

m_decodedHTTPBody = String();

@@ -414,7 +460,7 @@ bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request)

return false;

if ((m_state == SuppressingAdjacentCharacterTokens)

- || (m_scriptTagFoundInRequest && isContainedInRequest(decodedSnippetForJavaScript(request)))) {

+ || (m_scriptTagFoundInRequest && isContainedInRequest(canonicalizedSnippetForJavaScript(request)))) {

request.token.eraseCharacters();

request.token.appendToCharacter(' '); // Technically, character tokens can't be empty.

m_state = SuppressingAdjacentCharacterTokens;

@@ -431,10 +477,10 @@ bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request)

ASSERT(hasName(request.token, scriptTag));

bool didBlockScript = false;

- m_scriptTagFoundInRequest = isContainedInRequest(decodedSnippetForName(request));

+ m_scriptTagFoundInRequest = isContainedInRequest(canonicalizedSnippetForTagName(request));

if (m_scriptTagFoundInRequest) {

- didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute);

- didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttribute);

+ didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation);

+ didBlockScript |= eraseAttributeIfInjected(request, XLinkNames::hrefAttr, blankURL().string(), SrcLikeAttributeTruncation);

}

return didBlockScript;

}

@@ -445,8 +491,8 @@ bool XSSAuditor::filterObjectToken(const FilterTokenRequest& request)

ASSERT(hasName(request.token, objectTag));

bool didBlockScript = false;

- if (isContainedInRequest(decodedSnippetForName(request))) {

- didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttribute);

+ if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {

+ didBlockScript |= eraseAttributeIfInjected(request, dataAttr, blankURL().string(), SrcLikeAttributeTruncation);

didBlockScript |= eraseAttributeIfInjected(request, typeAttr);

didBlockScript |= eraseAttributeIfInjected(request, classidAttr);

}

@@ -466,7 +512,7 @@ bool XSSAuditor::filterParamToken(const FilterTokenRequest& request)

if (!HTMLParamElement::isURLParameter(String(nameAttribute.value)))

return false;

- return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttribute);

+ return eraseAttributeIfInjected(request, valueAttr, blankURL().string(), SrcLikeAttributeTruncation);

}

bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request)

@@ -475,9 +521,9 @@ bool XSSAuditor::filterEmbedToken(const FilterTokenRequest& request)

ASSERT(hasName(request.token, embedTag));

bool didBlockScript = false;

- if (isContainedInRequest(decodedSnippetForName(request))) {

- didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute);

- didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttribute);

+ if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {

+ didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation);

+ didBlockScript |= eraseAttributeIfInjected(request, srcAttr, blankURL().string(), SrcLikeAttributeTruncation);

didBlockScript |= eraseAttributeIfInjected(request, typeAttr);

}

return didBlockScript;

@@ -489,8 +535,8 @@ bool XSSAuditor::filterAppletToken(const FilterTokenRequest& request)

ASSERT(hasName(request.token, appletTag));

bool didBlockScript = false;

- if (isContainedInRequest(decodedSnippetForName(request))) {

- didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttribute);

+ if (isContainedInRequest(canonicalizedSnippetForTagName(request))) {

+ didBlockScript |= eraseAttributeIfInjected(request, codeAttr, String(), SrcLikeAttributeTruncation);

didBlockScript |= eraseAttributeIfInjected(request, objectAttr);

}

return didBlockScript;

@@ -501,9 +547,9 @@ bool XSSAuditor::filterFrameToken(const FilterTokenRequest& request)

ASSERT(request.token.type() == HTMLToken::StartTag);

ASSERT(hasName(request.token, iframeTag) || hasName(request.token, frameTag));

- bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttribute);

- if (isContainedInRequest(decodedSnippetForName(request)))

- didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttribute);

+ bool didBlockScript = eraseAttributeIfInjected(request, srcdocAttr, String(), ScriptLikeAttributeTruncation);

+ if (isContainedInRequest(canonicalizedSnippetForTagName(request)))

+ didBlockScript |= eraseAttributeIfInjected(request, srcAttr, String(), SrcLikeAttributeTruncation);

return didBlockScript;

}

@@ -537,7 +583,7 @@ bool XSSAuditor::filterInputToken(const FilterTokenRequest& request)

ASSERT(request.token.type() == HTMLToken::StartTag);

ASSERT(hasName(request.token, inputTag));

- return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttribute);

+ return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation);

}

bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request)

@@ -545,7 +591,7 @@ bool XSSAuditor::filterButtonToken(const FilterTokenRequest& request)

ASSERT(request.token.type() == HTMLToken::StartTag);

ASSERT(hasName(request.token, buttonTag));

- return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttribute);

+ return eraseAttributeIfInjected(request, formactionAttr, kURLWithUniqueOrigin, SrcLikeAttributeTruncation);

}

bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& request)

@@ -561,7 +607,7 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& re

bool valueContainsJavaScriptURL = (!isInlineEventHandler && protocolIsJavaScript(strippedValue)) || (isSemicolonSeparatedAttribute(attribute) && semicolonSeparatedValueContainsJavaScriptURL(strippedValue));

if (!isInlineEventHandler && !valueContainsJavaScriptURL)

continue;

- if (!isContainedInRequest(decodedSnippetForAttribute(request, attribute, ScriptLikeAttribute)))

+ if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), ScriptLikeAttributeTruncation)))

continue;

request.token.eraseValueOfAttribute(i);

if (valueContainsJavaScriptURL)

@@ -571,32 +617,38 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected(const FilterTokenRequest& re

return didBlockScript;

}

-bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, AttributeKind treatment)

+bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, const QualifiedName& attributeName, const String& replacementValue, TruncationKind treatment)

{

size_t indexOfAttribute = 0;

- if (findAttributeWithName(request.token, attributeName, indexOfAttribute)) {

- const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute);

- if (isContainedInRequest(decodedSnippetForAttribute(request, attribute, treatment))) {

- if (threadSafeMatch(attributeName, srcAttr) && isLikelySafeResource(String(attribute.value)))

- return false;

- if (threadSafeMatch(attributeName, http_equivAttr) && !isDangerousHTTPEquiv(String(attribute.value)))

- return false;

- request.token.eraseValueOfAttribute(indexOfAttribute);

- if (!replacementValue.isEmpty())

- request.token.appendToAttributeValue(indexOfAttribute, replacementValue);

- return true;

- }

+ if (!findAttributeWithName(request.token, attributeName, indexOfAttribute))

+ return false;

+ const HTMLToken::Attribute& attribute = request.token.attributes().at(indexOfAttribute);

+ if (!isContainedInRequest(canonicalize(snippetFromAttribute(request, attribute), treatment)))

+ return false;

+ if (threadSafeMatch(attributeName, srcAttr)) {

+ if (isLikelySafeResource(String(attribute.value)))

+ return false;

+ } else if (threadSafeMatch(attributeName, http_equivAttr)) {

+ if (!isDangerousHTTPEquiv(String(attribute.value)))

+ return false;

}

- return false;

+ request.token.eraseValueOfAttribute(indexOfAttribute);

+ if (!replacementValue.isEmpty())

+ request.token.appendToAttributeValue(indexOfAttribute, replacementValue);

+ return true;

}

-String XSSAuditor::decodedSnippetForName(const FilterTokenRequest& request)

+String XSSAuditor::canonicalizedSnippetForTagName(const FilterTokenRequest& request)

{

// Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<").

- return canonicalize(fullyDecodeString(request.sourceTracker.sourceForToken(request.token), m_encoding).substring(0, request.token.name().size() + 1));

+ return canonicalize(request.sourceTracker.sourceForToken(request.token).substring(0, request.token.name().size() + 1), NoTruncation);

}

-String XSSAuditor::decodedSnippetForAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute, AttributeKind treatment)

+String XSSAuditor::snippetFromAttribute(const FilterTokenRequest& request, const HTMLToken::Attribute& attribute)

{

// The range doesn't inlcude the character which terminates the value. So,

// for an input of |name="value"|, the snippet is |name="value|. For an

@@ -604,58 +656,25 @@ String XSSAuditor::decodedSnippetForAttribute(const FilterTokenRequest& request,

// FIXME: We should grab one character before the name also.

int start = attribute.nameRange.start - request.token.startIndex();

int end = attribute.valueRange.end - request.token.startIndex();

- String decodedSnippet = fullyDecodeString(request.sourceTracker.sourceForToken(request.token).substring(start, end - start), m_encoding);

- decodedSnippet.truncate(kMaximumFragmentLengthTarget);

- if (treatment == SrcLikeAttribute) {

- int slashCount = 0;

- bool commaSeen = false;

- // In HTTP URLs, characters following the first ?, #, or third slash may come from

- // the page itself and can be merely ignored by an attacker's server when a remote

- // script or script-like resource is requested. In DATA URLS, the payload starts at

- // the first comma, and the the first /*, //, or <!-- may introduce a comment. Characters

- // following this may come from the page itself and may be ignored when the script is

- // executed. For simplicity, we don't differentiate based on URL scheme, and stop at

- // the first # or ?, the third slash, or the first slash or < once a comma is seen.

- for (size_t currentLength = 0; currentLength < decodedSnippet.length(); ++currentLength) {

- UChar currentChar = decodedSnippet[currentLength];

- if (currentChar == '?'

- || currentChar == '#'

- || ((currentChar == '/' || currentChar == '\\') && (commaSeen || ++slashCount > 2))

- || (currentChar == '<' && commaSeen)) {

- decodedSnippet.truncate(currentLength);

- break;

- }

- if (currentChar == ',')

- commaSeen = true;

- }

- } else if (treatment == ScriptLikeAttribute) {

- // Beware of trailing characters which came from the page itself, not the

- // injected vector. Excluding the terminating character covers common cases

- // where the page immediately ends the attribute, but doesn't cover more

- // complex cases where there is other page data following the injection.

- // Generally, these won't parse as javascript, so the injected vector

- // typically excludes them from consideration via a single-line comment or

- // by enclosing them in a string literal terminated later by the page's own

- // closing punctuation. Since the snippet has not been parsed, the vector

- // may also try to introduce these via entities. As a result, we'd like to

- // stop before the first "//", the first <!--, the first entity, or the first

- // quote not immediately following the first equals sign (taking whitespace

- // into consideration). To keep things simpler, we don't try to distinguish

- // between entity-introducing amperands vs. other uses, nor do we bother to

- // check for a second slash for a comment, nor do we bother to check for

- // !-- following a less-than sign. We stop instead on any ampersand

- // slash, or less-than sign.

- size_t position = 0;

- if ((position = decodedSnippet.find("=")) != kNotFound

- && (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != kNotFound

- && (position = decodedSnippet.find(isTerminatingCharacter, isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != kNotFound) {

- decodedSnippet.truncate(position);

- }

+ return request.sourceTracker.sourceForToken(request.token).substring(start, end - start);

+String XSSAuditor::canonicalize(String snippet, TruncationKind treatment)

+ String decodedSnippet = fullyDecodeString(snippet, m_encoding);

+ if (treatment != NoTruncation) {

+ decodedSnippet.truncate(kMaximumFragmentLengthTarget);

+ if (treatment == SrcLikeAttributeTruncation)

+ truncateForSrcLikeAttribute(decodedSnippet);

+ else if (treatment == ScriptLikeAttributeTruncation)

+ truncateForScriptLikeAttribute(decodedSnippet);

}

- return canonicalize(decodedSnippet);

+ return decodedSnippet.removeCharacters(&isNonCanonicalCharacter);

}

-String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request)

+String XSSAuditor::canonicalizedSnippetForJavaScript(const FilterTokenRequest& request)

{

String string = request.sourceTracker.sourceForToken(request.token);

size_t startPosition = 0;

@@ -709,7 +728,6 @@ String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request

foundPosition = lastNonSpacePosition;

break;

}

if (foundPosition > startPosition + kMaximumFragmentLengthTarget) {

// After hitting the length target, we can only stop at a point where we know we are

// not in the middle of a %-escape sequence. For the sake of simplicity, approximate

@@ -718,14 +736,13 @@ String XSSAuditor::decodedSnippetForJavaScript(const FilterTokenRequest& request

if (isHTMLSpace<UChar>(string[foundPosition]))

break;

}

if (!isHTMLSpace<UChar>(string[foundPosition]))

lastNonSpacePosition = foundPosition;

}

- result = canonicalize(fullyDecodeString(string.substring(startPosition, foundPosition - startPosition), m_encoding));

+ result = canonicalize(string.substring(startPosition, foundPosition - startPosition), NoTruncation);

startPosition = foundPosition + 1;

}

return result;

}

« no previous file with comments | « Source/core/html/parser/XSSAuditor.h ('k') | no next file » | no next file with comments »