| Index: third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
|
| diff --git a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
|
| index 2bac31bf7fb3719dffe382b1825d7a0c88288019..165479ca8be105a7e08b2f7e75f93c4e740f0b0d 100644
|
| --- a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
|
| +++ b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp
|
| @@ -50,7 +50,8 @@
|
|
|
| namespace {
|
|
|
| -// SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or we'd use it instead.
|
| +// SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or
|
| +// we'd use it instead.
|
| const char kURLWithUniqueOrigin[] = "data:,";
|
|
|
| const char kSafeJavaScriptURL[] = "javascript:void(0)";
|
| @@ -62,21 +63,27 @@ namespace blink {
|
| using namespace HTMLNames;
|
|
|
| static bool isNonCanonicalCharacter(UChar c) {
|
| - // We remove all non-ASCII characters, including non-printable ASCII characters.
|
| + // We remove all non-ASCII characters, including non-printable ASCII
|
| + // characters.
|
| //
|
| - // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.
|
| - // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the
|
| - // adverse effect that we remove any legitimate zeros from a string.
|
| + // Note, we don't remove backslashes like PHP stripslashes(), which among
|
| + // other things converts "\\0" to the \0 character. Instead, we remove
|
| + // backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0").
|
| + // However, this has the adverse effect that we remove any legitimate zeros
|
| + // from a string.
|
| //
|
| - // We also remove forward-slash, because it is common for some servers to collapse successive path components, eg,
|
| - // a//b becomes a/b.
|
| + // We also remove forward-slash, because it is common for some servers to
|
| + // collapse successive path components, eg, a//b becomes a/b.
|
| //
|
| - // We also remove the questionmark character, since some severs replace invalid high-bytes with a questionmark. We
|
| - // are already stripping the high-bytes so we also strip the questionmark to match.
|
| + // We also remove the questionmark character, since some severs replace
|
| + // invalid high-bytes with a questionmark. We are already stripping the
|
| + // high-bytes so we also strip the questionmark to match.
|
| //
|
| - // We also move the percent character, since some servers strip it when there's a malformed sequence.
|
| + // We also move the percent character, since some servers strip it when
|
| + // there's a malformed sequence.
|
| //
|
| - // For instance: new String("http://localhost:8000?x") => new String("http:localhost:8x").
|
| + // For instance: new String("http://localhost:8000?x") => new
|
| + // String("http:localhost:8x").
|
| return (c == '\\' || c == '0' || c == '\0' || c == '/' || c == '?' ||
|
| c == '%' || c >= 127);
|
| }
|
| @@ -124,7 +131,8 @@ static bool startsOpeningScriptTagAt(const String& string, size_t start) {
|
| script);
|
| }
|
|
|
| -// If other files need this, we should move this to core/html/parser/HTMLParserIdioms.h
|
| +// If other files need this, we should move this to
|
| +// core/html/parser/HTMLParserIdioms.h
|
| template <size_t inlineCapacity>
|
| bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector,
|
| const QualifiedName& qname) {
|
| @@ -138,7 +146,8 @@ static bool hasName(const HTMLToken& token, const QualifiedName& name) {
|
| static bool findAttributeWithName(const HTMLToken& token,
|
| const QualifiedName& name,
|
| size_t& indexOfMatchingAttribute) {
|
| - // Notice that we're careful not to ref the StringImpl here because we might be on a background thread.
|
| + // Notice that we're careful not to ref the StringImpl here because we might
|
| + // be on a background thread.
|
| const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI
|
| ? "xlink:" + name.localName().getString()
|
| : name.localName().getString();
|
| @@ -167,7 +176,8 @@ static bool isDangerousHTTPEquiv(const String& value) {
|
| }
|
|
|
| static inline String decode16BitUnicodeEscapeSequences(const String& string) {
|
| - // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit.
|
| + // Note, the encoding is ignored since each %u-escape sequence represents a
|
| + // UTF-16 code unit.
|
| return decodeEscapeSequences<Unicode16BitEscapeSequence>(string,
|
| UTF8Encoding());
|
| }
|
| @@ -175,8 +185,9 @@ static inline String decode16BitUnicodeEscapeSequences(const String& string) {
|
| static inline String decodeStandardURLEscapeSequences(
|
| const String& string,
|
| const WTF::TextEncoding& encoding) {
|
| - // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (declared in weborigin/KURL.h) to
|
| - // avoid platform-specific URL decoding differences (e.g. KURLGoogle).
|
| + // We use decodeEscapeSequences() instead of decodeURLEscapeSequences()
|
| + // (declared in weborigin/KURL.h) to avoid platform-specific URL decoding
|
| + // differences (e.g. KURLGoogle).
|
| return decodeEscapeSequences<URLEscapeSequence>(string, encoding);
|
| }
|
|
|
| @@ -194,17 +205,21 @@ static String fullyDecodeString(const String& string,
|
| }
|
|
|
| static void truncateForSrcLikeAttribute(String& decodedSnippet) {
|
| - // In HTTP URLs, characters following the first ?, #, or third slash may come from
|
| - // the page itself and can be merely ignored by an attacker's server when a remote
|
| - // script or script-like resource is requested. In DATA URLS, the payload starts at
|
| - // the first comma, and the the first /*, //, or <!-- may introduce a comment. Also,
|
| - // DATA URLs may use the same string literal tricks as with script content itself.
|
| - // In either case, content following this may come from the page and may be ignored
|
| - // when the script is executed. Also, any of these characters may now be represented
|
| - // by the (enlarged) set of html5 entities.
|
| - // For simplicity, we don't differentiate based on URL scheme, and stop at the first
|
| - // & (since it might be part of an entity for any of the subsequent punctuation), the
|
| - // first # or ?, the third slash, or the first slash, <, ', or " once a comma is seen.
|
| + // In HTTP URLs, characters following the first ?, #, or third slash may come
|
| + // from the page itself and can be merely ignored by an attacker's server when
|
| + // a remote script or script-like resource is requested. In DATA URLS, the
|
| + // payload starts at the first comma, and the the first /*, //, or <!-- may
|
| + // introduce a comment.
|
| + //
|
| + // Also, DATA URLs may use the same string literal tricks as with script
|
| + // content itself. In either case, content following this may come from the
|
| + // page and may be ignored when the script is executed. Also, any of these
|
| + // characters may now be represented by the (enlarged) set of html5 entities.
|
| + //
|
| + // For simplicity, we don't differentiate based on URL scheme, and stop at the
|
| + // first & (since it might be part of an entity for any of the subsequent
|
| + // punctuation), the first # or ?, the third slash, or the first slash, <, ',
|
| + // or " once a comma is seen.
|
| int slashCount = 0;
|
| bool commaSeen = false;
|
| for (size_t currentLength = 0; currentLength < decodedSnippet.length();
|
| @@ -229,6 +244,7 @@ static void truncateForScriptLikeAttribute(String& decodedSnippet) {
|
| // injected vector. Excluding the terminating character covers common cases
|
| // where the page immediately ends the attribute, but doesn't cover more
|
| // complex cases where there is other page data following the injection.
|
| + //
|
| // Generally, these won't parse as javascript, so the injected vector
|
| // typically excludes them from consideration via a single-line comment or
|
| // by enclosing them in a string literal terminated later by the page's own
|
| @@ -236,11 +252,12 @@ static void truncateForScriptLikeAttribute(String& decodedSnippet) {
|
| // may also try to introduce these via entities. As a result, we'd like to
|
| // stop before the first "//", the first <!--, the first entity, or the first
|
| // quote not immediately following the first equals sign (taking whitespace
|
| - // into consideration). To keep things simpler, we don't try to distinguish
|
| - // between entity-introducing amperands vs. other uses, nor do we bother to
|
| - // check for a second slash for a comment, nor do we bother to check for
|
| - // !-- following a less-than sign. We stop instead on any ampersand
|
| - // slash, or less-than sign.
|
| + // into consideration).
|
| + //
|
| + // To keep things simpler, we don't try to distinguish between
|
| + // entity-introducing amperands vs. other uses, nor do we bother to check for
|
| + // a second slash for a comment, nor do we bother to check for !-- following a
|
| + // less-than sign. We stop instead on any ampersand slash, or less-than sign.
|
| size_t position = 0;
|
| if ((position = decodedSnippet.find("=")) != kNotFound &&
|
| (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) !=
|
| @@ -326,7 +343,8 @@ void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) {
|
| }
|
|
|
| if (m_documentURL.isEmpty()) {
|
| - // The URL can be empty when opening a new browser window or calling window.open("").
|
| + // The URL can be empty when opening a new browser window or calling
|
| + // window.open("").
|
| m_isEnabled = false;
|
| return;
|
| }
|
| @@ -512,8 +530,8 @@ bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request) {
|
| }
|
| if (m_state == SuppressingAdjacentCharacterTokens) {
|
| request.token.eraseCharacters();
|
| - request.token.appendToCharacter(
|
| - ' '); // Technically, character tokens can't be empty.
|
| + // Technically, character tokens can't be empty.
|
| + request.token.appendToCharacter(' ');
|
| return true;
|
| }
|
| return false;
|
| @@ -662,7 +680,8 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected(
|
| bool eraseAttribute = false;
|
| bool valueContainsJavaScriptURL = false;
|
| const HTMLToken::Attribute& attribute = request.token.attributes().at(i);
|
| - // FIXME: Don't create a new String for every attribute.value in the document.
|
| + // FIXME: Don't create a new String for every attribute.value in the
|
| + // document.
|
| if (isNameOfInlineEventHandler(attribute.nameAsVector())) {
|
| eraseAttribute = isContainedInRequest(
|
| canonicalize(snippetFromAttribute(request, attribute),
|
| @@ -729,7 +748,8 @@ bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request,
|
|
|
| String XSSAuditor::canonicalizedSnippetForTagName(
|
| const FilterTokenRequest& request) {
|
| - // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<").
|
| + // Grab a fixed number of characters equal to the length of the token's name
|
| + // plus one (to account for the "<").
|
| return canonicalize(request.sourceTracker.sourceForToken(request.token)
|
| .substring(0, request.token.name().size() + 1),
|
| NoTruncation);
|
| @@ -762,8 +782,9 @@ String XSSAuditor::canonicalize(String snippet, TruncationKind treatment) {
|
|
|
| if (treatment != NoTruncation) {
|
| if (decodedSnippet.length() > kMaximumFragmentLengthTarget) {
|
| - // Let the page influence the stopping point to avoid disclosing leading fragments.
|
| - // Stop when we hit whitespace, since that is unlikely to be part a leading fragment.
|
| + // Let the page influence the stopping point to avoid disclosing leading
|
| + // fragments. Stop when we hit whitespace, since that is unlikely to be
|
| + // part a leading fragment.
|
| size_t position = kMaximumFragmentLengthTarget;
|
| while (position < decodedSnippet.length() &&
|
| !isHTMLSpace(decodedSnippet[position]))
|
| @@ -793,14 +814,14 @@ String XSSAuditor::canonicalizedSnippetForJavaScript(
|
| isHTMLSpace<UChar>(string[startPosition]))
|
| startPosition++;
|
|
|
| - // Under SVG/XML rules, only HTML comment syntax matters and the parser returns
|
| - // these as a separate comment tokens. Having consumed whitespace, we need not look
|
| - // further for these.
|
| + // Under SVG/XML rules, only HTML comment syntax matters and the parser
|
| + // returns these as a separate comment tokens. Having consumed whitespace,
|
| + // we need not look further for these.
|
| if (request.shouldAllowCDATA)
|
| break;
|
|
|
| - // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML
|
| - // comment ends at the end of the line, not with -->.
|
| + // Under HTML rules, both the HTML and JS comment synatx matters, and the
|
| + // HTML comment ends at the end of the line, not with -->.
|
| if (startsHTMLCommentAt(string, startPosition) ||
|
| startsSingleLineCommentAt(string, startPosition)) {
|
| while (startPosition < endPosition && !isJSNewline(string[startPosition]))
|
| @@ -817,10 +838,12 @@ String XSSAuditor::canonicalizedSnippetForJavaScript(
|
|
|
| String result;
|
| while (startPosition < endPosition && !result.length()) {
|
| - // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma,
|
| - // when we encoutner a backtick, when we hit an opening <script> tag, or when we exceed the maximum length
|
| - // target. The comma rule covers a common parameter concatenation case performed by some web servers. The
|
| - // backtick rule covers the ECMA6 multi-line template string feature.
|
| + // Stop at next comment (using the same rules as above for SVG/XML vs HTML),
|
| + // when we encounter a comma, when we encoutner a backtick, when we hit an
|
| + // opening <script> tag, or when we exceed the maximum length target. The
|
| + // comma rule covers a common parameter concatenation case performed by some
|
| + // web servers. The backtick rule covers the ECMA6 multi-line template
|
| + // string feature.
|
| lastNonSpacePosition = kNotFound;
|
| for (foundPosition = startPosition; foundPosition < endPosition;
|
| foundPosition++) {
|
| @@ -840,10 +863,11 @@ String XSSAuditor::canonicalizedSnippetForJavaScript(
|
| break;
|
| }
|
| if (foundPosition > startPosition + kMaximumFragmentLengthTarget) {
|
| - // After hitting the length target, we can only stop at a point where we know we are
|
| - // not in the middle of a %-escape sequence. For the sake of simplicity, approximate
|
| - // not stopping inside a (possibly multiply encoded) %-escape sequence by breaking on
|
| - // whitespace only. We should have enough text in these cases to avoid false positives.
|
| + // After hitting the length target, we can only stop at a point where we
|
| + // know we are not in the middle of a %-escape sequence. For the sake of
|
| + // simplicity, approximate not stopping inside a (possibly multiply
|
| + // encoded) %-escape sequence by breaking on whitespace only. We should
|
| + // have enough text in these cases to avoid false positives.
|
| if (isHTMLSpace<UChar>(string[foundPosition]))
|
| break;
|
| }
|
|
|