Index: third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp |
diff --git a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp |
index 2bac31bf7fb3719dffe382b1825d7a0c88288019..165479ca8be105a7e08b2f7e75f93c4e740f0b0d 100644 |
--- a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp |
+++ b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp |
@@ -50,7 +50,8 @@ |
namespace { |
-// SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or we'd use it instead. |
+// SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or |
+// we'd use it instead. |
const char kURLWithUniqueOrigin[] = "data:,"; |
const char kSafeJavaScriptURL[] = "javascript:void(0)"; |
@@ -62,21 +63,27 @@ namespace blink { |
using namespace HTMLNames; |
static bool isNonCanonicalCharacter(UChar c) { |
- // We remove all non-ASCII characters, including non-printable ASCII characters. |
+ // We remove all non-ASCII characters, including non-printable ASCII |
+ // characters. |
// |
- // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character. |
- // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the |
- // adverse effect that we remove any legitimate zeros from a string. |
+ // Note, we don't remove backslashes like PHP stripslashes(), which among |
+ // other things converts "\\0" to the \0 character. Instead, we remove |
+ // backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). |
+ // However, this has the adverse effect that we remove any legitimate zeros |
+ // from a string. |
// |
- // We also remove forward-slash, because it is common for some servers to collapse successive path components, eg, |
- // a//b becomes a/b. |
+ // We also remove forward-slash, because it is common for some servers to |
+ // collapse successive path components, eg, a//b becomes a/b. |
// |
- // We also remove the questionmark character, since some severs replace invalid high-bytes with a questionmark. We |
- // are already stripping the high-bytes so we also strip the questionmark to match. |
+ // We also remove the questionmark character, since some severs replace |
+ // invalid high-bytes with a questionmark. We are already stripping the |
+ // high-bytes so we also strip the questionmark to match. |
// |
- // We also move the percent character, since some servers strip it when there's a malformed sequence. |
+ // We also move the percent character, since some servers strip it when |
+ // there's a malformed sequence. |
// |
- // For instance: new String("http://localhost:8000?x") => new String("http:localhost:8x"). |
+ // For instance: new String("http://localhost:8000?x") => new |
+ // String("http:localhost:8x"). |
return (c == '\\' || c == '0' || c == '\0' || c == '/' || c == '?' || |
c == '%' || c >= 127); |
} |
@@ -124,7 +131,8 @@ static bool startsOpeningScriptTagAt(const String& string, size_t start) { |
script); |
} |
-// If other files need this, we should move this to core/html/parser/HTMLParserIdioms.h |
+// If other files need this, we should move this to |
+// core/html/parser/HTMLParserIdioms.h |
template <size_t inlineCapacity> |
bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector, |
const QualifiedName& qname) { |
@@ -138,7 +146,8 @@ static bool hasName(const HTMLToken& token, const QualifiedName& name) { |
static bool findAttributeWithName(const HTMLToken& token, |
const QualifiedName& name, |
size_t& indexOfMatchingAttribute) { |
- // Notice that we're careful not to ref the StringImpl here because we might be on a background thread. |
+ // Notice that we're careful not to ref the StringImpl here because we might |
+ // be on a background thread. |
const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI |
? "xlink:" + name.localName().getString() |
: name.localName().getString(); |
@@ -167,7 +176,8 @@ static bool isDangerousHTTPEquiv(const String& value) { |
} |
static inline String decode16BitUnicodeEscapeSequences(const String& string) { |
- // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit. |
+ // Note, the encoding is ignored since each %u-escape sequence represents a |
+ // UTF-16 code unit. |
return decodeEscapeSequences<Unicode16BitEscapeSequence>(string, |
UTF8Encoding()); |
} |
@@ -175,8 +185,9 @@ static inline String decode16BitUnicodeEscapeSequences(const String& string) { |
static inline String decodeStandardURLEscapeSequences( |
const String& string, |
const WTF::TextEncoding& encoding) { |
- // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (declared in weborigin/KURL.h) to |
- // avoid platform-specific URL decoding differences (e.g. KURLGoogle). |
+ // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() |
+ // (declared in weborigin/KURL.h) to avoid platform-specific URL decoding |
+ // differences (e.g. KURLGoogle). |
return decodeEscapeSequences<URLEscapeSequence>(string, encoding); |
} |
@@ -194,17 +205,21 @@ static String fullyDecodeString(const String& string, |
} |
static void truncateForSrcLikeAttribute(String& decodedSnippet) { |
- // In HTTP URLs, characters following the first ?, #, or third slash may come from |
- // the page itself and can be merely ignored by an attacker's server when a remote |
- // script or script-like resource is requested. In DATA URLS, the payload starts at |
- // the first comma, and the the first /*, //, or <!-- may introduce a comment. Also, |
- // DATA URLs may use the same string literal tricks as with script content itself. |
- // In either case, content following this may come from the page and may be ignored |
- // when the script is executed. Also, any of these characters may now be represented |
- // by the (enlarged) set of html5 entities. |
- // For simplicity, we don't differentiate based on URL scheme, and stop at the first |
- // & (since it might be part of an entity for any of the subsequent punctuation), the |
- // first # or ?, the third slash, or the first slash, <, ', or " once a comma is seen. |
+ // In HTTP URLs, characters following the first ?, #, or third slash may come |
+ // from the page itself and can be merely ignored by an attacker's server when |
+ // a remote script or script-like resource is requested. In DATA URLS, the |
+ // payload starts at the first comma, and the the first /*, //, or <!-- may |
+ // introduce a comment. |
+ // |
+ // Also, DATA URLs may use the same string literal tricks as with script |
+ // content itself. In either case, content following this may come from the |
+ // page and may be ignored when the script is executed. Also, any of these |
+ // characters may now be represented by the (enlarged) set of html5 entities. |
+ // |
+ // For simplicity, we don't differentiate based on URL scheme, and stop at the |
+ // first & (since it might be part of an entity for any of the subsequent |
+ // punctuation), the first # or ?, the third slash, or the first slash, <, ', |
+ // or " once a comma is seen. |
int slashCount = 0; |
bool commaSeen = false; |
for (size_t currentLength = 0; currentLength < decodedSnippet.length(); |
@@ -229,6 +244,7 @@ static void truncateForScriptLikeAttribute(String& decodedSnippet) { |
// injected vector. Excluding the terminating character covers common cases |
// where the page immediately ends the attribute, but doesn't cover more |
// complex cases where there is other page data following the injection. |
+ // |
// Generally, these won't parse as javascript, so the injected vector |
// typically excludes them from consideration via a single-line comment or |
// by enclosing them in a string literal terminated later by the page's own |
@@ -236,11 +252,12 @@ static void truncateForScriptLikeAttribute(String& decodedSnippet) { |
// may also try to introduce these via entities. As a result, we'd like to |
// stop before the first "//", the first <!--, the first entity, or the first |
// quote not immediately following the first equals sign (taking whitespace |
- // into consideration). To keep things simpler, we don't try to distinguish |
- // between entity-introducing amperands vs. other uses, nor do we bother to |
- // check for a second slash for a comment, nor do we bother to check for |
- // !-- following a less-than sign. We stop instead on any ampersand |
- // slash, or less-than sign. |
+ // into consideration). |
+ // |
+ // To keep things simpler, we don't try to distinguish between |
+ // entity-introducing amperands vs. other uses, nor do we bother to check for |
+ // a second slash for a comment, nor do we bother to check for !-- following a |
+ // less-than sign. We stop instead on any ampersand slash, or less-than sign. |
size_t position = 0; |
if ((position = decodedSnippet.find("=")) != kNotFound && |
(position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != |
@@ -326,7 +343,8 @@ void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) { |
} |
if (m_documentURL.isEmpty()) { |
- // The URL can be empty when opening a new browser window or calling window.open(""). |
+ // The URL can be empty when opening a new browser window or calling |
+ // window.open(""). |
m_isEnabled = false; |
return; |
} |
@@ -512,8 +530,8 @@ bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request) { |
} |
if (m_state == SuppressingAdjacentCharacterTokens) { |
request.token.eraseCharacters(); |
- request.token.appendToCharacter( |
- ' '); // Technically, character tokens can't be empty. |
+ // Technically, character tokens can't be empty. |
+ request.token.appendToCharacter(' '); |
return true; |
} |
return false; |
@@ -662,7 +680,8 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected( |
bool eraseAttribute = false; |
bool valueContainsJavaScriptURL = false; |
const HTMLToken::Attribute& attribute = request.token.attributes().at(i); |
- // FIXME: Don't create a new String for every attribute.value in the document. |
+ // FIXME: Don't create a new String for every attribute.value in the |
+ // document. |
if (isNameOfInlineEventHandler(attribute.nameAsVector())) { |
eraseAttribute = isContainedInRequest( |
canonicalize(snippetFromAttribute(request, attribute), |
@@ -729,7 +748,8 @@ bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request, |
String XSSAuditor::canonicalizedSnippetForTagName( |
const FilterTokenRequest& request) { |
- // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<"). |
+ // Grab a fixed number of characters equal to the length of the token's name |
+ // plus one (to account for the "<"). |
return canonicalize(request.sourceTracker.sourceForToken(request.token) |
.substring(0, request.token.name().size() + 1), |
NoTruncation); |
@@ -762,8 +782,9 @@ String XSSAuditor::canonicalize(String snippet, TruncationKind treatment) { |
if (treatment != NoTruncation) { |
if (decodedSnippet.length() > kMaximumFragmentLengthTarget) { |
- // Let the page influence the stopping point to avoid disclosing leading fragments. |
- // Stop when we hit whitespace, since that is unlikely to be part a leading fragment. |
+ // Let the page influence the stopping point to avoid disclosing leading |
+ // fragments. Stop when we hit whitespace, since that is unlikely to be |
+ // part a leading fragment. |
size_t position = kMaximumFragmentLengthTarget; |
while (position < decodedSnippet.length() && |
!isHTMLSpace(decodedSnippet[position])) |
@@ -793,14 +814,14 @@ String XSSAuditor::canonicalizedSnippetForJavaScript( |
isHTMLSpace<UChar>(string[startPosition])) |
startPosition++; |
- // Under SVG/XML rules, only HTML comment syntax matters and the parser returns |
- // these as a separate comment tokens. Having consumed whitespace, we need not look |
- // further for these. |
+ // Under SVG/XML rules, only HTML comment syntax matters and the parser |
+ // returns these as a separate comment tokens. Having consumed whitespace, |
+ // we need not look further for these. |
if (request.shouldAllowCDATA) |
break; |
- // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML |
- // comment ends at the end of the line, not with -->. |
+ // Under HTML rules, both the HTML and JS comment synatx matters, and the |
+ // HTML comment ends at the end of the line, not with -->. |
if (startsHTMLCommentAt(string, startPosition) || |
startsSingleLineCommentAt(string, startPosition)) { |
while (startPosition < endPosition && !isJSNewline(string[startPosition])) |
@@ -817,10 +838,12 @@ String XSSAuditor::canonicalizedSnippetForJavaScript( |
String result; |
while (startPosition < endPosition && !result.length()) { |
- // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma, |
- // when we encoutner a backtick, when we hit an opening <script> tag, or when we exceed the maximum length |
- // target. The comma rule covers a common parameter concatenation case performed by some web servers. The |
- // backtick rule covers the ECMA6 multi-line template string feature. |
+ // Stop at next comment (using the same rules as above for SVG/XML vs HTML), |
+ // when we encounter a comma, when we encoutner a backtick, when we hit an |
+ // opening <script> tag, or when we exceed the maximum length target. The |
+ // comma rule covers a common parameter concatenation case performed by some |
+ // web servers. The backtick rule covers the ECMA6 multi-line template |
+ // string feature. |
lastNonSpacePosition = kNotFound; |
for (foundPosition = startPosition; foundPosition < endPosition; |
foundPosition++) { |
@@ -840,10 +863,11 @@ String XSSAuditor::canonicalizedSnippetForJavaScript( |
break; |
} |
if (foundPosition > startPosition + kMaximumFragmentLengthTarget) { |
- // After hitting the length target, we can only stop at a point where we know we are |
- // not in the middle of a %-escape sequence. For the sake of simplicity, approximate |
- // not stopping inside a (possibly multiply encoded) %-escape sequence by breaking on |
- // whitespace only. We should have enough text in these cases to avoid false positives. |
+ // After hitting the length target, we can only stop at a point where we |
+ // know we are not in the middle of a %-escape sequence. For the sake of |
+ // simplicity, approximate not stopping inside a (possibly multiply |
+ // encoded) %-escape sequence by breaking on whitespace only. We should |
+ // have enough text in these cases to avoid false positives. |
if (isHTMLSpace<UChar>(string[foundPosition])) |
break; |
} |