third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp - Issue 2386893002: Reformat comments in core/html/parser

Unified Diff: third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

Issue 2386893002: Reformat comments in core/html/parser (Closed)

Patch Set: self review Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

diff --git a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

index 2bac31bf7fb3719dffe382b1825d7a0c88288019..165479ca8be105a7e08b2f7e75f93c4e740f0b0d 100644

--- a/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

+++ b/third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

@@ -50,7 +50,8 @@

namespace {

-// SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or we'd use it instead.

+// SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or

+// we'd use it instead.

const char kURLWithUniqueOrigin[] = "data:,";

const char kSafeJavaScriptURL[] = "javascript:void(0)";

@@ -62,21 +63,27 @@ namespace blink {

using namespace HTMLNames;

static bool isNonCanonicalCharacter(UChar c) {

- // We remove all non-ASCII characters, including non-printable ASCII characters.

+ // We remove all non-ASCII characters, including non-printable ASCII

+ // characters.

- // Note, we don't remove backslashes like PHP stripslashes(), which among other things converts "\\0" to the \0 character.

- // Instead, we remove backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0"). However, this has the

- // adverse effect that we remove any legitimate zeros from a string.

+ // Note, we don't remove backslashes like PHP stripslashes(), which among

+ // other things converts "\\0" to the \0 character. Instead, we remove

+ // backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0").

+ // However, this has the adverse effect that we remove any legitimate zeros

+ // from a string.

- // We also remove forward-slash, because it is common for some servers to collapse successive path components, eg,

- // a//b becomes a/b.

+ // We also remove forward-slash, because it is common for some servers to

+ // collapse successive path components, eg, a//b becomes a/b.

- // We also remove the questionmark character, since some severs replace invalid high-bytes with a questionmark. We

- // are already stripping the high-bytes so we also strip the questionmark to match.

+ // We also remove the questionmark character, since some severs replace

+ // invalid high-bytes with a questionmark. We are already stripping the

+ // high-bytes so we also strip the questionmark to match.

- // We also move the percent character, since some servers strip it when there's a malformed sequence.

+ // We also move the percent character, since some servers strip it when

+ // there's a malformed sequence.

- // For instance: new String("http://localhost:8000?x") => new String("http:localhost:8x").

+ // For instance: new String("http://localhost:8000?x") => new

+ // String("http:localhost:8x").

return (c == '\\' || c == '0' || c == '\0' || c == '/' || c == '?' ||

c == '%' || c >= 127);

}

@@ -124,7 +131,8 @@ static bool startsOpeningScriptTagAt(const String& string, size_t start) {

script);

}

-// If other files need this, we should move this to core/html/parser/HTMLParserIdioms.h

+// If other files need this, we should move this to

+// core/html/parser/HTMLParserIdioms.h

template <size_t inlineCapacity>

bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector,

const QualifiedName& qname) {

@@ -138,7 +146,8 @@ static bool hasName(const HTMLToken& token, const QualifiedName& name) {

static bool findAttributeWithName(const HTMLToken& token,

const QualifiedName& name,

size_t& indexOfMatchingAttribute) {

- // Notice that we're careful not to ref the StringImpl here because we might be on a background thread.

+ // Notice that we're careful not to ref the StringImpl here because we might

+ // be on a background thread.

const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI

? "xlink:" + name.localName().getString()

: name.localName().getString();

@@ -167,7 +176,8 @@ static bool isDangerousHTTPEquiv(const String& value) {

}

static inline String decode16BitUnicodeEscapeSequences(const String& string) {

- // Note, the encoding is ignored since each %u-escape sequence represents a UTF-16 code unit.

+ // Note, the encoding is ignored since each %u-escape sequence represents a

+ // UTF-16 code unit.

return decodeEscapeSequences<Unicode16BitEscapeSequence>(string,

UTF8Encoding());

}

@@ -175,8 +185,9 @@ static inline String decode16BitUnicodeEscapeSequences(const String& string) {

static inline String decodeStandardURLEscapeSequences(

const String& string,

const WTF::TextEncoding& encoding) {

- // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (declared in weborigin/KURL.h) to

- // avoid platform-specific URL decoding differences (e.g. KURLGoogle).

+ // We use decodeEscapeSequences() instead of decodeURLEscapeSequences()

+ // (declared in weborigin/KURL.h) to avoid platform-specific URL decoding

+ // differences (e.g. KURLGoogle).

return decodeEscapeSequences<URLEscapeSequence>(string, encoding);

}

@@ -194,17 +205,21 @@ static String fullyDecodeString(const String& string,

}

static void truncateForSrcLikeAttribute(String& decodedSnippet) {

- // In HTTP URLs, characters following the first ?, #, or third slash may come from

- // the page itself and can be merely ignored by an attacker's server when a remote

- // script or script-like resource is requested. In DATA URLS, the payload starts at

- // the first comma, and the the first /*, //, or <!-- may introduce a comment. Also,

- // DATA URLs may use the same string literal tricks as with script content itself.

- // In either case, content following this may come from the page and may be ignored

- // when the script is executed. Also, any of these characters may now be represented

- // by the (enlarged) set of html5 entities.

- // For simplicity, we don't differentiate based on URL scheme, and stop at the first

- // & (since it might be part of an entity for any of the subsequent punctuation), the

- // first # or ?, the third slash, or the first slash, <, ', or " once a comma is seen.

+ // In HTTP URLs, characters following the first ?, #, or third slash may come

+ // from the page itself and can be merely ignored by an attacker's server when

+ // a remote script or script-like resource is requested. In DATA URLS, the

+ // payload starts at the first comma, and the the first /*, //, or <!-- may

+ // introduce a comment.

+ //

+ // Also, DATA URLs may use the same string literal tricks as with script

+ // content itself. In either case, content following this may come from the

+ // page and may be ignored when the script is executed. Also, any of these

+ // characters may now be represented by the (enlarged) set of html5 entities.

+ //

+ // For simplicity, we don't differentiate based on URL scheme, and stop at the

+ // first & (since it might be part of an entity for any of the subsequent

+ // punctuation), the first # or ?, the third slash, or the first slash, <, ',

+ // or " once a comma is seen.

int slashCount = 0;

bool commaSeen = false;

for (size_t currentLength = 0; currentLength < decodedSnippet.length();

@@ -229,6 +244,7 @@ static void truncateForScriptLikeAttribute(String& decodedSnippet) {

// injected vector. Excluding the terminating character covers common cases

// where the page immediately ends the attribute, but doesn't cover more

// complex cases where there is other page data following the injection.

+ //

// Generally, these won't parse as javascript, so the injected vector

// typically excludes them from consideration via a single-line comment or

// by enclosing them in a string literal terminated later by the page's own

@@ -236,11 +252,12 @@ static void truncateForScriptLikeAttribute(String& decodedSnippet) {

// may also try to introduce these via entities. As a result, we'd like to

// stop before the first "//", the first <!--, the first entity, or the first

// quote not immediately following the first equals sign (taking whitespace

- // into consideration). To keep things simpler, we don't try to distinguish

- // between entity-introducing amperands vs. other uses, nor do we bother to

- // check for a second slash for a comment, nor do we bother to check for

- // !-- following a less-than sign. We stop instead on any ampersand

- // slash, or less-than sign.

+ // into consideration).

+ //

+ // To keep things simpler, we don't try to distinguish between

+ // entity-introducing amperands vs. other uses, nor do we bother to check for

+ // a second slash for a comment, nor do we bother to check for !-- following a

+ // less-than sign. We stop instead on any ampersand slash, or less-than sign.

size_t position = 0;

if ((position = decodedSnippet.find("=")) != kNotFound &&

(position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) !=

@@ -326,7 +343,8 @@ void XSSAuditor::init(Document* document, XSSAuditorDelegate* auditorDelegate) {

}

if (m_documentURL.isEmpty()) {

- // The URL can be empty when opening a new browser window or calling window.open("").

+ // The URL can be empty when opening a new browser window or calling

+ // window.open("").

m_isEnabled = false;

return;

}

@@ -512,8 +530,8 @@ bool XSSAuditor::filterCharacterToken(const FilterTokenRequest& request) {

}

if (m_state == SuppressingAdjacentCharacterTokens) {

request.token.eraseCharacters();

- request.token.appendToCharacter(

- ' '); // Technically, character tokens can't be empty.

+ // Technically, character tokens can't be empty.

+ request.token.appendToCharacter(' ');

return true;

}

return false;

@@ -662,7 +680,8 @@ bool XSSAuditor::eraseDangerousAttributesIfInjected(

bool eraseAttribute = false;

bool valueContainsJavaScriptURL = false;

const HTMLToken::Attribute& attribute = request.token.attributes().at(i);

- // FIXME: Don't create a new String for every attribute.value in the document.

+ // FIXME: Don't create a new String for every attribute.value in the

+ // document.

if (isNameOfInlineEventHandler(attribute.nameAsVector())) {

eraseAttribute = isContainedInRequest(

canonicalize(snippetFromAttribute(request, attribute),

@@ -729,7 +748,8 @@ bool XSSAuditor::eraseAttributeIfInjected(const FilterTokenRequest& request,

String XSSAuditor::canonicalizedSnippetForTagName(

const FilterTokenRequest& request) {

- // Grab a fixed number of characters equal to the length of the token's name plus one (to account for the "<").

+ // Grab a fixed number of characters equal to the length of the token's name

+ // plus one (to account for the "<").

return canonicalize(request.sourceTracker.sourceForToken(request.token)

.substring(0, request.token.name().size() + 1),

NoTruncation);

@@ -762,8 +782,9 @@ String XSSAuditor::canonicalize(String snippet, TruncationKind treatment) {

if (treatment != NoTruncation) {

if (decodedSnippet.length() > kMaximumFragmentLengthTarget) {

- // Let the page influence the stopping point to avoid disclosing leading fragments.

- // Stop when we hit whitespace, since that is unlikely to be part a leading fragment.

+ // Let the page influence the stopping point to avoid disclosing leading

+ // fragments. Stop when we hit whitespace, since that is unlikely to be

+ // part a leading fragment.

size_t position = kMaximumFragmentLengthTarget;

while (position < decodedSnippet.length() &&

!isHTMLSpace(decodedSnippet[position]))

@@ -793,14 +814,14 @@ String XSSAuditor::canonicalizedSnippetForJavaScript(

isHTMLSpace<UChar>(string[startPosition]))

startPosition++;

- // Under SVG/XML rules, only HTML comment syntax matters and the parser returns

- // these as a separate comment tokens. Having consumed whitespace, we need not look

- // further for these.

+ // Under SVG/XML rules, only HTML comment syntax matters and the parser

+ // returns these as a separate comment tokens. Having consumed whitespace,

+ // we need not look further for these.

if (request.shouldAllowCDATA)

break;

- // Under HTML rules, both the HTML and JS comment synatx matters, and the HTML

- // comment ends at the end of the line, not with -->.

+ // Under HTML rules, both the HTML and JS comment synatx matters, and the

+ // HTML comment ends at the end of the line, not with -->.

if (startsHTMLCommentAt(string, startPosition) ||

startsSingleLineCommentAt(string, startPosition)) {

while (startPosition < endPosition && !isJSNewline(string[startPosition]))

@@ -817,10 +838,12 @@ String XSSAuditor::canonicalizedSnippetForJavaScript(

String result;

while (startPosition < endPosition && !result.length()) {

- // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma,

- // when we encoutner a backtick, when we hit an opening <script> tag, or when we exceed the maximum length

- // target. The comma rule covers a common parameter concatenation case performed by some web servers. The

- // backtick rule covers the ECMA6 multi-line template string feature.

+ // Stop at next comment (using the same rules as above for SVG/XML vs HTML),

+ // when we encounter a comma, when we encoutner a backtick, when we hit an

+ // opening <script> tag, or when we exceed the maximum length target. The

+ // comma rule covers a common parameter concatenation case performed by some

+ // web servers. The backtick rule covers the ECMA6 multi-line template

+ // string feature.

lastNonSpacePosition = kNotFound;

for (foundPosition = startPosition; foundPosition < endPosition;

foundPosition++) {

@@ -840,10 +863,11 @@ String XSSAuditor::canonicalizedSnippetForJavaScript(

break;

}

if (foundPosition > startPosition + kMaximumFragmentLengthTarget) {

- // After hitting the length target, we can only stop at a point where we know we are

- // not in the middle of a %-escape sequence. For the sake of simplicity, approximate

- // not stopping inside a (possibly multiply encoded) %-escape sequence by breaking on

- // whitespace only. We should have enough text in these cases to avoid false positives.

+ // After hitting the length target, we can only stop at a point where we

+ // know we are not in the middle of a %-escape sequence. For the sake of

+ // simplicity, approximate not stopping inside a (possibly multiply

+ // encoded) %-escape sequence by breaking on whitespace only. We should

+ // have enough text in these cases to avoid false positives.

if (isHTMLSpace<UChar>(string[foundPosition]))

break;

}

« no previous file with comments | « third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp ('k') | no next file » | no next file with comments »