third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp - Issue 2386893002: Reformat comments in core/html/parser

Side by Side Diff: third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

Issue 2386893002: Reformat comments in core/html/parser (Closed)

Patch Set: self review Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2011 Adam Barth. All Rights Reserved.	2 * Copyright (C) 2011 Adam Barth. All Rights Reserved.

3 * Copyright (C) 2011 Daniel Bates (dbates@intudata.com).	3 * Copyright (C) 2011 Daniel Bates (dbates@intudata.com).

4 *	4 *

5 * Redistribution and use in source and binary forms, with or without	5 * Redistribution and use in source and binary forms, with or without

6 * modification, are permitted provided that the following conditions	6 * modification, are permitted provided that the following conditions

7 * are met:	7 * are met:

8 * 1. Redistributions of source code must retain the above copyright	8 * 1. Redistributions of source code must retain the above copyright

9 * notice, this list of conditions and the following disclaimer.	9 * notice, this list of conditions and the following disclaimer.

10 * 2. Redistributions in binary form must reproduce the above copyright	10 * 2. Redistributions in binary form must reproduce the above copyright

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
43 #include "core/loader/DocumentLoader.h"	43 #include "core/loader/DocumentLoader.h"

44 #include "core/loader/MixedContentChecker.h"	44 #include "core/loader/MixedContentChecker.h"

45 #include "platform/network/EncodedFormData.h"	45 #include "platform/network/EncodedFormData.h"

46 #include "platform/text/DecodeEscapeSequences.h"	46 #include "platform/text/DecodeEscapeSequences.h"

47 #include "wtf/ASCIICType.h"	47 #include "wtf/ASCIICType.h"

48 #include "wtf/PtrUtil.h"	48 #include "wtf/PtrUtil.h"

49 #include <memory>	49 #include <memory>

50	50

51 namespace {	51 namespace {

52	52

53 // SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or we'd use it instead.	53 // SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or

	54 // we'd use it instead.

54 const char kURLWithUniqueOrigin[] = "data:,";	55 const char kURLWithUniqueOrigin[] = "data:,";

55	56

56 const char kSafeJavaScriptURL[] = "javascript:void(0)";	57 const char kSafeJavaScriptURL[] = "javascript:void(0)";

57	58

58 } // namespace	59 } // namespace

59	60

60 namespace blink {	61 namespace blink {

61	62

62 using namespace HTMLNames;	63 using namespace HTMLNames;

63	64

64 static bool isNonCanonicalCharacter(UChar c) {	65 static bool isNonCanonicalCharacter(UChar c) {

65 // We remove all non-ASCII characters, including non-printable ASCII character s.	66 // We remove all non-ASCII characters, including non-printable ASCII

	67 // characters.

66 //	68 //

67 // Note, we don't remove backslashes like PHP stripslashes(), which among othe r things converts "\\0" to the \0 character.	69 // Note, we don't remove backslashes like PHP stripslashes(), which among

68 // Instead, we remove backslashes and zeros (since the string "\\0" =(remove b ackslashes)=> "0"). However, this has the	70 // other things converts "\\0" to the \0 character. Instead, we remove

69 // adverse effect that we remove any legitimate zeros from a string.	71 // backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0").

	72 // However, this has the adverse effect that we remove any legitimate zeros

	73 // from a string.

70 //	74 //

71 // We also remove forward-slash, because it is common for some servers to coll apse successive path components, eg,	75 // We also remove forward-slash, because it is common for some servers to

72 // a//b becomes a/b.	76 // collapse successive path components, eg, a//b becomes a/b.

73 //	77 //

74 // We also remove the questionmark character, since some severs replace invali d high-bytes with a questionmark. We	78 // We also remove the questionmark character, since some severs replace

75 // are already stripping the high-bytes so we also strip the questionmark to m atch.	79 // invalid high-bytes with a questionmark. We are already stripping the

	80 // high-bytes so we also strip the questionmark to match.

76 //	81 //

77 // We also move the percent character, since some servers strip it when there' s a malformed sequence.	82 // We also move the percent character, since some servers strip it when

	83 // there's a malformed sequence.

78 //	84 //

79 // For instance: new String("http://localhost:8000?x") => new String("http:loc alhost:8x").	85 // For instance: new String("http://localhost:8000?x") => new

	86 // String("http:localhost:8x").

80 return (c == '\\' \|\| c == '0' \|\| c == '\0' \|\| c == '/' \|\| c == '?' \|\|	87 return (c == '\\' \|\| c == '0' \|\| c == '\0' \|\| c == '/' \|\| c == '?' \|\|

81 c == '%' \|\| c >= 127);	88 c == '%' \|\| c >= 127);

82 }	89 }

83	90

84 static bool isRequiredForInjection(UChar c) {	91 static bool isRequiredForInjection(UChar c) {

85 return (c == '\'' \|\| c == '"' \|\| c == '<' \|\| c == '>');	92 return (c == '\'' \|\| c == '"' \|\| c == '<' \|\| c == '>');

86 }	93 }

87	94

88 static bool isTerminatingCharacter(UChar c) {	95 static bool isTerminatingCharacter(UChar c) {

89 return (c == '&' \|\| c == '/' \|\| c == '"' \|\| c == '\'' \|\| c == '<' \|\|	96 return (c == '&' \|\| c == '/' \|\| c == '"' \|\| c == '\'' \|\| c == '<' \|\|

(...skipping 27 matching lines...) Expand all Loading...
117	124

118 static bool startsOpeningScriptTagAt(const String& string, size_t start) {	125 static bool startsOpeningScriptTagAt(const String& string, size_t start) {

119 if (start + 6 >= string.length())	126 if (start + 6 >= string.length())

120 return false;	127 return false;

121 // TODO(esprehn): StringView should probably have startsWith.	128 // TODO(esprehn): StringView should probably have startsWith.

122 StringView script("<script");	129 StringView script("<script");

123 return equalIgnoringASCIICase(StringView(string, start, script.length()),	130 return equalIgnoringASCIICase(StringView(string, start, script.length()),

124 script);	131 script);

125 }	132 }

126	133

127 // If other files need this, we should move this to core/html/parser/HTMLParserI dioms.h	134 // If other files need this, we should move this to

	135 // core/html/parser/HTMLParserIdioms.h

128 template <size_t inlineCapacity>	136 template <size_t inlineCapacity>

129 bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector,	137 bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector,

130 const QualifiedName& qname) {	138 const QualifiedName& qname) {

131 return equalIgnoringNullity(vector, qname.localName().impl());	139 return equalIgnoringNullity(vector, qname.localName().impl());

132 }	140 }

133	141

134 static bool hasName(const HTMLToken& token, const QualifiedName& name) {	142 static bool hasName(const HTMLToken& token, const QualifiedName& name) {

135 return threadSafeMatch(token.name(), name);	143 return threadSafeMatch(token.name(), name);

136 }	144 }

137	145

138 static bool findAttributeWithName(const HTMLToken& token,	146 static bool findAttributeWithName(const HTMLToken& token,

139 const QualifiedName& name,	147 const QualifiedName& name,

140 size_t& indexOfMatchingAttribute) {	148 size_t& indexOfMatchingAttribute) {

141 // Notice that we're careful not to ref the StringImpl here because we might b e on a background thread.	149 // Notice that we're careful not to ref the StringImpl here because we might

	150 // be on a background thread.

142 const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI	151 const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI

143 ? "xlink:" + name.localName().getString()	152 ? "xlink:" + name.localName().getString()

144 : name.localName().getString();	153 : name.localName().getString();

145	154

146 for (size_t i = 0; i < token.attributes().size(); ++i) {	155 for (size_t i = 0; i < token.attributes().size(); ++i) {

147 if (equalIgnoringNullity(token.attributes().at(i).nameAsVector(),	156 if (equalIgnoringNullity(token.attributes().at(i).nameAsVector(),

148 attrName)) {	157 attrName)) {

149 indexOfMatchingAttribute = i;	158 indexOfMatchingAttribute = i;

150 return true;	159 return true;

151 }	160 }

152 }	161 }

153 return false;	162 return false;

154 }	163 }

155	164

156 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name) {	165 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name) {

157 const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.	166 const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.

158 if (name.size() < lengthOfShortestInlineEventHandlerName)	167 if (name.size() < lengthOfShortestInlineEventHandlerName)

159 return false;	168 return false;

160 return name[0] == 'o' && name[1] == 'n';	169 return name[0] == 'o' && name[1] == 'n';

161 }	170 }

162	171

163 static bool isDangerousHTTPEquiv(const String& value) {	172 static bool isDangerousHTTPEquiv(const String& value) {

164 String equiv = value.stripWhiteSpace();	173 String equiv = value.stripWhiteSpace();

165 return equalIgnoringCase(equiv, "refresh") \|\|	174 return equalIgnoringCase(equiv, "refresh") \|\|

166 equalIgnoringCase(equiv, "set-cookie");	175 equalIgnoringCase(equiv, "set-cookie");

167 }	176 }

168	177

169 static inline String decode16BitUnicodeEscapeSequences(const String& string) {	178 static inline String decode16BitUnicodeEscapeSequences(const String& string) {

170 // Note, the encoding is ignored since each %u-escape sequence represents a UT F-16 code unit.	179 // Note, the encoding is ignored since each %u-escape sequence represents a

	180 // UTF-16 code unit.

171 return decodeEscapeSequences<Unicode16BitEscapeSequence>(string,	181 return decodeEscapeSequences<Unicode16BitEscapeSequence>(string,

172 UTF8Encoding());	182 UTF8Encoding());

173 }	183 }

174	184

175 static inline String decodeStandardURLEscapeSequences(	185 static inline String decodeStandardURLEscapeSequences(

176 const String& string,	186 const String& string,

177 const WTF::TextEncoding& encoding) {	187 const WTF::TextEncoding& encoding) {

178 // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (decla red in weborigin/KURL.h) to	188 // We use decodeEscapeSequences() instead of decodeURLEscapeSequences()

179 // avoid platform-specific URL decoding differences (e.g. KURLGoogle).	189 // (declared in weborigin/KURL.h) to avoid platform-specific URL decoding

	190 // differences (e.g. KURLGoogle).

180 return decodeEscapeSequences<URLEscapeSequence>(string, encoding);	191 return decodeEscapeSequences<URLEscapeSequence>(string, encoding);

181 }	192 }

182	193

183 static String fullyDecodeString(const String& string,	194 static String fullyDecodeString(const String& string,

184 const WTF::TextEncoding& encoding) {	195 const WTF::TextEncoding& encoding) {

185 size_t oldWorkingStringLength;	196 size_t oldWorkingStringLength;

186 String workingString = string;	197 String workingString = string;

187 do {	198 do {

188 oldWorkingStringLength = workingString.length();	199 oldWorkingStringLength = workingString.length();

189 workingString = decode16BitUnicodeEscapeSequences(	200 workingString = decode16BitUnicodeEscapeSequences(

190 decodeStandardURLEscapeSequences(workingString, encoding));	201 decodeStandardURLEscapeSequences(workingString, encoding));

191 } while (workingString.length() < oldWorkingStringLength);	202 } while (workingString.length() < oldWorkingStringLength);

192 workingString.replace('+', ' ');	203 workingString.replace('+', ' ');

193 return workingString;	204 return workingString;

194 }	205 }

195	206

196 static void truncateForSrcLikeAttribute(String& decodedSnippet) {	207 static void truncateForSrcLikeAttribute(String& decodedSnippet) {

197 // In HTTP URLs, characters following the first ?, #, or third slash may come from	208 // In HTTP URLs, characters following the first ?, #, or third slash may come

198 // the page itself and can be merely ignored by an attacker's server when a re mote	209 // from the page itself and can be merely ignored by an attacker's server when

199 // script or script-like resource is requested. In DATA URLS, the payload star ts at	210 // a remote script or script-like resource is requested. In DATA URLS, the

200 // the first comma, and the the first /*, //, or <!-- may introduce a comment. Also,	211 // payload starts at the first comma, and the the first /*, //, or <!-- may

201 // DATA URLs may use the same string literal tricks as with script content its elf.	212 // introduce a comment.

202 // In either case, content following this may come from the page and may be ig nored	213 //

203 // when the script is executed. Also, any of these characters may now be repre sented	214 // Also, DATA URLs may use the same string literal tricks as with script

204 // by the (enlarged) set of html5 entities.	215 // content itself. In either case, content following this may come from the

205 // For simplicity, we don't differentiate based on URL scheme, and stop at the first	216 // page and may be ignored when the script is executed. Also, any of these

206 // & (since it might be part of an entity for any of the subsequent punctuatio n), the	217 // characters may now be represented by the (enlarged) set of html5 entities.

207 // first # or ?, the third slash, or the first slash, <, ', or " once a comma is seen.	218 //

	219 // For simplicity, we don't differentiate based on URL scheme, and stop at the

	220 // first & (since it might be part of an entity for any of the subsequent

	221 // punctuation), the first # or ?, the third slash, or the first slash, <, ',

	222 // or " once a comma is seen.

208 int slashCount = 0;	223 int slashCount = 0;

209 bool commaSeen = false;	224 bool commaSeen = false;

210 for (size_t currentLength = 0; currentLength < decodedSnippet.length();	225 for (size_t currentLength = 0; currentLength < decodedSnippet.length();

211 ++currentLength) {	226 ++currentLength) {

212 UChar currentChar = decodedSnippet[currentLength];	227 UChar currentChar = decodedSnippet[currentLength];

213 if (currentChar == '&' \|\| currentChar == '?' \|\| currentChar == '#' \|\|	228 if (currentChar == '&' \|\| currentChar == '?' \|\| currentChar == '#' \|\|

214 ((currentChar == '/' \|\| currentChar == '\\') &&	229 ((currentChar == '/' \|\| currentChar == '\\') &&

215 (commaSeen \|\| ++slashCount > 2)) \|\|	230 (commaSeen \|\| ++slashCount > 2)) \|\|

216 (currentChar == '<' && commaSeen) \|\|	231 (currentChar == '<' && commaSeen) \|\|

217 (currentChar == '\'' && commaSeen) \|\|	232 (currentChar == '\'' && commaSeen) \|\|

218 (currentChar == '"' && commaSeen)) {	233 (currentChar == '"' && commaSeen)) {

219 decodedSnippet.truncate(currentLength);	234 decodedSnippet.truncate(currentLength);

220 return;	235 return;

221 }	236 }

222 if (currentChar == ',')	237 if (currentChar == ',')

223 commaSeen = true;	238 commaSeen = true;

224 }	239 }

225 }	240 }

226	241

227 static void truncateForScriptLikeAttribute(String& decodedSnippet) {	242 static void truncateForScriptLikeAttribute(String& decodedSnippet) {

228 // Beware of trailing characters which came from the page itself, not the	243 // Beware of trailing characters which came from the page itself, not the

229 // injected vector. Excluding the terminating character covers common cases	244 // injected vector. Excluding the terminating character covers common cases

230 // where the page immediately ends the attribute, but doesn't cover more	245 // where the page immediately ends the attribute, but doesn't cover more

231 // complex cases where there is other page data following the injection.	246 // complex cases where there is other page data following the injection.

	247 //

232 // Generally, these won't parse as javascript, so the injected vector	248 // Generally, these won't parse as javascript, so the injected vector

233 // typically excludes them from consideration via a single-line comment or	249 // typically excludes them from consideration via a single-line comment or

234 // by enclosing them in a string literal terminated later by the page's own	250 // by enclosing them in a string literal terminated later by the page's own

235 // closing punctuation. Since the snippet has not been parsed, the vector	251 // closing punctuation. Since the snippet has not been parsed, the vector

236 // may also try to introduce these via entities. As a result, we'd like to	252 // may also try to introduce these via entities. As a result, we'd like to

237 // stop before the first "//", the first <!--, the first entity, or the first	253 // stop before the first "//", the first <!--, the first entity, or the first

238 // quote not immediately following the first equals sign (taking whitespace	254 // quote not immediately following the first equals sign (taking whitespace

239 // into consideration). To keep things simpler, we don't try to distinguish	255 // into consideration).

240 // between entity-introducing amperands vs. other uses, nor do we bother to	256 //

241 // check for a second slash for a comment, nor do we bother to check for	257 // To keep things simpler, we don't try to distinguish between

242 // !-- following a less-than sign. We stop instead on any ampersand	258 // entity-introducing amperands vs. other uses, nor do we bother to check for

243 // slash, or less-than sign.	259 // a second slash for a comment, nor do we bother to check for !-- following a

	260 // less-than sign. We stop instead on any ampersand slash, or less-than sign.

244 size_t position = 0;	261 size_t position = 0;

245 if ((position = decodedSnippet.find("=")) != kNotFound &&	262 if ((position = decodedSnippet.find("=")) != kNotFound &&

246 (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) !=	263 (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) !=

247 kNotFound &&	264 kNotFound &&

248 (position = decodedSnippet.find(	265 (position = decodedSnippet.find(

249 isTerminatingCharacter,	266 isTerminatingCharacter,

250 isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) !=	267 isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) !=

251 kNotFound) {	268 kNotFound) {

252 decodedSnippet.truncate(position);	269 decodedSnippet.truncate(position);

253 }	270 }

(...skipping 65 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
319 m_documentURL = document->url().copy();	336 m_documentURL = document->url().copy();

320	337

321 // In theory, the Document could have detached from the LocalFrame after the	338 // In theory, the Document could have detached from the LocalFrame after the

322 // XSSAuditor was constructed.	339 // XSSAuditor was constructed.

323 if (!document->frame()) {	340 if (!document->frame()) {

324 m_isEnabled = false;	341 m_isEnabled = false;

325 return;	342 return;

326 }	343 }

327	344

328 if (m_documentURL.isEmpty()) {	345 if (m_documentURL.isEmpty()) {

329 // The URL can be empty when opening a new browser window or calling window. open("").	346 // The URL can be empty when opening a new browser window or calling

	347 // window.open("").

330 m_isEnabled = false;	348 m_isEnabled = false;

331 return;	349 return;

332 }	350 }

333	351

334 if (m_documentURL.protocolIsData()) {	352 if (m_documentURL.protocolIsData()) {

335 m_isEnabled = false;	353 m_isEnabled = false;

336 return;	354 return;

337 }	355 }

338	356

339 if (document->encoding().isValid())	357 if (document->encoding().isValid())

(...skipping 165 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
505	523

506 if (m_state == FilteringTokens && m_scriptTagFoundInRequest) {	524 if (m_state == FilteringTokens && m_scriptTagFoundInRequest) {

507 String snippet = canonicalizedSnippetForJavaScript(request);	525 String snippet = canonicalizedSnippetForJavaScript(request);

508 if (isContainedInRequest(snippet))	526 if (isContainedInRequest(snippet))

509 m_state = SuppressingAdjacentCharacterTokens;	527 m_state = SuppressingAdjacentCharacterTokens;

510 else if (!snippet.isEmpty())	528 else if (!snippet.isEmpty())

511 m_state = PermittingAdjacentCharacterTokens;	529 m_state = PermittingAdjacentCharacterTokens;

512 }	530 }

513 if (m_state == SuppressingAdjacentCharacterTokens) {	531 if (m_state == SuppressingAdjacentCharacterTokens) {

514 request.token.eraseCharacters();	532 request.token.eraseCharacters();

515 request.token.appendToCharacter(	533 // Technically, character tokens can't be empty.

516 ' '); // Technically, character tokens can't be empty.	534 request.token.appendToCharacter(' ');

517 return true;	535 return true;

518 }	536 }

519 return false;	537 return false;

520 }	538 }

521	539

522 bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request) {	540 bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request) {

523 ASSERT(request.token.type() == HTMLToken::StartTag);	541 ASSERT(request.token.type() == HTMLToken::StartTag);

524 ASSERT(hasName(request.token, scriptTag));	542 ASSERT(hasName(request.token, scriptTag));

525	543

526 bool didBlockScript = false;	544 bool didBlockScript = false;

(...skipping 128 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
655 AllowSameOriginHref);	673 AllowSameOriginHref);

656 }	674 }

657	675

658 bool XSSAuditor::eraseDangerousAttributesIfInjected(	676 bool XSSAuditor::eraseDangerousAttributesIfInjected(

659 const FilterTokenRequest& request) {	677 const FilterTokenRequest& request) {

660 bool didBlockScript = false;	678 bool didBlockScript = false;

661 for (size_t i = 0; i < request.token.attributes().size(); ++i) {	679 for (size_t i = 0; i < request.token.attributes().size(); ++i) {

662 bool eraseAttribute = false;	680 bool eraseAttribute = false;

663 bool valueContainsJavaScriptURL = false;	681 bool valueContainsJavaScriptURL = false;

664 const HTMLToken::Attribute& attribute = request.token.attributes().at(i);	682 const HTMLToken::Attribute& attribute = request.token.attributes().at(i);

665 // FIXME: Don't create a new String for every attribute.value in the documen t.	683 // FIXME: Don't create a new String for every attribute.value in the

	684 // document.

666 if (isNameOfInlineEventHandler(attribute.nameAsVector())) {	685 if (isNameOfInlineEventHandler(attribute.nameAsVector())) {

667 eraseAttribute = isContainedInRequest(	686 eraseAttribute = isContainedInRequest(

668 canonicalize(snippetFromAttribute(request, attribute),	687 canonicalize(snippetFromAttribute(request, attribute),

669 ScriptLikeAttributeTruncation));	688 ScriptLikeAttributeTruncation));

670 } else if (isSemicolonSeparatedAttribute(attribute)) {	689 } else if (isSemicolonSeparatedAttribute(attribute)) {

671 String subValue =	690 String subValue =

672 semicolonSeparatedValueContainingJavaScriptURL(attribute.value());	691 semicolonSeparatedValueContainingJavaScriptURL(attribute.value());

673 if (!subValue.isEmpty()) {	692 if (!subValue.isEmpty()) {

674 valueContainsJavaScriptURL = true;	693 valueContainsJavaScriptURL = true;

675 eraseAttribute =	694 eraseAttribute =

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
722	741

723 request.token.eraseValueOfAttribute(indexOfAttribute);	742 request.token.eraseValueOfAttribute(indexOfAttribute);

724 if (!replacementValue.isEmpty())	743 if (!replacementValue.isEmpty())

725 request.token.appendToAttributeValue(indexOfAttribute, replacementValue);	744 request.token.appendToAttributeValue(indexOfAttribute, replacementValue);

726	745

727 return true;	746 return true;

728 }	747 }

729	748

730 String XSSAuditor::canonicalizedSnippetForTagName(	749 String XSSAuditor::canonicalizedSnippetForTagName(

731 const FilterTokenRequest& request) {	750 const FilterTokenRequest& request) {

732 // Grab a fixed number of characters equal to the length of the token's name p lus one (to account for the "<").	751 // Grab a fixed number of characters equal to the length of the token's name

	752 // plus one (to account for the "<").

733 return canonicalize(request.sourceTracker.sourceForToken(request.token)	753 return canonicalize(request.sourceTracker.sourceForToken(request.token)

734 .substring(0, request.token.name().size() + 1),	754 .substring(0, request.token.name().size() + 1),

735 NoTruncation);	755 NoTruncation);

736 }	756 }

737	757

738 String XSSAuditor::nameFromAttribute(const FilterTokenRequest& request,	758 String XSSAuditor::nameFromAttribute(const FilterTokenRequest& request,

739 const HTMLToken::Attribute& attribute) {	759 const HTMLToken::Attribute& attribute) {

740 // The range inlcudes the character which terminates the name. So,	760 // The range inlcudes the character which terminates the name. So,

741 // for an input of \|name="value"\|, the snippet is \|name=\|.	761 // for an input of \|name="value"\|, the snippet is \|name=\|.

742 int start = attribute.nameRange().start - request.token.startIndex();	762 int start = attribute.nameRange().start - request.token.startIndex();

(...skipping 12 matching lines...) Expand all Loading...
755 int end = attribute.valueRange().end - request.token.startIndex();	775 int end = attribute.valueRange().end - request.token.startIndex();

756 return request.sourceTracker.sourceForToken(request.token)	776 return request.sourceTracker.sourceForToken(request.token)

757 .substring(start, end - start);	777 .substring(start, end - start);

758 }	778 }

759	779

760 String XSSAuditor::canonicalize(String snippet, TruncationKind treatment) {	780 String XSSAuditor::canonicalize(String snippet, TruncationKind treatment) {

761 String decodedSnippet = fullyDecodeString(snippet, m_encoding);	781 String decodedSnippet = fullyDecodeString(snippet, m_encoding);

762	782

763 if (treatment != NoTruncation) {	783 if (treatment != NoTruncation) {

764 if (decodedSnippet.length() > kMaximumFragmentLengthTarget) {	784 if (decodedSnippet.length() > kMaximumFragmentLengthTarget) {

765 // Let the page influence the stopping point to avoid disclosing leading f ragments.	785 // Let the page influence the stopping point to avoid disclosing leading

766 // Stop when we hit whitespace, since that is unlikely to be part a leadin g fragment.	786 // fragments. Stop when we hit whitespace, since that is unlikely to be

	787 // part a leading fragment.

767 size_t position = kMaximumFragmentLengthTarget;	788 size_t position = kMaximumFragmentLengthTarget;

768 while (position < decodedSnippet.length() &&	789 while (position < decodedSnippet.length() &&

769 !isHTMLSpace(decodedSnippet[position]))	790 !isHTMLSpace(decodedSnippet[position]))

770 ++position;	791 ++position;

771 decodedSnippet.truncate(position);	792 decodedSnippet.truncate(position);

772 }	793 }

773 if (treatment == SrcLikeAttributeTruncation)	794 if (treatment == SrcLikeAttributeTruncation)

774 truncateForSrcLikeAttribute(decodedSnippet);	795 truncateForSrcLikeAttribute(decodedSnippet);

775 else if (treatment == ScriptLikeAttributeTruncation)	796 else if (treatment == ScriptLikeAttributeTruncation)

776 truncateForScriptLikeAttribute(decodedSnippet);	797 truncateForScriptLikeAttribute(decodedSnippet);

777 }	798 }

778	799

779 return decodedSnippet.removeCharacters(&isNonCanonicalCharacter);	800 return decodedSnippet.removeCharacters(&isNonCanonicalCharacter);

780 }	801 }

781	802

782 String XSSAuditor::canonicalizedSnippetForJavaScript(	803 String XSSAuditor::canonicalizedSnippetForJavaScript(

783 const FilterTokenRequest& request) {	804 const FilterTokenRequest& request) {

784 String string = request.sourceTracker.sourceForToken(request.token);	805 String string = request.sourceTracker.sourceForToken(request.token);

785 size_t startPosition = 0;	806 size_t startPosition = 0;

786 size_t endPosition = string.length();	807 size_t endPosition = string.length();

787 size_t foundPosition = kNotFound;	808 size_t foundPosition = kNotFound;

788 size_t lastNonSpacePosition = kNotFound;	809 size_t lastNonSpacePosition = kNotFound;

789	810

790 // Skip over initial comments to find start of code.	811 // Skip over initial comments to find start of code.

791 while (startPosition < endPosition) {	812 while (startPosition < endPosition) {

792 while (startPosition < endPosition &&	813 while (startPosition < endPosition &&

793 isHTMLSpace<UChar>(string[startPosition]))	814 isHTMLSpace<UChar>(string[startPosition]))

794 startPosition++;	815 startPosition++;

795	816

796 // Under SVG/XML rules, only HTML comment syntax matters and the parser retu rns	817 // Under SVG/XML rules, only HTML comment syntax matters and the parser

797 // these as a separate comment tokens. Having consumed whitespace, we need n ot look	818 // returns these as a separate comment tokens. Having consumed whitespace,

798 // further for these.	819 // we need not look further for these.

799 if (request.shouldAllowCDATA)	820 if (request.shouldAllowCDATA)

800 break;	821 break;

801	822

802 // Under HTML rules, both the HTML and JS comment synatx matters, and the HT ML	823 // Under HTML rules, both the HTML and JS comment synatx matters, and the

803 // comment ends at the end of the line, not with -->.	824 // HTML comment ends at the end of the line, not with -->.

804 if (startsHTMLCommentAt(string, startPosition) \|\|	825 if (startsHTMLCommentAt(string, startPosition) \|\|

805 startsSingleLineCommentAt(string, startPosition)) {	826 startsSingleLineCommentAt(string, startPosition)) {

806 while (startPosition < endPosition && !isJSNewline(string[startPosition]))	827 while (startPosition < endPosition && !isJSNewline(string[startPosition]))

807 startPosition++;	828 startPosition++;

808 } else if (startsMultiLineCommentAt(string, startPosition)) {	829 } else if (startsMultiLineCommentAt(string, startPosition)) {

809 if (startPosition + 2 < endPosition &&	830 if (startPosition + 2 < endPosition &&

810 (foundPosition = string.find("*/", startPosition + 2)) != kNotFound)	831 (foundPosition = string.find("*/", startPosition + 2)) != kNotFound)

811 startPosition = foundPosition + 2;	832 startPosition = foundPosition + 2;

812 else	833 else

813 startPosition = endPosition;	834 startPosition = endPosition;

814 } else	835 } else

815 break;	836 break;

816 }	837 }

817	838

818 String result;	839 String result;

819 while (startPosition < endPosition && !result.length()) {	840 while (startPosition < endPosition && !result.length()) {

820 // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma,	841 // Stop at next comment (using the same rules as above for SVG/XML vs HTML),

821 // when we encoutner a backtick, when we hit an opening <script> tag, or whe n we exceed the maximum length	842 // when we encounter a comma, when we encoutner a backtick, when we hit an

822 // target. The comma rule covers a common parameter concatenation case perfo rmed by some web servers. The	843 // opening <script> tag, or when we exceed the maximum length target. The

823 // backtick rule covers the ECMA6 multi-line template string feature.	844 // comma rule covers a common parameter concatenation case performed by some

	845 // web servers. The backtick rule covers the ECMA6 multi-line template

	846 // string feature.

824 lastNonSpacePosition = kNotFound;	847 lastNonSpacePosition = kNotFound;

825 for (foundPosition = startPosition; foundPosition < endPosition;	848 for (foundPosition = startPosition; foundPosition < endPosition;

826 foundPosition++) {	849 foundPosition++) {

827 if (!request.shouldAllowCDATA) {	850 if (!request.shouldAllowCDATA) {

828 if (startsSingleLineCommentAt(string, foundPosition) \|\|	851 if (startsSingleLineCommentAt(string, foundPosition) \|\|

829 startsMultiLineCommentAt(string, foundPosition) \|\|	852 startsMultiLineCommentAt(string, foundPosition) \|\|

830 startsHTMLCommentAt(string, foundPosition)) {	853 startsHTMLCommentAt(string, foundPosition)) {

831 break;	854 break;

832 }	855 }

833 }	856 }

834 if (string[foundPosition] == ',' \|\| string[foundPosition] == '`')	857 if (string[foundPosition] == ',' \|\| string[foundPosition] == '`')

835 break;	858 break;

836	859

837 if (lastNonSpacePosition != kNotFound &&	860 if (lastNonSpacePosition != kNotFound &&

838 startsOpeningScriptTagAt(string, foundPosition)) {	861 startsOpeningScriptTagAt(string, foundPosition)) {

839 foundPosition = lastNonSpacePosition + 1;	862 foundPosition = lastNonSpacePosition + 1;

840 break;	863 break;

841 }	864 }

842 if (foundPosition > startPosition + kMaximumFragmentLengthTarget) {	865 if (foundPosition > startPosition + kMaximumFragmentLengthTarget) {

843 // After hitting the length target, we can only stop at a point where we know we are	866 // After hitting the length target, we can only stop at a point where we

844 // not in the middle of a %-escape sequence. For the sake of simplicity, approximate	867 // know we are not in the middle of a %-escape sequence. For the sake of

845 // not stopping inside a (possibly multiply encoded) %-escape sequence b y breaking on	868 // simplicity, approximate not stopping inside a (possibly multiply

846 // whitespace only. We should have enough text in these cases to avoid f alse positives.	869 // encoded) %-escape sequence by breaking on whitespace only. We should

	870 // have enough text in these cases to avoid false positives.

847 if (isHTMLSpace<UChar>(string[foundPosition]))	871 if (isHTMLSpace<UChar>(string[foundPosition]))

848 break;	872 break;

849 }	873 }

850 if (!isHTMLSpace<UChar>(string[foundPosition]))	874 if (!isHTMLSpace<UChar>(string[foundPosition]))

851 lastNonSpacePosition = foundPosition;	875 lastNonSpacePosition = foundPosition;

852 }	876 }

853 result = canonicalize(	877 result = canonicalize(

854 string.substring(startPosition, foundPosition - startPosition),	878 string.substring(startPosition, foundPosition - startPosition),

855 NoTruncation);	879 NoTruncation);

856 startPosition = foundPosition + 1;	880 startPosition = foundPosition + 1;

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
893 }	917 }

894	918

895 bool XSSAuditor::isSafeToSendToAnotherThread() const {	919 bool XSSAuditor::isSafeToSendToAnotherThread() const {

896 return m_documentURL.isSafeToSendToAnotherThread() &&	920 return m_documentURL.isSafeToSendToAnotherThread() &&

897 m_decodedURL.isSafeToSendToAnotherThread() &&	921 m_decodedURL.isSafeToSendToAnotherThread() &&

898 m_decodedHTTPBody.isSafeToSendToAnotherThread() &&	922 m_decodedHTTPBody.isSafeToSendToAnotherThread() &&

899 m_httpBodyAsString.isSafeToSendToAnotherThread();	923 m_httpBodyAsString.isSafeToSendToAnotherThread();

900 }	924 }

901	925

902 } // namespace blink	926 } // namespace blink

OLD	NEW

« no previous file with comments | « third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp ('k') | no next file » | no next file with comments »