Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/XSSAuditor.cpp

Issue 2386893002: Reformat comments in core/html/parser (Closed)
Patch Set: self review Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2011 Adam Barth. All Rights Reserved. 2 * Copyright (C) 2011 Adam Barth. All Rights Reserved.
3 * Copyright (C) 2011 Daniel Bates (dbates@intudata.com). 3 * Copyright (C) 2011 Daniel Bates (dbates@intudata.com).
4 * 4 *
5 * Redistribution and use in source and binary forms, with or without 5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions 6 * modification, are permitted provided that the following conditions
7 * are met: 7 * are met:
8 * 1. Redistributions of source code must retain the above copyright 8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer. 9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright 10 * 2. Redistributions in binary form must reproduce the above copyright
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
43 #include "core/loader/DocumentLoader.h" 43 #include "core/loader/DocumentLoader.h"
44 #include "core/loader/MixedContentChecker.h" 44 #include "core/loader/MixedContentChecker.h"
45 #include "platform/network/EncodedFormData.h" 45 #include "platform/network/EncodedFormData.h"
46 #include "platform/text/DecodeEscapeSequences.h" 46 #include "platform/text/DecodeEscapeSequences.h"
47 #include "wtf/ASCIICType.h" 47 #include "wtf/ASCIICType.h"
48 #include "wtf/PtrUtil.h" 48 #include "wtf/PtrUtil.h"
49 #include <memory> 49 #include <memory>
50 50
51 namespace { 51 namespace {
52 52
53 // SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or we'd use it instead. 53 // SecurityOrigin::urlWithUniqueSecurityOrigin() can't be used cross-thread, or
54 // we'd use it instead.
54 const char kURLWithUniqueOrigin[] = "data:,"; 55 const char kURLWithUniqueOrigin[] = "data:,";
55 56
56 const char kSafeJavaScriptURL[] = "javascript:void(0)"; 57 const char kSafeJavaScriptURL[] = "javascript:void(0)";
57 58
58 } // namespace 59 } // namespace
59 60
60 namespace blink { 61 namespace blink {
61 62
62 using namespace HTMLNames; 63 using namespace HTMLNames;
63 64
64 static bool isNonCanonicalCharacter(UChar c) { 65 static bool isNonCanonicalCharacter(UChar c) {
65 // We remove all non-ASCII characters, including non-printable ASCII character s. 66 // We remove all non-ASCII characters, including non-printable ASCII
67 // characters.
66 // 68 //
67 // Note, we don't remove backslashes like PHP stripslashes(), which among othe r things converts "\\0" to the \0 character. 69 // Note, we don't remove backslashes like PHP stripslashes(), which among
68 // Instead, we remove backslashes and zeros (since the string "\\0" =(remove b ackslashes)=> "0"). However, this has the 70 // other things converts "\\0" to the \0 character. Instead, we remove
69 // adverse effect that we remove any legitimate zeros from a string. 71 // backslashes and zeros (since the string "\\0" =(remove backslashes)=> "0").
72 // However, this has the adverse effect that we remove any legitimate zeros
73 // from a string.
70 // 74 //
71 // We also remove forward-slash, because it is common for some servers to coll apse successive path components, eg, 75 // We also remove forward-slash, because it is common for some servers to
72 // a//b becomes a/b. 76 // collapse successive path components, eg, a//b becomes a/b.
73 // 77 //
74 // We also remove the questionmark character, since some severs replace invali d high-bytes with a questionmark. We 78 // We also remove the questionmark character, since some severs replace
75 // are already stripping the high-bytes so we also strip the questionmark to m atch. 79 // invalid high-bytes with a questionmark. We are already stripping the
80 // high-bytes so we also strip the questionmark to match.
76 // 81 //
77 // We also move the percent character, since some servers strip it when there' s a malformed sequence. 82 // We also move the percent character, since some servers strip it when
83 // there's a malformed sequence.
78 // 84 //
79 // For instance: new String("http://localhost:8000?x") => new String("http:loc alhost:8x"). 85 // For instance: new String("http://localhost:8000?x") => new
86 // String("http:localhost:8x").
80 return (c == '\\' || c == '0' || c == '\0' || c == '/' || c == '?' || 87 return (c == '\\' || c == '0' || c == '\0' || c == '/' || c == '?' ||
81 c == '%' || c >= 127); 88 c == '%' || c >= 127);
82 } 89 }
83 90
84 static bool isRequiredForInjection(UChar c) { 91 static bool isRequiredForInjection(UChar c) {
85 return (c == '\'' || c == '"' || c == '<' || c == '>'); 92 return (c == '\'' || c == '"' || c == '<' || c == '>');
86 } 93 }
87 94
88 static bool isTerminatingCharacter(UChar c) { 95 static bool isTerminatingCharacter(UChar c) {
89 return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<' || 96 return (c == '&' || c == '/' || c == '"' || c == '\'' || c == '<' ||
(...skipping 27 matching lines...) Expand all
117 124
118 static bool startsOpeningScriptTagAt(const String& string, size_t start) { 125 static bool startsOpeningScriptTagAt(const String& string, size_t start) {
119 if (start + 6 >= string.length()) 126 if (start + 6 >= string.length())
120 return false; 127 return false;
121 // TODO(esprehn): StringView should probably have startsWith. 128 // TODO(esprehn): StringView should probably have startsWith.
122 StringView script("<script"); 129 StringView script("<script");
123 return equalIgnoringASCIICase(StringView(string, start, script.length()), 130 return equalIgnoringASCIICase(StringView(string, start, script.length()),
124 script); 131 script);
125 } 132 }
126 133
127 // If other files need this, we should move this to core/html/parser/HTMLParserI dioms.h 134 // If other files need this, we should move this to
135 // core/html/parser/HTMLParserIdioms.h
128 template <size_t inlineCapacity> 136 template <size_t inlineCapacity>
129 bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector, 137 bool threadSafeMatch(const Vector<UChar, inlineCapacity>& vector,
130 const QualifiedName& qname) { 138 const QualifiedName& qname) {
131 return equalIgnoringNullity(vector, qname.localName().impl()); 139 return equalIgnoringNullity(vector, qname.localName().impl());
132 } 140 }
133 141
134 static bool hasName(const HTMLToken& token, const QualifiedName& name) { 142 static bool hasName(const HTMLToken& token, const QualifiedName& name) {
135 return threadSafeMatch(token.name(), name); 143 return threadSafeMatch(token.name(), name);
136 } 144 }
137 145
138 static bool findAttributeWithName(const HTMLToken& token, 146 static bool findAttributeWithName(const HTMLToken& token,
139 const QualifiedName& name, 147 const QualifiedName& name,
140 size_t& indexOfMatchingAttribute) { 148 size_t& indexOfMatchingAttribute) {
141 // Notice that we're careful not to ref the StringImpl here because we might b e on a background thread. 149 // Notice that we're careful not to ref the StringImpl here because we might
150 // be on a background thread.
142 const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI 151 const String& attrName = name.namespaceURI() == XLinkNames::xlinkNamespaceURI
143 ? "xlink:" + name.localName().getString() 152 ? "xlink:" + name.localName().getString()
144 : name.localName().getString(); 153 : name.localName().getString();
145 154
146 for (size_t i = 0; i < token.attributes().size(); ++i) { 155 for (size_t i = 0; i < token.attributes().size(); ++i) {
147 if (equalIgnoringNullity(token.attributes().at(i).nameAsVector(), 156 if (equalIgnoringNullity(token.attributes().at(i).nameAsVector(),
148 attrName)) { 157 attrName)) {
149 indexOfMatchingAttribute = i; 158 indexOfMatchingAttribute = i;
150 return true; 159 return true;
151 } 160 }
152 } 161 }
153 return false; 162 return false;
154 } 163 }
155 164
156 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name) { 165 static bool isNameOfInlineEventHandler(const Vector<UChar, 32>& name) {
157 const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut. 166 const size_t lengthOfShortestInlineEventHandlerName = 5; // To wit: oncut.
158 if (name.size() < lengthOfShortestInlineEventHandlerName) 167 if (name.size() < lengthOfShortestInlineEventHandlerName)
159 return false; 168 return false;
160 return name[0] == 'o' && name[1] == 'n'; 169 return name[0] == 'o' && name[1] == 'n';
161 } 170 }
162 171
163 static bool isDangerousHTTPEquiv(const String& value) { 172 static bool isDangerousHTTPEquiv(const String& value) {
164 String equiv = value.stripWhiteSpace(); 173 String equiv = value.stripWhiteSpace();
165 return equalIgnoringCase(equiv, "refresh") || 174 return equalIgnoringCase(equiv, "refresh") ||
166 equalIgnoringCase(equiv, "set-cookie"); 175 equalIgnoringCase(equiv, "set-cookie");
167 } 176 }
168 177
169 static inline String decode16BitUnicodeEscapeSequences(const String& string) { 178 static inline String decode16BitUnicodeEscapeSequences(const String& string) {
170 // Note, the encoding is ignored since each %u-escape sequence represents a UT F-16 code unit. 179 // Note, the encoding is ignored since each %u-escape sequence represents a
180 // UTF-16 code unit.
171 return decodeEscapeSequences<Unicode16BitEscapeSequence>(string, 181 return decodeEscapeSequences<Unicode16BitEscapeSequence>(string,
172 UTF8Encoding()); 182 UTF8Encoding());
173 } 183 }
174 184
175 static inline String decodeStandardURLEscapeSequences( 185 static inline String decodeStandardURLEscapeSequences(
176 const String& string, 186 const String& string,
177 const WTF::TextEncoding& encoding) { 187 const WTF::TextEncoding& encoding) {
178 // We use decodeEscapeSequences() instead of decodeURLEscapeSequences() (decla red in weborigin/KURL.h) to 188 // We use decodeEscapeSequences() instead of decodeURLEscapeSequences()
179 // avoid platform-specific URL decoding differences (e.g. KURLGoogle). 189 // (declared in weborigin/KURL.h) to avoid platform-specific URL decoding
190 // differences (e.g. KURLGoogle).
180 return decodeEscapeSequences<URLEscapeSequence>(string, encoding); 191 return decodeEscapeSequences<URLEscapeSequence>(string, encoding);
181 } 192 }
182 193
183 static String fullyDecodeString(const String& string, 194 static String fullyDecodeString(const String& string,
184 const WTF::TextEncoding& encoding) { 195 const WTF::TextEncoding& encoding) {
185 size_t oldWorkingStringLength; 196 size_t oldWorkingStringLength;
186 String workingString = string; 197 String workingString = string;
187 do { 198 do {
188 oldWorkingStringLength = workingString.length(); 199 oldWorkingStringLength = workingString.length();
189 workingString = decode16BitUnicodeEscapeSequences( 200 workingString = decode16BitUnicodeEscapeSequences(
190 decodeStandardURLEscapeSequences(workingString, encoding)); 201 decodeStandardURLEscapeSequences(workingString, encoding));
191 } while (workingString.length() < oldWorkingStringLength); 202 } while (workingString.length() < oldWorkingStringLength);
192 workingString.replace('+', ' '); 203 workingString.replace('+', ' ');
193 return workingString; 204 return workingString;
194 } 205 }
195 206
196 static void truncateForSrcLikeAttribute(String& decodedSnippet) { 207 static void truncateForSrcLikeAttribute(String& decodedSnippet) {
197 // In HTTP URLs, characters following the first ?, #, or third slash may come from 208 // In HTTP URLs, characters following the first ?, #, or third slash may come
198 // the page itself and can be merely ignored by an attacker's server when a re mote 209 // from the page itself and can be merely ignored by an attacker's server when
199 // script or script-like resource is requested. In DATA URLS, the payload star ts at 210 // a remote script or script-like resource is requested. In DATA URLS, the
200 // the first comma, and the the first /*, //, or <!-- may introduce a comment. Also, 211 // payload starts at the first comma, and the the first /*, //, or <!-- may
201 // DATA URLs may use the same string literal tricks as with script content its elf. 212 // introduce a comment.
202 // In either case, content following this may come from the page and may be ig nored 213 //
203 // when the script is executed. Also, any of these characters may now be repre sented 214 // Also, DATA URLs may use the same string literal tricks as with script
204 // by the (enlarged) set of html5 entities. 215 // content itself. In either case, content following this may come from the
205 // For simplicity, we don't differentiate based on URL scheme, and stop at the first 216 // page and may be ignored when the script is executed. Also, any of these
206 // & (since it might be part of an entity for any of the subsequent punctuatio n), the 217 // characters may now be represented by the (enlarged) set of html5 entities.
207 // first # or ?, the third slash, or the first slash, <, ', or " once a comma is seen. 218 //
219 // For simplicity, we don't differentiate based on URL scheme, and stop at the
220 // first & (since it might be part of an entity for any of the subsequent
221 // punctuation), the first # or ?, the third slash, or the first slash, <, ',
222 // or " once a comma is seen.
208 int slashCount = 0; 223 int slashCount = 0;
209 bool commaSeen = false; 224 bool commaSeen = false;
210 for (size_t currentLength = 0; currentLength < decodedSnippet.length(); 225 for (size_t currentLength = 0; currentLength < decodedSnippet.length();
211 ++currentLength) { 226 ++currentLength) {
212 UChar currentChar = decodedSnippet[currentLength]; 227 UChar currentChar = decodedSnippet[currentLength];
213 if (currentChar == '&' || currentChar == '?' || currentChar == '#' || 228 if (currentChar == '&' || currentChar == '?' || currentChar == '#' ||
214 ((currentChar == '/' || currentChar == '\\') && 229 ((currentChar == '/' || currentChar == '\\') &&
215 (commaSeen || ++slashCount > 2)) || 230 (commaSeen || ++slashCount > 2)) ||
216 (currentChar == '<' && commaSeen) || 231 (currentChar == '<' && commaSeen) ||
217 (currentChar == '\'' && commaSeen) || 232 (currentChar == '\'' && commaSeen) ||
218 (currentChar == '"' && commaSeen)) { 233 (currentChar == '"' && commaSeen)) {
219 decodedSnippet.truncate(currentLength); 234 decodedSnippet.truncate(currentLength);
220 return; 235 return;
221 } 236 }
222 if (currentChar == ',') 237 if (currentChar == ',')
223 commaSeen = true; 238 commaSeen = true;
224 } 239 }
225 } 240 }
226 241
227 static void truncateForScriptLikeAttribute(String& decodedSnippet) { 242 static void truncateForScriptLikeAttribute(String& decodedSnippet) {
228 // Beware of trailing characters which came from the page itself, not the 243 // Beware of trailing characters which came from the page itself, not the
229 // injected vector. Excluding the terminating character covers common cases 244 // injected vector. Excluding the terminating character covers common cases
230 // where the page immediately ends the attribute, but doesn't cover more 245 // where the page immediately ends the attribute, but doesn't cover more
231 // complex cases where there is other page data following the injection. 246 // complex cases where there is other page data following the injection.
247 //
232 // Generally, these won't parse as javascript, so the injected vector 248 // Generally, these won't parse as javascript, so the injected vector
233 // typically excludes them from consideration via a single-line comment or 249 // typically excludes them from consideration via a single-line comment or
234 // by enclosing them in a string literal terminated later by the page's own 250 // by enclosing them in a string literal terminated later by the page's own
235 // closing punctuation. Since the snippet has not been parsed, the vector 251 // closing punctuation. Since the snippet has not been parsed, the vector
236 // may also try to introduce these via entities. As a result, we'd like to 252 // may also try to introduce these via entities. As a result, we'd like to
237 // stop before the first "//", the first <!--, the first entity, or the first 253 // stop before the first "//", the first <!--, the first entity, or the first
238 // quote not immediately following the first equals sign (taking whitespace 254 // quote not immediately following the first equals sign (taking whitespace
239 // into consideration). To keep things simpler, we don't try to distinguish 255 // into consideration).
240 // between entity-introducing amperands vs. other uses, nor do we bother to 256 //
241 // check for a second slash for a comment, nor do we bother to check for 257 // To keep things simpler, we don't try to distinguish between
242 // !-- following a less-than sign. We stop instead on any ampersand 258 // entity-introducing amperands vs. other uses, nor do we bother to check for
243 // slash, or less-than sign. 259 // a second slash for a comment, nor do we bother to check for !-- following a
260 // less-than sign. We stop instead on any ampersand slash, or less-than sign.
244 size_t position = 0; 261 size_t position = 0;
245 if ((position = decodedSnippet.find("=")) != kNotFound && 262 if ((position = decodedSnippet.find("=")) != kNotFound &&
246 (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) != 263 (position = decodedSnippet.find(isNotHTMLSpace<UChar>, position + 1)) !=
247 kNotFound && 264 kNotFound &&
248 (position = decodedSnippet.find( 265 (position = decodedSnippet.find(
249 isTerminatingCharacter, 266 isTerminatingCharacter,
250 isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) != 267 isHTMLQuote(decodedSnippet[position]) ? position + 1 : position)) !=
251 kNotFound) { 268 kNotFound) {
252 decodedSnippet.truncate(position); 269 decodedSnippet.truncate(position);
253 } 270 }
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
319 m_documentURL = document->url().copy(); 336 m_documentURL = document->url().copy();
320 337
321 // In theory, the Document could have detached from the LocalFrame after the 338 // In theory, the Document could have detached from the LocalFrame after the
322 // XSSAuditor was constructed. 339 // XSSAuditor was constructed.
323 if (!document->frame()) { 340 if (!document->frame()) {
324 m_isEnabled = false; 341 m_isEnabled = false;
325 return; 342 return;
326 } 343 }
327 344
328 if (m_documentURL.isEmpty()) { 345 if (m_documentURL.isEmpty()) {
329 // The URL can be empty when opening a new browser window or calling window. open(""). 346 // The URL can be empty when opening a new browser window or calling
347 // window.open("").
330 m_isEnabled = false; 348 m_isEnabled = false;
331 return; 349 return;
332 } 350 }
333 351
334 if (m_documentURL.protocolIsData()) { 352 if (m_documentURL.protocolIsData()) {
335 m_isEnabled = false; 353 m_isEnabled = false;
336 return; 354 return;
337 } 355 }
338 356
339 if (document->encoding().isValid()) 357 if (document->encoding().isValid())
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
505 523
506 if (m_state == FilteringTokens && m_scriptTagFoundInRequest) { 524 if (m_state == FilteringTokens && m_scriptTagFoundInRequest) {
507 String snippet = canonicalizedSnippetForJavaScript(request); 525 String snippet = canonicalizedSnippetForJavaScript(request);
508 if (isContainedInRequest(snippet)) 526 if (isContainedInRequest(snippet))
509 m_state = SuppressingAdjacentCharacterTokens; 527 m_state = SuppressingAdjacentCharacterTokens;
510 else if (!snippet.isEmpty()) 528 else if (!snippet.isEmpty())
511 m_state = PermittingAdjacentCharacterTokens; 529 m_state = PermittingAdjacentCharacterTokens;
512 } 530 }
513 if (m_state == SuppressingAdjacentCharacterTokens) { 531 if (m_state == SuppressingAdjacentCharacterTokens) {
514 request.token.eraseCharacters(); 532 request.token.eraseCharacters();
515 request.token.appendToCharacter( 533 // Technically, character tokens can't be empty.
516 ' '); // Technically, character tokens can't be empty. 534 request.token.appendToCharacter(' ');
517 return true; 535 return true;
518 } 536 }
519 return false; 537 return false;
520 } 538 }
521 539
522 bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request) { 540 bool XSSAuditor::filterScriptToken(const FilterTokenRequest& request) {
523 ASSERT(request.token.type() == HTMLToken::StartTag); 541 ASSERT(request.token.type() == HTMLToken::StartTag);
524 ASSERT(hasName(request.token, scriptTag)); 542 ASSERT(hasName(request.token, scriptTag));
525 543
526 bool didBlockScript = false; 544 bool didBlockScript = false;
(...skipping 128 matching lines...) Expand 10 before | Expand all | Expand 10 after
655 AllowSameOriginHref); 673 AllowSameOriginHref);
656 } 674 }
657 675
658 bool XSSAuditor::eraseDangerousAttributesIfInjected( 676 bool XSSAuditor::eraseDangerousAttributesIfInjected(
659 const FilterTokenRequest& request) { 677 const FilterTokenRequest& request) {
660 bool didBlockScript = false; 678 bool didBlockScript = false;
661 for (size_t i = 0; i < request.token.attributes().size(); ++i) { 679 for (size_t i = 0; i < request.token.attributes().size(); ++i) {
662 bool eraseAttribute = false; 680 bool eraseAttribute = false;
663 bool valueContainsJavaScriptURL = false; 681 bool valueContainsJavaScriptURL = false;
664 const HTMLToken::Attribute& attribute = request.token.attributes().at(i); 682 const HTMLToken::Attribute& attribute = request.token.attributes().at(i);
665 // FIXME: Don't create a new String for every attribute.value in the documen t. 683 // FIXME: Don't create a new String for every attribute.value in the
684 // document.
666 if (isNameOfInlineEventHandler(attribute.nameAsVector())) { 685 if (isNameOfInlineEventHandler(attribute.nameAsVector())) {
667 eraseAttribute = isContainedInRequest( 686 eraseAttribute = isContainedInRequest(
668 canonicalize(snippetFromAttribute(request, attribute), 687 canonicalize(snippetFromAttribute(request, attribute),
669 ScriptLikeAttributeTruncation)); 688 ScriptLikeAttributeTruncation));
670 } else if (isSemicolonSeparatedAttribute(attribute)) { 689 } else if (isSemicolonSeparatedAttribute(attribute)) {
671 String subValue = 690 String subValue =
672 semicolonSeparatedValueContainingJavaScriptURL(attribute.value()); 691 semicolonSeparatedValueContainingJavaScriptURL(attribute.value());
673 if (!subValue.isEmpty()) { 692 if (!subValue.isEmpty()) {
674 valueContainsJavaScriptURL = true; 693 valueContainsJavaScriptURL = true;
675 eraseAttribute = 694 eraseAttribute =
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
722 741
723 request.token.eraseValueOfAttribute(indexOfAttribute); 742 request.token.eraseValueOfAttribute(indexOfAttribute);
724 if (!replacementValue.isEmpty()) 743 if (!replacementValue.isEmpty())
725 request.token.appendToAttributeValue(indexOfAttribute, replacementValue); 744 request.token.appendToAttributeValue(indexOfAttribute, replacementValue);
726 745
727 return true; 746 return true;
728 } 747 }
729 748
730 String XSSAuditor::canonicalizedSnippetForTagName( 749 String XSSAuditor::canonicalizedSnippetForTagName(
731 const FilterTokenRequest& request) { 750 const FilterTokenRequest& request) {
732 // Grab a fixed number of characters equal to the length of the token's name p lus one (to account for the "<"). 751 // Grab a fixed number of characters equal to the length of the token's name
752 // plus one (to account for the "<").
733 return canonicalize(request.sourceTracker.sourceForToken(request.token) 753 return canonicalize(request.sourceTracker.sourceForToken(request.token)
734 .substring(0, request.token.name().size() + 1), 754 .substring(0, request.token.name().size() + 1),
735 NoTruncation); 755 NoTruncation);
736 } 756 }
737 757
738 String XSSAuditor::nameFromAttribute(const FilterTokenRequest& request, 758 String XSSAuditor::nameFromAttribute(const FilterTokenRequest& request,
739 const HTMLToken::Attribute& attribute) { 759 const HTMLToken::Attribute& attribute) {
740 // The range inlcudes the character which terminates the name. So, 760 // The range inlcudes the character which terminates the name. So,
741 // for an input of |name="value"|, the snippet is |name=|. 761 // for an input of |name="value"|, the snippet is |name=|.
742 int start = attribute.nameRange().start - request.token.startIndex(); 762 int start = attribute.nameRange().start - request.token.startIndex();
(...skipping 12 matching lines...) Expand all
755 int end = attribute.valueRange().end - request.token.startIndex(); 775 int end = attribute.valueRange().end - request.token.startIndex();
756 return request.sourceTracker.sourceForToken(request.token) 776 return request.sourceTracker.sourceForToken(request.token)
757 .substring(start, end - start); 777 .substring(start, end - start);
758 } 778 }
759 779
760 String XSSAuditor::canonicalize(String snippet, TruncationKind treatment) { 780 String XSSAuditor::canonicalize(String snippet, TruncationKind treatment) {
761 String decodedSnippet = fullyDecodeString(snippet, m_encoding); 781 String decodedSnippet = fullyDecodeString(snippet, m_encoding);
762 782
763 if (treatment != NoTruncation) { 783 if (treatment != NoTruncation) {
764 if (decodedSnippet.length() > kMaximumFragmentLengthTarget) { 784 if (decodedSnippet.length() > kMaximumFragmentLengthTarget) {
765 // Let the page influence the stopping point to avoid disclosing leading f ragments. 785 // Let the page influence the stopping point to avoid disclosing leading
766 // Stop when we hit whitespace, since that is unlikely to be part a leadin g fragment. 786 // fragments. Stop when we hit whitespace, since that is unlikely to be
787 // part a leading fragment.
767 size_t position = kMaximumFragmentLengthTarget; 788 size_t position = kMaximumFragmentLengthTarget;
768 while (position < decodedSnippet.length() && 789 while (position < decodedSnippet.length() &&
769 !isHTMLSpace(decodedSnippet[position])) 790 !isHTMLSpace(decodedSnippet[position]))
770 ++position; 791 ++position;
771 decodedSnippet.truncate(position); 792 decodedSnippet.truncate(position);
772 } 793 }
773 if (treatment == SrcLikeAttributeTruncation) 794 if (treatment == SrcLikeAttributeTruncation)
774 truncateForSrcLikeAttribute(decodedSnippet); 795 truncateForSrcLikeAttribute(decodedSnippet);
775 else if (treatment == ScriptLikeAttributeTruncation) 796 else if (treatment == ScriptLikeAttributeTruncation)
776 truncateForScriptLikeAttribute(decodedSnippet); 797 truncateForScriptLikeAttribute(decodedSnippet);
777 } 798 }
778 799
779 return decodedSnippet.removeCharacters(&isNonCanonicalCharacter); 800 return decodedSnippet.removeCharacters(&isNonCanonicalCharacter);
780 } 801 }
781 802
782 String XSSAuditor::canonicalizedSnippetForJavaScript( 803 String XSSAuditor::canonicalizedSnippetForJavaScript(
783 const FilterTokenRequest& request) { 804 const FilterTokenRequest& request) {
784 String string = request.sourceTracker.sourceForToken(request.token); 805 String string = request.sourceTracker.sourceForToken(request.token);
785 size_t startPosition = 0; 806 size_t startPosition = 0;
786 size_t endPosition = string.length(); 807 size_t endPosition = string.length();
787 size_t foundPosition = kNotFound; 808 size_t foundPosition = kNotFound;
788 size_t lastNonSpacePosition = kNotFound; 809 size_t lastNonSpacePosition = kNotFound;
789 810
790 // Skip over initial comments to find start of code. 811 // Skip over initial comments to find start of code.
791 while (startPosition < endPosition) { 812 while (startPosition < endPosition) {
792 while (startPosition < endPosition && 813 while (startPosition < endPosition &&
793 isHTMLSpace<UChar>(string[startPosition])) 814 isHTMLSpace<UChar>(string[startPosition]))
794 startPosition++; 815 startPosition++;
795 816
796 // Under SVG/XML rules, only HTML comment syntax matters and the parser retu rns 817 // Under SVG/XML rules, only HTML comment syntax matters and the parser
797 // these as a separate comment tokens. Having consumed whitespace, we need n ot look 818 // returns these as a separate comment tokens. Having consumed whitespace,
798 // further for these. 819 // we need not look further for these.
799 if (request.shouldAllowCDATA) 820 if (request.shouldAllowCDATA)
800 break; 821 break;
801 822
802 // Under HTML rules, both the HTML and JS comment synatx matters, and the HT ML 823 // Under HTML rules, both the HTML and JS comment synatx matters, and the
803 // comment ends at the end of the line, not with -->. 824 // HTML comment ends at the end of the line, not with -->.
804 if (startsHTMLCommentAt(string, startPosition) || 825 if (startsHTMLCommentAt(string, startPosition) ||
805 startsSingleLineCommentAt(string, startPosition)) { 826 startsSingleLineCommentAt(string, startPosition)) {
806 while (startPosition < endPosition && !isJSNewline(string[startPosition])) 827 while (startPosition < endPosition && !isJSNewline(string[startPosition]))
807 startPosition++; 828 startPosition++;
808 } else if (startsMultiLineCommentAt(string, startPosition)) { 829 } else if (startsMultiLineCommentAt(string, startPosition)) {
809 if (startPosition + 2 < endPosition && 830 if (startPosition + 2 < endPosition &&
810 (foundPosition = string.find("*/", startPosition + 2)) != kNotFound) 831 (foundPosition = string.find("*/", startPosition + 2)) != kNotFound)
811 startPosition = foundPosition + 2; 832 startPosition = foundPosition + 2;
812 else 833 else
813 startPosition = endPosition; 834 startPosition = endPosition;
814 } else 835 } else
815 break; 836 break;
816 } 837 }
817 838
818 String result; 839 String result;
819 while (startPosition < endPosition && !result.length()) { 840 while (startPosition < endPosition && !result.length()) {
820 // Stop at next comment (using the same rules as above for SVG/XML vs HTML), when we encounter a comma, 841 // Stop at next comment (using the same rules as above for SVG/XML vs HTML),
821 // when we encoutner a backtick, when we hit an opening <script> tag, or whe n we exceed the maximum length 842 // when we encounter a comma, when we encoutner a backtick, when we hit an
822 // target. The comma rule covers a common parameter concatenation case perfo rmed by some web servers. The 843 // opening <script> tag, or when we exceed the maximum length target. The
823 // backtick rule covers the ECMA6 multi-line template string feature. 844 // comma rule covers a common parameter concatenation case performed by some
845 // web servers. The backtick rule covers the ECMA6 multi-line template
846 // string feature.
824 lastNonSpacePosition = kNotFound; 847 lastNonSpacePosition = kNotFound;
825 for (foundPosition = startPosition; foundPosition < endPosition; 848 for (foundPosition = startPosition; foundPosition < endPosition;
826 foundPosition++) { 849 foundPosition++) {
827 if (!request.shouldAllowCDATA) { 850 if (!request.shouldAllowCDATA) {
828 if (startsSingleLineCommentAt(string, foundPosition) || 851 if (startsSingleLineCommentAt(string, foundPosition) ||
829 startsMultiLineCommentAt(string, foundPosition) || 852 startsMultiLineCommentAt(string, foundPosition) ||
830 startsHTMLCommentAt(string, foundPosition)) { 853 startsHTMLCommentAt(string, foundPosition)) {
831 break; 854 break;
832 } 855 }
833 } 856 }
834 if (string[foundPosition] == ',' || string[foundPosition] == '`') 857 if (string[foundPosition] == ',' || string[foundPosition] == '`')
835 break; 858 break;
836 859
837 if (lastNonSpacePosition != kNotFound && 860 if (lastNonSpacePosition != kNotFound &&
838 startsOpeningScriptTagAt(string, foundPosition)) { 861 startsOpeningScriptTagAt(string, foundPosition)) {
839 foundPosition = lastNonSpacePosition + 1; 862 foundPosition = lastNonSpacePosition + 1;
840 break; 863 break;
841 } 864 }
842 if (foundPosition > startPosition + kMaximumFragmentLengthTarget) { 865 if (foundPosition > startPosition + kMaximumFragmentLengthTarget) {
843 // After hitting the length target, we can only stop at a point where we know we are 866 // After hitting the length target, we can only stop at a point where we
844 // not in the middle of a %-escape sequence. For the sake of simplicity, approximate 867 // know we are not in the middle of a %-escape sequence. For the sake of
845 // not stopping inside a (possibly multiply encoded) %-escape sequence b y breaking on 868 // simplicity, approximate not stopping inside a (possibly multiply
846 // whitespace only. We should have enough text in these cases to avoid f alse positives. 869 // encoded) %-escape sequence by breaking on whitespace only. We should
870 // have enough text in these cases to avoid false positives.
847 if (isHTMLSpace<UChar>(string[foundPosition])) 871 if (isHTMLSpace<UChar>(string[foundPosition]))
848 break; 872 break;
849 } 873 }
850 if (!isHTMLSpace<UChar>(string[foundPosition])) 874 if (!isHTMLSpace<UChar>(string[foundPosition]))
851 lastNonSpacePosition = foundPosition; 875 lastNonSpacePosition = foundPosition;
852 } 876 }
853 result = canonicalize( 877 result = canonicalize(
854 string.substring(startPosition, foundPosition - startPosition), 878 string.substring(startPosition, foundPosition - startPosition),
855 NoTruncation); 879 NoTruncation);
856 startPosition = foundPosition + 1; 880 startPosition = foundPosition + 1;
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
893 } 917 }
894 918
895 bool XSSAuditor::isSafeToSendToAnotherThread() const { 919 bool XSSAuditor::isSafeToSendToAnotherThread() const {
896 return m_documentURL.isSafeToSendToAnotherThread() && 920 return m_documentURL.isSafeToSendToAnotherThread() &&
897 m_decodedURL.isSafeToSendToAnotherThread() && 921 m_decodedURL.isSafeToSendToAnotherThread() &&
898 m_decodedHTTPBody.isSafeToSendToAnotherThread() && 922 m_decodedHTTPBody.isSafeToSendToAnotherThread() &&
899 m_httpBodyAsString.isSafeToSendToAnotherThread(); 923 m_httpBodyAsString.isSafeToSendToAnotherThread();
900 } 924 }
901 925
902 } // namespace blink 926 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/html/parser/TextResourceDecoder.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698