Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(343)

Unified Diff: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

Issue 2438263002: Possibly merge consecutive script fragments to reduce execution overhead
Patch Set: Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
index df3edccc5f1c001da795603f76356eb650a02c1b..3283f453737077799d708a25d56cbfc710582c53 100644
--- a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
+++ b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp
@@ -230,8 +230,12 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {
END_STATE()
HTML_BEGIN_STATE(ScriptDataState) {
- if (cc == '<')
- HTML_ADVANCE_TO(ScriptDataLessThanSignState);
+ if (cc == '<') {
+ if (checkIfMergeScripts(source))
+ HTML_ADVANCE_TO(ScriptDataState);
+ else
+ HTML_ADVANCE_TO(ScriptDataLessThanSignState);
+ }
else if (cc == kEndOfFileMarker)
return emitEndOfFile(source);
else {
@@ -303,9 +307,11 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {
HTML_ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
HTML_ADVANCE_TO(SelfClosingStartTagState);
- else if (cc == '>')
+ else if (cc == '>') {
+ if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptStaticStringCreation(m_token->name(), Likely8Bit), scriptTag))
+ reserveAttributeForCheck();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (isASCIIUpper(cc)) {
+ } else if (isASCIIUpper(cc)) {
m_token->appendToName(toLowerCase(cc));
HTML_ADVANCE_TO(TagNameState);
} else if (cc == kEndOfFileMarker) {
@@ -805,6 +811,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {
HTML_ADVANCE_TO(BeforeAttributeValueState);
} else if (cc == '>') {
m_token->endAttributeName(source.numberOfCharactersConsumed());
+ if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptStaticStringCreation(m_token->name(), Likely8Bit), scriptTag))
+ reserveAttributeForCheck();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
} else if (isASCIIUpper(cc)) {
m_token->appendToAttributeName(toLowerCase(cc));
@@ -972,9 +980,11 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {
HTML_ADVANCE_TO(BeforeAttributeNameState);
else if (cc == '/')
HTML_ADVANCE_TO(SelfClosingStartTagState);
- else if (cc == '>')
+ else if (cc == '>') {
+ if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptStaticStringCreation(m_token->name(), Likely8Bit), scriptTag))
+ reserveAttributeForCheck();
return emitAndResumeIn(source, HTMLTokenizer::DataState);
- else if (cc == kEndOfFileMarker) {
+ } else if (cc == kEndOfFileMarker) {
parseError();
HTML_RECONSUME_IN(DataState);
} else {
@@ -1557,6 +1567,150 @@ void HTMLTokenizer::updateStateFor(const String& tagName) {
setState(HTMLTokenizer::RAWTEXTState);
}
+void HTMLTokenizer::reserveAttributeForCheck() {
+ m_temporaryAttributeList = m_token->attributes();
+}
+
+bool HTMLTokenizer::checkIfMergeScripts(SegmentedString& source) {
+ unsigned scriptEndTagLength = 9;
+ unsigned scriptStartTagLength = 7;
+ String currentSubstring = source.getCurrentSubstring(scriptEndTagLength);
+ if (source.isEqualToScriptEndTagTemplate(currentSubstring)) {
+ unsigned index = scriptEndTagLength - 1;
+ while (true) {
+ UChar cc = source.getCurrentString().getCharByIndex(++index);
+ if (isTokenizerWhitespace(cc))
+ continue;
+ if (cc == '<') {
+ currentSubstring = source.getCurrentSubstring(index, scriptStartTagLength);
+ if (source.isEqualToScriptStartTagTemplate(currentSubstring)) {
+ index += scriptStartTagLength;
+ cc = source.getCurrentString().getCharByIndex(index);
+ if (cc == '>' && m_temporaryAttributeList.isEmpty()) {
+ //Merge <script> ... </script> <script>
+ for (unsigned it = 0; it < index; it++)
+ source.advanceAndUpdateLineNumber();
+ return true;
+ } else if (cc != '>' && !m_temporaryAttributeList.isEmpty()) {
+ // parse attribute name and value
+ if (compareAttribute(source, index)) {
+ for (unsigned it = 0; it < index; it++)
+ source.advanceAndUpdateLineNumber();
+ m_temporaryAttributeValueBuffer.clear();
+ return true;
+ } else {
+ m_temporaryAttributeList.clear();
+ m_temporaryAttributeValueBuffer.clear();
+ return false;
+ }
+ } else
+ break;
+ }
+ break;
+ }
+ break;
+ }
+ m_temporaryAttributeList.clear();
+ }
+ return false;
+}
+
+bool HTMLTokenizer::compareAttribute(SegmentedString& source, unsigned& index) {
+ UChar cc;
+ String currentSubstring;
+ unsigned attributeCount = 0;
+
+ while (true) {
+ cc = source.getCurrentString().getCharByIndex(++index);
+ if (isTokenizerWhitespace(cc))
+ continue;
+ if (isASCIIUpper(cc))
+ toLowerCase(cc);
+ if (isASCIILower(cc)) {
+ // 'src' attribute, shouldn't merge scripts if exists
+ if (cc == 's') {
+ currentSubstring = source.getCurrentSubstring(index, srcAttr.localName().length());
+ if (getAttributeFromList(srcAttr) || threadSafeMatch(currentSubstring, srcAttr)) {
+ return false;
+ }
+ } else if (cc == 't') {
+ // 'type' attribute
+ currentSubstring = source.getCurrentSubstring(index, typeAttr.localName().length());
+ if (getAttributeFromList(typeAttr) && threadSafeMatch(currentSubstring, typeAttr)) {
+ ++attributeCount;
+ index += typeAttr.localName().length() - 1;
+ if (compareAttributeValue(source, index, typeAttr))
+ continue;
+ else
+ return false;
+ } else
+ return false;
+ } else if (cc == 'c') {
+ // 'charset' attribute
+ currentSubstring = source.getCurrentSubstring(index, charsetAttr.localName().length());
+ if (getAttributeFromList(charsetAttr) && threadSafeMatch(currentSubstring, charsetAttr)) {
+ ++attributeCount;
+ index += charsetAttr.localName().length() - 1;
+ if (compareAttributeValue(source, index, charsetAttr))
+ continue;
+ else
+ return false;
+ } else
+ return false;
+ } else if (cc == 'l') {
+ // 'language' attribute
+ currentSubstring = source.getCurrentSubstring(index, languageAttr.localName().length());
+ if (getAttributeFromList(languageAttr) && threadSafeMatch(currentSubstring, languageAttr)) {
+ ++attributeCount;
+ index += languageAttr.localName().length() - 1;
+ if (compareAttributeValue(source, index, languageAttr))
+ continue;
+ else
+ return false;
+ } else
+ return false;
+ } else
+ return false;
+ }
+ if (cc == '>') {
+ if (m_temporaryAttributeList.size() > attributeCount)
+ return false;
+ break;
+ }
+ }
+ return true;
+}
+
+bool HTMLTokenizer::compareAttributeValue(SegmentedString& source, unsigned& index, const QualifiedName& qName) {
+ UChar cc;
+ bool singleQuotedAttributeValueStarts = false;
+ bool doubleQuotedAttributeValueStarts = false;
+ m_temporaryAttributeValueBuffer.clear();
+
+ while (true) {
+ cc = source.getCurrentString().getCharByIndex(++index);
+ if (singleQuotedAttributeValueStarts || doubleQuotedAttributeValueStarts) {
+ if (cc == '"' || cc == '\'')
+ break;
+ else if (isASCIIUpper(cc))
+ toLowerCase(cc);
+ m_temporaryAttributeValueBuffer.append(cc);
+ } else if (isTokenizerWhitespace(cc) || cc == '=')
+ continue;
+ else if ( cc == '\'' && !singleQuotedAttributeValueStarts) {
+ singleQuotedAttributeValueStarts = true;
+ continue;
+ } else if ( cc == '"' && !doubleQuotedAttributeValueStarts) {
+ doubleQuotedAttributeValueStarts = true;
+ continue;
+ } else
+ break;
+ }
+ const HTMLToken::Attribute* comparedAttribute = getAttributeFromList(qName);
+ String comparedAttributeValue(comparedAttribute->value());
+ return equalIgnoringCase(comparedAttributeValue, String(m_temporaryAttributeValueBuffer));
+}
+
inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) {
return vectorEqualsString(m_temporaryBuffer, expectedString);
}
« no previous file with comments | « third_party/WebKit/Source/core/html/parser/HTMLTokenizer.h ('k') | third_party/WebKit/Source/platform/text/SegmentedString.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698