third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp - Issue 2438263002: Possibly merge consecutive script fragments to reduce execution overhead

Unified Diff: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

Issue 2438263002: Possibly merge consecutive script fragments to reduce execution overhead

Patch Set: Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« no previous file with comments | « third_party/WebKit/Source/core/html/parser/HTMLTokenizer.h ('k') | third_party/WebKit/Source/platform/text/SegmentedString.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

diff --git a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

index df3edccc5f1c001da795603f76356eb650a02c1b..3283f453737077799d708a25d56cbfc710582c53 100644

--- a/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

+++ b/third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

@@ -230,8 +230,12 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {

END_STATE()

HTML_BEGIN_STATE(ScriptDataState) {

- if (cc == '<')

- HTML_ADVANCE_TO(ScriptDataLessThanSignState);

+ if (cc == '<') {

+ if (checkIfMergeScripts(source))

+ HTML_ADVANCE_TO(ScriptDataState);

+ else

+ HTML_ADVANCE_TO(ScriptDataLessThanSignState);

+ }

else if (cc == kEndOfFileMarker)

return emitEndOfFile(source);

else {

@@ -303,9 +307,11 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {

HTML_ADVANCE_TO(BeforeAttributeNameState);

else if (cc == '/')

HTML_ADVANCE_TO(SelfClosingStartTagState);

- else if (cc == '>')

+ else if (cc == '>') {

+ if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptStaticStringCreation(m_token->name(), Likely8Bit), scriptTag))

+ reserveAttributeForCheck();

return emitAndResumeIn(source, HTMLTokenizer::DataState);

- else if (isASCIIUpper(cc)) {

+ } else if (isASCIIUpper(cc)) {

m_token->appendToName(toLowerCase(cc));

HTML_ADVANCE_TO(TagNameState);

} else if (cc == kEndOfFileMarker) {

@@ -805,6 +811,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {

HTML_ADVANCE_TO(BeforeAttributeValueState);

} else if (cc == '>') {

m_token->endAttributeName(source.numberOfCharactersConsumed());

+ if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptStaticStringCreation(m_token->name(), Likely8Bit), scriptTag))

+ reserveAttributeForCheck();

return emitAndResumeIn(source, HTMLTokenizer::DataState);

} else if (isASCIIUpper(cc)) {

m_token->appendToAttributeName(toLowerCase(cc));

@@ -972,9 +980,11 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) {

HTML_ADVANCE_TO(BeforeAttributeNameState);

else if (cc == '/')

HTML_ADVANCE_TO(SelfClosingStartTagState);

- else if (cc == '>')

+ else if (cc == '>') {

+ if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptStaticStringCreation(m_token->name(), Likely8Bit), scriptTag))

+ reserveAttributeForCheck();

return emitAndResumeIn(source, HTMLTokenizer::DataState);

- else if (cc == kEndOfFileMarker) {

+ } else if (cc == kEndOfFileMarker) {

parseError();

HTML_RECONSUME_IN(DataState);

} else {

@@ -1557,6 +1567,150 @@ void HTMLTokenizer::updateStateFor(const String& tagName) {

setState(HTMLTokenizer::RAWTEXTState);

}

+void HTMLTokenizer::reserveAttributeForCheck() {

+ m_temporaryAttributeList = m_token->attributes();

+bool HTMLTokenizer::checkIfMergeScripts(SegmentedString& source) {

+ unsigned scriptEndTagLength = 9;

+ unsigned scriptStartTagLength = 7;

+ String currentSubstring = source.getCurrentSubstring(scriptEndTagLength);

+ if (source.isEqualToScriptEndTagTemplate(currentSubstring)) {

+ unsigned index = scriptEndTagLength - 1;

+ while (true) {

+ UChar cc = source.getCurrentString().getCharByIndex(++index);

+ if (isTokenizerWhitespace(cc))

+ continue;

+ if (cc == '<') {

+ currentSubstring = source.getCurrentSubstring(index, scriptStartTagLength);

+ if (source.isEqualToScriptStartTagTemplate(currentSubstring)) {

+ index += scriptStartTagLength;

+ cc = source.getCurrentString().getCharByIndex(index);

+ if (cc == '>' && m_temporaryAttributeList.isEmpty()) {

+ //Merge <script> ... </script> <script>

+ for (unsigned it = 0; it < index; it++)

+ source.advanceAndUpdateLineNumber();

+ return true;

+ } else if (cc != '>' && !m_temporaryAttributeList.isEmpty()) {

+ // parse attribute name and value

+ if (compareAttribute(source, index)) {

+ for (unsigned it = 0; it < index; it++)

+ source.advanceAndUpdateLineNumber();

+ m_temporaryAttributeValueBuffer.clear();

+ return true;

+ } else {

+ m_temporaryAttributeList.clear();

+ m_temporaryAttributeValueBuffer.clear();

+ return false;

+ }

+ } else

+ break;

+ }

+ break;

+ }

+ break;

+ }

+ m_temporaryAttributeList.clear();

+ }

+ return false;

+bool HTMLTokenizer::compareAttribute(SegmentedString& source, unsigned& index) {

+ UChar cc;

+ String currentSubstring;

+ unsigned attributeCount = 0;

+ while (true) {

+ cc = source.getCurrentString().getCharByIndex(++index);

+ if (isTokenizerWhitespace(cc))

+ continue;

+ if (isASCIIUpper(cc))

+ toLowerCase(cc);

+ if (isASCIILower(cc)) {

+ // 'src' attribute, shouldn't merge scripts if exists

+ if (cc == 's') {

+ currentSubstring = source.getCurrentSubstring(index, srcAttr.localName().length());

+ if (getAttributeFromList(srcAttr) || threadSafeMatch(currentSubstring, srcAttr)) {

+ return false;

+ }

+ } else if (cc == 't') {

+ // 'type' attribute

+ currentSubstring = source.getCurrentSubstring(index, typeAttr.localName().length());

+ if (getAttributeFromList(typeAttr) && threadSafeMatch(currentSubstring, typeAttr)) {

+ ++attributeCount;

+ index += typeAttr.localName().length() - 1;

+ if (compareAttributeValue(source, index, typeAttr))

+ continue;

+ else

+ return false;

+ } else

+ return false;

+ } else if (cc == 'c') {

+ // 'charset' attribute

+ currentSubstring = source.getCurrentSubstring(index, charsetAttr.localName().length());

+ if (getAttributeFromList(charsetAttr) && threadSafeMatch(currentSubstring, charsetAttr)) {

+ ++attributeCount;

+ index += charsetAttr.localName().length() - 1;

+ if (compareAttributeValue(source, index, charsetAttr))

+ continue;

+ else

+ return false;

+ } else

+ return false;

+ } else if (cc == 'l') {

+ // 'language' attribute

+ currentSubstring = source.getCurrentSubstring(index, languageAttr.localName().length());

+ if (getAttributeFromList(languageAttr) && threadSafeMatch(currentSubstring, languageAttr)) {

+ ++attributeCount;

+ index += languageAttr.localName().length() - 1;

+ if (compareAttributeValue(source, index, languageAttr))

+ continue;

+ else

+ return false;

+ } else

+ return false;

+ } else

+ return false;

+ }

+ if (cc == '>') {

+ if (m_temporaryAttributeList.size() > attributeCount)

+ return false;

+ break;

+ }

+ return true;

+bool HTMLTokenizer::compareAttributeValue(SegmentedString& source, unsigned& index, const QualifiedName& qName) {

+ UChar cc;

+ bool singleQuotedAttributeValueStarts = false;

+ bool doubleQuotedAttributeValueStarts = false;

+ m_temporaryAttributeValueBuffer.clear();

+ while (true) {

+ cc = source.getCurrentString().getCharByIndex(++index);

+ if (singleQuotedAttributeValueStarts || doubleQuotedAttributeValueStarts) {

+ if (cc == '"' || cc == '\'')

+ break;

+ else if (isASCIIUpper(cc))

+ toLowerCase(cc);

+ m_temporaryAttributeValueBuffer.append(cc);

+ } else if (isTokenizerWhitespace(cc) || cc == '=')

+ continue;

+ else if ( cc == '\'' && !singleQuotedAttributeValueStarts) {

+ singleQuotedAttributeValueStarts = true;

+ continue;

+ } else if ( cc == '"' && !doubleQuotedAttributeValueStarts) {

+ doubleQuotedAttributeValueStarts = true;

+ continue;

+ } else

+ break;

+ }

+ const HTMLToken::Attribute* comparedAttribute = getAttributeFromList(qName);

+ String comparedAttributeValue(comparedAttribute->value());

+ return equalIgnoringCase(comparedAttributeValue, String(m_temporaryAttributeValueBuffer));

inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) {

return vectorEqualsString(m_temporaryBuffer, expectedString);

}