third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp - Issue 2438263002: Possibly merge consecutive script fragments to reduce execution overhead

Side by Side Diff: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

Issue 2438263002: Possibly merge consecutive script fragments to reduce execution overhead

Patch Set: Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.	2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.

3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/	3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/

4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.	4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.

5 *	5 *

6 * Redistribution and use in source and binary forms, with or without	6 * Redistribution and use in source and binary forms, with or without

7 * modification, are permitted provided that the following conditions	7 * modification, are permitted provided that the following conditions

8 * are met:	8 * are met:

9 * 1. Redistributions of source code must retain the above copyright	9 * 1. Redistributions of source code must retain the above copyright

10 * notice, this list of conditions and the following disclaimer.	10 * notice, this list of conditions and the following disclaimer.

(...skipping 212 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
223 else if (cc == kEndOfFileMarker)	223 else if (cc == kEndOfFileMarker)

224 return emitEndOfFile(source);	224 return emitEndOfFile(source);

225 else {	225 else {

226 bufferCharacter(cc);	226 bufferCharacter(cc);

227 HTML_ADVANCE_TO(RAWTEXTState);	227 HTML_ADVANCE_TO(RAWTEXTState);

228 }	228 }

229 }	229 }

230 END_STATE()	230 END_STATE()

231	231

232 HTML_BEGIN_STATE(ScriptDataState) {	232 HTML_BEGIN_STATE(ScriptDataState) {

233 if (cc == '<')	233 if (cc == '<') {

234 HTML_ADVANCE_TO(ScriptDataLessThanSignState);	234 if (checkIfMergeScripts(source))

	235 HTML_ADVANCE_TO(ScriptDataState);

	236 else

	237 HTML_ADVANCE_TO(ScriptDataLessThanSignState);

	238 }

235 else if (cc == kEndOfFileMarker)	239 else if (cc == kEndOfFileMarker)

236 return emitEndOfFile(source);	240 return emitEndOfFile(source);

237 else {	241 else {

238 bufferCharacter(cc);	242 bufferCharacter(cc);

239 HTML_ADVANCE_TO(ScriptDataState);	243 HTML_ADVANCE_TO(ScriptDataState);

240 }	244 }

241 }	245 }

242 END_STATE()	246 END_STATE()

243	247

244 HTML_BEGIN_STATE(PLAINTEXTState) {	248 HTML_BEGIN_STATE(PLAINTEXTState) {

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
296 HTML_RECONSUME_IN(BogusCommentState);	300 HTML_RECONSUME_IN(BogusCommentState);

297 }	301 }

298 }	302 }

299 END_STATE()	303 END_STATE()

300	304

301 HTML_BEGIN_STATE(TagNameState) {	305 HTML_BEGIN_STATE(TagNameState) {

302 if (isTokenizerWhitespace(cc))	306 if (isTokenizerWhitespace(cc))

303 HTML_ADVANCE_TO(BeforeAttributeNameState);	307 HTML_ADVANCE_TO(BeforeAttributeNameState);

304 else if (cc == '/')	308 else if (cc == '/')

305 HTML_ADVANCE_TO(SelfClosingStartTagState);	309 HTML_ADVANCE_TO(SelfClosingStartTagState);

306 else if (cc == '>')	310 else if (cc == '>') {

	311 if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptS taticStringCreation(m_token->name(), Likely8Bit), scriptTag))

	312 reserveAttributeForCheck();

307 return emitAndResumeIn(source, HTMLTokenizer::DataState);	313 return emitAndResumeIn(source, HTMLTokenizer::DataState);

308 else if (isASCIIUpper(cc)) {	314 } else if (isASCIIUpper(cc)) {

309 m_token->appendToName(toLowerCase(cc));	315 m_token->appendToName(toLowerCase(cc));

310 HTML_ADVANCE_TO(TagNameState);	316 HTML_ADVANCE_TO(TagNameState);

311 } else if (cc == kEndOfFileMarker) {	317 } else if (cc == kEndOfFileMarker) {

312 parseError();	318 parseError();

313 HTML_RECONSUME_IN(DataState);	319 HTML_RECONSUME_IN(DataState);

314 } else {	320 } else {

315 m_token->appendToName(cc);	321 m_token->appendToName(cc);

316 HTML_ADVANCE_TO(TagNameState);	322 HTML_ADVANCE_TO(TagNameState);

317 }	323 }

318 }	324 }

(...skipping 479 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
798 m_token->endAttributeName(source.numberOfCharactersConsumed());	804 m_token->endAttributeName(source.numberOfCharactersConsumed());

799 HTML_ADVANCE_TO(AfterAttributeNameState);	805 HTML_ADVANCE_TO(AfterAttributeNameState);

800 } else if (cc == '/') {	806 } else if (cc == '/') {

801 m_token->endAttributeName(source.numberOfCharactersConsumed());	807 m_token->endAttributeName(source.numberOfCharactersConsumed());

802 HTML_ADVANCE_TO(SelfClosingStartTagState);	808 HTML_ADVANCE_TO(SelfClosingStartTagState);

803 } else if (cc == '=') {	809 } else if (cc == '=') {

804 m_token->endAttributeName(source.numberOfCharactersConsumed());	810 m_token->endAttributeName(source.numberOfCharactersConsumed());

805 HTML_ADVANCE_TO(BeforeAttributeValueState);	811 HTML_ADVANCE_TO(BeforeAttributeValueState);

806 } else if (cc == '>') {	812 } else if (cc == '>') {

807 m_token->endAttributeName(source.numberOfCharactersConsumed());	813 m_token->endAttributeName(source.numberOfCharactersConsumed());

	814 if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptS taticStringCreation(m_token->name(), Likely8Bit), scriptTag))

	815 reserveAttributeForCheck();

808 return emitAndResumeIn(source, HTMLTokenizer::DataState);	816 return emitAndResumeIn(source, HTMLTokenizer::DataState);

809 } else if (isASCIIUpper(cc)) {	817 } else if (isASCIIUpper(cc)) {

810 m_token->appendToAttributeName(toLowerCase(cc));	818 m_token->appendToAttributeName(toLowerCase(cc));

811 HTML_ADVANCE_TO(AttributeNameState);	819 HTML_ADVANCE_TO(AttributeNameState);

812 } else if (cc == kEndOfFileMarker) {	820 } else if (cc == kEndOfFileMarker) {

813 parseError();	821 parseError();

814 m_token->endAttributeName(source.numberOfCharactersConsumed());	822 m_token->endAttributeName(source.numberOfCharactersConsumed());

815 HTML_RECONSUME_IN(DataState);	823 HTML_RECONSUME_IN(DataState);

816 } else {	824 } else {

817 if (cc == '"' \|\| cc == '\'' \|\| cc == '<' \|\| cc == '=')	825 if (cc == '"' \|\| cc == '\'' \|\| cc == '<' \|\| cc == '=')

(...skipping 147 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
965 else	973 else

966 ASSERT_NOT_REACHED();	974 ASSERT_NOT_REACHED();

967 }	975 }

968 END_STATE()	976 END_STATE()

969	977

970 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {	978 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {

971 if (isTokenizerWhitespace(cc))	979 if (isTokenizerWhitespace(cc))

972 HTML_ADVANCE_TO(BeforeAttributeNameState);	980 HTML_ADVANCE_TO(BeforeAttributeNameState);

973 else if (cc == '/')	981 else if (cc == '/')

974 HTML_ADVANCE_TO(SelfClosingStartTagState);	982 HTML_ADVANCE_TO(SelfClosingStartTagState);

975 else if (cc == '>')	983 else if (cc == '>') {

	984 if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptS taticStringCreation(m_token->name(), Likely8Bit), scriptTag))

	985 reserveAttributeForCheck();

976 return emitAndResumeIn(source, HTMLTokenizer::DataState);	986 return emitAndResumeIn(source, HTMLTokenizer::DataState);

977 else if (cc == kEndOfFileMarker) {	987 } else if (cc == kEndOfFileMarker) {

978 parseError();	988 parseError();

979 HTML_RECONSUME_IN(DataState);	989 HTML_RECONSUME_IN(DataState);

980 } else {	990 } else {

981 parseError();	991 parseError();

982 HTML_RECONSUME_IN(BeforeAttributeNameState);	992 HTML_RECONSUME_IN(BeforeAttributeNameState);

983 }	993 }

984 }	994 }

985 END_STATE()	995 END_STATE()

986	996

987 HTML_BEGIN_STATE(SelfClosingStartTagState) {	997 HTML_BEGIN_STATE(SelfClosingStartTagState) {

(...skipping 562 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1550 setState(HTMLTokenizer::ScriptDataState);	1560 setState(HTMLTokenizer::ScriptDataState);

1551 else if (threadSafeMatch(tagName, styleTag) \|\|	1561 else if (threadSafeMatch(tagName, styleTag) \|\|

1552 threadSafeMatch(tagName, iframeTag) \|\|	1562 threadSafeMatch(tagName, iframeTag) \|\|

1553 threadSafeMatch(tagName, xmpTag) \|\|	1563 threadSafeMatch(tagName, xmpTag) \|\|

1554 (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled) \|\|	1564 (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled) \|\|

1555 threadSafeMatch(tagName, noframesTag) \|\|	1565 threadSafeMatch(tagName, noframesTag) \|\|

1556 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled))	1566 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled))

1557 setState(HTMLTokenizer::RAWTEXTState);	1567 setState(HTMLTokenizer::RAWTEXTState);

1558 }	1568 }

1559	1569

	1570 void HTMLTokenizer::reserveAttributeForCheck() {

	1571 m_temporaryAttributeList = m_token->attributes();

	1572 }

	1573

	1574 bool HTMLTokenizer::checkIfMergeScripts(SegmentedString& source) {

	1575 unsigned scriptEndTagLength = 9;

	1576 unsigned scriptStartTagLength = 7;

	1577 String currentSubstring = source.getCurrentSubstring(scriptEndTagLength);

	1578 if (source.isEqualToScriptEndTagTemplate(currentSubstring)) {

	1579 unsigned index = scriptEndTagLength - 1;

	1580 while (true) {

	1581 UChar cc = source.getCurrentString().getCharByIndex(++index);

	1582 if (isTokenizerWhitespace(cc))

	1583 continue;

	1584 if (cc == '<') {

	1585 currentSubstring = source.getCurrentSubstring(index, scriptStartTagLengt h);

	1586 if (source.isEqualToScriptStartTagTemplate(currentSubstring)) {

	1587 index += scriptStartTagLength;

	1588 cc = source.getCurrentString().getCharByIndex(index);

	1589 if (cc == '>' && m_temporaryAttributeList.isEmpty()) {

	1590 //Merge <script> ... </script> <script>

	1591 for (unsigned it = 0; it < index; it++)

	1592 source.advanceAndUpdateLineNumber();

	1593 return true;

	1594 } else if (cc != '>' && !m_temporaryAttributeList.isEmpty()) {

	1595 // parse attribute name and value

	1596 if (compareAttribute(source, index)) {

	1597 for (unsigned it = 0; it < index; it++)

	1598 source.advanceAndUpdateLineNumber();

	1599 m_temporaryAttributeValueBuffer.clear();

	1600 return true;

	1601 } else {

	1602 m_temporaryAttributeList.clear();

	1603 m_temporaryAttributeValueBuffer.clear();

	1604 return false;

	1605 }

	1606 } else

	1607 break;

	1608 }

	1609 break;

	1610 }

	1611 break;

	1612 }

	1613 m_temporaryAttributeList.clear();

	1614 }

	1615 return false;

	1616 }

	1617

	1618 bool HTMLTokenizer::compareAttribute(SegmentedString& source, unsigned& index) {

	1619 UChar cc;

	1620 String currentSubstring;

	1621 unsigned attributeCount = 0;

	1622

	1623 while (true) {

	1624 cc = source.getCurrentString().getCharByIndex(++index);

	1625 if (isTokenizerWhitespace(cc))

	1626 continue;

	1627 if (isASCIIUpper(cc))

	1628 toLowerCase(cc);

	1629 if (isASCIILower(cc)) {

	1630 // 'src' attribute, shouldn't merge scripts if exists

	1631 if (cc == 's') {

	1632 currentSubstring = source.getCurrentSubstring(index, srcAttr.localName() .length());

	1633 if (getAttributeFromList(srcAttr) \|\| threadSafeMatch(currentSubstring, s rcAttr)) {

	1634 return false;

	1635 }

	1636 } else if (cc == 't') {

	1637 // 'type' attribute

	1638 currentSubstring = source.getCurrentSubstring(index, typeAttr.localName( ).length());

	1639 if (getAttributeFromList(typeAttr) && threadSafeMatch(currentSubstring, typeAttr)) {

	1640 ++attributeCount;

	1641 index += typeAttr.localName().length() - 1;

	1642 if (compareAttributeValue(source, index, typeAttr))

	1643 continue;

	1644 else

	1645 return false;

	1646 } else

	1647 return false;

	1648 } else if (cc == 'c') {

	1649 // 'charset' attribute

	1650 currentSubstring = source.getCurrentSubstring(index, charsetAttr.localNa me().length());

	1651 if (getAttributeFromList(charsetAttr) && threadSafeMatch(currentSubstrin g, charsetAttr)) {

	1652 ++attributeCount;

	1653 index += charsetAttr.localName().length() - 1;

	1654 if (compareAttributeValue(source, index, charsetAttr))

	1655 continue;

	1656 else

	1657 return false;

	1658 } else

	1659 return false;

	1660 } else if (cc == 'l') {

	1661 // 'language' attribute

	1662 currentSubstring = source.getCurrentSubstring(index, languageAttr.localN ame().length());

	1663 if (getAttributeFromList(languageAttr) && threadSafeMatch(currentSubstri ng, languageAttr)) {

	1664 ++attributeCount;

	1665 index += languageAttr.localName().length() - 1;

	1666 if (compareAttributeValue(source, index, languageAttr))

	1667 continue;

	1668 else

	1669 return false;

	1670 } else

	1671 return false;

	1672 } else

	1673 return false;

	1674 }

	1675 if (cc == '>') {

	1676 if (m_temporaryAttributeList.size() > attributeCount)

	1677 return false;

	1678 break;

	1679 }

	1680 }

	1681 return true;

	1682 }

	1683

	1684 bool HTMLTokenizer::compareAttributeValue(SegmentedString& source, unsigned& ind ex, const QualifiedName& qName) {

	1685 UChar cc;

	1686 bool singleQuotedAttributeValueStarts = false;

	1687 bool doubleQuotedAttributeValueStarts = false;

	1688 m_temporaryAttributeValueBuffer.clear();

	1689

	1690 while (true) {

	1691 cc = source.getCurrentString().getCharByIndex(++index);

	1692 if (singleQuotedAttributeValueStarts \|\| doubleQuotedAttributeValueStarts) {

	1693 if (cc == '"' \|\| cc == '\'')

	1694 break;

	1695 else if (isASCIIUpper(cc))

	1696 toLowerCase(cc);

	1697 m_temporaryAttributeValueBuffer.append(cc);

	1698 } else if (isTokenizerWhitespace(cc) \|\| cc == '=')

	1699 continue;

	1700 else if ( cc == '\'' && !singleQuotedAttributeValueStarts) {

	1701 singleQuotedAttributeValueStarts = true;

	1702 continue;

	1703 } else if ( cc == '"' && !doubleQuotedAttributeValueStarts) {

	1704 doubleQuotedAttributeValueStarts = true;

	1705 continue;

	1706 } else

	1707 break;

	1708 }

	1709 const HTMLToken::Attribute* comparedAttribute = getAttributeFromList(qName);

	1710 String comparedAttributeValue(comparedAttribute->value());

	1711 return equalIgnoringCase(comparedAttributeValue, String(m_temporaryAttributeVa lueBuffer));

	1712 }

	1713

1560 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) {	1714 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) {

1561 return vectorEqualsString(m_temporaryBuffer, expectedString);	1715 return vectorEqualsString(m_temporaryBuffer, expectedString);

1562 }	1716 }

1563	1717

1564 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) {	1718 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) {

1565 ASSERT(isEndTagBufferingState(m_state));	1719 ASSERT(isEndTagBufferingState(m_state));

1566 m_bufferedEndTagName.append(cc);	1720 m_bufferedEndTagName.append(cc);

1567 }	1721 }

1568	1722

1569 inline bool HTMLTokenizer::isAppropriateEndTag() {	1723 inline bool HTMLTokenizer::isAppropriateEndTag() {

1570 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())	1724 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())

1571 return false;	1725 return false;

1572	1726

1573 size_t numCharacters = m_bufferedEndTagName.size();	1727 size_t numCharacters = m_bufferedEndTagName.size();

1574	1728

1575 for (size_t i = 0; i < numCharacters; i++) {	1729 for (size_t i = 0; i < numCharacters; i++) {

1576 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])	1730 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])

1577 return false;	1731 return false;

1578 }	1732 }

1579	1733

1580 return true;	1734 return true;

1581 }	1735 }

1582	1736

1583 inline void HTMLTokenizer::parseError() {	1737 inline void HTMLTokenizer::parseError() {

1584 DVLOG(1) << "Not implemented.";	1738 DVLOG(1) << "Not implemented.";

1585 }	1739 }

1586	1740

1587 } // namespace blink	1741 } // namespace blink

OLD	NEW