Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(42)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

Issue 2438263002: Possibly merge consecutive script fragments to reduce execution overhead
Patch Set: Created 4 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
(...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after
223 else if (cc == kEndOfFileMarker) 223 else if (cc == kEndOfFileMarker)
224 return emitEndOfFile(source); 224 return emitEndOfFile(source);
225 else { 225 else {
226 bufferCharacter(cc); 226 bufferCharacter(cc);
227 HTML_ADVANCE_TO(RAWTEXTState); 227 HTML_ADVANCE_TO(RAWTEXTState);
228 } 228 }
229 } 229 }
230 END_STATE() 230 END_STATE()
231 231
232 HTML_BEGIN_STATE(ScriptDataState) { 232 HTML_BEGIN_STATE(ScriptDataState) {
233 if (cc == '<') 233 if (cc == '<') {
234 HTML_ADVANCE_TO(ScriptDataLessThanSignState); 234 if (checkIfMergeScripts(source))
235 HTML_ADVANCE_TO(ScriptDataState);
236 else
237 HTML_ADVANCE_TO(ScriptDataLessThanSignState);
238 }
235 else if (cc == kEndOfFileMarker) 239 else if (cc == kEndOfFileMarker)
236 return emitEndOfFile(source); 240 return emitEndOfFile(source);
237 else { 241 else {
238 bufferCharacter(cc); 242 bufferCharacter(cc);
239 HTML_ADVANCE_TO(ScriptDataState); 243 HTML_ADVANCE_TO(ScriptDataState);
240 } 244 }
241 } 245 }
242 END_STATE() 246 END_STATE()
243 247
244 HTML_BEGIN_STATE(PLAINTEXTState) { 248 HTML_BEGIN_STATE(PLAINTEXTState) {
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
296 HTML_RECONSUME_IN(BogusCommentState); 300 HTML_RECONSUME_IN(BogusCommentState);
297 } 301 }
298 } 302 }
299 END_STATE() 303 END_STATE()
300 304
301 HTML_BEGIN_STATE(TagNameState) { 305 HTML_BEGIN_STATE(TagNameState) {
302 if (isTokenizerWhitespace(cc)) 306 if (isTokenizerWhitespace(cc))
303 HTML_ADVANCE_TO(BeforeAttributeNameState); 307 HTML_ADVANCE_TO(BeforeAttributeNameState);
304 else if (cc == '/') 308 else if (cc == '/')
305 HTML_ADVANCE_TO(SelfClosingStartTagState); 309 HTML_ADVANCE_TO(SelfClosingStartTagState);
306 else if (cc == '>') 310 else if (cc == '>') {
311 if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptS taticStringCreation(m_token->name(), Likely8Bit), scriptTag))
312 reserveAttributeForCheck();
307 return emitAndResumeIn(source, HTMLTokenizer::DataState); 313 return emitAndResumeIn(source, HTMLTokenizer::DataState);
308 else if (isASCIIUpper(cc)) { 314 } else if (isASCIIUpper(cc)) {
309 m_token->appendToName(toLowerCase(cc)); 315 m_token->appendToName(toLowerCase(cc));
310 HTML_ADVANCE_TO(TagNameState); 316 HTML_ADVANCE_TO(TagNameState);
311 } else if (cc == kEndOfFileMarker) { 317 } else if (cc == kEndOfFileMarker) {
312 parseError(); 318 parseError();
313 HTML_RECONSUME_IN(DataState); 319 HTML_RECONSUME_IN(DataState);
314 } else { 320 } else {
315 m_token->appendToName(cc); 321 m_token->appendToName(cc);
316 HTML_ADVANCE_TO(TagNameState); 322 HTML_ADVANCE_TO(TagNameState);
317 } 323 }
318 } 324 }
(...skipping 479 matching lines...) Expand 10 before | Expand all | Expand 10 after
798 m_token->endAttributeName(source.numberOfCharactersConsumed()); 804 m_token->endAttributeName(source.numberOfCharactersConsumed());
799 HTML_ADVANCE_TO(AfterAttributeNameState); 805 HTML_ADVANCE_TO(AfterAttributeNameState);
800 } else if (cc == '/') { 806 } else if (cc == '/') {
801 m_token->endAttributeName(source.numberOfCharactersConsumed()); 807 m_token->endAttributeName(source.numberOfCharactersConsumed());
802 HTML_ADVANCE_TO(SelfClosingStartTagState); 808 HTML_ADVANCE_TO(SelfClosingStartTagState);
803 } else if (cc == '=') { 809 } else if (cc == '=') {
804 m_token->endAttributeName(source.numberOfCharactersConsumed()); 810 m_token->endAttributeName(source.numberOfCharactersConsumed());
805 HTML_ADVANCE_TO(BeforeAttributeValueState); 811 HTML_ADVANCE_TO(BeforeAttributeValueState);
806 } else if (cc == '>') { 812 } else if (cc == '>') {
807 m_token->endAttributeName(source.numberOfCharactersConsumed()); 813 m_token->endAttributeName(source.numberOfCharactersConsumed());
814 if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptS taticStringCreation(m_token->name(), Likely8Bit), scriptTag))
815 reserveAttributeForCheck();
808 return emitAndResumeIn(source, HTMLTokenizer::DataState); 816 return emitAndResumeIn(source, HTMLTokenizer::DataState);
809 } else if (isASCIIUpper(cc)) { 817 } else if (isASCIIUpper(cc)) {
810 m_token->appendToAttributeName(toLowerCase(cc)); 818 m_token->appendToAttributeName(toLowerCase(cc));
811 HTML_ADVANCE_TO(AttributeNameState); 819 HTML_ADVANCE_TO(AttributeNameState);
812 } else if (cc == kEndOfFileMarker) { 820 } else if (cc == kEndOfFileMarker) {
813 parseError(); 821 parseError();
814 m_token->endAttributeName(source.numberOfCharactersConsumed()); 822 m_token->endAttributeName(source.numberOfCharactersConsumed());
815 HTML_RECONSUME_IN(DataState); 823 HTML_RECONSUME_IN(DataState);
816 } else { 824 } else {
817 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') 825 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
965 else 973 else
966 ASSERT_NOT_REACHED(); 974 ASSERT_NOT_REACHED();
967 } 975 }
968 END_STATE() 976 END_STATE()
969 977
970 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { 978 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
971 if (isTokenizerWhitespace(cc)) 979 if (isTokenizerWhitespace(cc))
972 HTML_ADVANCE_TO(BeforeAttributeNameState); 980 HTML_ADVANCE_TO(BeforeAttributeNameState);
973 else if (cc == '/') 981 else if (cc == '/')
974 HTML_ADVANCE_TO(SelfClosingStartTagState); 982 HTML_ADVANCE_TO(SelfClosingStartTagState);
975 else if (cc == '>') 983 else if (cc == '>') {
984 if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptS taticStringCreation(m_token->name(), Likely8Bit), scriptTag))
985 reserveAttributeForCheck();
976 return emitAndResumeIn(source, HTMLTokenizer::DataState); 986 return emitAndResumeIn(source, HTMLTokenizer::DataState);
977 else if (cc == kEndOfFileMarker) { 987 } else if (cc == kEndOfFileMarker) {
978 parseError(); 988 parseError();
979 HTML_RECONSUME_IN(DataState); 989 HTML_RECONSUME_IN(DataState);
980 } else { 990 } else {
981 parseError(); 991 parseError();
982 HTML_RECONSUME_IN(BeforeAttributeNameState); 992 HTML_RECONSUME_IN(BeforeAttributeNameState);
983 } 993 }
984 } 994 }
985 END_STATE() 995 END_STATE()
986 996
987 HTML_BEGIN_STATE(SelfClosingStartTagState) { 997 HTML_BEGIN_STATE(SelfClosingStartTagState) {
(...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after
1550 setState(HTMLTokenizer::ScriptDataState); 1560 setState(HTMLTokenizer::ScriptDataState);
1551 else if (threadSafeMatch(tagName, styleTag) || 1561 else if (threadSafeMatch(tagName, styleTag) ||
1552 threadSafeMatch(tagName, iframeTag) || 1562 threadSafeMatch(tagName, iframeTag) ||
1553 threadSafeMatch(tagName, xmpTag) || 1563 threadSafeMatch(tagName, xmpTag) ||
1554 (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled) || 1564 (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled) ||
1555 threadSafeMatch(tagName, noframesTag) || 1565 threadSafeMatch(tagName, noframesTag) ||
1556 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled)) 1566 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled))
1557 setState(HTMLTokenizer::RAWTEXTState); 1567 setState(HTMLTokenizer::RAWTEXTState);
1558 } 1568 }
1559 1569
1570 void HTMLTokenizer::reserveAttributeForCheck() {
1571 m_temporaryAttributeList = m_token->attributes();
1572 }
1573
1574 bool HTMLTokenizer::checkIfMergeScripts(SegmentedString& source) {
1575 unsigned scriptEndTagLength = 9;
1576 unsigned scriptStartTagLength = 7;
1577 String currentSubstring = source.getCurrentSubstring(scriptEndTagLength);
1578 if (source.isEqualToScriptEndTagTemplate(currentSubstring)) {
1579 unsigned index = scriptEndTagLength - 1;
1580 while (true) {
1581 UChar cc = source.getCurrentString().getCharByIndex(++index);
1582 if (isTokenizerWhitespace(cc))
1583 continue;
1584 if (cc == '<') {
1585 currentSubstring = source.getCurrentSubstring(index, scriptStartTagLengt h);
1586 if (source.isEqualToScriptStartTagTemplate(currentSubstring)) {
1587 index += scriptStartTagLength;
1588 cc = source.getCurrentString().getCharByIndex(index);
1589 if (cc == '>' && m_temporaryAttributeList.isEmpty()) {
1590 //Merge <script> ... </script> <script>
1591 for (unsigned it = 0; it < index; it++)
1592 source.advanceAndUpdateLineNumber();
1593 return true;
1594 } else if (cc != '>' && !m_temporaryAttributeList.isEmpty()) {
1595 // parse attribute name and value
1596 if (compareAttribute(source, index)) {
1597 for (unsigned it = 0; it < index; it++)
1598 source.advanceAndUpdateLineNumber();
1599 m_temporaryAttributeValueBuffer.clear();
1600 return true;
1601 } else {
1602 m_temporaryAttributeList.clear();
1603 m_temporaryAttributeValueBuffer.clear();
1604 return false;
1605 }
1606 } else
1607 break;
1608 }
1609 break;
1610 }
1611 break;
1612 }
1613 m_temporaryAttributeList.clear();
1614 }
1615 return false;
1616 }
1617
1618 bool HTMLTokenizer::compareAttribute(SegmentedString& source, unsigned& index) {
1619 UChar cc;
1620 String currentSubstring;
1621 unsigned attributeCount = 0;
1622
1623 while (true) {
1624 cc = source.getCurrentString().getCharByIndex(++index);
1625 if (isTokenizerWhitespace(cc))
1626 continue;
1627 if (isASCIIUpper(cc))
1628 toLowerCase(cc);
1629 if (isASCIILower(cc)) {
1630 // 'src' attribute, shouldn't merge scripts if exists
1631 if (cc == 's') {
1632 currentSubstring = source.getCurrentSubstring(index, srcAttr.localName() .length());
1633 if (getAttributeFromList(srcAttr) || threadSafeMatch(currentSubstring, s rcAttr)) {
1634 return false;
1635 }
1636 } else if (cc == 't') {
1637 // 'type' attribute
1638 currentSubstring = source.getCurrentSubstring(index, typeAttr.localName( ).length());
1639 if (getAttributeFromList(typeAttr) && threadSafeMatch(currentSubstring, typeAttr)) {
1640 ++attributeCount;
1641 index += typeAttr.localName().length() - 1;
1642 if (compareAttributeValue(source, index, typeAttr))
1643 continue;
1644 else
1645 return false;
1646 } else
1647 return false;
1648 } else if (cc == 'c') {
1649 // 'charset' attribute
1650 currentSubstring = source.getCurrentSubstring(index, charsetAttr.localNa me().length());
1651 if (getAttributeFromList(charsetAttr) && threadSafeMatch(currentSubstrin g, charsetAttr)) {
1652 ++attributeCount;
1653 index += charsetAttr.localName().length() - 1;
1654 if (compareAttributeValue(source, index, charsetAttr))
1655 continue;
1656 else
1657 return false;
1658 } else
1659 return false;
1660 } else if (cc == 'l') {
1661 // 'language' attribute
1662 currentSubstring = source.getCurrentSubstring(index, languageAttr.localN ame().length());
1663 if (getAttributeFromList(languageAttr) && threadSafeMatch(currentSubstri ng, languageAttr)) {
1664 ++attributeCount;
1665 index += languageAttr.localName().length() - 1;
1666 if (compareAttributeValue(source, index, languageAttr))
1667 continue;
1668 else
1669 return false;
1670 } else
1671 return false;
1672 } else
1673 return false;
1674 }
1675 if (cc == '>') {
1676 if (m_temporaryAttributeList.size() > attributeCount)
1677 return false;
1678 break;
1679 }
1680 }
1681 return true;
1682 }
1683
1684 bool HTMLTokenizer::compareAttributeValue(SegmentedString& source, unsigned& ind ex, const QualifiedName& qName) {
1685 UChar cc;
1686 bool singleQuotedAttributeValueStarts = false;
1687 bool doubleQuotedAttributeValueStarts = false;
1688 m_temporaryAttributeValueBuffer.clear();
1689
1690 while (true) {
1691 cc = source.getCurrentString().getCharByIndex(++index);
1692 if (singleQuotedAttributeValueStarts || doubleQuotedAttributeValueStarts) {
1693 if (cc == '"' || cc == '\'')
1694 break;
1695 else if (isASCIIUpper(cc))
1696 toLowerCase(cc);
1697 m_temporaryAttributeValueBuffer.append(cc);
1698 } else if (isTokenizerWhitespace(cc) || cc == '=')
1699 continue;
1700 else if ( cc == '\'' && !singleQuotedAttributeValueStarts) {
1701 singleQuotedAttributeValueStarts = true;
1702 continue;
1703 } else if ( cc == '"' && !doubleQuotedAttributeValueStarts) {
1704 doubleQuotedAttributeValueStarts = true;
1705 continue;
1706 } else
1707 break;
1708 }
1709 const HTMLToken::Attribute* comparedAttribute = getAttributeFromList(qName);
1710 String comparedAttributeValue(comparedAttribute->value());
1711 return equalIgnoringCase(comparedAttributeValue, String(m_temporaryAttributeVa lueBuffer));
1712 }
1713
1560 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) { 1714 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) {
1561 return vectorEqualsString(m_temporaryBuffer, expectedString); 1715 return vectorEqualsString(m_temporaryBuffer, expectedString);
1562 } 1716 }
1563 1717
1564 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) { 1718 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) {
1565 ASSERT(isEndTagBufferingState(m_state)); 1719 ASSERT(isEndTagBufferingState(m_state));
1566 m_bufferedEndTagName.append(cc); 1720 m_bufferedEndTagName.append(cc);
1567 } 1721 }
1568 1722
1569 inline bool HTMLTokenizer::isAppropriateEndTag() { 1723 inline bool HTMLTokenizer::isAppropriateEndTag() {
1570 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) 1724 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size())
1571 return false; 1725 return false;
1572 1726
1573 size_t numCharacters = m_bufferedEndTagName.size(); 1727 size_t numCharacters = m_bufferedEndTagName.size();
1574 1728
1575 for (size_t i = 0; i < numCharacters; i++) { 1729 for (size_t i = 0; i < numCharacters; i++) {
1576 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) 1730 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i])
1577 return false; 1731 return false;
1578 } 1732 }
1579 1733
1580 return true; 1734 return true;
1581 } 1735 }
1582 1736
1583 inline void HTMLTokenizer::parseError() { 1737 inline void HTMLTokenizer::parseError() {
1584 DVLOG(1) << "Not implemented."; 1738 DVLOG(1) << "Not implemented.";
1585 } 1739 }
1586 1740
1587 } // namespace blink 1741 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/core/html/parser/HTMLTokenizer.h ('k') | third_party/WebKit/Source/platform/text/SegmentedString.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698