| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ | 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
| 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 5 * | 5 * |
| 6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
| 8 * are met: | 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
| (...skipping 212 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 223 else if (cc == kEndOfFileMarker) | 223 else if (cc == kEndOfFileMarker) |
| 224 return emitEndOfFile(source); | 224 return emitEndOfFile(source); |
| 225 else { | 225 else { |
| 226 bufferCharacter(cc); | 226 bufferCharacter(cc); |
| 227 HTML_ADVANCE_TO(RAWTEXTState); | 227 HTML_ADVANCE_TO(RAWTEXTState); |
| 228 } | 228 } |
| 229 } | 229 } |
| 230 END_STATE() | 230 END_STATE() |
| 231 | 231 |
| 232 HTML_BEGIN_STATE(ScriptDataState) { | 232 HTML_BEGIN_STATE(ScriptDataState) { |
| 233 if (cc == '<') | 233 if (cc == '<') { |
| 234 HTML_ADVANCE_TO(ScriptDataLessThanSignState); | 234 if (checkIfMergeScripts(source)) |
| 235 HTML_ADVANCE_TO(ScriptDataState); |
| 236 else |
| 237 HTML_ADVANCE_TO(ScriptDataLessThanSignState); |
| 238 } |
| 235 else if (cc == kEndOfFileMarker) | 239 else if (cc == kEndOfFileMarker) |
| 236 return emitEndOfFile(source); | 240 return emitEndOfFile(source); |
| 237 else { | 241 else { |
| 238 bufferCharacter(cc); | 242 bufferCharacter(cc); |
| 239 HTML_ADVANCE_TO(ScriptDataState); | 243 HTML_ADVANCE_TO(ScriptDataState); |
| 240 } | 244 } |
| 241 } | 245 } |
| 242 END_STATE() | 246 END_STATE() |
| 243 | 247 |
| 244 HTML_BEGIN_STATE(PLAINTEXTState) { | 248 HTML_BEGIN_STATE(PLAINTEXTState) { |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 296 HTML_RECONSUME_IN(BogusCommentState); | 300 HTML_RECONSUME_IN(BogusCommentState); |
| 297 } | 301 } |
| 298 } | 302 } |
| 299 END_STATE() | 303 END_STATE() |
| 300 | 304 |
| 301 HTML_BEGIN_STATE(TagNameState) { | 305 HTML_BEGIN_STATE(TagNameState) { |
| 302 if (isTokenizerWhitespace(cc)) | 306 if (isTokenizerWhitespace(cc)) |
| 303 HTML_ADVANCE_TO(BeforeAttributeNameState); | 307 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 304 else if (cc == '/') | 308 else if (cc == '/') |
| 305 HTML_ADVANCE_TO(SelfClosingStartTagState); | 309 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 306 else if (cc == '>') | 310 else if (cc == '>') { |
| 311 if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptS
taticStringCreation(m_token->name(), Likely8Bit), scriptTag)) |
| 312 reserveAttributeForCheck(); |
| 307 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 313 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 308 else if (isASCIIUpper(cc)) { | 314 } else if (isASCIIUpper(cc)) { |
| 309 m_token->appendToName(toLowerCase(cc)); | 315 m_token->appendToName(toLowerCase(cc)); |
| 310 HTML_ADVANCE_TO(TagNameState); | 316 HTML_ADVANCE_TO(TagNameState); |
| 311 } else if (cc == kEndOfFileMarker) { | 317 } else if (cc == kEndOfFileMarker) { |
| 312 parseError(); | 318 parseError(); |
| 313 HTML_RECONSUME_IN(DataState); | 319 HTML_RECONSUME_IN(DataState); |
| 314 } else { | 320 } else { |
| 315 m_token->appendToName(cc); | 321 m_token->appendToName(cc); |
| 316 HTML_ADVANCE_TO(TagNameState); | 322 HTML_ADVANCE_TO(TagNameState); |
| 317 } | 323 } |
| 318 } | 324 } |
| (...skipping 479 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 798 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 804 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
| 799 HTML_ADVANCE_TO(AfterAttributeNameState); | 805 HTML_ADVANCE_TO(AfterAttributeNameState); |
| 800 } else if (cc == '/') { | 806 } else if (cc == '/') { |
| 801 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 807 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
| 802 HTML_ADVANCE_TO(SelfClosingStartTagState); | 808 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 803 } else if (cc == '=') { | 809 } else if (cc == '=') { |
| 804 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 810 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
| 805 HTML_ADVANCE_TO(BeforeAttributeValueState); | 811 HTML_ADVANCE_TO(BeforeAttributeValueState); |
| 806 } else if (cc == '>') { | 812 } else if (cc == '>') { |
| 807 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 813 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
| 814 if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptS
taticStringCreation(m_token->name(), Likely8Bit), scriptTag)) |
| 815 reserveAttributeForCheck(); |
| 808 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 816 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 809 } else if (isASCIIUpper(cc)) { | 817 } else if (isASCIIUpper(cc)) { |
| 810 m_token->appendToAttributeName(toLowerCase(cc)); | 818 m_token->appendToAttributeName(toLowerCase(cc)); |
| 811 HTML_ADVANCE_TO(AttributeNameState); | 819 HTML_ADVANCE_TO(AttributeNameState); |
| 812 } else if (cc == kEndOfFileMarker) { | 820 } else if (cc == kEndOfFileMarker) { |
| 813 parseError(); | 821 parseError(); |
| 814 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 822 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
| 815 HTML_RECONSUME_IN(DataState); | 823 HTML_RECONSUME_IN(DataState); |
| 816 } else { | 824 } else { |
| 817 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') | 825 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
| (...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 965 else | 973 else |
| 966 ASSERT_NOT_REACHED(); | 974 ASSERT_NOT_REACHED(); |
| 967 } | 975 } |
| 968 END_STATE() | 976 END_STATE() |
| 969 | 977 |
| 970 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { | 978 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { |
| 971 if (isTokenizerWhitespace(cc)) | 979 if (isTokenizerWhitespace(cc)) |
| 972 HTML_ADVANCE_TO(BeforeAttributeNameState); | 980 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 973 else if (cc == '/') | 981 else if (cc == '/') |
| 974 HTML_ADVANCE_TO(SelfClosingStartTagState); | 982 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 975 else if (cc == '>') | 983 else if (cc == '>') { |
| 984 if ((m_token->type() == HTMLToken::StartTag) && threadSafeMatch(attemptS
taticStringCreation(m_token->name(), Likely8Bit), scriptTag)) |
| 985 reserveAttributeForCheck(); |
| 976 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 986 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
| 977 else if (cc == kEndOfFileMarker) { | 987 } else if (cc == kEndOfFileMarker) { |
| 978 parseError(); | 988 parseError(); |
| 979 HTML_RECONSUME_IN(DataState); | 989 HTML_RECONSUME_IN(DataState); |
| 980 } else { | 990 } else { |
| 981 parseError(); | 991 parseError(); |
| 982 HTML_RECONSUME_IN(BeforeAttributeNameState); | 992 HTML_RECONSUME_IN(BeforeAttributeNameState); |
| 983 } | 993 } |
| 984 } | 994 } |
| 985 END_STATE() | 995 END_STATE() |
| 986 | 996 |
| 987 HTML_BEGIN_STATE(SelfClosingStartTagState) { | 997 HTML_BEGIN_STATE(SelfClosingStartTagState) { |
| (...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1550 setState(HTMLTokenizer::ScriptDataState); | 1560 setState(HTMLTokenizer::ScriptDataState); |
| 1551 else if (threadSafeMatch(tagName, styleTag) || | 1561 else if (threadSafeMatch(tagName, styleTag) || |
| 1552 threadSafeMatch(tagName, iframeTag) || | 1562 threadSafeMatch(tagName, iframeTag) || |
| 1553 threadSafeMatch(tagName, xmpTag) || | 1563 threadSafeMatch(tagName, xmpTag) || |
| 1554 (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled) || | 1564 (threadSafeMatch(tagName, noembedTag) && m_options.pluginsEnabled) || |
| 1555 threadSafeMatch(tagName, noframesTag) || | 1565 threadSafeMatch(tagName, noframesTag) || |
| 1556 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled)) | 1566 (threadSafeMatch(tagName, noscriptTag) && m_options.scriptEnabled)) |
| 1557 setState(HTMLTokenizer::RAWTEXTState); | 1567 setState(HTMLTokenizer::RAWTEXTState); |
| 1558 } | 1568 } |
| 1559 | 1569 |
| 1570 void HTMLTokenizer::reserveAttributeForCheck() { |
| 1571 m_temporaryAttributeList = m_token->attributes(); |
| 1572 } |
| 1573 |
| 1574 bool HTMLTokenizer::checkIfMergeScripts(SegmentedString& source) { |
| 1575 unsigned scriptEndTagLength = 9; |
| 1576 unsigned scriptStartTagLength = 7; |
| 1577 String currentSubstring = source.getCurrentSubstring(scriptEndTagLength); |
| 1578 if (source.isEqualToScriptEndTagTemplate(currentSubstring)) { |
| 1579 unsigned index = scriptEndTagLength - 1; |
| 1580 while (true) { |
| 1581 UChar cc = source.getCurrentString().getCharByIndex(++index); |
| 1582 if (isTokenizerWhitespace(cc)) |
| 1583 continue; |
| 1584 if (cc == '<') { |
| 1585 currentSubstring = source.getCurrentSubstring(index, scriptStartTagLengt
h); |
| 1586 if (source.isEqualToScriptStartTagTemplate(currentSubstring)) { |
| 1587 index += scriptStartTagLength; |
| 1588 cc = source.getCurrentString().getCharByIndex(index); |
| 1589 if (cc == '>' && m_temporaryAttributeList.isEmpty()) { |
| 1590 //Merge <script> ... </script> <script> |
| 1591 for (unsigned it = 0; it < index; it++) |
| 1592 source.advanceAndUpdateLineNumber(); |
| 1593 return true; |
| 1594 } else if (cc != '>' && !m_temporaryAttributeList.isEmpty()) { |
| 1595 // parse attribute name and value |
| 1596 if (compareAttribute(source, index)) { |
| 1597 for (unsigned it = 0; it < index; it++) |
| 1598 source.advanceAndUpdateLineNumber(); |
| 1599 m_temporaryAttributeValueBuffer.clear(); |
| 1600 return true; |
| 1601 } else { |
| 1602 m_temporaryAttributeList.clear(); |
| 1603 m_temporaryAttributeValueBuffer.clear(); |
| 1604 return false; |
| 1605 } |
| 1606 } else |
| 1607 break; |
| 1608 } |
| 1609 break; |
| 1610 } |
| 1611 break; |
| 1612 } |
| 1613 m_temporaryAttributeList.clear(); |
| 1614 } |
| 1615 return false; |
| 1616 } |
| 1617 |
| 1618 bool HTMLTokenizer::compareAttribute(SegmentedString& source, unsigned& index) { |
| 1619 UChar cc; |
| 1620 String currentSubstring; |
| 1621 unsigned attributeCount = 0; |
| 1622 |
| 1623 while (true) { |
| 1624 cc = source.getCurrentString().getCharByIndex(++index); |
| 1625 if (isTokenizerWhitespace(cc)) |
| 1626 continue; |
| 1627 if (isASCIIUpper(cc)) |
| 1628 toLowerCase(cc); |
| 1629 if (isASCIILower(cc)) { |
| 1630 // 'src' attribute, shouldn't merge scripts if exists |
| 1631 if (cc == 's') { |
| 1632 currentSubstring = source.getCurrentSubstring(index, srcAttr.localName()
.length()); |
| 1633 if (getAttributeFromList(srcAttr) || threadSafeMatch(currentSubstring, s
rcAttr)) { |
| 1634 return false; |
| 1635 } |
| 1636 } else if (cc == 't') { |
| 1637 // 'type' attribute |
| 1638 currentSubstring = source.getCurrentSubstring(index, typeAttr.localName(
).length()); |
| 1639 if (getAttributeFromList(typeAttr) && threadSafeMatch(currentSubstring,
typeAttr)) { |
| 1640 ++attributeCount; |
| 1641 index += typeAttr.localName().length() - 1; |
| 1642 if (compareAttributeValue(source, index, typeAttr)) |
| 1643 continue; |
| 1644 else |
| 1645 return false; |
| 1646 } else |
| 1647 return false; |
| 1648 } else if (cc == 'c') { |
| 1649 // 'charset' attribute |
| 1650 currentSubstring = source.getCurrentSubstring(index, charsetAttr.localNa
me().length()); |
| 1651 if (getAttributeFromList(charsetAttr) && threadSafeMatch(currentSubstrin
g, charsetAttr)) { |
| 1652 ++attributeCount; |
| 1653 index += charsetAttr.localName().length() - 1; |
| 1654 if (compareAttributeValue(source, index, charsetAttr)) |
| 1655 continue; |
| 1656 else |
| 1657 return false; |
| 1658 } else |
| 1659 return false; |
| 1660 } else if (cc == 'l') { |
| 1661 // 'language' attribute |
| 1662 currentSubstring = source.getCurrentSubstring(index, languageAttr.localN
ame().length()); |
| 1663 if (getAttributeFromList(languageAttr) && threadSafeMatch(currentSubstri
ng, languageAttr)) { |
| 1664 ++attributeCount; |
| 1665 index += languageAttr.localName().length() - 1; |
| 1666 if (compareAttributeValue(source, index, languageAttr)) |
| 1667 continue; |
| 1668 else |
| 1669 return false; |
| 1670 } else |
| 1671 return false; |
| 1672 } else |
| 1673 return false; |
| 1674 } |
| 1675 if (cc == '>') { |
| 1676 if (m_temporaryAttributeList.size() > attributeCount) |
| 1677 return false; |
| 1678 break; |
| 1679 } |
| 1680 } |
| 1681 return true; |
| 1682 } |
| 1683 |
| 1684 bool HTMLTokenizer::compareAttributeValue(SegmentedString& source, unsigned& ind
ex, const QualifiedName& qName) { |
| 1685 UChar cc; |
| 1686 bool singleQuotedAttributeValueStarts = false; |
| 1687 bool doubleQuotedAttributeValueStarts = false; |
| 1688 m_temporaryAttributeValueBuffer.clear(); |
| 1689 |
| 1690 while (true) { |
| 1691 cc = source.getCurrentString().getCharByIndex(++index); |
| 1692 if (singleQuotedAttributeValueStarts || doubleQuotedAttributeValueStarts) { |
| 1693 if (cc == '"' || cc == '\'') |
| 1694 break; |
| 1695 else if (isASCIIUpper(cc)) |
| 1696 toLowerCase(cc); |
| 1697 m_temporaryAttributeValueBuffer.append(cc); |
| 1698 } else if (isTokenizerWhitespace(cc) || cc == '=') |
| 1699 continue; |
| 1700 else if ( cc == '\'' && !singleQuotedAttributeValueStarts) { |
| 1701 singleQuotedAttributeValueStarts = true; |
| 1702 continue; |
| 1703 } else if ( cc == '"' && !doubleQuotedAttributeValueStarts) { |
| 1704 doubleQuotedAttributeValueStarts = true; |
| 1705 continue; |
| 1706 } else |
| 1707 break; |
| 1708 } |
| 1709 const HTMLToken::Attribute* comparedAttribute = getAttributeFromList(qName); |
| 1710 String comparedAttributeValue(comparedAttribute->value()); |
| 1711 return equalIgnoringCase(comparedAttributeValue, String(m_temporaryAttributeVa
lueBuffer)); |
| 1712 } |
| 1713 |
| 1560 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) { | 1714 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) { |
| 1561 return vectorEqualsString(m_temporaryBuffer, expectedString); | 1715 return vectorEqualsString(m_temporaryBuffer, expectedString); |
| 1562 } | 1716 } |
| 1563 | 1717 |
| 1564 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) { | 1718 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) { |
| 1565 ASSERT(isEndTagBufferingState(m_state)); | 1719 ASSERT(isEndTagBufferingState(m_state)); |
| 1566 m_bufferedEndTagName.append(cc); | 1720 m_bufferedEndTagName.append(cc); |
| 1567 } | 1721 } |
| 1568 | 1722 |
| 1569 inline bool HTMLTokenizer::isAppropriateEndTag() { | 1723 inline bool HTMLTokenizer::isAppropriateEndTag() { |
| 1570 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) | 1724 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) |
| 1571 return false; | 1725 return false; |
| 1572 | 1726 |
| 1573 size_t numCharacters = m_bufferedEndTagName.size(); | 1727 size_t numCharacters = m_bufferedEndTagName.size(); |
| 1574 | 1728 |
| 1575 for (size_t i = 0; i < numCharacters; i++) { | 1729 for (size_t i = 0; i < numCharacters; i++) { |
| 1576 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) | 1730 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) |
| 1577 return false; | 1731 return false; |
| 1578 } | 1732 } |
| 1579 | 1733 |
| 1580 return true; | 1734 return true; |
| 1581 } | 1735 } |
| 1582 | 1736 |
| 1583 inline void HTMLTokenizer::parseError() { | 1737 inline void HTMLTokenizer::parseError() { |
| 1584 DVLOG(1) << "Not implemented."; | 1738 DVLOG(1) << "Not implemented."; |
| 1585 } | 1739 } |
| 1586 | 1740 |
| 1587 } // namespace blink | 1741 } // namespace blink |
| OLD | NEW |