| Index: sky/engine/core/html/parser/HTMLTokenizer.cpp
|
| diff --git a/sky/engine/core/html/parser/HTMLTokenizer.cpp b/sky/engine/core/html/parser/HTMLTokenizer.cpp
|
| index ca454ad3cd314b958735a106c6b87d0ed7cb5b85..6c4c21099f0d98a48a65139c3129bacf99979c0b 100644
|
| --- a/sky/engine/core/html/parser/HTMLTokenizer.cpp
|
| +++ b/sky/engine/core/html/parser/HTMLTokenizer.cpp
|
| @@ -235,21 +235,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
|
|
|
| HTML_BEGIN_STATE(TagOpenState) {
|
| if (cc == '!')
|
| - HTML_ADVANCE_TO(MarkupDeclarationOpenState);
|
| + HTML_ADVANCE_TO(CommentStart1State);
|
| else if (cc == '/')
|
| - HTML_ADVANCE_TO(EndTagOpenState);
|
| + HTML_ADVANCE_TO(CloseTagState);
|
| else if (isASCIIUpper(cc)) {
|
| m_token->beginStartTag(toLowerCase(cc));
|
| HTML_ADVANCE_TO(TagNameState);
|
| } else if (isASCIILower(cc)) {
|
| m_token->beginStartTag(cc);
|
| HTML_ADVANCE_TO(TagNameState);
|
| - } else if (cc == '?') {
|
| - parseError();
|
| - // The spec consumes the current character before switching
|
| - // to the bogus comment state, but it's easier to implement
|
| - // if we reconsume the current character.
|
| - HTML_RECONSUME_IN(BogusCommentState);
|
| } else {
|
| parseError();
|
| bufferCharacter('<');
|
| @@ -258,7 +252,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
|
| }
|
| END_STATE()
|
|
|
| - HTML_BEGIN_STATE(EndTagOpenState) {
|
| + HTML_BEGIN_STATE(CloseTagState) {
|
| if (isASCIIUpper(cc)) {
|
| m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
|
| m_appropriateEndTagName.clear();
|
| @@ -268,16 +262,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
|
| m_appropriateEndTagName.clear();
|
| HTML_ADVANCE_TO(TagNameState);
|
| } else if (cc == '>') {
|
| - parseError();
|
| + bufferCharacter('<');
|
| + bufferCharacter('/');
|
| + bufferCharacter('>');
|
| HTML_ADVANCE_TO(DataState);
|
| - } else if (cc == kEndOfFileMarker) {
|
| - parseError();
|
| + } else {
|
| bufferCharacter('<');
|
| bufferCharacter('/');
|
| HTML_RECONSUME_IN(DataState);
|
| - } else {
|
| - parseError();
|
| - HTML_RECONSUME_IN(BogusCommentState);
|
| }
|
| }
|
| END_STATE()
|
| @@ -571,144 +563,54 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
|
| }
|
| END_STATE()
|
|
|
| - HTML_BEGIN_STATE(BogusCommentState) {
|
| - m_token->beginComment();
|
| - HTML_RECONSUME_IN(ContinueBogusCommentState);
|
| - }
|
| - END_STATE()
|
| -
|
| - HTML_BEGIN_STATE(ContinueBogusCommentState) {
|
| - if (cc == '>')
|
| - return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
| - else if (cc == kEndOfFileMarker)
|
| - return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
| - else {
|
| - m_token->appendToComment(cc);
|
| - HTML_ADVANCE_TO(ContinueBogusCommentState);
|
| - }
|
| - }
|
| - END_STATE()
|
| -
|
| - HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
|
| + HTML_BEGIN_STATE(CommentStart1State) {
|
| if (cc == '-') {
|
| - SegmentedString::LookAheadResult result = source.lookAhead(HTMLTokenizerNames::dashDash);
|
| - if (result == SegmentedString::DidMatch) {
|
| - source.advanceAndASSERT('-');
|
| - source.advanceAndASSERT('-');
|
| - m_token->beginComment();
|
| - HTML_SWITCH_TO(CommentStartState);
|
| - } else if (result == SegmentedString::NotEnoughCharacters)
|
| - return haveBufferedCharacterToken();
|
| - }
|
| - parseError();
|
| - HTML_RECONSUME_IN(BogusCommentState);
|
| - }
|
| - END_STATE()
|
| -
|
| - HTML_BEGIN_STATE(CommentStartState) {
|
| - if (cc == '-')
|
| - HTML_ADVANCE_TO(CommentStartDashState);
|
| - else if (cc == '>') {
|
| - parseError();
|
| - return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
| - } else if (cc == kEndOfFileMarker) {
|
| - parseError();
|
| - return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
| + HTML_ADVANCE_TO(CommentStart2State);
|
| } else {
|
| - m_token->appendToComment(cc);
|
| - HTML_ADVANCE_TO(CommentState);
|
| + bufferCharacter('<');
|
| + bufferCharacter('!');
|
| + HTML_RECONSUME_IN(DataState);
|
| }
|
| }
|
| END_STATE()
|
|
|
| - HTML_BEGIN_STATE(CommentStartDashState) {
|
| - if (cc == '-')
|
| - HTML_ADVANCE_TO(CommentEndState);
|
| - else if (cc == '>') {
|
| - parseError();
|
| - return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
| - } else if (cc == kEndOfFileMarker) {
|
| - parseError();
|
| - return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
| - } else {
|
| - m_token->appendToComment('-');
|
| - m_token->appendToComment(cc);
|
| + HTML_BEGIN_STATE(CommentStart2State) {
|
| + if (cc == '-') {
|
| HTML_ADVANCE_TO(CommentState);
|
| + } else {
|
| + bufferCharacter('<');
|
| + bufferCharacter('!');
|
| + bufferCharacter('-');
|
| + HTML_RECONSUME_IN(DataState);
|
| }
|
| }
|
| END_STATE()
|
|
|
| HTML_BEGIN_STATE(CommentState) {
|
| if (cc == '-')
|
| - HTML_ADVANCE_TO(CommentEndDashState);
|
| - else if (cc == kEndOfFileMarker) {
|
| - parseError();
|
| - return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
| - } else {
|
| - m_token->appendToComment(cc);
|
| + HTML_ADVANCE_TO(CommentEnd1State);
|
| + else
|
| HTML_ADVANCE_TO(CommentState);
|
| - }
|
| }
|
| END_STATE()
|
|
|
| - HTML_BEGIN_STATE(CommentEndDashState) {
|
| + HTML_BEGIN_STATE(CommentEnd1State) {
|
| if (cc == '-')
|
| - HTML_ADVANCE_TO(CommentEndState);
|
| - else if (cc == kEndOfFileMarker) {
|
| - parseError();
|
| - return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
| - } else {
|
| - m_token->appendToComment('-');
|
| - m_token->appendToComment(cc);
|
| - HTML_ADVANCE_TO(CommentState);
|
| - }
|
| - }
|
| - END_STATE()
|
| -
|
| - HTML_BEGIN_STATE(CommentEndState) {
|
| - if (cc == '>')
|
| - return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
| - else if (cc == '!') {
|
| - parseError();
|
| - HTML_ADVANCE_TO(CommentEndBangState);
|
| - } else if (cc == '-') {
|
| - parseError();
|
| - m_token->appendToComment('-');
|
| - HTML_ADVANCE_TO(CommentEndState);
|
| - } else if (cc == kEndOfFileMarker) {
|
| - parseError();
|
| - return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
| - } else {
|
| - parseError();
|
| - m_token->appendToComment('-');
|
| - m_token->appendToComment('-');
|
| - m_token->appendToComment(cc);
|
| + HTML_ADVANCE_TO(CommentEnd2State);
|
| + else
|
| HTML_ADVANCE_TO(CommentState);
|
| - }
|
| }
|
| END_STATE()
|
|
|
| - HTML_BEGIN_STATE(CommentEndBangState) {
|
| - if (cc == '-') {
|
| - m_token->appendToComment('-');
|
| - m_token->appendToComment('-');
|
| - m_token->appendToComment('!');
|
| - HTML_ADVANCE_TO(CommentEndDashState);
|
| - } else if (cc == '>')
|
| - return emitAndResumeIn(source, HTMLTokenizer::DataState);
|
| - else if (cc == kEndOfFileMarker) {
|
| - parseError();
|
| - return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
|
| - } else {
|
| - m_token->appendToComment('-');
|
| - m_token->appendToComment('-');
|
| - m_token->appendToComment('!');
|
| - m_token->appendToComment(cc);
|
| + HTML_BEGIN_STATE(CommentEnd2State) {
|
| + if (cc == '-')
|
| + HTML_ADVANCE_TO(CommentEnd2State);
|
| + else if (cc == '>')
|
| + HTML_ADVANCE_TO(DataState);
|
| + else
|
| HTML_ADVANCE_TO(CommentState);
|
| - }
|
| }
|
| END_STATE()
|
| -
|
| }
|
|
|
| ASSERT_NOT_REACHED();
|
|
|