sky/engine/core/html/parser/HTMLTokenizer.cpp - Issue 682893002: Parse comments according to parsing.md

Unified Diff: sky/engine/core/html/parser/HTMLTokenizer.cpp

Issue 682893002: Parse comments according to parsing.md (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 6 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: sky/engine/core/html/parser/HTMLTokenizer.cpp

diff --git a/sky/engine/core/html/parser/HTMLTokenizer.cpp b/sky/engine/core/html/parser/HTMLTokenizer.cpp

index ca454ad3cd314b958735a106c6b87d0ed7cb5b85..6c4c21099f0d98a48a65139c3129bacf99979c0b 100644

--- a/sky/engine/core/html/parser/HTMLTokenizer.cpp

+++ b/sky/engine/core/html/parser/HTMLTokenizer.cpp

@@ -235,21 +235,15 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)

HTML_BEGIN_STATE(TagOpenState) {

if (cc == '!')

- HTML_ADVANCE_TO(MarkupDeclarationOpenState);

+ HTML_ADVANCE_TO(CommentStart1State);

else if (cc == '/')

- HTML_ADVANCE_TO(EndTagOpenState);

+ HTML_ADVANCE_TO(CloseTagState);

else if (isASCIIUpper(cc)) {

m_token->beginStartTag(toLowerCase(cc));

HTML_ADVANCE_TO(TagNameState);

} else if (isASCIILower(cc)) {

m_token->beginStartTag(cc);

HTML_ADVANCE_TO(TagNameState);

- } else if (cc == '?') {

- parseError();

- // The spec consumes the current character before switching

- // to the bogus comment state, but it's easier to implement

- // if we reconsume the current character.

- HTML_RECONSUME_IN(BogusCommentState);

} else {

parseError();

bufferCharacter('<');

@@ -258,7 +252,7 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)

}

END_STATE()

- HTML_BEGIN_STATE(EndTagOpenState) {

+ HTML_BEGIN_STATE(CloseTagState) {

if (isASCIIUpper(cc)) {

m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));

m_appropriateEndTagName.clear();

@@ -268,16 +262,14 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)

m_appropriateEndTagName.clear();

HTML_ADVANCE_TO(TagNameState);

} else if (cc == '>') {

- parseError();

+ bufferCharacter('<');

+ bufferCharacter('/');

+ bufferCharacter('>');

HTML_ADVANCE_TO(DataState);

- } else if (cc == kEndOfFileMarker) {

- parseError();

+ } else {

bufferCharacter('<');

bufferCharacter('/');

HTML_RECONSUME_IN(DataState);

- } else {

- parseError();

- HTML_RECONSUME_IN(BogusCommentState);

}

END_STATE()

@@ -571,144 +563,54 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)

}

END_STATE()

- HTML_BEGIN_STATE(BogusCommentState) {

- m_token->beginComment();

- HTML_RECONSUME_IN(ContinueBogusCommentState);

- }

- END_STATE()

- HTML_BEGIN_STATE(ContinueBogusCommentState) {

- if (cc == '>')

- return emitAndResumeIn(source, HTMLTokenizer::DataState);

- else if (cc == kEndOfFileMarker)

- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

- else {

- m_token->appendToComment(cc);

- HTML_ADVANCE_TO(ContinueBogusCommentState);

- }

- END_STATE()

- HTML_BEGIN_STATE(MarkupDeclarationOpenState) {

+ HTML_BEGIN_STATE(CommentStart1State) {

if (cc == '-') {

- SegmentedString::LookAheadResult result = source.lookAhead(HTMLTokenizerNames::dashDash);

- if (result == SegmentedString::DidMatch) {

- source.advanceAndASSERT('-');

- m_token->beginComment();

- HTML_SWITCH_TO(CommentStartState);

- } else if (result == SegmentedString::NotEnoughCharacters)

- return haveBufferedCharacterToken();

- }

- parseError();

- HTML_RECONSUME_IN(BogusCommentState);

- }

- END_STATE()

- HTML_BEGIN_STATE(CommentStartState) {

- if (cc == '-')

- HTML_ADVANCE_TO(CommentStartDashState);

- else if (cc == '>') {

- parseError();

- return emitAndResumeIn(source, HTMLTokenizer::DataState);

- } else if (cc == kEndOfFileMarker) {

- parseError();

- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

+ HTML_ADVANCE_TO(CommentStart2State);

} else {

- m_token->appendToComment(cc);

- HTML_ADVANCE_TO(CommentState);

+ bufferCharacter('<');

+ bufferCharacter('!');

+ HTML_RECONSUME_IN(DataState);

}

END_STATE()

- HTML_BEGIN_STATE(CommentStartDashState) {

- if (cc == '-')

- HTML_ADVANCE_TO(CommentEndState);

- else if (cc == '>') {

- parseError();

- return emitAndResumeIn(source, HTMLTokenizer::DataState);

- } else if (cc == kEndOfFileMarker) {

- parseError();

- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

- } else {

- m_token->appendToComment('-');

- m_token->appendToComment(cc);

+ HTML_BEGIN_STATE(CommentStart2State) {

+ if (cc == '-') {

HTML_ADVANCE_TO(CommentState);

+ } else {

+ bufferCharacter('<');

+ bufferCharacter('!');

+ bufferCharacter('-');

+ HTML_RECONSUME_IN(DataState);

}

END_STATE()

HTML_BEGIN_STATE(CommentState) {

if (cc == '-')

- HTML_ADVANCE_TO(CommentEndDashState);

- else if (cc == kEndOfFileMarker) {

- parseError();

- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

- } else {

- m_token->appendToComment(cc);

+ HTML_ADVANCE_TO(CommentEnd1State);

+ else

HTML_ADVANCE_TO(CommentState);

- }

}

END_STATE()

- HTML_BEGIN_STATE(CommentEndDashState) {

+ HTML_BEGIN_STATE(CommentEnd1State) {

if (cc == '-')

- HTML_ADVANCE_TO(CommentEndState);

- else if (cc == kEndOfFileMarker) {

- parseError();

- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

- } else {

- m_token->appendToComment('-');

- m_token->appendToComment(cc);

- HTML_ADVANCE_TO(CommentState);

- }

- END_STATE()

- HTML_BEGIN_STATE(CommentEndState) {

- if (cc == '>')

- return emitAndResumeIn(source, HTMLTokenizer::DataState);

- else if (cc == '!') {

- parseError();

- HTML_ADVANCE_TO(CommentEndBangState);

- } else if (cc == '-') {

- parseError();

- m_token->appendToComment('-');

- HTML_ADVANCE_TO(CommentEndState);

- } else if (cc == kEndOfFileMarker) {

- parseError();

- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

- } else {

- parseError();

- m_token->appendToComment('-');

- m_token->appendToComment(cc);

+ HTML_ADVANCE_TO(CommentEnd2State);

+ else

HTML_ADVANCE_TO(CommentState);

- }

}

END_STATE()

- HTML_BEGIN_STATE(CommentEndBangState) {

- if (cc == '-') {

- m_token->appendToComment('-');

- m_token->appendToComment('!');

- HTML_ADVANCE_TO(CommentEndDashState);

- } else if (cc == '>')

- return emitAndResumeIn(source, HTMLTokenizer::DataState);

- else if (cc == kEndOfFileMarker) {

- parseError();

- return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

- } else {

- m_token->appendToComment('-');

- m_token->appendToComment('!');

- m_token->appendToComment(cc);

+ HTML_BEGIN_STATE(CommentEnd2State) {

+ if (cc == '-')

+ HTML_ADVANCE_TO(CommentEnd2State);

+ else if (cc == '>')

+ HTML_ADVANCE_TO(DataState);

+ else

HTML_ADVANCE_TO(CommentState);

- }

}

END_STATE()

}

ASSERT_NOT_REACHED();

« no previous file with comments | « sky/engine/core/html/parser/HTMLTokenizer.h ('k') | sky/engine/core/html/parser/HTMLTreeBuilder.cpp » ('j') | no next file with comments »