Index: sky/engine/core/html/parser/HTMLTokenizer.cpp |
diff --git a/sky/engine/core/html/parser/HTMLTokenizer.cpp b/sky/engine/core/html/parser/HTMLTokenizer.cpp |
index 7e765b277ea71084fe31a34ad8f7308930a44c34..ca454ad3cd314b958735a106c6b87d0ed7cb5b85 100644 |
--- a/sky/engine/core/html/parser/HTMLTokenizer.cpp |
+++ b/sky/engine/core/html/parser/HTMLTokenizer.cpp |
@@ -111,24 +111,6 @@ void HTMLTokenizer::reset() |
{ |
m_state = HTMLTokenizer::DataState; |
m_token = 0; |
- m_additionalAllowedCharacter = '\0'; |
-} |
- |
-inline bool HTMLTokenizer::processEntity(SegmentedString& source) |
-{ |
- bool notEnoughCharacters = false; |
- DecodedHTMLEntity decodedEntity; |
- bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters); |
- if (notEnoughCharacters) |
- return false; |
- if (!success) { |
- ASSERT(decodedEntity.isEmpty()); |
- bufferCharacter('&'); |
- } else { |
- for (unsigned i = 0; i < decodedEntity.length; ++i) |
- bufferCharacter(decodedEntity.data[i]); |
- } |
- return true; |
} |
bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) |
@@ -146,7 +128,7 @@ bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) |
#define FLUSH_AND_ADVANCE_TO(stateName) \ |
do { \ |
- m_state = HTMLTokenizer::stateName; \ |
+ m_state = HTMLTokenizer::stateName; \ |
if (flushBufferedEndTag(source)) \ |
return true; \ |
if (source.isEmpty() \ |
@@ -190,9 +172,11 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) |
// Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 |
switch (m_state) { |
HTML_BEGIN_STATE(DataState) { |
- if (cc == '&') |
+ if (cc == '&') { |
+ m_returnState = DataState; |
+ m_entityParser.reset(); |
HTML_ADVANCE_TO(CharacterReferenceInDataState); |
- else if (cc == '<') { |
+ } else if (cc == '<') { |
if (m_token->type() == HTMLToken::Character) { |
// We have a bunch of character tokens queued up that we |
// are emitting lazily here. |
@@ -209,12 +193,34 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) |
END_STATE() |
HTML_BEGIN_STATE(CharacterReferenceInDataState) { |
- if (!processEntity(source)) |
+ if (!m_entityParser.parse(source)) |
return haveBufferedCharacterToken(); |
+ for (const UChar& entityCharacter : m_entityParser.result()) |
+ bufferCharacter(entityCharacter); |
+ cc = m_inputStreamPreprocessor.nextInputCharacter(); |
+ ASSERT(m_returnState == m_returnState); |
HTML_SWITCH_TO(DataState); |
} |
END_STATE() |
+ HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) { |
+ if (!m_entityParser.parse(source)) |
+ return haveBufferedCharacterToken(); |
+ for (const UChar& entityCharacter : m_entityParser.result()) |
+ m_token->appendToAttributeValue(entityCharacter); |
+ cc = m_inputStreamPreprocessor.nextInputCharacter(); |
+ |
+ if (m_returnState == AttributeValueDoubleQuotedState) |
+ HTML_SWITCH_TO(AttributeValueDoubleQuotedState); |
+ else if (m_returnState == AttributeValueSingleQuotedState) |
+ HTML_SWITCH_TO(AttributeValueSingleQuotedState); |
+ else if (m_returnState == AttributeValueUnquotedState) |
+ HTML_SWITCH_TO(AttributeValueUnquotedState); |
+ else |
+ ASSERT_NOT_REACHED(); |
+ } |
+ END_STATE() |
+ |
HTML_BEGIN_STATE(RAWTEXTState) { |
if (cc == '<') |
HTML_ADVANCE_TO(RAWTEXTLessThanSignState); |
@@ -477,7 +483,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) |
m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
HTML_ADVANCE_TO(AfterAttributeValueQuotedState); |
} else if (cc == '&') { |
- m_additionalAllowedCharacter = '"'; |
+ m_returnState = AttributeValueDoubleQuotedState; |
+ m_entityParser.reset(); |
HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); |
} else if (cc == kEndOfFileMarker) { |
parseError(); |
@@ -495,7 +502,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) |
m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
HTML_ADVANCE_TO(AfterAttributeValueQuotedState); |
} else if (cc == '&') { |
- m_additionalAllowedCharacter = '\''; |
+ m_returnState = AttributeValueSingleQuotedState; |
+ m_entityParser.reset(); |
HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); |
} else if (cc == kEndOfFileMarker) { |
parseError(); |
@@ -513,7 +521,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) |
m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
HTML_ADVANCE_TO(BeforeAttributeNameState); |
} else if (cc == '&') { |
- m_additionalAllowedCharacter = '>'; |
+ m_returnState = AttributeValueUnquotedState; |
+ m_entityParser.reset(); |
HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); |
} else if (cc == '>') { |
m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
@@ -531,34 +540,6 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) |
} |
END_STATE() |
- HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) { |
- bool notEnoughCharacters = false; |
- DecodedHTMLEntity decodedEntity; |
- bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter); |
- if (notEnoughCharacters) |
- return haveBufferedCharacterToken(); |
- if (!success) { |
- ASSERT(decodedEntity.isEmpty()); |
- m_token->appendToAttributeValue('&'); |
- } else { |
- for (unsigned i = 0; i < decodedEntity.length; ++i) |
- m_token->appendToAttributeValue(decodedEntity.data[i]); |
- } |
- // We're supposed to switch back to the attribute value state that |
- // we were in when we were switched into this state. Rather than |
- // keeping track of this explictly, we observe that the previous |
- // state can be determined by m_additionalAllowedCharacter. |
- if (m_additionalAllowedCharacter == '"') |
- HTML_SWITCH_TO(AttributeValueDoubleQuotedState); |
- else if (m_additionalAllowedCharacter == '\'') |
- HTML_SWITCH_TO(AttributeValueSingleQuotedState); |
- else if (m_additionalAllowedCharacter == '>') |
- HTML_SWITCH_TO(AttributeValueUnquotedState); |
- else |
- ASSERT_NOT_REACHED(); |
- } |
- END_STATE() |
- |
HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { |
if (isTokenizerWhitespace(cc)) |
HTML_ADVANCE_TO(BeforeAttributeNameState); |