Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(936)

Unified Diff: sky/engine/core/html/parser/HTMLTokenizer.cpp

Issue 678073002: Parse Sky entities according to the spec (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 6 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « sky/engine/core/html/parser/HTMLTokenizer.h ('k') | sky/tests/parser/entity.html » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: sky/engine/core/html/parser/HTMLTokenizer.cpp
diff --git a/sky/engine/core/html/parser/HTMLTokenizer.cpp b/sky/engine/core/html/parser/HTMLTokenizer.cpp
index 7e765b277ea71084fe31a34ad8f7308930a44c34..ca454ad3cd314b958735a106c6b87d0ed7cb5b85 100644
--- a/sky/engine/core/html/parser/HTMLTokenizer.cpp
+++ b/sky/engine/core/html/parser/HTMLTokenizer.cpp
@@ -111,24 +111,6 @@ void HTMLTokenizer::reset()
{
m_state = HTMLTokenizer::DataState;
m_token = 0;
- m_additionalAllowedCharacter = '\0';
-}
-
-inline bool HTMLTokenizer::processEntity(SegmentedString& source)
-{
- bool notEnoughCharacters = false;
- DecodedHTMLEntity decodedEntity;
- bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters);
- if (notEnoughCharacters)
- return false;
- if (!success) {
- ASSERT(decodedEntity.isEmpty());
- bufferCharacter('&');
- } else {
- for (unsigned i = 0; i < decodedEntity.length; ++i)
- bufferCharacter(decodedEntity.data[i]);
- }
- return true;
}
bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
@@ -146,7 +128,7 @@ bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
#define FLUSH_AND_ADVANCE_TO(stateName) \
do { \
- m_state = HTMLTokenizer::stateName; \
+ m_state = HTMLTokenizer::stateName; \
if (flushBufferedEndTag(source)) \
return true; \
if (source.isEmpty() \
@@ -190,9 +172,11 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
// Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
switch (m_state) {
HTML_BEGIN_STATE(DataState) {
- if (cc == '&')
+ if (cc == '&') {
+ m_returnState = DataState;
+ m_entityParser.reset();
HTML_ADVANCE_TO(CharacterReferenceInDataState);
- else if (cc == '<') {
+ } else if (cc == '<') {
if (m_token->type() == HTMLToken::Character) {
// We have a bunch of character tokens queued up that we
// are emitting lazily here.
@@ -209,12 +193,34 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
END_STATE()
HTML_BEGIN_STATE(CharacterReferenceInDataState) {
- if (!processEntity(source))
+ if (!m_entityParser.parse(source))
return haveBufferedCharacterToken();
+ for (const UChar& entityCharacter : m_entityParser.result())
+ bufferCharacter(entityCharacter);
+ cc = m_inputStreamPreprocessor.nextInputCharacter();
+ ASSERT(m_returnState == m_returnState);
HTML_SWITCH_TO(DataState);
}
END_STATE()
+ HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {
+ if (!m_entityParser.parse(source))
+ return haveBufferedCharacterToken();
+ for (const UChar& entityCharacter : m_entityParser.result())
+ m_token->appendToAttributeValue(entityCharacter);
+ cc = m_inputStreamPreprocessor.nextInputCharacter();
+
+ if (m_returnState == AttributeValueDoubleQuotedState)
+ HTML_SWITCH_TO(AttributeValueDoubleQuotedState);
+ else if (m_returnState == AttributeValueSingleQuotedState)
+ HTML_SWITCH_TO(AttributeValueSingleQuotedState);
+ else if (m_returnState == AttributeValueUnquotedState)
+ HTML_SWITCH_TO(AttributeValueUnquotedState);
+ else
+ ASSERT_NOT_REACHED();
+ }
+ END_STATE()
+
HTML_BEGIN_STATE(RAWTEXTState) {
if (cc == '<')
HTML_ADVANCE_TO(RAWTEXTLessThanSignState);
@@ -477,7 +483,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
m_token->endAttributeValue(source.numberOfCharactersConsumed());
HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
} else if (cc == '&') {
- m_additionalAllowedCharacter = '"';
+ m_returnState = AttributeValueDoubleQuotedState;
+ m_entityParser.reset();
HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == kEndOfFileMarker) {
parseError();
@@ -495,7 +502,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
m_token->endAttributeValue(source.numberOfCharactersConsumed());
HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
} else if (cc == '&') {
- m_additionalAllowedCharacter = '\'';
+ m_returnState = AttributeValueSingleQuotedState;
+ m_entityParser.reset();
HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == kEndOfFileMarker) {
parseError();
@@ -513,7 +521,8 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
m_token->endAttributeValue(source.numberOfCharactersConsumed());
HTML_ADVANCE_TO(BeforeAttributeNameState);
} else if (cc == '&') {
- m_additionalAllowedCharacter = '>';
+ m_returnState = AttributeValueUnquotedState;
+ m_entityParser.reset();
HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
} else if (cc == '>') {
m_token->endAttributeValue(source.numberOfCharactersConsumed());
@@ -531,34 +540,6 @@ bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
}
END_STATE()
- HTML_BEGIN_STATE(CharacterReferenceInAttributeValueState) {
- bool notEnoughCharacters = false;
- DecodedHTMLEntity decodedEntity;
- bool success = consumeHTMLEntity(source, decodedEntity, notEnoughCharacters, m_additionalAllowedCharacter);
- if (notEnoughCharacters)
- return haveBufferedCharacterToken();
- if (!success) {
- ASSERT(decodedEntity.isEmpty());
- m_token->appendToAttributeValue('&');
- } else {
- for (unsigned i = 0; i < decodedEntity.length; ++i)
- m_token->appendToAttributeValue(decodedEntity.data[i]);
- }
- // We're supposed to switch back to the attribute value state that
- // we were in when we were switched into this state. Rather than
- // keeping track of this explictly, we observe that the previous
- // state can be determined by m_additionalAllowedCharacter.
- if (m_additionalAllowedCharacter == '"')
- HTML_SWITCH_TO(AttributeValueDoubleQuotedState);
- else if (m_additionalAllowedCharacter == '\'')
- HTML_SWITCH_TO(AttributeValueSingleQuotedState);
- else if (m_additionalAllowedCharacter == '>')
- HTML_SWITCH_TO(AttributeValueUnquotedState);
- else
- ASSERT_NOT_REACHED();
- }
- END_STATE()
-
HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
if (isTokenizerWhitespace(cc))
HTML_ADVANCE_TO(BeforeAttributeNameState);
« no previous file with comments | « sky/engine/core/html/parser/HTMLTokenizer.h ('k') | sky/tests/parser/entity.html » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698