Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(270)

Unified Diff: ios/third_party/blink/src/html_tokenizer.mm

Issue 1031023002: Upstream ios/web/ HTML tokenizer (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « ios/third_party/blink/src/html_tokenizer.h ('k') | ios/third_party/blink/src/html_tokenizer_adapter.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: ios/third_party/blink/src/html_tokenizer.mm
diff --git a/ios/third_party/blink/src/html_tokenizer.mm b/ios/third_party/blink/src/html_tokenizer.mm
new file mode 100644
index 0000000000000000000000000000000000000000..709f4d13cc11abfd7b3e5f7d4f365f4a6f2c7649
--- /dev/null
+++ b/ios/third_party/blink/src/html_tokenizer.mm
@@ -0,0 +1,787 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "ios/third_party/blink/src/html_tokenizer.h"
+
+#include "html_markup_tokenizer_inlines.h"
+
+namespace WebCore {
+
+#define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
+#define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
+#define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
+#define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
+
+HTMLTokenizer::HTMLTokenizer()
+ : m_state(HTMLTokenizer::DataState)
+ , m_token(nullptr)
+ , m_additionalAllowedCharacter('\0')
+ , m_inputStreamPreprocessor(this)
+{
+}
+
+HTMLTokenizer::~HTMLTokenizer()
+{
+}
+
+void HTMLTokenizer::reset()
+{
+ m_state = HTMLTokenizer::DataState;
+ m_token = 0;
+ m_additionalAllowedCharacter = '\0';
+}
+
+bool HTMLTokenizer::flushBufferedEndTag(CharacterProvider& source)
+{
+ ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized);
+ source.next();
+ if (m_token->type() == HTMLToken::Character)
+ return true;
+
+ return false;
+}
+
+#define FLUSH_AND_ADVANCE_TO(stateName) \
+ do { \
+ m_state = HTMLTokenizer::stateName; \
+ if (flushBufferedEndTag(source)) \
+ return true; \
+ if (source.isEmpty() \
+ || !m_inputStreamPreprocessor.peek(source)) \
+ return haveBufferedCharacterToken(); \
+ cc = m_inputStreamPreprocessor.nextInputCharacter(); \
+ goto stateName; \
+ } while (false)
+
+bool HTMLTokenizer::nextToken(CharacterProvider& source, HTMLToken& token)
+{
+ // If we have a token in progress, then we're supposed to be called back
+ // with the same token so we can finish it.
+ ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized);
+ m_token = &token;
+
+ if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
+ return haveBufferedCharacterToken();
+ UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
+
+ // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
+ switch (m_state) {
+ HTML_BEGIN_STATE(DataState) {
+ if (cc == '<') {
+ if (m_token->type() == HTMLToken::Character) {
+ // We have a bunch of character tokens queued up that we
+ // are emitting lazily here.
+ return true;
+ }
+ HTML_ADVANCE_TO(TagOpenState);
+ } else if (cc == kEndOfFileMarker)
+ return emitEndOfFile(source);
+ else {
+ m_token->ensureIsCharacterToken();
+ HTML_ADVANCE_TO(DataState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(TagOpenState) {
+ if (cc == '!')
+ HTML_ADVANCE_TO(MarkupDeclarationOpenState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(EndTagOpenState);
+ else if (isASCIIUpper(cc)) {
+ m_token->beginStartTag(toLowerCase(cc));
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (isASCIILower(cc)) {
+ m_token->beginStartTag(cc);
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (cc == '?') {
+ parseError();
+ // The spec consumes the current character before switching
+ // to the bogus comment state, but it's easier to implement
+ // if we reconsume the current character.
+ HTML_RECONSUME_IN(BogusCommentState);
+ } else {
+ parseError();
+ m_token->ensureIsCharacterToken();
+ HTML_RECONSUME_IN(DataState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(EndTagOpenState) {
+ if (isASCIIUpper(cc)) {
+ m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (isASCIILower(cc)) {
+ m_token->beginEndTag(static_cast<LChar>(cc));
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (cc == '>') {
+ parseError();
+ HTML_ADVANCE_TO(DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ m_token->ensureIsCharacterToken();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ parseError();
+ HTML_RECONSUME_IN(BogusCommentState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(TagNameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeAttributeNameState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (isASCIIUpper(cc)) {
+ m_token->appendToName(toLowerCase(cc));
+ HTML_ADVANCE_TO(TagNameState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ m_token->appendToName(cc);
+ HTML_ADVANCE_TO(TagNameState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(BeforeAttributeNameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeAttributeNameState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (isASCIIUpper(cc)) {
+ HTML_ADVANCE_TO(AttributeNameState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
+ parseError();
+ HTML_ADVANCE_TO(AttributeNameState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AttributeNameState) {
+ if (isTokenizerWhitespace(cc)) {
+ HTML_ADVANCE_TO(AfterAttributeNameState);
+ } else if (cc == '/') {
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ } else if (cc == '=') {
+ HTML_ADVANCE_TO(BeforeAttributeValueState);
+ } else if (cc == '>') {
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (isASCIIUpper(cc)) {
+ HTML_ADVANCE_TO(AttributeNameState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
+ parseError();
+ HTML_ADVANCE_TO(AttributeNameState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterAttributeNameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(AfterAttributeNameState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '=')
+ HTML_ADVANCE_TO(BeforeAttributeValueState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (isASCIIUpper(cc)) {
+ HTML_ADVANCE_TO(AttributeNameState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<')
+ parseError();
+ HTML_ADVANCE_TO(AttributeNameState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(BeforeAttributeValueState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeAttributeValueState);
+ else if (cc == '"') {
+ HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
+ } else if (cc == '&') {
+ HTML_RECONSUME_IN(AttributeValueUnquotedState);
+ } else if (cc == '\'') {
+ HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '<' || cc == '=' || cc == '`')
+ parseError();
+ HTML_ADVANCE_TO(AttributeValueUnquotedState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
+ if (cc == '"') {
+ HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
+ if (cc == '\'') {
+ HTML_ADVANCE_TO(AfterAttributeValueQuotedState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AttributeValueUnquotedState) {
+ if (isTokenizerWhitespace(cc)) {
+ HTML_ADVANCE_TO(BeforeAttributeNameState);
+ } else if (cc == '>') {
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
+ parseError();
+ HTML_ADVANCE_TO(AttributeValueUnquotedState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterAttributeValueQuotedState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeAttributeNameState);
+ else if (cc == '/')
+ HTML_ADVANCE_TO(SelfClosingStartTagState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ parseError();
+ HTML_RECONSUME_IN(BeforeAttributeNameState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(SelfClosingStartTagState) {
+ if (cc == '>') {
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ HTML_RECONSUME_IN(DataState);
+ } else {
+ parseError();
+ HTML_RECONSUME_IN(BeforeAttributeNameState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(BogusCommentState) {
+ m_token->beginComment();
+ HTML_RECONSUME_IN(ContinueBogusCommentState);
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(ContinueBogusCommentState) {
+ if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker)
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ else {
+ HTML_ADVANCE_TO(ContinueBogusCommentState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(MarkupDeclarationOpenState) {
+ DEFINE_STATIC_LOCAL_STRING(dashDashString, "--");
+ DEFINE_STATIC_LOCAL_STRING(doctypeString, "doctype");
+ if (cc == '-') {
+ if (source.startsWith(dashDashString, dashDashStringLength)) {
+ advanceAndASSERT(source, '-');
+ advanceAndASSERT(source, '-');
+ m_token->beginComment();
+ HTML_SWITCH_TO(CommentStartState);
+ } else if (source.remainingBytes() < dashDashStringLength)
+ return haveBufferedCharacterToken();
+ } else if (cc == 'D' || cc == 'd') {
+ if (source.startsWith(doctypeString, doctypeStringLength, true)) {
+ advanceStringAndASSERTIgnoringCase(source, doctypeString);
+ HTML_SWITCH_TO(DOCTYPEState);
+ } else if (source.remainingBytes() < doctypeStringLength)
+ return haveBufferedCharacterToken();
+ }
+ parseError();
+ HTML_RECONSUME_IN(BogusCommentState);
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(CommentStartState) {
+ if (cc == '-')
+ HTML_ADVANCE_TO(CommentStartDashState);
+ else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(CommentStartDashState) {
+ if (cc == '-')
+ HTML_ADVANCE_TO(CommentEndState);
+ else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(CommentState) {
+ if (cc == '-')
+ HTML_ADVANCE_TO(CommentEndDashState);
+ else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(CommentEndDashState) {
+ if (cc == '-')
+ HTML_ADVANCE_TO(CommentEndState);
+ else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(CommentEndState) {
+ if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == '!') {
+ parseError();
+ HTML_ADVANCE_TO(CommentEndBangState);
+ } else if (cc == '-') {
+ parseError();
+ HTML_ADVANCE_TO(CommentEndState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(CommentEndBangState) {
+ if (cc == '-') {
+ HTML_ADVANCE_TO(CommentEndDashState);
+ } else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(CommentState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(DOCTYPEState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPENameState);
+ else if (cc == kEndOfFileMarker) {
+ parseError();
+ m_token->beginDOCTYPE();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_RECONSUME_IN(BeforeDOCTYPENameState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(BeforeDOCTYPENameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPENameState);
+ else if (cc == '>') {
+ parseError();
+ m_token->beginDOCTYPE();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ m_token->beginDOCTYPE();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ m_token->beginDOCTYPE();
+ HTML_ADVANCE_TO(DOCTYPENameState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(DOCTYPENameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(AfterDOCTYPENameState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(DOCTYPENameState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterDOCTYPENameState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(AfterDOCTYPENameState);
+ if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ DEFINE_STATIC_LOCAL_STRING(publicString, "public");
+ DEFINE_STATIC_LOCAL_STRING(systemString, "system");
+ if (cc == 'P' || cc == 'p') {
+ if (source.startsWith(publicString, publicStringLength, true)) {
+ advanceStringAndASSERTIgnoringCase(source, publicString);
+ HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState);
+ } else if (source.remainingBytes() < publicStringLength)
+ return haveBufferedCharacterToken();
+ } else if (cc == 'S' || cc == 's') {
+ if (source.startsWith(systemString, systemStringLength, true)) {
+ advanceStringAndASSERTIgnoringCase(source, systemString);
+ HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState);
+ } else if (source.remainingBytes() < systemStringLength)
+ return haveBufferedCharacterToken();
+ }
+ parseError();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+ else if (cc == '"') {
+ parseError();
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ parseError();
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState);
+ else if (cc == '"') {
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) {
+ if (cc == '"')
+ HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+ else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) {
+ if (cc == '\'')
+ HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState);
+ else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == '"') {
+ parseError();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ parseError();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == '"') {
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+ else if (cc == '"') {
+ parseError();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ parseError();
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState);
+ if (cc == '"') {
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ } else if (cc == '\'') {
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ } else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) {
+ if (cc == '"')
+ HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) {
+ if (cc == '\'')
+ HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ else if (cc == '>') {
+ parseError();
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ } else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) {
+ if (isTokenizerWhitespace(cc))
+ HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState);
+ else if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker) {
+ parseError();
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ } else {
+ parseError();
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(BogusDOCTYPEState) {
+ if (cc == '>')
+ return emitAndResumeIn(source, HTMLTokenizer::DataState);
+ else if (cc == kEndOfFileMarker)
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
+ HTML_ADVANCE_TO(BogusDOCTYPEState);
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(CDATASectionState) {
+ if (cc == ']')
+ HTML_ADVANCE_TO(CDATASectionRightSquareBracketState);
+ else if (cc == kEndOfFileMarker)
+ HTML_RECONSUME_IN(DataState);
+ else {
+ m_token->ensureIsCharacterToken();
+ HTML_ADVANCE_TO(CDATASectionState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) {
+ if (cc == ']')
+ HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState);
+ else {
+ m_token->ensureIsCharacterToken();
+ HTML_RECONSUME_IN(CDATASectionState);
+ }
+ }
+ END_STATE()
+
+ HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) {
+ if (cc == '>')
+ HTML_ADVANCE_TO(DataState);
+ else {
+ m_token->ensureIsCharacterToken();
+ HTML_RECONSUME_IN(CDATASectionState);
+ }
+ }
+ END_STATE()
+
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+}
+
+inline void HTMLTokenizer::parseError()
+{
+ notImplemented();
+}
+
+}
« no previous file with comments | « ios/third_party/blink/src/html_tokenizer.h ('k') | ios/third_party/blink/src/html_tokenizer_adapter.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698