Index: ios/third_party/blink/src/html_tokenizer.mm |
diff --git a/ios/third_party/blink/src/html_tokenizer.mm b/ios/third_party/blink/src/html_tokenizer.mm |
new file mode 100644 |
index 0000000000000000000000000000000000000000..709f4d13cc11abfd7b3e5f7d4f365f4a6f2c7649 |
--- /dev/null |
+++ b/ios/third_party/blink/src/html_tokenizer.mm |
@@ -0,0 +1,787 @@ |
+/* |
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
+ * |
+ * Redistribution and use in source and binary forms, with or without |
+ * modification, are permitted provided that the following conditions |
+ * are met: |
+ * 1. Redistributions of source code must retain the above copyright |
+ * notice, this list of conditions and the following disclaimer. |
+ * 2. Redistributions in binary form must reproduce the above copyright |
+ * notice, this list of conditions and the following disclaimer in the |
+ * documentation and/or other materials provided with the distribution. |
+ * |
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
+ */ |
+ |
+#include "ios/third_party/blink/src/html_tokenizer.h" |
+ |
+#include "html_markup_tokenizer_inlines.h" |
+ |
+namespace WebCore { |
+ |
+#define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName) |
+#define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName) |
+#define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName) |
+#define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName) |
+ |
+HTMLTokenizer::HTMLTokenizer() |
+ : m_state(HTMLTokenizer::DataState) |
+ , m_token(nullptr) |
+ , m_additionalAllowedCharacter('\0') |
+ , m_inputStreamPreprocessor(this) |
+{ |
+} |
+ |
+HTMLTokenizer::~HTMLTokenizer() |
+{ |
+} |
+ |
+void HTMLTokenizer::reset() |
+{ |
+ m_state = HTMLTokenizer::DataState; |
+ m_token = 0; |
+ m_additionalAllowedCharacter = '\0'; |
+} |
+ |
+bool HTMLTokenizer::flushBufferedEndTag(CharacterProvider& source) |
+{ |
+ ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLToken::Uninitialized); |
+ source.next(); |
+ if (m_token->type() == HTMLToken::Character) |
+ return true; |
+ |
+ return false; |
+} |
+ |
+#define FLUSH_AND_ADVANCE_TO(stateName) \ |
+ do { \ |
+ m_state = HTMLTokenizer::stateName; \ |
+ if (flushBufferedEndTag(source)) \ |
+ return true; \ |
+ if (source.isEmpty() \ |
+ || !m_inputStreamPreprocessor.peek(source)) \ |
+ return haveBufferedCharacterToken(); \ |
+ cc = m_inputStreamPreprocessor.nextInputCharacter(); \ |
+ goto stateName; \ |
+ } while (false) |
+ |
+bool HTMLTokenizer::nextToken(CharacterProvider& source, HTMLToken& token) |
+{ |
+ // If we have a token in progress, then we're supposed to be called back |
+ // with the same token so we can finish it. |
+ ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitialized); |
+ m_token = &token; |
+ |
+ if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) |
+ return haveBufferedCharacterToken(); |
+ UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
+ |
+ // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 |
+ switch (m_state) { |
+ HTML_BEGIN_STATE(DataState) { |
+ if (cc == '<') { |
+ if (m_token->type() == HTMLToken::Character) { |
+ // We have a bunch of character tokens queued up that we |
+ // are emitting lazily here. |
+ return true; |
+ } |
+ HTML_ADVANCE_TO(TagOpenState); |
+ } else if (cc == kEndOfFileMarker) |
+ return emitEndOfFile(source); |
+ else { |
+ m_token->ensureIsCharacterToken(); |
+ HTML_ADVANCE_TO(DataState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(TagOpenState) { |
+ if (cc == '!') |
+ HTML_ADVANCE_TO(MarkupDeclarationOpenState); |
+ else if (cc == '/') |
+ HTML_ADVANCE_TO(EndTagOpenState); |
+ else if (isASCIIUpper(cc)) { |
+ m_token->beginStartTag(toLowerCase(cc)); |
+ HTML_ADVANCE_TO(TagNameState); |
+ } else if (isASCIILower(cc)) { |
+ m_token->beginStartTag(cc); |
+ HTML_ADVANCE_TO(TagNameState); |
+ } else if (cc == '?') { |
+ parseError(); |
+ // The spec consumes the current character before switching |
+ // to the bogus comment state, but it's easier to implement |
+ // if we reconsume the current character. |
+ HTML_RECONSUME_IN(BogusCommentState); |
+ } else { |
+ parseError(); |
+ m_token->ensureIsCharacterToken(); |
+ HTML_RECONSUME_IN(DataState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(EndTagOpenState) { |
+ if (isASCIIUpper(cc)) { |
+ m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); |
+ HTML_ADVANCE_TO(TagNameState); |
+ } else if (isASCIILower(cc)) { |
+ m_token->beginEndTag(static_cast<LChar>(cc)); |
+ HTML_ADVANCE_TO(TagNameState); |
+ } else if (cc == '>') { |
+ parseError(); |
+ HTML_ADVANCE_TO(DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ m_token->ensureIsCharacterToken(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ parseError(); |
+ HTML_RECONSUME_IN(BogusCommentState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(TagNameState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeAttributeNameState); |
+ else if (cc == '/') |
+ HTML_ADVANCE_TO(SelfClosingStartTagState); |
+ else if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (isASCIIUpper(cc)) { |
+ m_token->appendToName(toLowerCase(cc)); |
+ HTML_ADVANCE_TO(TagNameState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ m_token->appendToName(cc); |
+ HTML_ADVANCE_TO(TagNameState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(BeforeAttributeNameState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeAttributeNameState); |
+ else if (cc == '/') |
+ HTML_ADVANCE_TO(SelfClosingStartTagState); |
+ else if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (isASCIIUpper(cc)) { |
+ HTML_ADVANCE_TO(AttributeNameState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
+ parseError(); |
+ HTML_ADVANCE_TO(AttributeNameState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AttributeNameState) { |
+ if (isTokenizerWhitespace(cc)) { |
+ HTML_ADVANCE_TO(AfterAttributeNameState); |
+ } else if (cc == '/') { |
+ HTML_ADVANCE_TO(SelfClosingStartTagState); |
+ } else if (cc == '=') { |
+ HTML_ADVANCE_TO(BeforeAttributeValueState); |
+ } else if (cc == '>') { |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (isASCIIUpper(cc)) { |
+ HTML_ADVANCE_TO(AttributeNameState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') |
+ parseError(); |
+ HTML_ADVANCE_TO(AttributeNameState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AfterAttributeNameState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(AfterAttributeNameState); |
+ else if (cc == '/') |
+ HTML_ADVANCE_TO(SelfClosingStartTagState); |
+ else if (cc == '=') |
+ HTML_ADVANCE_TO(BeforeAttributeValueState); |
+ else if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (isASCIIUpper(cc)) { |
+ HTML_ADVANCE_TO(AttributeNameState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ if (cc == '"' || cc == '\'' || cc == '<') |
+ parseError(); |
+ HTML_ADVANCE_TO(AttributeNameState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(BeforeAttributeValueState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeAttributeValueState); |
+ else if (cc == '"') { |
+ HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); |
+ } else if (cc == '&') { |
+ HTML_RECONSUME_IN(AttributeValueUnquotedState); |
+ } else if (cc == '\'') { |
+ HTML_ADVANCE_TO(AttributeValueSingleQuotedState); |
+ } else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ if (cc == '<' || cc == '=' || cc == '`') |
+ parseError(); |
+ HTML_ADVANCE_TO(AttributeValueUnquotedState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) { |
+ if (cc == '"') { |
+ HTML_ADVANCE_TO(AfterAttributeValueQuotedState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AttributeValueSingleQuotedState) { |
+ if (cc == '\'') { |
+ HTML_ADVANCE_TO(AfterAttributeValueQuotedState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ HTML_ADVANCE_TO(AttributeValueSingleQuotedState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AttributeValueUnquotedState) { |
+ if (isTokenizerWhitespace(cc)) { |
+ HTML_ADVANCE_TO(BeforeAttributeNameState); |
+ } else if (cc == '>') { |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') |
+ parseError(); |
+ HTML_ADVANCE_TO(AttributeValueUnquotedState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeAttributeNameState); |
+ else if (cc == '/') |
+ HTML_ADVANCE_TO(SelfClosingStartTagState); |
+ else if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ parseError(); |
+ HTML_RECONSUME_IN(BeforeAttributeNameState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(SelfClosingStartTagState) { |
+ if (cc == '>') { |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ HTML_RECONSUME_IN(DataState); |
+ } else { |
+ parseError(); |
+ HTML_RECONSUME_IN(BeforeAttributeNameState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(BogusCommentState) { |
+ m_token->beginComment(); |
+ HTML_RECONSUME_IN(ContinueBogusCommentState); |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(ContinueBogusCommentState) { |
+ if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == kEndOfFileMarker) |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ else { |
+ HTML_ADVANCE_TO(ContinueBogusCommentState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(MarkupDeclarationOpenState) { |
+ DEFINE_STATIC_LOCAL_STRING(dashDashString, "--"); |
+ DEFINE_STATIC_LOCAL_STRING(doctypeString, "doctype"); |
+ if (cc == '-') { |
+ if (source.startsWith(dashDashString, dashDashStringLength)) { |
+ advanceAndASSERT(source, '-'); |
+ advanceAndASSERT(source, '-'); |
+ m_token->beginComment(); |
+ HTML_SWITCH_TO(CommentStartState); |
+ } else if (source.remainingBytes() < dashDashStringLength) |
+ return haveBufferedCharacterToken(); |
+ } else if (cc == 'D' || cc == 'd') { |
+ if (source.startsWith(doctypeString, doctypeStringLength, true)) { |
+ advanceStringAndASSERTIgnoringCase(source, doctypeString); |
+ HTML_SWITCH_TO(DOCTYPEState); |
+ } else if (source.remainingBytes() < doctypeStringLength) |
+ return haveBufferedCharacterToken(); |
+ } |
+ parseError(); |
+ HTML_RECONSUME_IN(BogusCommentState); |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(CommentStartState) { |
+ if (cc == '-') |
+ HTML_ADVANCE_TO(CommentStartDashState); |
+ else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(CommentState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(CommentStartDashState) { |
+ if (cc == '-') |
+ HTML_ADVANCE_TO(CommentEndState); |
+ else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(CommentState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(CommentState) { |
+ if (cc == '-') |
+ HTML_ADVANCE_TO(CommentEndDashState); |
+ else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(CommentState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(CommentEndDashState) { |
+ if (cc == '-') |
+ HTML_ADVANCE_TO(CommentEndState); |
+ else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(CommentState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(CommentEndState) { |
+ if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == '!') { |
+ parseError(); |
+ HTML_ADVANCE_TO(CommentEndBangState); |
+ } else if (cc == '-') { |
+ parseError(); |
+ HTML_ADVANCE_TO(CommentEndState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ parseError(); |
+ HTML_ADVANCE_TO(CommentState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(CommentEndBangState) { |
+ if (cc == '-') { |
+ HTML_ADVANCE_TO(CommentEndDashState); |
+ } else if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(CommentState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(DOCTYPEState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeDOCTYPENameState); |
+ else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ m_token->beginDOCTYPE(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ parseError(); |
+ HTML_RECONSUME_IN(BeforeDOCTYPENameState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(BeforeDOCTYPENameState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeDOCTYPENameState); |
+ else if (cc == '>') { |
+ parseError(); |
+ m_token->beginDOCTYPE(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ m_token->beginDOCTYPE(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ m_token->beginDOCTYPE(); |
+ HTML_ADVANCE_TO(DOCTYPENameState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(DOCTYPENameState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(AfterDOCTYPENameState); |
+ else if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(DOCTYPENameState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AfterDOCTYPENameState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(AfterDOCTYPENameState); |
+ if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ DEFINE_STATIC_LOCAL_STRING(publicString, "public"); |
+ DEFINE_STATIC_LOCAL_STRING(systemString, "system"); |
+ if (cc == 'P' || cc == 'p') { |
+ if (source.startsWith(publicString, publicStringLength, true)) { |
+ advanceStringAndASSERTIgnoringCase(source, publicString); |
+ HTML_SWITCH_TO(AfterDOCTYPEPublicKeywordState); |
+ } else if (source.remainingBytes() < publicStringLength) |
+ return haveBufferedCharacterToken(); |
+ } else if (cc == 'S' || cc == 's') { |
+ if (source.startsWith(systemString, systemStringLength, true)) { |
+ advanceStringAndASSERTIgnoringCase(source, systemString); |
+ HTML_SWITCH_TO(AfterDOCTYPESystemKeywordState); |
+ } else if (source.remainingBytes() < systemStringLength) |
+ return haveBufferedCharacterToken(); |
+ } |
+ parseError(); |
+ HTML_ADVANCE_TO(BogusDOCTYPEState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AfterDOCTYPEPublicKeywordState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); |
+ else if (cc == '"') { |
+ parseError(); |
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); |
+ } else if (cc == '\'') { |
+ parseError(); |
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); |
+ } else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ parseError(); |
+ HTML_ADVANCE_TO(BogusDOCTYPEState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(BeforeDOCTYPEPublicIdentifierState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeDOCTYPEPublicIdentifierState); |
+ else if (cc == '"') { |
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); |
+ } else if (cc == '\'') { |
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); |
+ } else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ parseError(); |
+ HTML_ADVANCE_TO(BogusDOCTYPEState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(DOCTYPEPublicIdentifierDoubleQuotedState) { |
+ if (cc == '"') |
+ HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); |
+ else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierDoubleQuotedState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(DOCTYPEPublicIdentifierSingleQuotedState) { |
+ if (cc == '\'') |
+ HTML_ADVANCE_TO(AfterDOCTYPEPublicIdentifierState); |
+ else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(DOCTYPEPublicIdentifierSingleQuotedState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AfterDOCTYPEPublicIdentifierState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); |
+ else if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == '"') { |
+ parseError(); |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
+ } else if (cc == '\'') { |
+ parseError(); |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ parseError(); |
+ HTML_ADVANCE_TO(BogusDOCTYPEState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(BetweenDOCTYPEPublicAndSystemIdentifiersState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BetweenDOCTYPEPublicAndSystemIdentifiersState); |
+ else if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == '"') { |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
+ } else if (cc == '\'') { |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ parseError(); |
+ HTML_ADVANCE_TO(BogusDOCTYPEState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AfterDOCTYPESystemKeywordState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); |
+ else if (cc == '"') { |
+ parseError(); |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
+ } else if (cc == '\'') { |
+ parseError(); |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
+ } else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ parseError(); |
+ HTML_ADVANCE_TO(BogusDOCTYPEState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(BeforeDOCTYPESystemIdentifierState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(BeforeDOCTYPESystemIdentifierState); |
+ if (cc == '"') { |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
+ } else if (cc == '\'') { |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
+ } else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ parseError(); |
+ HTML_ADVANCE_TO(BogusDOCTYPEState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(DOCTYPESystemIdentifierDoubleQuotedState) { |
+ if (cc == '"') |
+ HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState); |
+ else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierDoubleQuotedState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(DOCTYPESystemIdentifierSingleQuotedState) { |
+ if (cc == '\'') |
+ HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState); |
+ else if (cc == '>') { |
+ parseError(); |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ } else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ HTML_ADVANCE_TO(DOCTYPESystemIdentifierSingleQuotedState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(AfterDOCTYPESystemIdentifierState) { |
+ if (isTokenizerWhitespace(cc)) |
+ HTML_ADVANCE_TO(AfterDOCTYPESystemIdentifierState); |
+ else if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == kEndOfFileMarker) { |
+ parseError(); |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ } else { |
+ parseError(); |
+ HTML_ADVANCE_TO(BogusDOCTYPEState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(BogusDOCTYPEState) { |
+ if (cc == '>') |
+ return emitAndResumeIn(source, HTMLTokenizer::DataState); |
+ else if (cc == kEndOfFileMarker) |
+ return emitAndReconsumeIn(source, HTMLTokenizer::DataState); |
+ HTML_ADVANCE_TO(BogusDOCTYPEState); |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(CDATASectionState) { |
+ if (cc == ']') |
+ HTML_ADVANCE_TO(CDATASectionRightSquareBracketState); |
+ else if (cc == kEndOfFileMarker) |
+ HTML_RECONSUME_IN(DataState); |
+ else { |
+ m_token->ensureIsCharacterToken(); |
+ HTML_ADVANCE_TO(CDATASectionState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(CDATASectionRightSquareBracketState) { |
+ if (cc == ']') |
+ HTML_ADVANCE_TO(CDATASectionDoubleRightSquareBracketState); |
+ else { |
+ m_token->ensureIsCharacterToken(); |
+ HTML_RECONSUME_IN(CDATASectionState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ HTML_BEGIN_STATE(CDATASectionDoubleRightSquareBracketState) { |
+ if (cc == '>') |
+ HTML_ADVANCE_TO(DataState); |
+ else { |
+ m_token->ensureIsCharacterToken(); |
+ HTML_RECONSUME_IN(CDATASectionState); |
+ } |
+ } |
+ END_STATE() |
+ |
+ } |
+ |
+ ASSERT_NOT_REACHED(); |
+ return false; |
+} |
+ |
+inline void HTMLTokenizer::parseError() |
+{ |
+ notImplemented(); |
+} |
+ |
+} |