Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(29)

Unified Diff: ios/third_party/blink/src/html_tokenizer.h

Issue 1031023002: Upstream ios/web/ HTML tokenizer (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « ios/third_party/blink/src/html_token.mm ('k') | ios/third_party/blink/src/html_tokenizer.mm » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: ios/third_party/blink/src/html_tokenizer.h
diff --git a/ios/third_party/blink/src/html_tokenizer.h b/ios/third_party/blink/src/html_tokenizer.h
new file mode 100644
index 0000000000000000000000000000000000000000..70623aef6ca7a064f5ede4240a0cab328cda5a35
--- /dev/null
+++ b/ios/third_party/blink/src/html_tokenizer.h
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef HTMLTokenizer_h
+#define HTMLTokenizer_h
+
+#include "ios/third_party/blink/src/html_input_stream_preprocessor.h"
+#include "ios/third_party/blink/src/html_token.h"
+
+namespace WebCore {
+
+class HTMLTokenizer {
+ WTF_MAKE_NONCOPYABLE(HTMLTokenizer);
+
+public:
+ HTMLTokenizer();
+ ~HTMLTokenizer();
+
+ void reset();
+
+ enum State {
+ DataState,
+ TagOpenState,
+ EndTagOpenState,
+ TagNameState,
+ BeforeAttributeNameState,
+ AttributeNameState,
+ AfterAttributeNameState,
+ BeforeAttributeValueState,
+ AttributeValueDoubleQuotedState,
+ AttributeValueSingleQuotedState,
+ AttributeValueUnquotedState,
+ AfterAttributeValueQuotedState,
+ SelfClosingStartTagState,
+ BogusCommentState,
+ // The ContinueBogusCommentState is not in the HTML5 spec, but we use
+ // it internally to keep track of whether we've started the bogus
+ // comment token yet.
+ ContinueBogusCommentState,
+ MarkupDeclarationOpenState,
+ CommentStartState,
+ CommentStartDashState,
+ CommentState,
+ CommentEndDashState,
+ CommentEndState,
+ CommentEndBangState,
+ DOCTYPEState,
+ BeforeDOCTYPENameState,
+ DOCTYPENameState,
+ AfterDOCTYPENameState,
+ AfterDOCTYPEPublicKeywordState,
+ BeforeDOCTYPEPublicIdentifierState,
+ DOCTYPEPublicIdentifierDoubleQuotedState,
+ DOCTYPEPublicIdentifierSingleQuotedState,
+ AfterDOCTYPEPublicIdentifierState,
+ BetweenDOCTYPEPublicAndSystemIdentifiersState,
+ AfterDOCTYPESystemKeywordState,
+ BeforeDOCTYPESystemIdentifierState,
+ DOCTYPESystemIdentifierDoubleQuotedState,
+ DOCTYPESystemIdentifierSingleQuotedState,
+ AfterDOCTYPESystemIdentifierState,
+ BogusDOCTYPEState,
+ CDATASectionState,
+ // These CDATA states are not in the HTML5 spec, but we use them internally.
+ CDATASectionRightSquareBracketState,
+ CDATASectionDoubleRightSquareBracketState,
+ };
+
+ // This function returns true if it emits a token. Otherwise, callers
+ // must provide the same (in progress) token on the next call (unless
+ // they call reset() first).
+ bool nextToken(CharacterProvider&, HTMLToken&);
+
+ State state() const { return m_state; }
+ void setState(State state) { m_state = state; }
+
+ inline bool shouldSkipNullCharacters() const
+ {
+ return m_state == HTMLTokenizer::DataState;
+ }
+
+private:
+ inline void parseError();
+
+ inline bool emitAndResumeIn(CharacterProvider& source, State state)
+ {
+ ASSERT(m_token->type() != HTMLToken::Uninitialized);
+ m_state = state;
+ source.next();
+ return true;
+ }
+
+ inline bool emitAndReconsumeIn(CharacterProvider&, State state)
+ {
+ ASSERT(m_token->type() != HTMLToken::Uninitialized);
+ m_state = state;
+ return true;
+ }
+
+ inline bool emitEndOfFile(CharacterProvider& source)
+ {
+ if (haveBufferedCharacterToken())
+ return true;
+ m_state = HTMLTokenizer::DataState;
+ source.next();
+ m_token->clear();
+ m_token->makeEndOfFile();
+ return true;
+ }
+
+ // Return whether we need to emit a character token before dealing with
+ // the buffered end tag.
+ inline bool flushBufferedEndTag(CharacterProvider&);
+
+ inline bool haveBufferedCharacterToken()
+ {
+ return m_token->type() == HTMLToken::Character;
+ }
+
+ State m_state;
+
+ // m_token is owned by the caller. If nextToken is not on the stack,
+ // this member might be pointing to unallocated memory.
+ HTMLToken* m_token;
+
+ // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-character
+ LChar m_additionalAllowedCharacter;
+
+ // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-input-stream
+ InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor;
+};
+}
+
+#endif
« no previous file with comments | « ios/third_party/blink/src/html_token.mm ('k') | ios/third_party/blink/src/html_tokenizer.mm » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698