OLD | NEW |
(Empty) | |
| 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 4 * |
| 5 * Redistribution and use in source and binary forms, with or without |
| 6 * modification, are permitted provided that the following conditions |
| 7 * are met: |
| 8 * 1. Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. |
| 10 * 2. Redistributions in binary form must reproduce the above copyright |
| 11 * notice, this list of conditions and the following disclaimer in the |
| 12 * documentation and/or other materials provided with the distribution. |
| 13 * |
| 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 25 */ |
| 26 |
| 27 #ifndef HTMLTokenizer_h |
| 28 #define HTMLTokenizer_h |
| 29 |
| 30 #include "ios/third_party/blink/src/html_input_stream_preprocessor.h" |
| 31 #include "ios/third_party/blink/src/html_token.h" |
| 32 |
| 33 namespace WebCore { |
| 34 |
| 35 class HTMLTokenizer { |
| 36 WTF_MAKE_NONCOPYABLE(HTMLTokenizer); |
| 37 |
| 38 public: |
| 39 HTMLTokenizer(); |
| 40 ~HTMLTokenizer(); |
| 41 |
| 42 void reset(); |
| 43 |
| 44 enum State { |
| 45 DataState, |
| 46 TagOpenState, |
| 47 EndTagOpenState, |
| 48 TagNameState, |
| 49 BeforeAttributeNameState, |
| 50 AttributeNameState, |
| 51 AfterAttributeNameState, |
| 52 BeforeAttributeValueState, |
| 53 AttributeValueDoubleQuotedState, |
| 54 AttributeValueSingleQuotedState, |
| 55 AttributeValueUnquotedState, |
| 56 AfterAttributeValueQuotedState, |
| 57 SelfClosingStartTagState, |
| 58 BogusCommentState, |
| 59 // The ContinueBogusCommentState is not in the HTML5 spec, but we use |
| 60 // it internally to keep track of whether we've started the bogus |
| 61 // comment token yet. |
| 62 ContinueBogusCommentState, |
| 63 MarkupDeclarationOpenState, |
| 64 CommentStartState, |
| 65 CommentStartDashState, |
| 66 CommentState, |
| 67 CommentEndDashState, |
| 68 CommentEndState, |
| 69 CommentEndBangState, |
| 70 DOCTYPEState, |
| 71 BeforeDOCTYPENameState, |
| 72 DOCTYPENameState, |
| 73 AfterDOCTYPENameState, |
| 74 AfterDOCTYPEPublicKeywordState, |
| 75 BeforeDOCTYPEPublicIdentifierState, |
| 76 DOCTYPEPublicIdentifierDoubleQuotedState, |
| 77 DOCTYPEPublicIdentifierSingleQuotedState, |
| 78 AfterDOCTYPEPublicIdentifierState, |
| 79 BetweenDOCTYPEPublicAndSystemIdentifiersState, |
| 80 AfterDOCTYPESystemKeywordState, |
| 81 BeforeDOCTYPESystemIdentifierState, |
| 82 DOCTYPESystemIdentifierDoubleQuotedState, |
| 83 DOCTYPESystemIdentifierSingleQuotedState, |
| 84 AfterDOCTYPESystemIdentifierState, |
| 85 BogusDOCTYPEState, |
| 86 CDATASectionState, |
| 87 // These CDATA states are not in the HTML5 spec, but we use them interna
lly. |
| 88 CDATASectionRightSquareBracketState, |
| 89 CDATASectionDoubleRightSquareBracketState, |
| 90 }; |
| 91 |
| 92 // This function returns true if it emits a token. Otherwise, callers |
| 93 // must provide the same (in progress) token on the next call (unless |
| 94 // they call reset() first). |
| 95 bool nextToken(CharacterProvider&, HTMLToken&); |
| 96 |
| 97 State state() const { return m_state; } |
| 98 void setState(State state) { m_state = state; } |
| 99 |
| 100 inline bool shouldSkipNullCharacters() const |
| 101 { |
| 102 return m_state == HTMLTokenizer::DataState; |
| 103 } |
| 104 |
| 105 private: |
| 106 inline void parseError(); |
| 107 |
| 108 inline bool emitAndResumeIn(CharacterProvider& source, State state) |
| 109 { |
| 110 ASSERT(m_token->type() != HTMLToken::Uninitialized); |
| 111 m_state = state; |
| 112 source.next(); |
| 113 return true; |
| 114 } |
| 115 |
| 116 inline bool emitAndReconsumeIn(CharacterProvider&, State state) |
| 117 { |
| 118 ASSERT(m_token->type() != HTMLToken::Uninitialized); |
| 119 m_state = state; |
| 120 return true; |
| 121 } |
| 122 |
| 123 inline bool emitEndOfFile(CharacterProvider& source) |
| 124 { |
| 125 if (haveBufferedCharacterToken()) |
| 126 return true; |
| 127 m_state = HTMLTokenizer::DataState; |
| 128 source.next(); |
| 129 m_token->clear(); |
| 130 m_token->makeEndOfFile(); |
| 131 return true; |
| 132 } |
| 133 |
| 134 // Return whether we need to emit a character token before dealing with |
| 135 // the buffered end tag. |
| 136 inline bool flushBufferedEndTag(CharacterProvider&); |
| 137 |
| 138 inline bool haveBufferedCharacterToken() |
| 139 { |
| 140 return m_token->type() == HTMLToken::Character; |
| 141 } |
| 142 |
| 143 State m_state; |
| 144 |
| 145 // m_token is owned by the caller. If nextToken is not on the stack, |
| 146 // this member might be pointing to unallocated memory. |
| 147 HTMLToken* m_token; |
| 148 |
| 149 // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-cha
racter |
| 150 LChar m_additionalAllowedCharacter; |
| 151 |
| 152 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream |
| 153 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; |
| 154 }; |
| 155 } |
| 156 |
| 157 #endif |
OLD | NEW |