| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | |
| 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | |
| 4 * | |
| 5 * Redistribution and use in source and binary forms, with or without | |
| 6 * modification, are permitted provided that the following conditions | |
| 7 * are met: | |
| 8 * 1. Redistributions of source code must retain the above copyright | |
| 9 * notice, this list of conditions and the following disclaimer. | |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | |
| 11 * notice, this list of conditions and the following disclaimer in the | |
| 12 * documentation and/or other materials provided with the distribution. | |
| 13 * | |
| 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
| 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
| 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
| 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
| 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
| 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
| 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
| 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
| 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| 25 */ | |
| 26 | |
| 27 #ifndef SKY_ENGINE_CORE_HTML_PARSER_HTMLTOKENIZER_H_ | |
| 28 #define SKY_ENGINE_CORE_HTML_PARSER_HTMLTOKENIZER_H_ | |
| 29 | |
| 30 #include "sky/engine/core/html/parser/HTMLEntityParser.h" | |
| 31 #include "sky/engine/core/html/parser/HTMLToken.h" | |
| 32 #include "sky/engine/core/html/parser/InputStreamPreprocessor.h" | |
| 33 #include "sky/engine/platform/text/SegmentedString.h" | |
| 34 | |
| 35 namespace blink { | |
| 36 | |
| 37 class HTMLTokenizer { | |
| 38 WTF_MAKE_NONCOPYABLE(HTMLTokenizer); | |
| 39 WTF_MAKE_FAST_ALLOCATED; | |
| 40 public: | |
| 41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } | |
| 42 ~HTMLTokenizer(); | |
| 43 | |
| 44 void reset(); | |
| 45 | |
| 46 enum State { | |
| 47 DataState, | |
| 48 CharacterReferenceInDataState, | |
| 49 CharacterReferenceInAttributeValueState, | |
| 50 RawDataState, | |
| 51 RawDataLessThanSignState, | |
| 52 RawDataEndTagOpenState, | |
| 53 RawDataEndTagNameState, | |
| 54 TagOpenState, | |
| 55 CloseTagState, | |
| 56 TagNameState, | |
| 57 BeforeAttributeNameState, | |
| 58 AttributeNameState, | |
| 59 AfterAttributeNameState, | |
| 60 BeforeAttributeValueState, | |
| 61 AttributeValueDoubleQuotedState, | |
| 62 AttributeValueSingleQuotedState, | |
| 63 AttributeValueUnquotedState, | |
| 64 VoidTagState, | |
| 65 CommentStart1State, | |
| 66 CommentStart2State, | |
| 67 CommentState, | |
| 68 CommentEnd1State, | |
| 69 CommentEnd2State, | |
| 70 }; | |
| 71 | |
| 72 // This function returns true if it emits a token. Otherwise, callers | |
| 73 // must provide the same (in progress) token on the next call (unless | |
| 74 // they call reset() first). | |
| 75 bool nextToken(SegmentedString&, HTMLToken&); | |
| 76 | |
| 77 State state() const { return m_state; } | |
| 78 | |
| 79 void setState(State state) { m_state = state; } | |
| 80 | |
| 81 private: | |
| 82 HTMLTokenizer(); | |
| 83 | |
| 84 inline void parseError(); | |
| 85 | |
| 86 inline void bufferCharacter(UChar character) | |
| 87 { | |
| 88 ASSERT(character != kEndOfFileMarker); | |
| 89 m_token->ensureIsCharacterToken(); | |
| 90 m_token->appendToCharacter(character); | |
| 91 } | |
| 92 | |
| 93 inline bool emitAndResumeIn(SegmentedString& source, State state) | |
| 94 { | |
| 95 saveEndTagNameIfNeeded(); | |
| 96 m_state = state; | |
| 97 source.advanceAndUpdateLineNumber(); | |
| 98 return true; | |
| 99 } | |
| 100 | |
| 101 inline bool emitAndReconsumeIn(SegmentedString&, State state) | |
| 102 { | |
| 103 saveEndTagNameIfNeeded(); | |
| 104 m_state = state; | |
| 105 return true; | |
| 106 } | |
| 107 | |
| 108 inline bool emitEndOfFile(SegmentedString& source) | |
| 109 { | |
| 110 if (haveBufferedCharacterToken()) | |
| 111 return true; | |
| 112 m_state = HTMLTokenizer::DataState; | |
| 113 source.advanceAndUpdateLineNumber(); | |
| 114 m_token->clear(); | |
| 115 m_token->makeEndOfFile(); | |
| 116 return true; | |
| 117 } | |
| 118 | |
| 119 inline bool flushEmitAndResumeIn(SegmentedString&, State); | |
| 120 | |
| 121 // Return whether we need to emit a character token before dealing with | |
| 122 // the buffered end tag. | |
| 123 inline bool flushBufferedEndTag(SegmentedString&); | |
| 124 | |
| 125 inline void saveEndTagNameIfNeeded() | |
| 126 { | |
| 127 ASSERT(m_token->type() != HTMLToken::Uninitialized); | |
| 128 if (m_token->type() == HTMLToken::StartTag) | |
| 129 m_appropriateEndTagName = m_token->name(); | |
| 130 } | |
| 131 inline bool isAppropriateEndTag(); | |
| 132 | |
| 133 inline bool haveBufferedCharacterToken() | |
| 134 { | |
| 135 return m_token->type() == HTMLToken::Character; | |
| 136 } | |
| 137 | |
| 138 State m_state; | |
| 139 | |
| 140 // m_token is owned by the caller. If nextToken is not on the stack, | |
| 141 // this member might be pointing to unallocated memory. | |
| 142 HTMLToken* m_token; | |
| 143 | |
| 144 State m_returnState; | |
| 145 | |
| 146 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream | |
| 147 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; | |
| 148 HTMLEntityParser m_entityParser; | |
| 149 | |
| 150 Vector<UChar, 32> m_appropriateEndTagName; | |
| 151 | |
| 152 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer | |
| 153 Vector<LChar, 32> m_temporaryBuffer; | |
| 154 }; | |
| 155 | |
| 156 } | |
| 157 | |
| 158 #endif // SKY_ENGINE_CORE_HTML_PARSER_HTMLTOKENIZER_H_ | |
| OLD | NEW |