| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 4 * | 4 * |
| 5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
| 6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
| 7 * are met: | 7 * are met: |
| 8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
| 11 * notice, this list of conditions and the following disclaimer in the | 11 * notice, this list of conditions and the following disclaimer in the |
| 12 * documentation and/or other materials provided with the distribution. | 12 * documentation and/or other materials provided with the distribution. |
| 13 * | 13 * |
| 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 25 */ | 25 */ |
| 26 | 26 |
| 27 #ifndef HTMLTokenizer_h | 27 #ifndef HTMLTokenizer_h |
| 28 #define HTMLTokenizer_h | 28 #define HTMLTokenizer_h |
| 29 | 29 |
| 30 #include "core/html/parser/HTMLParserOptions.h" | |
| 31 #include "core/html/parser/HTMLToken.h" | 30 #include "core/html/parser/HTMLToken.h" |
| 32 #include "core/html/parser/InputStreamPreprocessor.h" | 31 #include "core/html/parser/InputStreamPreprocessor.h" |
| 33 #include "platform/text/SegmentedString.h" | 32 #include "platform/text/SegmentedString.h" |
| 34 | 33 |
| 35 namespace blink { | 34 namespace blink { |
| 36 | 35 |
| 37 class HTMLTokenizer { | 36 class HTMLTokenizer { |
| 38 WTF_MAKE_NONCOPYABLE(HTMLTokenizer); | 37 WTF_MAKE_NONCOPYABLE(HTMLTokenizer); |
| 39 WTF_MAKE_FAST_ALLOCATED; | 38 WTF_MAKE_FAST_ALLOCATED; |
| 40 public: | 39 public: |
| 41 static PassOwnPtr<HTMLTokenizer> create(const HTMLParserOptions& options) {
return adoptPtr(new HTMLTokenizer(options)); } | 40 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } |
| 42 ~HTMLTokenizer(); | 41 ~HTMLTokenizer(); |
| 43 | 42 |
| 44 void reset(); | 43 void reset(); |
| 45 | 44 |
| 46 enum State { | 45 enum State { |
| 47 DataState, | 46 DataState, |
| 48 CharacterReferenceInDataState, | 47 CharacterReferenceInDataState, |
| 49 RAWTEXTState, | 48 RAWTEXTState, |
| 50 ScriptDataState, | 49 ScriptDataState, |
| 51 PLAINTEXTState, | 50 PLAINTEXTState, |
| (...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 89 ContinueBogusCommentState, | 88 ContinueBogusCommentState, |
| 90 MarkupDeclarationOpenState, | 89 MarkupDeclarationOpenState, |
| 91 CommentStartState, | 90 CommentStartState, |
| 92 CommentStartDashState, | 91 CommentStartDashState, |
| 93 CommentState, | 92 CommentState, |
| 94 CommentEndDashState, | 93 CommentEndDashState, |
| 95 CommentEndState, | 94 CommentEndState, |
| 96 CommentEndBangState, | 95 CommentEndBangState, |
| 97 }; | 96 }; |
| 98 | 97 |
| 99 struct Checkpoint { | |
| 100 HTMLParserOptions options; | |
| 101 State state; | |
| 102 UChar additionalAllowedCharacter; | |
| 103 bool skipNextNewLine; | |
| 104 | |
| 105 Checkpoint() | |
| 106 : options(0) | |
| 107 , state() | |
| 108 , additionalAllowedCharacter('\0') | |
| 109 , skipNextNewLine(false) | |
| 110 { | |
| 111 } | |
| 112 }; | |
| 113 | |
| 114 bool canCreateCheckpoint() const; | |
| 115 void createCheckpoint(Checkpoint&) const; | |
| 116 void restoreFromCheckpoint(const Checkpoint&); | |
| 117 | |
| 118 // This function returns true if it emits a token. Otherwise, callers | 98 // This function returns true if it emits a token. Otherwise, callers |
| 119 // must provide the same (in progress) token on the next call (unless | 99 // must provide the same (in progress) token on the next call (unless |
| 120 // they call reset() first). | 100 // they call reset() first). |
| 121 bool nextToken(SegmentedString&, HTMLToken&); | 101 bool nextToken(SegmentedString&, HTMLToken&); |
| 122 | 102 |
| 123 // Returns a copy of any characters buffered internally by the tokenizer. | 103 // Returns a copy of any characters buffered internally by the tokenizer. |
| 124 // The tokenizer buffers characters when searching for the </script> token | 104 // The tokenizer buffers characters when searching for the </script> token |
| 125 // that terminates a script element. | 105 // that terminates a script element. |
| 126 String bufferedCharacters() const; | 106 String bufferedCharacters() const; |
| 127 | 107 |
| (...skipping 18 matching lines...) Expand all Loading... |
| 146 // tree builder's insertion mode. | 126 // tree builder's insertion mode. |
| 147 // * CDATA sections in foreign content will be tokenized as bogus comments | 127 // * CDATA sections in foreign content will be tokenized as bogus comments |
| 148 // instead of as character tokens. | 128 // instead of as character tokens. |
| 149 // | 129 // |
| 150 void updateStateFor(const String& tagName); | 130 void updateStateFor(const String& tagName); |
| 151 | 131 |
| 152 State state() const { return m_state; } | 132 State state() const { return m_state; } |
| 153 void setState(State state) { m_state = state; } | 133 void setState(State state) { m_state = state; } |
| 154 | 134 |
| 155 private: | 135 private: |
| 156 explicit HTMLTokenizer(const HTMLParserOptions&); | 136 HTMLTokenizer(); |
| 157 | 137 |
| 158 inline bool processEntity(SegmentedString&); | 138 inline bool processEntity(SegmentedString&); |
| 159 | 139 |
| 160 inline void parseError(); | 140 inline void parseError(); |
| 161 | 141 |
| 162 inline void bufferCharacter(UChar character) | 142 inline void bufferCharacter(UChar character) |
| 163 { | 143 { |
| 164 ASSERT(character != kEndOfFileMarker); | 144 ASSERT(character != kEndOfFileMarker); |
| 165 m_token->ensureIsCharacterToken(); | 145 m_token->ensureIsCharacterToken(); |
| 166 m_token->appendToCharacter(character); | 146 m_token->appendToCharacter(character); |
| (...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 232 | 212 |
| 233 Vector<UChar, 32> m_appropriateEndTagName; | 213 Vector<UChar, 32> m_appropriateEndTagName; |
| 234 | 214 |
| 235 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer | 215 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer |
| 236 Vector<LChar, 32> m_temporaryBuffer; | 216 Vector<LChar, 32> m_temporaryBuffer; |
| 237 | 217 |
| 238 // We occationally want to emit both a character token and an end tag | 218 // We occationally want to emit both a character token and an end tag |
| 239 // token (e.g., when lexing script). We buffer the name of the end tag | 219 // token (e.g., when lexing script). We buffer the name of the end tag |
| 240 // token here so we remember it next time we re-enter the tokenizer. | 220 // token here so we remember it next time we re-enter the tokenizer. |
| 241 Vector<LChar, 32> m_bufferedEndTagName; | 221 Vector<LChar, 32> m_bufferedEndTagName; |
| 242 | |
| 243 HTMLParserOptions m_options; | |
| 244 }; | 222 }; |
| 245 | 223 |
| 246 } | 224 } |
| 247 | 225 |
| 248 #endif | 226 #endif |
| OLD | NEW |