| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 4 * | 4 * |
| 5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
| 6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
| 7 * are met: | 7 * are met: |
| 8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
| 9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
| 10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
| 11 * notice, this list of conditions and the following disclaimer in the | 11 * notice, this list of conditions and the following disclaimer in the |
| 12 * documentation and/or other materials provided with the distribution. | 12 * documentation and/or other materials provided with the distribution. |
| 13 * | 13 * |
| 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
| 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
| 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
| 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
| 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 25 */ | 25 */ |
| 26 | 26 |
| 27 #ifndef HTMLTokenizer_h | 27 #ifndef HTMLTokenizer_h |
| 28 #define HTMLTokenizer_h | 28 #define HTMLTokenizer_h |
| 29 | 29 |
| 30 #include "core/html/parser/HTMLEntityParser.h" |
| 30 #include "core/html/parser/HTMLToken.h" | 31 #include "core/html/parser/HTMLToken.h" |
| 31 #include "core/html/parser/InputStreamPreprocessor.h" | 32 #include "core/html/parser/InputStreamPreprocessor.h" |
| 32 #include "platform/text/SegmentedString.h" | 33 #include "platform/text/SegmentedString.h" |
| 33 | 34 |
| 34 namespace blink { | 35 namespace blink { |
| 35 | 36 |
| 36 class HTMLTokenizer { | 37 class HTMLTokenizer { |
| 37 WTF_MAKE_NONCOPYABLE(HTMLTokenizer); | 38 WTF_MAKE_NONCOPYABLE(HTMLTokenizer); |
| 38 WTF_MAKE_FAST_ALLOCATED; | 39 WTF_MAKE_FAST_ALLOCATED; |
| 39 public: | 40 public: |
| 40 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } | 41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } |
| 41 ~HTMLTokenizer(); | 42 ~HTMLTokenizer(); |
| 42 | 43 |
| 43 void reset(); | 44 void reset(); |
| 44 | 45 |
| 45 enum State { | 46 enum State { |
| 46 DataState, | 47 DataState, |
| 47 CharacterReferenceInDataState, | 48 CharacterReferenceInDataState, |
| 49 CharacterReferenceInAttributeValueState, |
| 48 RAWTEXTState, | 50 RAWTEXTState, |
| 49 TagOpenState, | 51 TagOpenState, |
| 50 EndTagOpenState, | 52 EndTagOpenState, |
| 51 TagNameState, | 53 TagNameState, |
| 52 RAWTEXTLessThanSignState, | 54 RAWTEXTLessThanSignState, |
| 53 RAWTEXTEndTagOpenState, | 55 RAWTEXTEndTagOpenState, |
| 54 RAWTEXTEndTagNameState, | 56 RAWTEXTEndTagNameState, |
| 55 BeforeAttributeNameState, | 57 BeforeAttributeNameState, |
| 56 AttributeNameState, | 58 AttributeNameState, |
| 57 AfterAttributeNameState, | 59 AfterAttributeNameState, |
| 58 BeforeAttributeValueState, | 60 BeforeAttributeValueState, |
| 59 AttributeValueDoubleQuotedState, | 61 AttributeValueDoubleQuotedState, |
| 60 AttributeValueSingleQuotedState, | 62 AttributeValueSingleQuotedState, |
| 61 AttributeValueUnquotedState, | 63 AttributeValueUnquotedState, |
| 62 CharacterReferenceInAttributeValueState, | |
| 63 AfterAttributeValueQuotedState, | 64 AfterAttributeValueQuotedState, |
| 64 SelfClosingStartTagState, | 65 SelfClosingStartTagState, |
| 65 BogusCommentState, | 66 BogusCommentState, |
| 66 // The ContinueBogusCommentState is not in the HTML5 spec, but we use | 67 // The ContinueBogusCommentState is not in the HTML5 spec, but we use |
| 67 // it internally to keep track of whether we've started the bogus | 68 // it internally to keep track of whether we've started the bogus |
| 68 // comment token yet. | 69 // comment token yet. |
| 69 ContinueBogusCommentState, | 70 ContinueBogusCommentState, |
| 70 MarkupDeclarationOpenState, | 71 MarkupDeclarationOpenState, |
| 71 CommentStartState, | 72 CommentStartState, |
| 72 CommentStartDashState, | 73 CommentStartDashState, |
| 73 CommentState, | 74 CommentState, |
| 74 CommentEndDashState, | 75 CommentEndDashState, |
| 75 CommentEndState, | 76 CommentEndState, |
| 76 CommentEndBangState, | 77 CommentEndBangState, |
| 77 }; | 78 }; |
| 78 | 79 |
| 79 // This function returns true if it emits a token. Otherwise, callers | 80 // This function returns true if it emits a token. Otherwise, callers |
| 80 // must provide the same (in progress) token on the next call (unless | 81 // must provide the same (in progress) token on the next call (unless |
| 81 // they call reset() first). | 82 // they call reset() first). |
| 82 bool nextToken(SegmentedString&, HTMLToken&); | 83 bool nextToken(SegmentedString&, HTMLToken&); |
| 83 | 84 |
| 84 State state() const { return m_state; } | 85 State state() const { return m_state; } |
| 85 void setState(State state) { m_state = state; } | 86 void setState(State state) { m_state = state; } |
| 86 | 87 |
| 87 private: | 88 private: |
| 88 HTMLTokenizer(); | 89 HTMLTokenizer(); |
| 89 | 90 |
| 90 inline bool processEntity(SegmentedString&); | |
| 91 | |
| 92 inline void parseError(); | 91 inline void parseError(); |
| 93 | 92 |
| 94 inline void bufferCharacter(UChar character) | 93 inline void bufferCharacter(UChar character) |
| 95 { | 94 { |
| 96 ASSERT(character != kEndOfFileMarker); | 95 ASSERT(character != kEndOfFileMarker); |
| 97 m_token->ensureIsCharacterToken(); | 96 m_token->ensureIsCharacterToken(); |
| 98 m_token->appendToCharacter(character); | 97 m_token->appendToCharacter(character); |
| 99 } | 98 } |
| 100 | 99 |
| 101 inline bool emitAndResumeIn(SegmentedString& source, State state) | 100 inline bool emitAndResumeIn(SegmentedString& source, State state) |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 149 { | 148 { |
| 150 return m_token->type() == HTMLToken::Character; | 149 return m_token->type() == HTMLToken::Character; |
| 151 } | 150 } |
| 152 | 151 |
| 153 State m_state; | 152 State m_state; |
| 154 | 153 |
| 155 // m_token is owned by the caller. If nextToken is not on the stack, | 154 // m_token is owned by the caller. If nextToken is not on the stack, |
| 156 // this member might be pointing to unallocated memory. | 155 // this member might be pointing to unallocated memory. |
| 157 HTMLToken* m_token; | 156 HTMLToken* m_token; |
| 158 | 157 |
| 159 // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-cha
racter | 158 State m_returnState; |
| 160 UChar m_additionalAllowedCharacter; | |
| 161 | 159 |
| 162 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream | 160 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream |
| 163 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; | 161 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; |
| 162 HTMLEntityParser m_entityParser; |
| 164 | 163 |
| 165 Vector<UChar, 32> m_appropriateEndTagName; | 164 Vector<UChar, 32> m_appropriateEndTagName; |
| 166 | 165 |
| 167 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer | 166 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer |
| 168 Vector<LChar, 32> m_temporaryBuffer; | 167 Vector<LChar, 32> m_temporaryBuffer; |
| 169 | 168 |
| 170 // We occationally want to emit both a character token and an end tag | 169 // We occationally want to emit both a character token and an end tag |
| 171 // token (e.g., when lexing script). We buffer the name of the end tag | 170 // token (e.g., when lexing script). We buffer the name of the end tag |
| 172 // token here so we remember it next time we re-enter the tokenizer. | 171 // token here so we remember it next time we re-enter the tokenizer. |
| 173 Vector<LChar, 32> m_bufferedEndTagName; | 172 Vector<LChar, 32> m_bufferedEndTagName; |
| 174 }; | 173 }; |
| 175 | 174 |
| 176 } | 175 } |
| 177 | 176 |
| 178 #endif | 177 #endif |
| OLD | NEW |