OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
4 * | 4 * |
5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
7 * are met: | 7 * are met: |
8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
11 * notice, this list of conditions and the following disclaimer in the | 11 * notice, this list of conditions and the following disclaimer in the |
12 * documentation and/or other materials provided with the distribution. | 12 * documentation and/or other materials provided with the distribution. |
13 * | 13 * |
14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY |
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR |
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY |
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 */ | 25 */ |
26 | 26 |
27 #ifndef HTMLTokenizer_h | 27 #ifndef HTMLTokenizer_h |
28 #define HTMLTokenizer_h | 28 #define HTMLTokenizer_h |
29 | 29 |
| 30 #include "core/html/parser/HTMLEntityParser.h" |
30 #include "core/html/parser/HTMLToken.h" | 31 #include "core/html/parser/HTMLToken.h" |
31 #include "core/html/parser/InputStreamPreprocessor.h" | 32 #include "core/html/parser/InputStreamPreprocessor.h" |
32 #include "platform/text/SegmentedString.h" | 33 #include "platform/text/SegmentedString.h" |
33 | 34 |
34 namespace blink { | 35 namespace blink { |
35 | 36 |
36 class HTMLTokenizer { | 37 class HTMLTokenizer { |
37 WTF_MAKE_NONCOPYABLE(HTMLTokenizer); | 38 WTF_MAKE_NONCOPYABLE(HTMLTokenizer); |
38 WTF_MAKE_FAST_ALLOCATED; | 39 WTF_MAKE_FAST_ALLOCATED; |
39 public: | 40 public: |
40 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } | 41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } |
41 ~HTMLTokenizer(); | 42 ~HTMLTokenizer(); |
42 | 43 |
43 void reset(); | 44 void reset(); |
44 | 45 |
45 enum State { | 46 enum State { |
46 DataState, | 47 DataState, |
47 CharacterReferenceInDataState, | 48 CharacterReferenceInDataState, |
| 49 CharacterReferenceInAttributeValueState, |
48 RAWTEXTState, | 50 RAWTEXTState, |
49 TagOpenState, | 51 TagOpenState, |
50 EndTagOpenState, | 52 EndTagOpenState, |
51 TagNameState, | 53 TagNameState, |
52 RAWTEXTLessThanSignState, | 54 RAWTEXTLessThanSignState, |
53 RAWTEXTEndTagOpenState, | 55 RAWTEXTEndTagOpenState, |
54 RAWTEXTEndTagNameState, | 56 RAWTEXTEndTagNameState, |
55 BeforeAttributeNameState, | 57 BeforeAttributeNameState, |
56 AttributeNameState, | 58 AttributeNameState, |
57 AfterAttributeNameState, | 59 AfterAttributeNameState, |
58 BeforeAttributeValueState, | 60 BeforeAttributeValueState, |
59 AttributeValueDoubleQuotedState, | 61 AttributeValueDoubleQuotedState, |
60 AttributeValueSingleQuotedState, | 62 AttributeValueSingleQuotedState, |
61 AttributeValueUnquotedState, | 63 AttributeValueUnquotedState, |
62 CharacterReferenceInAttributeValueState, | |
63 AfterAttributeValueQuotedState, | 64 AfterAttributeValueQuotedState, |
64 SelfClosingStartTagState, | 65 SelfClosingStartTagState, |
65 BogusCommentState, | 66 BogusCommentState, |
66 // The ContinueBogusCommentState is not in the HTML5 spec, but we use | 67 // The ContinueBogusCommentState is not in the HTML5 spec, but we use |
67 // it internally to keep track of whether we've started the bogus | 68 // it internally to keep track of whether we've started the bogus |
68 // comment token yet. | 69 // comment token yet. |
69 ContinueBogusCommentState, | 70 ContinueBogusCommentState, |
70 MarkupDeclarationOpenState, | 71 MarkupDeclarationOpenState, |
71 CommentStartState, | 72 CommentStartState, |
72 CommentStartDashState, | 73 CommentStartDashState, |
73 CommentState, | 74 CommentState, |
74 CommentEndDashState, | 75 CommentEndDashState, |
75 CommentEndState, | 76 CommentEndState, |
76 CommentEndBangState, | 77 CommentEndBangState, |
77 }; | 78 }; |
78 | 79 |
79 // This function returns true if it emits a token. Otherwise, callers | 80 // This function returns true if it emits a token. Otherwise, callers |
80 // must provide the same (in progress) token on the next call (unless | 81 // must provide the same (in progress) token on the next call (unless |
81 // they call reset() first). | 82 // they call reset() first). |
82 bool nextToken(SegmentedString&, HTMLToken&); | 83 bool nextToken(SegmentedString&, HTMLToken&); |
83 | 84 |
84 State state() const { return m_state; } | 85 State state() const { return m_state; } |
85 void setState(State state) { m_state = state; } | 86 void setState(State state) { m_state = state; } |
86 | 87 |
87 private: | 88 private: |
88 HTMLTokenizer(); | 89 HTMLTokenizer(); |
89 | 90 |
90 inline bool processEntity(SegmentedString&); | |
91 | |
92 inline void parseError(); | 91 inline void parseError(); |
93 | 92 |
94 inline void bufferCharacter(UChar character) | 93 inline void bufferCharacter(UChar character) |
95 { | 94 { |
96 ASSERT(character != kEndOfFileMarker); | 95 ASSERT(character != kEndOfFileMarker); |
97 m_token->ensureIsCharacterToken(); | 96 m_token->ensureIsCharacterToken(); |
98 m_token->appendToCharacter(character); | 97 m_token->appendToCharacter(character); |
99 } | 98 } |
100 | 99 |
101 inline bool emitAndResumeIn(SegmentedString& source, State state) | 100 inline bool emitAndResumeIn(SegmentedString& source, State state) |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
149 { | 148 { |
150 return m_token->type() == HTMLToken::Character; | 149 return m_token->type() == HTMLToken::Character; |
151 } | 150 } |
152 | 151 |
153 State m_state; | 152 State m_state; |
154 | 153 |
155 // m_token is owned by the caller. If nextToken is not on the stack, | 154 // m_token is owned by the caller. If nextToken is not on the stack, |
156 // this member might be pointing to unallocated memory. | 155 // this member might be pointing to unallocated memory. |
157 HTMLToken* m_token; | 156 HTMLToken* m_token; |
158 | 157 |
159 // http://www.whatwg.org/specs/web-apps/current-work/#additional-allowed-cha
racter | 158 State m_returnState; |
160 UChar m_additionalAllowedCharacter; | |
161 | 159 |
162 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream | 160 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream |
163 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; | 161 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; |
| 162 HTMLEntityParser m_entityParser; |
164 | 163 |
165 Vector<UChar, 32> m_appropriateEndTagName; | 164 Vector<UChar, 32> m_appropriateEndTagName; |
166 | 165 |
167 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer | 166 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer |
168 Vector<LChar, 32> m_temporaryBuffer; | 167 Vector<LChar, 32> m_temporaryBuffer; |
169 | 168 |
170 // We occationally want to emit both a character token and an end tag | 169 // We occationally want to emit both a character token and an end tag |
171 // token (e.g., when lexing script). We buffer the name of the end tag | 170 // token (e.g., when lexing script). We buffer the name of the end tag |
172 // token here so we remember it next time we re-enter the tokenizer. | 171 // token here so we remember it next time we re-enter the tokenizer. |
173 Vector<LChar, 32> m_bufferedEndTagName; | 172 Vector<LChar, 32> m_bufferedEndTagName; |
174 }; | 173 }; |
175 | 174 |
176 } | 175 } |
177 | 176 |
178 #endif | 177 #endif |
OLD | NEW |