OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 3 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
4 * | 4 * |
5 * Redistribution and use in source and binary forms, with or without | 5 * Redistribution and use in source and binary forms, with or without |
6 * modification, are permitted provided that the following conditions | 6 * modification, are permitted provided that the following conditions |
7 * are met: | 7 * are met: |
8 * 1. Redistributions of source code must retain the above copyright | 8 * 1. Redistributions of source code must retain the above copyright |
9 * notice, this list of conditions and the following disclaimer. | 9 * notice, this list of conditions and the following disclaimer. |
10 * 2. Redistributions in binary form must reproduce the above copyright | 10 * 2. Redistributions in binary form must reproduce the above copyright |
(...skipping 29 matching lines...) Expand all Loading... |
40 public: | 40 public: |
41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } | 41 static PassOwnPtr<HTMLTokenizer> create() { return adoptPtr(new HTMLTokenize
r()); } |
42 ~HTMLTokenizer(); | 42 ~HTMLTokenizer(); |
43 | 43 |
44 void reset(); | 44 void reset(); |
45 | 45 |
46 enum State { | 46 enum State { |
47 DataState, | 47 DataState, |
48 CharacterReferenceInDataState, | 48 CharacterReferenceInDataState, |
49 CharacterReferenceInAttributeValueState, | 49 CharacterReferenceInAttributeValueState, |
50 RAWTEXTState, | 50 RawDataState, |
| 51 RawDataLessThanSignState, |
| 52 RawDataEndTagOpenState, |
| 53 RawDataEndTagNameState, |
51 TagOpenState, | 54 TagOpenState, |
52 CloseTagState, | 55 CloseTagState, |
53 TagNameState, | 56 TagNameState, |
54 RAWTEXTLessThanSignState, | |
55 RAWTEXTEndTagOpenState, | |
56 RAWTEXTEndTagNameState, | |
57 BeforeAttributeNameState, | 57 BeforeAttributeNameState, |
58 AttributeNameState, | 58 AttributeNameState, |
59 AfterAttributeNameState, | 59 AfterAttributeNameState, |
60 BeforeAttributeValueState, | 60 BeforeAttributeValueState, |
61 AttributeValueDoubleQuotedState, | 61 AttributeValueDoubleQuotedState, |
62 AttributeValueSingleQuotedState, | 62 AttributeValueSingleQuotedState, |
63 AttributeValueUnquotedState, | 63 AttributeValueUnquotedState, |
64 AfterAttributeValueQuotedState, | 64 VoidTagState, |
65 SelfClosingStartTagState, | |
66 CommentStart1State, | 65 CommentStart1State, |
67 CommentStart2State, | 66 CommentStart2State, |
68 CommentState, | 67 CommentState, |
69 CommentEnd1State, | 68 CommentEnd1State, |
70 CommentEnd2State, | 69 CommentEnd2State, |
71 }; | 70 }; |
72 | 71 |
73 // This function returns true if it emits a token. Otherwise, callers | 72 // This function returns true if it emits a token. Otherwise, callers |
74 // must provide the same (in progress) token on the next call (unless | 73 // must provide the same (in progress) token on the next call (unless |
75 // they call reset() first). | 74 // they call reset() first). |
76 bool nextToken(SegmentedString&, HTMLToken&); | 75 bool nextToken(SegmentedString&, HTMLToken&); |
77 | 76 |
78 State state() const { return m_state; } | 77 State state() const { return m_state; } |
| 78 |
79 void setState(State state) { m_state = state; } | 79 void setState(State state) { m_state = state; } |
80 | 80 |
81 private: | 81 private: |
82 HTMLTokenizer(); | 82 HTMLTokenizer(); |
83 | 83 |
84 inline void parseError(); | 84 inline void parseError(); |
85 | 85 |
86 inline void bufferCharacter(UChar character) | 86 inline void bufferCharacter(UChar character) |
87 { | 87 { |
88 ASSERT(character != kEndOfFileMarker); | 88 ASSERT(character != kEndOfFileMarker); |
(...skipping 25 matching lines...) Expand all Loading... |
114 m_token->clear(); | 114 m_token->clear(); |
115 m_token->makeEndOfFile(); | 115 m_token->makeEndOfFile(); |
116 return true; | 116 return true; |
117 } | 117 } |
118 | 118 |
119 inline bool flushEmitAndResumeIn(SegmentedString&, State); | 119 inline bool flushEmitAndResumeIn(SegmentedString&, State); |
120 | 120 |
121 // Return whether we need to emit a character token before dealing with | 121 // Return whether we need to emit a character token before dealing with |
122 // the buffered end tag. | 122 // the buffered end tag. |
123 inline bool flushBufferedEndTag(SegmentedString&); | 123 inline bool flushBufferedEndTag(SegmentedString&); |
124 inline bool temporaryBufferIs(const String&); | |
125 | |
126 // Sometimes we speculatively consume input characters and we don't | |
127 // know whether they represent end tags or RCDATA, etc. These | |
128 // functions help manage these state. | |
129 inline void addToPossibleEndTag(LChar cc); | |
130 | 124 |
131 inline void saveEndTagNameIfNeeded() | 125 inline void saveEndTagNameIfNeeded() |
132 { | 126 { |
133 ASSERT(m_token->type() != HTMLToken::Uninitialized); | 127 ASSERT(m_token->type() != HTMLToken::Uninitialized); |
134 if (m_token->type() == HTMLToken::StartTag) | 128 if (m_token->type() == HTMLToken::StartTag) |
135 m_appropriateEndTagName = m_token->name(); | 129 m_appropriateEndTagName = m_token->name(); |
136 } | 130 } |
137 inline bool isAppropriateEndTag(); | 131 inline bool isAppropriateEndTag(); |
138 | 132 |
139 | |
140 inline bool haveBufferedCharacterToken() | 133 inline bool haveBufferedCharacterToken() |
141 { | 134 { |
142 return m_token->type() == HTMLToken::Character; | 135 return m_token->type() == HTMLToken::Character; |
143 } | 136 } |
144 | 137 |
145 State m_state; | 138 State m_state; |
146 | 139 |
147 // m_token is owned by the caller. If nextToken is not on the stack, | 140 // m_token is owned by the caller. If nextToken is not on the stack, |
148 // this member might be pointing to unallocated memory. | 141 // this member might be pointing to unallocated memory. |
149 HTMLToken* m_token; | 142 HTMLToken* m_token; |
150 | 143 |
151 State m_returnState; | 144 State m_returnState; |
152 | 145 |
153 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream | 146 // http://www.whatwg.org/specs/web-apps/current-work/#preprocessing-the-inpu
t-stream |
154 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; | 147 InputStreamPreprocessor<HTMLTokenizer> m_inputStreamPreprocessor; |
155 HTMLEntityParser m_entityParser; | 148 HTMLEntityParser m_entityParser; |
156 | 149 |
157 Vector<UChar, 32> m_appropriateEndTagName; | 150 Vector<UChar, 32> m_appropriateEndTagName; |
158 | 151 |
159 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer | 152 // http://www.whatwg.org/specs/web-apps/current-work/#temporary-buffer |
160 Vector<LChar, 32> m_temporaryBuffer; | 153 Vector<LChar, 32> m_temporaryBuffer; |
161 | |
162 // We occationally want to emit both a character token and an end tag | |
163 // token (e.g., when lexing script). We buffer the name of the end tag | |
164 // token here so we remember it next time we re-enter the tokenizer. | |
165 Vector<LChar, 32> m_bufferedEndTagName; | |
166 }; | 154 }; |
167 | 155 |
168 } | 156 } |
169 | 157 |
170 #endif | 158 #endif |
OLD | NEW |