OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ | 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
5 * | 5 * |
6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
8 * are met: | 8 * are met: |
9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
55 bool AtomicHTMLToken::usesName() const | 55 bool AtomicHTMLToken::usesName() const |
56 { | 56 { |
57 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; | 57 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; |
58 } | 58 } |
59 | 59 |
60 bool AtomicHTMLToken::usesAttributes() const | 60 bool AtomicHTMLToken::usesAttributes() const |
61 { | 61 { |
62 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; | 62 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; |
63 } | 63 } |
64 | 64 |
65 static inline UChar toLowerCase(UChar cc) | |
66 { | |
67 ASSERT(isASCIIUpper(cc)); | |
68 const int lowerCaseOffset = 0x20; | |
69 return cc + lowerCaseOffset; | |
70 } | |
71 | |
72 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const Str
ing& string) | |
73 { | |
74 if (vector.size() != string.length()) | |
75 return false; | |
76 | |
77 if (!string.length()) | |
78 return true; | |
79 | |
80 return equal(string.impl(), vector.data(), vector.size()); | |
81 } | |
82 | |
83 static inline bool isEndTagBufferingState(HTMLTokenizer::State state) | 65 static inline bool isEndTagBufferingState(HTMLTokenizer::State state) |
84 { | 66 { |
85 switch (state) { | 67 return state == HTMLTokenizer::RawDataEndTagOpenState || state == HTMLTokeni
zer::RawDataEndTagNameState; |
86 case HTMLTokenizer::RAWTEXTEndTagOpenState: | |
87 case HTMLTokenizer::RAWTEXTEndTagNameState: | |
88 return true; | |
89 default: | |
90 return false; | |
91 } | |
92 } | 68 } |
93 | 69 |
94 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName) | 70 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName) |
95 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName) | 71 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName) |
96 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName) | 72 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName) |
97 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName) | 73 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName) |
98 | 74 |
99 HTMLTokenizer::HTMLTokenizer() | 75 HTMLTokenizer::HTMLTokenizer() |
100 : m_inputStreamPreprocessor(this) | 76 : m_inputStreamPreprocessor(this) |
101 { | 77 { |
102 reset(); | 78 reset(); |
103 } | 79 } |
104 | 80 |
105 HTMLTokenizer::~HTMLTokenizer() | 81 HTMLTokenizer::~HTMLTokenizer() |
106 { | 82 { |
107 } | 83 } |
108 | 84 |
109 void HTMLTokenizer::reset() | 85 void HTMLTokenizer::reset() |
110 { | 86 { |
111 m_state = HTMLTokenizer::DataState; | 87 m_state = HTMLTokenizer::DataState; |
112 m_token = 0; | 88 m_token = 0; |
113 } | 89 } |
114 | 90 |
115 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) | 91 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) |
116 { | 92 { |
117 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLTok
en::Uninitialized); | 93 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLTok
en::Uninitialized); |
118 source.advanceAndUpdateLineNumber(); | 94 source.advanceAndUpdateLineNumber(); |
119 if (m_token->type() == HTMLToken::Character) | 95 if (m_token->type() == HTMLToken::Character) |
120 return true; | 96 return true; |
121 m_token->beginEndTag(m_bufferedEndTagName); | 97 m_token->beginEndTag(m_temporaryBuffer); |
122 m_bufferedEndTagName.clear(); | |
123 m_appropriateEndTagName.clear(); | 98 m_appropriateEndTagName.clear(); |
124 m_temporaryBuffer.clear(); | 99 m_temporaryBuffer.clear(); |
125 return false; | 100 return false; |
126 } | 101 } |
127 | 102 |
128 #define FLUSH_AND_ADVANCE_TO(stateName) \ | 103 #define FLUSH_AND_ADVANCE_TO(stateName) \ |
129 do { \ | 104 do { \ |
130 m_state = HTMLTokenizer::stateName; \ | 105 m_state = HTMLTokenizer::stateName; \ |
131 if (flushBufferedEndTag(source)) \ | 106 if (flushBufferedEndTag(source)) \ |
132 return true; \ | 107 return true; \ |
(...skipping 11 matching lines...) Expand all Loading... |
144 return true; | 119 return true; |
145 } | 120 } |
146 | 121 |
147 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) | 122 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) |
148 { | 123 { |
149 // If we have a token in progress, then we're supposed to be called back | 124 // If we have a token in progress, then we're supposed to be called back |
150 // with the same token so we can finish it. | 125 // with the same token so we can finish it. |
151 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitial
ized); | 126 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitial
ized); |
152 m_token = &token; | 127 m_token = &token; |
153 | 128 |
154 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { | 129 if (!m_temporaryBuffer.isEmpty() && !isEndTagBufferingState(m_state)) { |
155 // FIXME: This should call flushBufferedEndTag(). | 130 // FIXME: This should call flushBufferedEndTag(). |
156 // We started an end tag during our last iteration. | 131 // We started an end tag during our last iteration. |
157 m_token->beginEndTag(m_bufferedEndTagName); | 132 m_token->beginEndTag(m_temporaryBuffer); |
158 m_bufferedEndTagName.clear(); | |
159 m_appropriateEndTagName.clear(); | 133 m_appropriateEndTagName.clear(); |
160 m_temporaryBuffer.clear(); | 134 m_temporaryBuffer.clear(); |
161 if (m_state == HTMLTokenizer::DataState) { | 135 if (m_state == HTMLTokenizer::DataState) { |
162 // We're back in the data state, so we must be done with the tag. | 136 // We're back in the data state, so we must be done with the tag. |
163 return true; | 137 return true; |
164 } | 138 } |
165 } | 139 } |
166 | 140 |
167 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) | 141 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) |
168 return haveBufferedCharacterToken(); | 142 return haveBufferedCharacterToken(); |
169 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); | 143 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); |
170 | 144 |
171 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 | 145 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 |
172 switch (m_state) { | 146 switch (m_state) { |
173 HTML_BEGIN_STATE(DataState) { | 147 HTML_BEGIN_STATE(DataState) { |
174 if (cc == '&') { | 148 if (cc == '&') { |
175 m_returnState = DataState; | 149 m_returnState = DataState; |
176 m_entityParser.reset(); | 150 m_entityParser.reset(); |
177 HTML_ADVANCE_TO(CharacterReferenceInDataState); | 151 HTML_ADVANCE_TO(CharacterReferenceInDataState); |
178 } else if (cc == '<') { | 152 } else if (cc == '<') { |
179 if (m_token->type() == HTMLToken::Character) { | 153 if (m_token->type() == HTMLToken::Character) { |
180 // We have a bunch of character tokens queued up that we | 154 // We have a bunch of character tokens queued up that we |
181 // are emitting lazily here. | 155 // are emitting lazily here. |
182 return true; | 156 return true; |
183 } | 157 } |
184 HTML_ADVANCE_TO(TagOpenState); | 158 HTML_ADVANCE_TO(TagOpenState); |
185 } else if (cc == kEndOfFileMarker) | 159 } else if (cc == kEndOfFileMarker) { |
186 return emitEndOfFile(source); | 160 return emitEndOfFile(source); |
187 else { | 161 } else { |
188 bufferCharacter(cc); | 162 bufferCharacter(cc); |
189 HTML_ADVANCE_TO(DataState); | 163 HTML_ADVANCE_TO(DataState); |
190 } | 164 } |
191 } | 165 } |
192 END_STATE() | 166 END_STATE() |
193 | 167 |
194 HTML_BEGIN_STATE(CharacterReferenceInDataState) { | 168 HTML_BEGIN_STATE(CharacterReferenceInDataState) { |
195 if (!m_entityParser.parse(source)) | 169 if (!m_entityParser.parse(source)) |
196 return haveBufferedCharacterToken(); | 170 return haveBufferedCharacterToken(); |
197 for (const UChar& entityCharacter : m_entityParser.result()) | 171 for (const UChar& entityCharacter : m_entityParser.result()) |
(...skipping 15 matching lines...) Expand all Loading... |
213 HTML_SWITCH_TO(AttributeValueDoubleQuotedState); | 187 HTML_SWITCH_TO(AttributeValueDoubleQuotedState); |
214 else if (m_returnState == AttributeValueSingleQuotedState) | 188 else if (m_returnState == AttributeValueSingleQuotedState) |
215 HTML_SWITCH_TO(AttributeValueSingleQuotedState); | 189 HTML_SWITCH_TO(AttributeValueSingleQuotedState); |
216 else if (m_returnState == AttributeValueUnquotedState) | 190 else if (m_returnState == AttributeValueUnquotedState) |
217 HTML_SWITCH_TO(AttributeValueUnquotedState); | 191 HTML_SWITCH_TO(AttributeValueUnquotedState); |
218 else | 192 else |
219 ASSERT_NOT_REACHED(); | 193 ASSERT_NOT_REACHED(); |
220 } | 194 } |
221 END_STATE() | 195 END_STATE() |
222 | 196 |
223 HTML_BEGIN_STATE(RAWTEXTState) { | 197 HTML_BEGIN_STATE(RawDataState) { |
224 if (cc == '<') | 198 if (cc == '<') { |
225 HTML_ADVANCE_TO(RAWTEXTLessThanSignState); | 199 HTML_ADVANCE_TO(RawDataLessThanSignState); |
226 else if (cc == kEndOfFileMarker) | 200 } else { |
227 return emitEndOfFile(source); | |
228 else { | |
229 bufferCharacter(cc); | 201 bufferCharacter(cc); |
230 HTML_ADVANCE_TO(RAWTEXTState); | 202 HTML_ADVANCE_TO(RawDataState); |
| 203 } |
| 204 } |
| 205 END_STATE() |
| 206 |
| 207 HTML_BEGIN_STATE(RawDataLessThanSignState) { |
| 208 if (cc == '/') { |
| 209 m_temporaryBuffer.clear(); |
| 210 HTML_ADVANCE_TO(RawDataEndTagOpenState); |
| 211 } else { |
| 212 bufferCharacter('<'); |
| 213 HTML_RECONSUME_IN(RawDataState); |
| 214 } |
| 215 } |
| 216 END_STATE() |
| 217 |
| 218 HTML_BEGIN_STATE(RawDataEndTagOpenState) { |
| 219 if (isASCIILower(cc)) { |
| 220 m_temporaryBuffer.append(static_cast<LChar>(cc)); |
| 221 HTML_ADVANCE_TO(RawDataEndTagNameState); |
| 222 } else { |
| 223 bufferCharacter('<'); |
| 224 bufferCharacter('/'); |
| 225 HTML_RECONSUME_IN(RawDataState); |
| 226 } |
| 227 } |
| 228 END_STATE() |
| 229 |
| 230 HTML_BEGIN_STATE(RawDataEndTagNameState) { |
| 231 if (isASCIILower(cc)) { |
| 232 m_temporaryBuffer.append(static_cast<LChar>(cc)); |
| 233 HTML_ADVANCE_TO(RawDataEndTagNameState); |
| 234 } else { |
| 235 if (isTokenizerWhitespace(cc)) { |
| 236 if (isAppropriateEndTag()) |
| 237 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); |
| 238 } else if (cc == '/') { |
| 239 if (isAppropriateEndTag()) |
| 240 FLUSH_AND_ADVANCE_TO(VoidTagState); |
| 241 } else if (cc == '>') { |
| 242 if (isAppropriateEndTag()) |
| 243 return flushEmitAndResumeIn(source, HTMLTokenizer::DataState
); |
| 244 } |
| 245 bufferCharacter('<'); |
| 246 bufferCharacter('/'); |
| 247 m_token->appendToCharacter(m_temporaryBuffer); |
| 248 m_temporaryBuffer.clear(); |
| 249 HTML_RECONSUME_IN(RawDataState); |
231 } | 250 } |
232 } | 251 } |
233 END_STATE() | 252 END_STATE() |
234 | 253 |
235 HTML_BEGIN_STATE(TagOpenState) { | 254 HTML_BEGIN_STATE(TagOpenState) { |
236 if (cc == '!') | 255 if (cc == '!') { |
237 HTML_ADVANCE_TO(CommentStart1State); | 256 HTML_ADVANCE_TO(CommentStart1State); |
238 else if (cc == '/') | 257 } else if (cc == '/') { |
239 HTML_ADVANCE_TO(CloseTagState); | 258 HTML_ADVANCE_TO(CloseTagState); |
240 else if (isASCIIUpper(cc)) { | 259 } else if (isTokenizerTagName(cc)) { |
241 m_token->beginStartTag(toLowerCase(cc)); | 260 m_token->beginStartTag(static_cast<LChar>(cc)); |
242 HTML_ADVANCE_TO(TagNameState); | |
243 } else if (isASCIILower(cc)) { | |
244 m_token->beginStartTag(cc); | |
245 HTML_ADVANCE_TO(TagNameState); | 261 HTML_ADVANCE_TO(TagNameState); |
246 } else { | 262 } else { |
247 parseError(); | |
248 bufferCharacter('<'); | 263 bufferCharacter('<'); |
249 HTML_RECONSUME_IN(DataState); | 264 HTML_RECONSUME_IN(DataState); |
250 } | 265 } |
251 } | 266 } |
252 END_STATE() | 267 END_STATE() |
253 | 268 |
254 HTML_BEGIN_STATE(CloseTagState) { | 269 HTML_BEGIN_STATE(CloseTagState) { |
255 if (isASCIIUpper(cc)) { | 270 if (isTokenizerTagName(cc)) { |
256 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); | |
257 m_appropriateEndTagName.clear(); | |
258 HTML_ADVANCE_TO(TagNameState); | |
259 } else if (isASCIILower(cc)) { | |
260 m_token->beginEndTag(static_cast<LChar>(cc)); | 271 m_token->beginEndTag(static_cast<LChar>(cc)); |
261 m_appropriateEndTagName.clear(); | |
262 HTML_ADVANCE_TO(TagNameState); | 272 HTML_ADVANCE_TO(TagNameState); |
263 } else if (cc == '>') { | 273 } else if (cc == '>') { |
264 bufferCharacter('<'); | 274 bufferCharacter('<'); |
265 bufferCharacter('/'); | 275 bufferCharacter('/'); |
266 bufferCharacter('>'); | 276 bufferCharacter('>'); |
267 HTML_ADVANCE_TO(DataState); | 277 HTML_ADVANCE_TO(DataState); |
268 } else { | 278 } else { |
269 bufferCharacter('<'); | 279 bufferCharacter('<'); |
270 bufferCharacter('/'); | 280 bufferCharacter('/'); |
271 HTML_RECONSUME_IN(DataState); | 281 HTML_RECONSUME_IN(DataState); |
272 } | 282 } |
273 } | 283 } |
274 END_STATE() | 284 END_STATE() |
275 | 285 |
276 HTML_BEGIN_STATE(TagNameState) { | 286 HTML_BEGIN_STATE(TagNameState) { |
277 if (isTokenizerWhitespace(cc)) | 287 if (isTokenizerWhitespace(cc)) { |
278 HTML_ADVANCE_TO(BeforeAttributeNameState); | 288 HTML_ADVANCE_TO(BeforeAttributeNameState); |
279 else if (cc == '/') | 289 } else if (cc == '/') { |
280 HTML_ADVANCE_TO(SelfClosingStartTagState); | 290 HTML_ADVANCE_TO(VoidTagState); |
281 else if (cc == '>') | 291 } else if (cc == '>') { |
282 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 292 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
283 else if (isASCIIUpper(cc)) { | |
284 m_token->appendToName(toLowerCase(cc)); | |
285 HTML_ADVANCE_TO(TagNameState); | |
286 } else if (cc == kEndOfFileMarker) { | |
287 parseError(); | |
288 HTML_RECONSUME_IN(DataState); | |
289 } else { | 293 } else { |
290 m_token->appendToName(cc); | 294 m_token->appendToName(cc); |
291 HTML_ADVANCE_TO(TagNameState); | 295 HTML_ADVANCE_TO(TagNameState); |
292 } | 296 } |
293 } | 297 } |
294 END_STATE() | 298 END_STATE() |
295 | 299 |
296 HTML_BEGIN_STATE(RAWTEXTLessThanSignState) { | |
297 if (cc == '/') { | |
298 m_temporaryBuffer.clear(); | |
299 ASSERT(m_bufferedEndTagName.isEmpty()); | |
300 HTML_ADVANCE_TO(RAWTEXTEndTagOpenState); | |
301 } else { | |
302 bufferCharacter('<'); | |
303 HTML_RECONSUME_IN(RAWTEXTState); | |
304 } | |
305 } | |
306 END_STATE() | |
307 | |
308 HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) { | |
309 if (isASCIIUpper(cc)) { | |
310 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
311 addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); | |
312 HTML_ADVANCE_TO(RAWTEXTEndTagNameState); | |
313 } else if (isASCIILower(cc)) { | |
314 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
315 addToPossibleEndTag(static_cast<LChar>(cc)); | |
316 HTML_ADVANCE_TO(RAWTEXTEndTagNameState); | |
317 } else { | |
318 bufferCharacter('<'); | |
319 bufferCharacter('/'); | |
320 HTML_RECONSUME_IN(RAWTEXTState); | |
321 } | |
322 } | |
323 END_STATE() | |
324 | |
325 HTML_BEGIN_STATE(RAWTEXTEndTagNameState) { | |
326 if (isASCIIUpper(cc)) { | |
327 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
328 addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc))); | |
329 HTML_ADVANCE_TO(RAWTEXTEndTagNameState); | |
330 } else if (isASCIILower(cc)) { | |
331 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
332 addToPossibleEndTag(static_cast<LChar>(cc)); | |
333 HTML_ADVANCE_TO(RAWTEXTEndTagNameState); | |
334 } else { | |
335 if (isTokenizerWhitespace(cc)) { | |
336 if (isAppropriateEndTag()) { | |
337 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
338 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState); | |
339 } | |
340 } else if (cc == '/') { | |
341 if (isAppropriateEndTag()) { | |
342 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
343 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState); | |
344 } | |
345 } else if (cc == '>') { | |
346 if (isAppropriateEndTag()) { | |
347 m_temporaryBuffer.append(static_cast<LChar>(cc)); | |
348 return flushEmitAndResumeIn(source, HTMLTokenizer::DataState
); | |
349 } | |
350 } | |
351 bufferCharacter('<'); | |
352 bufferCharacter('/'); | |
353 m_token->appendToCharacter(m_temporaryBuffer); | |
354 m_bufferedEndTagName.clear(); | |
355 m_temporaryBuffer.clear(); | |
356 HTML_RECONSUME_IN(RAWTEXTState); | |
357 } | |
358 } | |
359 END_STATE() | |
360 | |
361 HTML_BEGIN_STATE(BeforeAttributeNameState) { | 300 HTML_BEGIN_STATE(BeforeAttributeNameState) { |
362 if (isTokenizerWhitespace(cc)) | 301 if (isTokenizerWhitespace(cc)) { |
363 HTML_ADVANCE_TO(BeforeAttributeNameState); | 302 HTML_ADVANCE_TO(BeforeAttributeNameState); |
364 else if (cc == '/') | 303 } else if (cc == '/') { |
365 HTML_ADVANCE_TO(SelfClosingStartTagState); | 304 HTML_ADVANCE_TO(VoidTagState); |
366 else if (cc == '>') | 305 } else if (cc == '>') { |
367 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 306 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
368 else if (isASCIIUpper(cc)) { | |
369 m_token->addNewAttribute(); | |
370 m_token->beginAttributeName(source.numberOfCharactersConsumed()); | |
371 m_token->appendToAttributeName(toLowerCase(cc)); | |
372 HTML_ADVANCE_TO(AttributeNameState); | |
373 } else if (cc == kEndOfFileMarker) { | |
374 parseError(); | |
375 HTML_RECONSUME_IN(DataState); | |
376 } else { | 307 } else { |
377 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') | |
378 parseError(); | |
379 m_token->addNewAttribute(); | 308 m_token->addNewAttribute(); |
380 m_token->beginAttributeName(source.numberOfCharactersConsumed()); | 309 m_token->beginAttributeName(source.numberOfCharactersConsumed()); |
381 m_token->appendToAttributeName(cc); | 310 m_token->appendToAttributeName(cc); |
382 HTML_ADVANCE_TO(AttributeNameState); | 311 HTML_ADVANCE_TO(AttributeNameState); |
383 } | 312 } |
384 } | 313 } |
385 END_STATE() | 314 END_STATE() |
386 | 315 |
387 HTML_BEGIN_STATE(AttributeNameState) { | 316 HTML_BEGIN_STATE(AttributeNameState) { |
388 if (isTokenizerWhitespace(cc)) { | 317 if (isTokenizerWhitespace(cc)) { |
389 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 318 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
390 HTML_ADVANCE_TO(AfterAttributeNameState); | 319 HTML_ADVANCE_TO(AfterAttributeNameState); |
391 } else if (cc == '/') { | 320 } else if (cc == '/') { |
392 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 321 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
393 HTML_ADVANCE_TO(SelfClosingStartTagState); | 322 HTML_ADVANCE_TO(VoidTagState); |
394 } else if (cc == '=') { | 323 } else if (cc == '=') { |
395 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 324 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
396 HTML_ADVANCE_TO(BeforeAttributeValueState); | 325 HTML_ADVANCE_TO(BeforeAttributeValueState); |
397 } else if (cc == '>') { | 326 } else if (cc == '>') { |
398 m_token->endAttributeName(source.numberOfCharactersConsumed()); | 327 m_token->endAttributeName(source.numberOfCharactersConsumed()); |
399 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 328 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
400 } else if (isASCIIUpper(cc)) { | |
401 m_token->appendToAttributeName(toLowerCase(cc)); | |
402 HTML_ADVANCE_TO(AttributeNameState); | |
403 } else if (cc == kEndOfFileMarker) { | |
404 parseError(); | |
405 m_token->endAttributeName(source.numberOfCharactersConsumed()); | |
406 HTML_RECONSUME_IN(DataState); | |
407 } else { | 329 } else { |
408 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') | |
409 parseError(); | |
410 m_token->appendToAttributeName(cc); | 330 m_token->appendToAttributeName(cc); |
411 HTML_ADVANCE_TO(AttributeNameState); | 331 HTML_ADVANCE_TO(AttributeNameState); |
412 } | 332 } |
413 } | 333 } |
414 END_STATE() | 334 END_STATE() |
415 | 335 |
416 HTML_BEGIN_STATE(AfterAttributeNameState) { | 336 HTML_BEGIN_STATE(AfterAttributeNameState) { |
417 if (isTokenizerWhitespace(cc)) | 337 if (isTokenizerWhitespace(cc)) { |
418 HTML_ADVANCE_TO(AfterAttributeNameState); | 338 HTML_ADVANCE_TO(AfterAttributeNameState); |
419 else if (cc == '/') | 339 } else if (cc == '/') { |
420 HTML_ADVANCE_TO(SelfClosingStartTagState); | 340 HTML_ADVANCE_TO(VoidTagState); |
421 else if (cc == '=') | 341 } else if (cc == '=') { |
422 HTML_ADVANCE_TO(BeforeAttributeValueState); | 342 HTML_ADVANCE_TO(BeforeAttributeValueState); |
423 else if (cc == '>') | 343 } else if (cc == '>') { |
424 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 344 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
425 else if (isASCIIUpper(cc)) { | |
426 m_token->addNewAttribute(); | |
427 m_token->beginAttributeName(source.numberOfCharactersConsumed()); | |
428 m_token->appendToAttributeName(toLowerCase(cc)); | |
429 HTML_ADVANCE_TO(AttributeNameState); | |
430 } else if (cc == kEndOfFileMarker) { | |
431 parseError(); | |
432 HTML_RECONSUME_IN(DataState); | |
433 } else { | 345 } else { |
434 if (cc == '"' || cc == '\'' || cc == '<') | |
435 parseError(); | |
436 m_token->addNewAttribute(); | 346 m_token->addNewAttribute(); |
437 m_token->beginAttributeName(source.numberOfCharactersConsumed()); | 347 m_token->beginAttributeName(source.numberOfCharactersConsumed()); |
438 m_token->appendToAttributeName(cc); | 348 m_token->appendToAttributeName(cc); |
439 HTML_ADVANCE_TO(AttributeNameState); | 349 HTML_ADVANCE_TO(AttributeNameState); |
440 } | 350 } |
441 } | 351 } |
442 END_STATE() | 352 END_STATE() |
443 | 353 |
444 HTML_BEGIN_STATE(BeforeAttributeValueState) { | 354 HTML_BEGIN_STATE(BeforeAttributeValueState) { |
445 if (isTokenizerWhitespace(cc)) | 355 if (isTokenizerWhitespace(cc)) |
446 HTML_ADVANCE_TO(BeforeAttributeValueState); | 356 HTML_ADVANCE_TO(BeforeAttributeValueState); |
447 else if (cc == '"') { | 357 else if (cc == '"') { |
448 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1
); | 358 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1
); |
449 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); | 359 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); |
450 } else if (cc == '&') { | 360 } else if (cc == '&') { |
451 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); | 361 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); |
452 HTML_RECONSUME_IN(AttributeValueUnquotedState); | 362 HTML_RECONSUME_IN(AttributeValueUnquotedState); |
453 } else if (cc == '\'') { | 363 } else if (cc == '\'') { |
454 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1
); | 364 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1
); |
455 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); | 365 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); |
456 } else if (cc == '>') { | 366 } else if (cc == '>') { |
457 parseError(); | |
458 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 367 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
459 } else if (cc == kEndOfFileMarker) { | |
460 parseError(); | |
461 HTML_RECONSUME_IN(DataState); | |
462 } else { | 368 } else { |
463 if (cc == '<' || cc == '=' || cc == '`') | |
464 parseError(); | |
465 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); | 369 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); |
466 m_token->appendToAttributeValue(cc); | 370 m_token->appendToAttributeValue(cc); |
467 HTML_ADVANCE_TO(AttributeValueUnquotedState); | 371 HTML_ADVANCE_TO(AttributeValueUnquotedState); |
468 } | 372 } |
469 } | 373 } |
470 END_STATE() | 374 END_STATE() |
471 | 375 |
472 HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) { | 376 HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) { |
473 if (cc == '"') { | 377 if (cc == '"') { |
474 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | 378 m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
475 HTML_ADVANCE_TO(AfterAttributeValueQuotedState); | 379 HTML_ADVANCE_TO(BeforeAttributeNameState); |
476 } else if (cc == '&') { | 380 } else if (cc == '&') { |
477 m_returnState = AttributeValueDoubleQuotedState; | 381 m_returnState = AttributeValueDoubleQuotedState; |
478 m_entityParser.reset(); | 382 m_entityParser.reset(); |
479 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); | 383 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); |
480 } else if (cc == kEndOfFileMarker) { | |
481 parseError(); | |
482 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | |
483 HTML_RECONSUME_IN(DataState); | |
484 } else { | 384 } else { |
485 m_token->appendToAttributeValue(cc); | 385 m_token->appendToAttributeValue(cc); |
486 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); | 386 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); |
487 } | 387 } |
488 } | 388 } |
489 END_STATE() | 389 END_STATE() |
490 | 390 |
491 HTML_BEGIN_STATE(AttributeValueSingleQuotedState) { | 391 HTML_BEGIN_STATE(AttributeValueSingleQuotedState) { |
492 if (cc == '\'') { | 392 if (cc == '\'') { |
493 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | 393 m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
494 HTML_ADVANCE_TO(AfterAttributeValueQuotedState); | 394 HTML_ADVANCE_TO(BeforeAttributeNameState); |
495 } else if (cc == '&') { | 395 } else if (cc == '&') { |
496 m_returnState = AttributeValueSingleQuotedState; | 396 m_returnState = AttributeValueSingleQuotedState; |
497 m_entityParser.reset(); | 397 m_entityParser.reset(); |
498 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); | 398 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); |
499 } else if (cc == kEndOfFileMarker) { | |
500 parseError(); | |
501 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | |
502 HTML_RECONSUME_IN(DataState); | |
503 } else { | 399 } else { |
504 m_token->appendToAttributeValue(cc); | 400 m_token->appendToAttributeValue(cc); |
505 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); | 401 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); |
506 } | 402 } |
507 } | 403 } |
508 END_STATE() | 404 END_STATE() |
509 | 405 |
510 HTML_BEGIN_STATE(AttributeValueUnquotedState) { | 406 HTML_BEGIN_STATE(AttributeValueUnquotedState) { |
511 if (isTokenizerWhitespace(cc)) { | 407 if (isTokenizerWhitespace(cc)) { |
512 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | 408 m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
513 HTML_ADVANCE_TO(BeforeAttributeNameState); | 409 HTML_ADVANCE_TO(BeforeAttributeNameState); |
514 } else if (cc == '&') { | 410 } else if (cc == '&') { |
515 m_returnState = AttributeValueUnquotedState; | 411 m_returnState = AttributeValueUnquotedState; |
516 m_entityParser.reset(); | 412 m_entityParser.reset(); |
517 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); | 413 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); |
518 } else if (cc == '>') { | 414 } else if (cc == '>') { |
519 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | 415 m_token->endAttributeValue(source.numberOfCharactersConsumed()); |
520 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 416 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
521 } else if (cc == kEndOfFileMarker) { | |
522 parseError(); | |
523 m_token->endAttributeValue(source.numberOfCharactersConsumed()); | |
524 HTML_RECONSUME_IN(DataState); | |
525 } else { | 417 } else { |
526 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`') | |
527 parseError(); | |
528 m_token->appendToAttributeValue(cc); | 418 m_token->appendToAttributeValue(cc); |
529 HTML_ADVANCE_TO(AttributeValueUnquotedState); | 419 HTML_ADVANCE_TO(AttributeValueUnquotedState); |
530 } | 420 } |
531 } | 421 } |
532 END_STATE() | 422 END_STATE() |
533 | 423 |
534 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { | 424 HTML_BEGIN_STATE(VoidTagState) { |
535 if (isTokenizerWhitespace(cc)) | 425 if (cc == '>') { |
536 HTML_ADVANCE_TO(BeforeAttributeNameState); | 426 m_token->setSelfClosing(); |
537 else if (cc == '/') | |
538 HTML_ADVANCE_TO(SelfClosingStartTagState); | |
539 else if (cc == '>') | |
540 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 427 return emitAndResumeIn(source, HTMLTokenizer::DataState); |
541 else if (cc == kEndOfFileMarker) { | |
542 parseError(); | |
543 HTML_RECONSUME_IN(DataState); | |
544 } else { | 428 } else { |
545 parseError(); | |
546 HTML_RECONSUME_IN(BeforeAttributeNameState); | 429 HTML_RECONSUME_IN(BeforeAttributeNameState); |
547 } | 430 } |
548 } | 431 } |
549 END_STATE() | |
550 | |
551 HTML_BEGIN_STATE(SelfClosingStartTagState) { | |
552 if (cc == '>') { | |
553 m_token->setSelfClosing(); | |
554 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
555 } else if (cc == kEndOfFileMarker) { | |
556 parseError(); | |
557 HTML_RECONSUME_IN(DataState); | |
558 } else { | |
559 parseError(); | |
560 HTML_RECONSUME_IN(BeforeAttributeNameState); | |
561 } | |
562 } | |
563 END_STATE() | 432 END_STATE() |
564 | 433 |
565 HTML_BEGIN_STATE(CommentStart1State) { | 434 HTML_BEGIN_STATE(CommentStart1State) { |
566 if (cc == '-') { | 435 if (cc == '-') { |
567 HTML_ADVANCE_TO(CommentStart2State); | 436 HTML_ADVANCE_TO(CommentStart2State); |
568 } else { | 437 } else { |
569 bufferCharacter('<'); | 438 bufferCharacter('<'); |
570 bufferCharacter('!'); | 439 bufferCharacter('!'); |
571 HTML_RECONSUME_IN(DataState); | 440 HTML_RECONSUME_IN(DataState); |
572 } | 441 } |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
609 else | 478 else |
610 HTML_ADVANCE_TO(CommentState); | 479 HTML_ADVANCE_TO(CommentState); |
611 } | 480 } |
612 END_STATE() | 481 END_STATE() |
613 } | 482 } |
614 | 483 |
615 ASSERT_NOT_REACHED(); | 484 ASSERT_NOT_REACHED(); |
616 return false; | 485 return false; |
617 } | 486 } |
618 | 487 |
619 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) | |
620 { | |
621 return vectorEqualsString(m_temporaryBuffer, expectedString); | |
622 } | |
623 | |
624 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc) | |
625 { | |
626 ASSERT(isEndTagBufferingState(m_state)); | |
627 m_bufferedEndTagName.append(cc); | |
628 } | |
629 | |
630 inline bool HTMLTokenizer::isAppropriateEndTag() | 488 inline bool HTMLTokenizer::isAppropriateEndTag() |
631 { | 489 { |
632 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) | 490 if (m_temporaryBuffer.size() != m_appropriateEndTagName.size()) |
633 return false; | 491 return false; |
634 | 492 |
635 size_t numCharacters = m_bufferedEndTagName.size(); | 493 size_t numCharacters = m_temporaryBuffer.size(); |
636 | 494 |
637 for (size_t i = 0; i < numCharacters; i++) { | 495 for (size_t i = 0; i < numCharacters; i++) { |
638 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) | 496 if (m_temporaryBuffer[i] != m_appropriateEndTagName[i]) |
639 return false; | 497 return false; |
640 } | 498 } |
641 | 499 |
642 return true; | 500 return true; |
643 } | 501 } |
644 | 502 |
645 inline void HTMLTokenizer::parseError() | 503 inline void HTMLTokenizer::parseError() |
646 { | 504 { |
647 notImplemented(); | 505 notImplemented(); |
648 } | 506 } |
649 | 507 |
650 } | 508 } |
OLD | NEW |