Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(284)

Side by Side Diff: sky/engine/core/html/parser/HTMLTokenizer.cpp

Issue 678263002: Update tokenizer to match spec (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
55 bool AtomicHTMLToken::usesName() const 55 bool AtomicHTMLToken::usesName() const
56 { 56 {
57 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; 57 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag;
58 } 58 }
59 59
60 bool AtomicHTMLToken::usesAttributes() const 60 bool AtomicHTMLToken::usesAttributes() const
61 { 61 {
62 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag; 62 return m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag;
63 } 63 }
64 64
65 static inline UChar toLowerCase(UChar cc)
66 {
67 ASSERT(isASCIIUpper(cc));
68 const int lowerCaseOffset = 0x20;
69 return cc + lowerCaseOffset;
70 }
71
72 static inline bool vectorEqualsString(const Vector<LChar, 32>& vector, const Str ing& string)
73 {
74 if (vector.size() != string.length())
75 return false;
76
77 if (!string.length())
78 return true;
79
80 return equal(string.impl(), vector.data(), vector.size());
81 }
82
83 static inline bool isEndTagBufferingState(HTMLTokenizer::State state) 65 static inline bool isEndTagBufferingState(HTMLTokenizer::State state)
84 { 66 {
85 switch (state) { 67 return state == HTMLTokenizer::RawDataEndTagOpenState || state == HTMLTokeni zer::RawDataEndTagNameState;
86 case HTMLTokenizer::RAWTEXTEndTagOpenState:
87 case HTMLTokenizer::RAWTEXTEndTagNameState:
88 return true;
89 default:
90 return false;
91 }
92 } 68 }
93 69
94 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName) 70 #define HTML_BEGIN_STATE(stateName) BEGIN_STATE(HTMLTokenizer, stateName)
95 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName) 71 #define HTML_RECONSUME_IN(stateName) RECONSUME_IN(HTMLTokenizer, stateName)
96 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName) 72 #define HTML_ADVANCE_TO(stateName) ADVANCE_TO(HTMLTokenizer, stateName)
97 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName) 73 #define HTML_SWITCH_TO(stateName) SWITCH_TO(HTMLTokenizer, stateName)
98 74
99 HTMLTokenizer::HTMLTokenizer() 75 HTMLTokenizer::HTMLTokenizer()
100 : m_inputStreamPreprocessor(this) 76 : m_inputStreamPreprocessor(this)
101 { 77 {
102 reset(); 78 reset();
103 } 79 }
104 80
105 HTMLTokenizer::~HTMLTokenizer() 81 HTMLTokenizer::~HTMLTokenizer()
106 { 82 {
107 } 83 }
108 84
109 void HTMLTokenizer::reset() 85 void HTMLTokenizer::reset()
110 { 86 {
111 m_state = HTMLTokenizer::DataState; 87 m_state = HTMLTokenizer::DataState;
112 m_token = 0; 88 m_token = 0;
113 } 89 }
114 90
115 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source) 91 bool HTMLTokenizer::flushBufferedEndTag(SegmentedString& source)
116 { 92 {
117 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLTok en::Uninitialized); 93 ASSERT(m_token->type() == HTMLToken::Character || m_token->type() == HTMLTok en::Uninitialized);
118 source.advanceAndUpdateLineNumber(); 94 source.advanceAndUpdateLineNumber();
119 if (m_token->type() == HTMLToken::Character) 95 if (m_token->type() == HTMLToken::Character)
120 return true; 96 return true;
121 m_token->beginEndTag(m_bufferedEndTagName); 97 m_token->beginEndTag(m_temporaryBuffer);
122 m_bufferedEndTagName.clear();
123 m_appropriateEndTagName.clear(); 98 m_appropriateEndTagName.clear();
124 m_temporaryBuffer.clear(); 99 m_temporaryBuffer.clear();
125 return false; 100 return false;
126 } 101 }
127 102
128 #define FLUSH_AND_ADVANCE_TO(stateName) \ 103 #define FLUSH_AND_ADVANCE_TO(stateName) \
129 do { \ 104 do { \
130 m_state = HTMLTokenizer::stateName; \ 105 m_state = HTMLTokenizer::stateName; \
131 if (flushBufferedEndTag(source)) \ 106 if (flushBufferedEndTag(source)) \
132 return true; \ 107 return true; \
(...skipping 11 matching lines...) Expand all
144 return true; 119 return true;
145 } 120 }
146 121
147 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token) 122 bool HTMLTokenizer::nextToken(SegmentedString& source, HTMLToken& token)
148 { 123 {
149 // If we have a token in progress, then we're supposed to be called back 124 // If we have a token in progress, then we're supposed to be called back
150 // with the same token so we can finish it. 125 // with the same token so we can finish it.
151 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitial ized); 126 ASSERT(!m_token || m_token == &token || token.type() == HTMLToken::Uninitial ized);
152 m_token = &token; 127 m_token = &token;
153 128
154 if (!m_bufferedEndTagName.isEmpty() && !isEndTagBufferingState(m_state)) { 129 if (!m_temporaryBuffer.isEmpty() && !isEndTagBufferingState(m_state)) {
155 // FIXME: This should call flushBufferedEndTag(). 130 // FIXME: This should call flushBufferedEndTag().
156 // We started an end tag during our last iteration. 131 // We started an end tag during our last iteration.
157 m_token->beginEndTag(m_bufferedEndTagName); 132 m_token->beginEndTag(m_temporaryBuffer);
158 m_bufferedEndTagName.clear();
159 m_appropriateEndTagName.clear(); 133 m_appropriateEndTagName.clear();
160 m_temporaryBuffer.clear(); 134 m_temporaryBuffer.clear();
161 if (m_state == HTMLTokenizer::DataState) { 135 if (m_state == HTMLTokenizer::DataState) {
162 // We're back in the data state, so we must be done with the tag. 136 // We're back in the data state, so we must be done with the tag.
163 return true; 137 return true;
164 } 138 }
165 } 139 }
166 140
167 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source)) 141 if (source.isEmpty() || !m_inputStreamPreprocessor.peek(source))
168 return haveBufferedCharacterToken(); 142 return haveBufferedCharacterToken();
169 UChar cc = m_inputStreamPreprocessor.nextInputCharacter(); 143 UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
170 144
171 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 145 // Source: http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0
172 switch (m_state) { 146 switch (m_state) {
173 HTML_BEGIN_STATE(DataState) { 147 HTML_BEGIN_STATE(DataState) {
174 if (cc == '&') { 148 if (cc == '&') {
175 m_returnState = DataState; 149 m_returnState = DataState;
176 m_entityParser.reset(); 150 m_entityParser.reset();
177 HTML_ADVANCE_TO(CharacterReferenceInDataState); 151 HTML_ADVANCE_TO(CharacterReferenceInDataState);
178 } else if (cc == '<') { 152 } else if (cc == '<') {
179 if (m_token->type() == HTMLToken::Character) { 153 if (m_token->type() == HTMLToken::Character) {
180 // We have a bunch of character tokens queued up that we 154 // We have a bunch of character tokens queued up that we
181 // are emitting lazily here. 155 // are emitting lazily here.
182 return true; 156 return true;
183 } 157 }
184 HTML_ADVANCE_TO(TagOpenState); 158 HTML_ADVANCE_TO(TagOpenState);
185 } else if (cc == kEndOfFileMarker) 159 } else if (cc == kEndOfFileMarker) {
186 return emitEndOfFile(source); 160 return emitEndOfFile(source);
187 else { 161 } else {
188 bufferCharacter(cc); 162 bufferCharacter(cc);
189 HTML_ADVANCE_TO(DataState); 163 HTML_ADVANCE_TO(DataState);
190 } 164 }
191 } 165 }
192 END_STATE() 166 END_STATE()
193 167
194 HTML_BEGIN_STATE(CharacterReferenceInDataState) { 168 HTML_BEGIN_STATE(CharacterReferenceInDataState) {
195 if (!m_entityParser.parse(source)) 169 if (!m_entityParser.parse(source))
196 return haveBufferedCharacterToken(); 170 return haveBufferedCharacterToken();
197 for (const UChar& entityCharacter : m_entityParser.result()) 171 for (const UChar& entityCharacter : m_entityParser.result())
(...skipping 15 matching lines...) Expand all
213 HTML_SWITCH_TO(AttributeValueDoubleQuotedState); 187 HTML_SWITCH_TO(AttributeValueDoubleQuotedState);
214 else if (m_returnState == AttributeValueSingleQuotedState) 188 else if (m_returnState == AttributeValueSingleQuotedState)
215 HTML_SWITCH_TO(AttributeValueSingleQuotedState); 189 HTML_SWITCH_TO(AttributeValueSingleQuotedState);
216 else if (m_returnState == AttributeValueUnquotedState) 190 else if (m_returnState == AttributeValueUnquotedState)
217 HTML_SWITCH_TO(AttributeValueUnquotedState); 191 HTML_SWITCH_TO(AttributeValueUnquotedState);
218 else 192 else
219 ASSERT_NOT_REACHED(); 193 ASSERT_NOT_REACHED();
220 } 194 }
221 END_STATE() 195 END_STATE()
222 196
223 HTML_BEGIN_STATE(RAWTEXTState) { 197 HTML_BEGIN_STATE(RawDataState) {
224 if (cc == '<') 198 if (cc == '<') {
225 HTML_ADVANCE_TO(RAWTEXTLessThanSignState); 199 HTML_ADVANCE_TO(RawDataLessThanSignState);
226 else if (cc == kEndOfFileMarker) 200 } else {
227 return emitEndOfFile(source);
228 else {
229 bufferCharacter(cc); 201 bufferCharacter(cc);
230 HTML_ADVANCE_TO(RAWTEXTState); 202 HTML_ADVANCE_TO(RawDataState);
203 }
204 }
205 END_STATE()
206
207 HTML_BEGIN_STATE(RawDataLessThanSignState) {
208 if (cc == '/') {
209 m_temporaryBuffer.clear();
210 HTML_ADVANCE_TO(RawDataEndTagOpenState);
211 } else {
212 bufferCharacter('<');
213 HTML_RECONSUME_IN(RawDataState);
214 }
215 }
216 END_STATE()
217
218 HTML_BEGIN_STATE(RawDataEndTagOpenState) {
219 if (isASCIILower(cc)) {
220 m_temporaryBuffer.append(static_cast<LChar>(cc));
221 HTML_ADVANCE_TO(RawDataEndTagNameState);
222 } else {
223 bufferCharacter('<');
224 bufferCharacter('/');
225 HTML_RECONSUME_IN(RawDataState);
226 }
227 }
228 END_STATE()
229
230 HTML_BEGIN_STATE(RawDataEndTagNameState) {
231 if (isASCIILower(cc)) {
232 m_temporaryBuffer.append(static_cast<LChar>(cc));
233 HTML_ADVANCE_TO(RawDataEndTagNameState);
234 } else {
235 if (isTokenizerWhitespace(cc)) {
236 if (isAppropriateEndTag())
237 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
238 } else if (cc == '/') {
239 if (isAppropriateEndTag())
240 FLUSH_AND_ADVANCE_TO(VoidTagState);
241 } else if (cc == '>') {
242 if (isAppropriateEndTag())
243 return flushEmitAndResumeIn(source, HTMLTokenizer::DataState );
244 }
245 bufferCharacter('<');
246 bufferCharacter('/');
247 m_token->appendToCharacter(m_temporaryBuffer);
248 m_temporaryBuffer.clear();
249 HTML_RECONSUME_IN(RawDataState);
231 } 250 }
232 } 251 }
233 END_STATE() 252 END_STATE()
234 253
235 HTML_BEGIN_STATE(TagOpenState) { 254 HTML_BEGIN_STATE(TagOpenState) {
236 if (cc == '!') 255 if (cc == '!') {
237 HTML_ADVANCE_TO(CommentStart1State); 256 HTML_ADVANCE_TO(CommentStart1State);
238 else if (cc == '/') 257 } else if (cc == '/') {
239 HTML_ADVANCE_TO(CloseTagState); 258 HTML_ADVANCE_TO(CloseTagState);
240 else if (isASCIIUpper(cc)) { 259 } else if (isTokenizerTagName(cc)) {
241 m_token->beginStartTag(toLowerCase(cc)); 260 m_token->beginStartTag(static_cast<LChar>(cc));
242 HTML_ADVANCE_TO(TagNameState);
243 } else if (isASCIILower(cc)) {
244 m_token->beginStartTag(cc);
245 HTML_ADVANCE_TO(TagNameState); 261 HTML_ADVANCE_TO(TagNameState);
246 } else { 262 } else {
247 parseError();
248 bufferCharacter('<'); 263 bufferCharacter('<');
249 HTML_RECONSUME_IN(DataState); 264 HTML_RECONSUME_IN(DataState);
250 } 265 }
251 } 266 }
252 END_STATE() 267 END_STATE()
253 268
254 HTML_BEGIN_STATE(CloseTagState) { 269 HTML_BEGIN_STATE(CloseTagState) {
255 if (isASCIIUpper(cc)) { 270 if (isTokenizerTagName(cc)) {
256 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
257 m_appropriateEndTagName.clear();
258 HTML_ADVANCE_TO(TagNameState);
259 } else if (isASCIILower(cc)) {
260 m_token->beginEndTag(static_cast<LChar>(cc)); 271 m_token->beginEndTag(static_cast<LChar>(cc));
261 m_appropriateEndTagName.clear();
262 HTML_ADVANCE_TO(TagNameState); 272 HTML_ADVANCE_TO(TagNameState);
263 } else if (cc == '>') { 273 } else if (cc == '>') {
264 bufferCharacter('<'); 274 bufferCharacter('<');
265 bufferCharacter('/'); 275 bufferCharacter('/');
266 bufferCharacter('>'); 276 bufferCharacter('>');
267 HTML_ADVANCE_TO(DataState); 277 HTML_ADVANCE_TO(DataState);
268 } else { 278 } else {
269 bufferCharacter('<'); 279 bufferCharacter('<');
270 bufferCharacter('/'); 280 bufferCharacter('/');
271 HTML_RECONSUME_IN(DataState); 281 HTML_RECONSUME_IN(DataState);
272 } 282 }
273 } 283 }
274 END_STATE() 284 END_STATE()
275 285
276 HTML_BEGIN_STATE(TagNameState) { 286 HTML_BEGIN_STATE(TagNameState) {
277 if (isTokenizerWhitespace(cc)) 287 if (isTokenizerWhitespace(cc)) {
278 HTML_ADVANCE_TO(BeforeAttributeNameState); 288 HTML_ADVANCE_TO(BeforeAttributeNameState);
279 else if (cc == '/') 289 } else if (cc == '/') {
280 HTML_ADVANCE_TO(SelfClosingStartTagState); 290 HTML_ADVANCE_TO(VoidTagState);
281 else if (cc == '>') 291 } else if (cc == '>') {
282 return emitAndResumeIn(source, HTMLTokenizer::DataState); 292 return emitAndResumeIn(source, HTMLTokenizer::DataState);
283 else if (isASCIIUpper(cc)) {
284 m_token->appendToName(toLowerCase(cc));
285 HTML_ADVANCE_TO(TagNameState);
286 } else if (cc == kEndOfFileMarker) {
287 parseError();
288 HTML_RECONSUME_IN(DataState);
289 } else { 293 } else {
290 m_token->appendToName(cc); 294 m_token->appendToName(cc);
291 HTML_ADVANCE_TO(TagNameState); 295 HTML_ADVANCE_TO(TagNameState);
292 } 296 }
293 } 297 }
294 END_STATE() 298 END_STATE()
295 299
296 HTML_BEGIN_STATE(RAWTEXTLessThanSignState) {
297 if (cc == '/') {
298 m_temporaryBuffer.clear();
299 ASSERT(m_bufferedEndTagName.isEmpty());
300 HTML_ADVANCE_TO(RAWTEXTEndTagOpenState);
301 } else {
302 bufferCharacter('<');
303 HTML_RECONSUME_IN(RAWTEXTState);
304 }
305 }
306 END_STATE()
307
308 HTML_BEGIN_STATE(RAWTEXTEndTagOpenState) {
309 if (isASCIIUpper(cc)) {
310 m_temporaryBuffer.append(static_cast<LChar>(cc));
311 addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
312 HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
313 } else if (isASCIILower(cc)) {
314 m_temporaryBuffer.append(static_cast<LChar>(cc));
315 addToPossibleEndTag(static_cast<LChar>(cc));
316 HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
317 } else {
318 bufferCharacter('<');
319 bufferCharacter('/');
320 HTML_RECONSUME_IN(RAWTEXTState);
321 }
322 }
323 END_STATE()
324
325 HTML_BEGIN_STATE(RAWTEXTEndTagNameState) {
326 if (isASCIIUpper(cc)) {
327 m_temporaryBuffer.append(static_cast<LChar>(cc));
328 addToPossibleEndTag(static_cast<LChar>(toLowerCase(cc)));
329 HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
330 } else if (isASCIILower(cc)) {
331 m_temporaryBuffer.append(static_cast<LChar>(cc));
332 addToPossibleEndTag(static_cast<LChar>(cc));
333 HTML_ADVANCE_TO(RAWTEXTEndTagNameState);
334 } else {
335 if (isTokenizerWhitespace(cc)) {
336 if (isAppropriateEndTag()) {
337 m_temporaryBuffer.append(static_cast<LChar>(cc));
338 FLUSH_AND_ADVANCE_TO(BeforeAttributeNameState);
339 }
340 } else if (cc == '/') {
341 if (isAppropriateEndTag()) {
342 m_temporaryBuffer.append(static_cast<LChar>(cc));
343 FLUSH_AND_ADVANCE_TO(SelfClosingStartTagState);
344 }
345 } else if (cc == '>') {
346 if (isAppropriateEndTag()) {
347 m_temporaryBuffer.append(static_cast<LChar>(cc));
348 return flushEmitAndResumeIn(source, HTMLTokenizer::DataState );
349 }
350 }
351 bufferCharacter('<');
352 bufferCharacter('/');
353 m_token->appendToCharacter(m_temporaryBuffer);
354 m_bufferedEndTagName.clear();
355 m_temporaryBuffer.clear();
356 HTML_RECONSUME_IN(RAWTEXTState);
357 }
358 }
359 END_STATE()
360
361 HTML_BEGIN_STATE(BeforeAttributeNameState) { 300 HTML_BEGIN_STATE(BeforeAttributeNameState) {
362 if (isTokenizerWhitespace(cc)) 301 if (isTokenizerWhitespace(cc)) {
363 HTML_ADVANCE_TO(BeforeAttributeNameState); 302 HTML_ADVANCE_TO(BeforeAttributeNameState);
364 else if (cc == '/') 303 } else if (cc == '/') {
365 HTML_ADVANCE_TO(SelfClosingStartTagState); 304 HTML_ADVANCE_TO(VoidTagState);
366 else if (cc == '>') 305 } else if (cc == '>') {
367 return emitAndResumeIn(source, HTMLTokenizer::DataState); 306 return emitAndResumeIn(source, HTMLTokenizer::DataState);
368 else if (isASCIIUpper(cc)) {
369 m_token->addNewAttribute();
370 m_token->beginAttributeName(source.numberOfCharactersConsumed());
371 m_token->appendToAttributeName(toLowerCase(cc));
372 HTML_ADVANCE_TO(AttributeNameState);
373 } else if (cc == kEndOfFileMarker) {
374 parseError();
375 HTML_RECONSUME_IN(DataState);
376 } else { 307 } else {
377 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
378 parseError();
379 m_token->addNewAttribute(); 308 m_token->addNewAttribute();
380 m_token->beginAttributeName(source.numberOfCharactersConsumed()); 309 m_token->beginAttributeName(source.numberOfCharactersConsumed());
381 m_token->appendToAttributeName(cc); 310 m_token->appendToAttributeName(cc);
382 HTML_ADVANCE_TO(AttributeNameState); 311 HTML_ADVANCE_TO(AttributeNameState);
383 } 312 }
384 } 313 }
385 END_STATE() 314 END_STATE()
386 315
387 HTML_BEGIN_STATE(AttributeNameState) { 316 HTML_BEGIN_STATE(AttributeNameState) {
388 if (isTokenizerWhitespace(cc)) { 317 if (isTokenizerWhitespace(cc)) {
389 m_token->endAttributeName(source.numberOfCharactersConsumed()); 318 m_token->endAttributeName(source.numberOfCharactersConsumed());
390 HTML_ADVANCE_TO(AfterAttributeNameState); 319 HTML_ADVANCE_TO(AfterAttributeNameState);
391 } else if (cc == '/') { 320 } else if (cc == '/') {
392 m_token->endAttributeName(source.numberOfCharactersConsumed()); 321 m_token->endAttributeName(source.numberOfCharactersConsumed());
393 HTML_ADVANCE_TO(SelfClosingStartTagState); 322 HTML_ADVANCE_TO(VoidTagState);
394 } else if (cc == '=') { 323 } else if (cc == '=') {
395 m_token->endAttributeName(source.numberOfCharactersConsumed()); 324 m_token->endAttributeName(source.numberOfCharactersConsumed());
396 HTML_ADVANCE_TO(BeforeAttributeValueState); 325 HTML_ADVANCE_TO(BeforeAttributeValueState);
397 } else if (cc == '>') { 326 } else if (cc == '>') {
398 m_token->endAttributeName(source.numberOfCharactersConsumed()); 327 m_token->endAttributeName(source.numberOfCharactersConsumed());
399 return emitAndResumeIn(source, HTMLTokenizer::DataState); 328 return emitAndResumeIn(source, HTMLTokenizer::DataState);
400 } else if (isASCIIUpper(cc)) {
401 m_token->appendToAttributeName(toLowerCase(cc));
402 HTML_ADVANCE_TO(AttributeNameState);
403 } else if (cc == kEndOfFileMarker) {
404 parseError();
405 m_token->endAttributeName(source.numberOfCharactersConsumed());
406 HTML_RECONSUME_IN(DataState);
407 } else { 329 } else {
408 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
409 parseError();
410 m_token->appendToAttributeName(cc); 330 m_token->appendToAttributeName(cc);
411 HTML_ADVANCE_TO(AttributeNameState); 331 HTML_ADVANCE_TO(AttributeNameState);
412 } 332 }
413 } 333 }
414 END_STATE() 334 END_STATE()
415 335
416 HTML_BEGIN_STATE(AfterAttributeNameState) { 336 HTML_BEGIN_STATE(AfterAttributeNameState) {
417 if (isTokenizerWhitespace(cc)) 337 if (isTokenizerWhitespace(cc)) {
418 HTML_ADVANCE_TO(AfterAttributeNameState); 338 HTML_ADVANCE_TO(AfterAttributeNameState);
419 else if (cc == '/') 339 } else if (cc == '/') {
420 HTML_ADVANCE_TO(SelfClosingStartTagState); 340 HTML_ADVANCE_TO(VoidTagState);
421 else if (cc == '=') 341 } else if (cc == '=') {
422 HTML_ADVANCE_TO(BeforeAttributeValueState); 342 HTML_ADVANCE_TO(BeforeAttributeValueState);
423 else if (cc == '>') 343 } else if (cc == '>') {
424 return emitAndResumeIn(source, HTMLTokenizer::DataState); 344 return emitAndResumeIn(source, HTMLTokenizer::DataState);
425 else if (isASCIIUpper(cc)) {
426 m_token->addNewAttribute();
427 m_token->beginAttributeName(source.numberOfCharactersConsumed());
428 m_token->appendToAttributeName(toLowerCase(cc));
429 HTML_ADVANCE_TO(AttributeNameState);
430 } else if (cc == kEndOfFileMarker) {
431 parseError();
432 HTML_RECONSUME_IN(DataState);
433 } else { 345 } else {
434 if (cc == '"' || cc == '\'' || cc == '<')
435 parseError();
436 m_token->addNewAttribute(); 346 m_token->addNewAttribute();
437 m_token->beginAttributeName(source.numberOfCharactersConsumed()); 347 m_token->beginAttributeName(source.numberOfCharactersConsumed());
438 m_token->appendToAttributeName(cc); 348 m_token->appendToAttributeName(cc);
439 HTML_ADVANCE_TO(AttributeNameState); 349 HTML_ADVANCE_TO(AttributeNameState);
440 } 350 }
441 } 351 }
442 END_STATE() 352 END_STATE()
443 353
444 HTML_BEGIN_STATE(BeforeAttributeValueState) { 354 HTML_BEGIN_STATE(BeforeAttributeValueState) {
445 if (isTokenizerWhitespace(cc)) 355 if (isTokenizerWhitespace(cc))
446 HTML_ADVANCE_TO(BeforeAttributeValueState); 356 HTML_ADVANCE_TO(BeforeAttributeValueState);
447 else if (cc == '"') { 357 else if (cc == '"') {
448 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1 ); 358 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1 );
449 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); 359 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
450 } else if (cc == '&') { 360 } else if (cc == '&') {
451 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); 361 m_token->beginAttributeValue(source.numberOfCharactersConsumed());
452 HTML_RECONSUME_IN(AttributeValueUnquotedState); 362 HTML_RECONSUME_IN(AttributeValueUnquotedState);
453 } else if (cc == '\'') { 363 } else if (cc == '\'') {
454 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1 ); 364 m_token->beginAttributeValue(source.numberOfCharactersConsumed() + 1 );
455 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); 365 HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
456 } else if (cc == '>') { 366 } else if (cc == '>') {
457 parseError();
458 return emitAndResumeIn(source, HTMLTokenizer::DataState); 367 return emitAndResumeIn(source, HTMLTokenizer::DataState);
459 } else if (cc == kEndOfFileMarker) {
460 parseError();
461 HTML_RECONSUME_IN(DataState);
462 } else { 368 } else {
463 if (cc == '<' || cc == '=' || cc == '`')
464 parseError();
465 m_token->beginAttributeValue(source.numberOfCharactersConsumed()); 369 m_token->beginAttributeValue(source.numberOfCharactersConsumed());
466 m_token->appendToAttributeValue(cc); 370 m_token->appendToAttributeValue(cc);
467 HTML_ADVANCE_TO(AttributeValueUnquotedState); 371 HTML_ADVANCE_TO(AttributeValueUnquotedState);
468 } 372 }
469 } 373 }
470 END_STATE() 374 END_STATE()
471 375
472 HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) { 376 HTML_BEGIN_STATE(AttributeValueDoubleQuotedState) {
473 if (cc == '"') { 377 if (cc == '"') {
474 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 378 m_token->endAttributeValue(source.numberOfCharactersConsumed());
475 HTML_ADVANCE_TO(AfterAttributeValueQuotedState); 379 HTML_ADVANCE_TO(BeforeAttributeNameState);
476 } else if (cc == '&') { 380 } else if (cc == '&') {
477 m_returnState = AttributeValueDoubleQuotedState; 381 m_returnState = AttributeValueDoubleQuotedState;
478 m_entityParser.reset(); 382 m_entityParser.reset();
479 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); 383 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
480 } else if (cc == kEndOfFileMarker) {
481 parseError();
482 m_token->endAttributeValue(source.numberOfCharactersConsumed());
483 HTML_RECONSUME_IN(DataState);
484 } else { 384 } else {
485 m_token->appendToAttributeValue(cc); 385 m_token->appendToAttributeValue(cc);
486 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState); 386 HTML_ADVANCE_TO(AttributeValueDoubleQuotedState);
487 } 387 }
488 } 388 }
489 END_STATE() 389 END_STATE()
490 390
491 HTML_BEGIN_STATE(AttributeValueSingleQuotedState) { 391 HTML_BEGIN_STATE(AttributeValueSingleQuotedState) {
492 if (cc == '\'') { 392 if (cc == '\'') {
493 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 393 m_token->endAttributeValue(source.numberOfCharactersConsumed());
494 HTML_ADVANCE_TO(AfterAttributeValueQuotedState); 394 HTML_ADVANCE_TO(BeforeAttributeNameState);
495 } else if (cc == '&') { 395 } else if (cc == '&') {
496 m_returnState = AttributeValueSingleQuotedState; 396 m_returnState = AttributeValueSingleQuotedState;
497 m_entityParser.reset(); 397 m_entityParser.reset();
498 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); 398 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
499 } else if (cc == kEndOfFileMarker) {
500 parseError();
501 m_token->endAttributeValue(source.numberOfCharactersConsumed());
502 HTML_RECONSUME_IN(DataState);
503 } else { 399 } else {
504 m_token->appendToAttributeValue(cc); 400 m_token->appendToAttributeValue(cc);
505 HTML_ADVANCE_TO(AttributeValueSingleQuotedState); 401 HTML_ADVANCE_TO(AttributeValueSingleQuotedState);
506 } 402 }
507 } 403 }
508 END_STATE() 404 END_STATE()
509 405
510 HTML_BEGIN_STATE(AttributeValueUnquotedState) { 406 HTML_BEGIN_STATE(AttributeValueUnquotedState) {
511 if (isTokenizerWhitespace(cc)) { 407 if (isTokenizerWhitespace(cc)) {
512 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 408 m_token->endAttributeValue(source.numberOfCharactersConsumed());
513 HTML_ADVANCE_TO(BeforeAttributeNameState); 409 HTML_ADVANCE_TO(BeforeAttributeNameState);
514 } else if (cc == '&') { 410 } else if (cc == '&') {
515 m_returnState = AttributeValueUnquotedState; 411 m_returnState = AttributeValueUnquotedState;
516 m_entityParser.reset(); 412 m_entityParser.reset();
517 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState); 413 HTML_ADVANCE_TO(CharacterReferenceInAttributeValueState);
518 } else if (cc == '>') { 414 } else if (cc == '>') {
519 m_token->endAttributeValue(source.numberOfCharactersConsumed()); 415 m_token->endAttributeValue(source.numberOfCharactersConsumed());
520 return emitAndResumeIn(source, HTMLTokenizer::DataState); 416 return emitAndResumeIn(source, HTMLTokenizer::DataState);
521 } else if (cc == kEndOfFileMarker) {
522 parseError();
523 m_token->endAttributeValue(source.numberOfCharactersConsumed());
524 HTML_RECONSUME_IN(DataState);
525 } else { 417 } else {
526 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=' || cc == '`')
527 parseError();
528 m_token->appendToAttributeValue(cc); 418 m_token->appendToAttributeValue(cc);
529 HTML_ADVANCE_TO(AttributeValueUnquotedState); 419 HTML_ADVANCE_TO(AttributeValueUnquotedState);
530 } 420 }
531 } 421 }
532 END_STATE() 422 END_STATE()
533 423
534 HTML_BEGIN_STATE(AfterAttributeValueQuotedState) { 424 HTML_BEGIN_STATE(VoidTagState) {
535 if (isTokenizerWhitespace(cc)) 425 if (cc == '>') {
536 HTML_ADVANCE_TO(BeforeAttributeNameState); 426 m_token->setSelfClosing();
537 else if (cc == '/')
538 HTML_ADVANCE_TO(SelfClosingStartTagState);
539 else if (cc == '>')
540 return emitAndResumeIn(source, HTMLTokenizer::DataState); 427 return emitAndResumeIn(source, HTMLTokenizer::DataState);
541 else if (cc == kEndOfFileMarker) {
542 parseError();
543 HTML_RECONSUME_IN(DataState);
544 } else { 428 } else {
545 parseError();
546 HTML_RECONSUME_IN(BeforeAttributeNameState); 429 HTML_RECONSUME_IN(BeforeAttributeNameState);
547 } 430 }
548 } 431 }
549 END_STATE()
550
551 HTML_BEGIN_STATE(SelfClosingStartTagState) {
552 if (cc == '>') {
553 m_token->setSelfClosing();
554 return emitAndResumeIn(source, HTMLTokenizer::DataState);
555 } else if (cc == kEndOfFileMarker) {
556 parseError();
557 HTML_RECONSUME_IN(DataState);
558 } else {
559 parseError();
560 HTML_RECONSUME_IN(BeforeAttributeNameState);
561 }
562 }
563 END_STATE() 432 END_STATE()
564 433
565 HTML_BEGIN_STATE(CommentStart1State) { 434 HTML_BEGIN_STATE(CommentStart1State) {
566 if (cc == '-') { 435 if (cc == '-') {
567 HTML_ADVANCE_TO(CommentStart2State); 436 HTML_ADVANCE_TO(CommentStart2State);
568 } else { 437 } else {
569 bufferCharacter('<'); 438 bufferCharacter('<');
570 bufferCharacter('!'); 439 bufferCharacter('!');
571 HTML_RECONSUME_IN(DataState); 440 HTML_RECONSUME_IN(DataState);
572 } 441 }
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
609 else 478 else
610 HTML_ADVANCE_TO(CommentState); 479 HTML_ADVANCE_TO(CommentState);
611 } 480 }
612 END_STATE() 481 END_STATE()
613 } 482 }
614 483
615 ASSERT_NOT_REACHED(); 484 ASSERT_NOT_REACHED();
616 return false; 485 return false;
617 } 486 }
618 487
619 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
620 {
621 return vectorEqualsString(m_temporaryBuffer, expectedString);
622 }
623
624 inline void HTMLTokenizer::addToPossibleEndTag(LChar cc)
625 {
626 ASSERT(isEndTagBufferingState(m_state));
627 m_bufferedEndTagName.append(cc);
628 }
629
630 inline bool HTMLTokenizer::isAppropriateEndTag() 488 inline bool HTMLTokenizer::isAppropriateEndTag()
631 { 489 {
632 if (m_bufferedEndTagName.size() != m_appropriateEndTagName.size()) 490 if (m_temporaryBuffer.size() != m_appropriateEndTagName.size())
633 return false; 491 return false;
634 492
635 size_t numCharacters = m_bufferedEndTagName.size(); 493 size_t numCharacters = m_temporaryBuffer.size();
636 494
637 for (size_t i = 0; i < numCharacters; i++) { 495 for (size_t i = 0; i < numCharacters; i++) {
638 if (m_bufferedEndTagName[i] != m_appropriateEndTagName[i]) 496 if (m_temporaryBuffer[i] != m_appropriateEndTagName[i])
639 return false; 497 return false;
640 } 498 }
641 499
642 return true; 500 return true;
643 } 501 }
644 502
645 inline void HTMLTokenizer::parseError() 503 inline void HTMLTokenizer::parseError()
646 { 504 {
647 notImplemented(); 505 notImplemented();
648 } 506 }
649 507
650 } 508 }
OLDNEW
« no previous file with comments | « sky/engine/core/html/parser/HTMLTokenizer.h ('k') | sky/engine/core/html/parser/MarkupTokenizerInlines.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698