Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(280)

Side by Side Diff: third_party/WebKit/Source/core/html/parser/HTMLTokenizer.cpp

Issue 2827653003: HTMLTokenizer: Fold isASCIIUpper() / isASCIILower() cases (Closed)
Patch Set: Rebase Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
(...skipping 27 matching lines...) Expand all
38 38
39 // Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used 39 // Please don't use DEFINE_STATIC_LOCAL in this file. The HTMLTokenizer is used
40 // from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe. 40 // from multiple threads and DEFINE_STATIC_LOCAL isn't threadsafe.
41 #undef DEFINE_STATIC_LOCAL 41 #undef DEFINE_STATIC_LOCAL
42 42
43 namespace blink { 43 namespace blink {
44 44
45 using namespace HTMLNames; 45 using namespace HTMLNames;
46 46
47 static inline UChar ToLowerCase(UChar cc) { 47 static inline UChar ToLowerCase(UChar cc) {
48 DCHECK(IsASCIIUpper(cc)); 48 DCHECK(IsASCIIAlpha(cc));
49 const int kLowerCaseOffset = 0x20; 49 return cc | 0x20;
50 return cc + kLowerCaseOffset; 50 }
51
52 static inline UChar ToLowerCaseIfAlpha(UChar cc) {
53 return cc | (IsASCIIUpper(cc) ? 0x20 : 0);
51 } 54 }
52 55
53 static inline bool VectorEqualsString(const Vector<LChar, 32>& vector, 56 static inline bool VectorEqualsString(const Vector<LChar, 32>& vector,
54 const String& string) { 57 const String& string) {
55 if (vector.size() != string.length()) 58 if (vector.size() != string.length())
56 return false; 59 return false;
57 60
58 if (!string.length()) 61 if (!string.length())
59 return true; 62 return true;
60 63
(...skipping 167 matching lines...) Expand 10 before | Expand all | Expand 10 after
228 231
229 HTML_BEGIN_STATE(kPLAINTEXTState) { 232 HTML_BEGIN_STATE(kPLAINTEXTState) {
230 if (cc == kEndOfFileMarker) 233 if (cc == kEndOfFileMarker)
231 return EmitEndOfFile(source); 234 return EmitEndOfFile(source);
232 BufferCharacter(cc); 235 BufferCharacter(cc);
233 HTML_ADVANCE_TO(kPLAINTEXTState); 236 HTML_ADVANCE_TO(kPLAINTEXTState);
234 } 237 }
235 END_STATE() 238 END_STATE()
236 239
237 HTML_BEGIN_STATE(kTagOpenState) { 240 HTML_BEGIN_STATE(kTagOpenState) {
238 if (cc == '!') 241 if (cc == '!') {
239 HTML_ADVANCE_TO(kMarkupDeclarationOpenState); 242 HTML_ADVANCE_TO(kMarkupDeclarationOpenState);
240 else if (cc == '/') 243 } else if (cc == '/') {
241 HTML_ADVANCE_TO(kEndTagOpenState); 244 HTML_ADVANCE_TO(kEndTagOpenState);
242 else if (IsASCIIUpper(cc)) { 245 } else if (IsASCIIAlpha(cc)) {
243 token_->BeginStartTag(ToLowerCase(cc)); 246 token_->BeginStartTag(ToLowerCase(cc));
244 HTML_ADVANCE_TO(kTagNameState); 247 HTML_ADVANCE_TO(kTagNameState);
245 } else if (IsASCIILower(cc)) {
246 token_->BeginStartTag(cc);
247 HTML_ADVANCE_TO(kTagNameState);
248 } else if (cc == '?') { 248 } else if (cc == '?') {
249 ParseError(); 249 ParseError();
250 // The spec consumes the current character before switching 250 // The spec consumes the current character before switching
251 // to the bogus comment state, but it's easier to implement 251 // to the bogus comment state, but it's easier to implement
252 // if we reconsume the current character. 252 // if we reconsume the current character.
253 HTML_RECONSUME_IN(kBogusCommentState); 253 HTML_RECONSUME_IN(kBogusCommentState);
254 } else { 254 } else {
255 ParseError(); 255 ParseError();
256 BufferCharacter('<'); 256 BufferCharacter('<');
257 HTML_RECONSUME_IN(kDataState); 257 HTML_RECONSUME_IN(kDataState);
258 } 258 }
259 } 259 }
260 END_STATE() 260 END_STATE()
261 261
262 HTML_BEGIN_STATE(kEndTagOpenState) { 262 HTML_BEGIN_STATE(kEndTagOpenState) {
263 if (IsASCIIUpper(cc)) { 263 if (IsASCIIAlpha(cc)) {
264 token_->BeginEndTag(static_cast<LChar>(ToLowerCase(cc))); 264 token_->BeginEndTag(static_cast<LChar>(ToLowerCase(cc)));
265 appropriate_end_tag_name_.clear(); 265 appropriate_end_tag_name_.clear();
266 HTML_ADVANCE_TO(kTagNameState); 266 HTML_ADVANCE_TO(kTagNameState);
267 } else if (IsASCIILower(cc)) {
268 token_->BeginEndTag(static_cast<LChar>(cc));
269 appropriate_end_tag_name_.clear();
270 HTML_ADVANCE_TO(kTagNameState);
271 } else if (cc == '>') { 267 } else if (cc == '>') {
272 ParseError(); 268 ParseError();
273 HTML_ADVANCE_TO(kDataState); 269 HTML_ADVANCE_TO(kDataState);
274 } else if (cc == kEndOfFileMarker) { 270 } else if (cc == kEndOfFileMarker) {
275 ParseError(); 271 ParseError();
276 BufferCharacter('<'); 272 BufferCharacter('<');
277 BufferCharacter('/'); 273 BufferCharacter('/');
278 HTML_RECONSUME_IN(kDataState); 274 HTML_RECONSUME_IN(kDataState);
279 } else { 275 } else {
280 ParseError(); 276 ParseError();
281 HTML_RECONSUME_IN(kBogusCommentState); 277 HTML_RECONSUME_IN(kBogusCommentState);
282 } 278 }
283 } 279 }
284 END_STATE() 280 END_STATE()
285 281
286 HTML_BEGIN_STATE(kTagNameState) { 282 HTML_BEGIN_STATE(kTagNameState) {
287 if (IsTokenizerWhitespace(cc)) 283 if (IsTokenizerWhitespace(cc)) {
288 HTML_ADVANCE_TO(kBeforeAttributeNameState); 284 HTML_ADVANCE_TO(kBeforeAttributeNameState);
289 else if (cc == '/') 285 } else if (cc == '/') {
290 HTML_ADVANCE_TO(kSelfClosingStartTagState); 286 HTML_ADVANCE_TO(kSelfClosingStartTagState);
291 else if (cc == '>') 287 } else if (cc == '>') {
292 return EmitAndResumeIn(source, HTMLTokenizer::kDataState); 288 return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
293 else if (IsASCIIUpper(cc)) {
294 token_->AppendToName(ToLowerCase(cc));
295 HTML_ADVANCE_TO(kTagNameState);
296 } else if (cc == kEndOfFileMarker) { 289 } else if (cc == kEndOfFileMarker) {
297 ParseError(); 290 ParseError();
298 HTML_RECONSUME_IN(kDataState); 291 HTML_RECONSUME_IN(kDataState);
299 } else { 292 } else {
300 token_->AppendToName(cc); 293 token_->AppendToName(ToLowerCaseIfAlpha(cc));
301 HTML_ADVANCE_TO(kTagNameState); 294 HTML_ADVANCE_TO(kTagNameState);
302 } 295 }
303 } 296 }
304 END_STATE() 297 END_STATE()
305 298
306 HTML_BEGIN_STATE(kRCDATALessThanSignState) { 299 HTML_BEGIN_STATE(kRCDATALessThanSignState) {
307 if (cc == '/') { 300 if (cc == '/') {
308 temporary_buffer_.clear(); 301 temporary_buffer_.clear();
309 DCHECK(buffered_end_tag_name_.IsEmpty()); 302 DCHECK(buffered_end_tag_name_.IsEmpty());
310 HTML_ADVANCE_TO(kRCDATAEndTagOpenState); 303 HTML_ADVANCE_TO(kRCDATAEndTagOpenState);
311 } else { 304 } else {
312 BufferCharacter('<'); 305 BufferCharacter('<');
313 HTML_RECONSUME_IN(kRCDATAState); 306 HTML_RECONSUME_IN(kRCDATAState);
314 } 307 }
315 } 308 }
316 END_STATE() 309 END_STATE()
317 310
318 HTML_BEGIN_STATE(kRCDATAEndTagOpenState) { 311 HTML_BEGIN_STATE(kRCDATAEndTagOpenState) {
319 if (IsASCIIUpper(cc)) { 312 if (IsASCIIAlpha(cc)) {
320 temporary_buffer_.push_back(static_cast<LChar>(cc)); 313 temporary_buffer_.push_back(static_cast<LChar>(cc));
321 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); 314 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
322 HTML_ADVANCE_TO(kRCDATAEndTagNameState); 315 HTML_ADVANCE_TO(kRCDATAEndTagNameState);
323 } else if (IsASCIILower(cc)) {
324 temporary_buffer_.push_back(static_cast<LChar>(cc));
325 AddToPossibleEndTag(static_cast<LChar>(cc));
326 HTML_ADVANCE_TO(kRCDATAEndTagNameState);
327 } else { 316 } else {
328 BufferCharacter('<'); 317 BufferCharacter('<');
329 BufferCharacter('/'); 318 BufferCharacter('/');
330 HTML_RECONSUME_IN(kRCDATAState); 319 HTML_RECONSUME_IN(kRCDATAState);
331 } 320 }
332 } 321 }
333 END_STATE() 322 END_STATE()
334 323
335 HTML_BEGIN_STATE(kRCDATAEndTagNameState) { 324 HTML_BEGIN_STATE(kRCDATAEndTagNameState) {
336 if (IsASCIIUpper(cc)) { 325 if (IsASCIIAlpha(cc)) {
337 temporary_buffer_.push_back(static_cast<LChar>(cc)); 326 temporary_buffer_.push_back(static_cast<LChar>(cc));
338 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); 327 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
339 HTML_ADVANCE_TO(kRCDATAEndTagNameState); 328 HTML_ADVANCE_TO(kRCDATAEndTagNameState);
340 } else if (IsASCIILower(cc)) {
341 temporary_buffer_.push_back(static_cast<LChar>(cc));
342 AddToPossibleEndTag(static_cast<LChar>(cc));
343 HTML_ADVANCE_TO(kRCDATAEndTagNameState);
344 } else { 329 } else {
345 if (IsTokenizerWhitespace(cc)) { 330 if (IsTokenizerWhitespace(cc)) {
346 if (IsAppropriateEndTag()) { 331 if (IsAppropriateEndTag()) {
347 temporary_buffer_.push_back(static_cast<LChar>(cc)); 332 temporary_buffer_.push_back(static_cast<LChar>(cc));
348 FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState); 333 FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState);
349 } 334 }
350 } else if (cc == '/') { 335 } else if (cc == '/') {
351 if (IsAppropriateEndTag()) { 336 if (IsAppropriateEndTag()) {
352 temporary_buffer_.push_back(static_cast<LChar>(cc)); 337 temporary_buffer_.push_back(static_cast<LChar>(cc));
353 FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState); 338 FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState);
(...skipping 20 matching lines...) Expand all
374 DCHECK(buffered_end_tag_name_.IsEmpty()); 359 DCHECK(buffered_end_tag_name_.IsEmpty());
375 HTML_ADVANCE_TO(kRAWTEXTEndTagOpenState); 360 HTML_ADVANCE_TO(kRAWTEXTEndTagOpenState);
376 } else { 361 } else {
377 BufferCharacter('<'); 362 BufferCharacter('<');
378 HTML_RECONSUME_IN(kRAWTEXTState); 363 HTML_RECONSUME_IN(kRAWTEXTState);
379 } 364 }
380 } 365 }
381 END_STATE() 366 END_STATE()
382 367
383 HTML_BEGIN_STATE(kRAWTEXTEndTagOpenState) { 368 HTML_BEGIN_STATE(kRAWTEXTEndTagOpenState) {
384 if (IsASCIIUpper(cc)) { 369 if (IsASCIIAlpha(cc)) {
385 temporary_buffer_.push_back(static_cast<LChar>(cc)); 370 temporary_buffer_.push_back(static_cast<LChar>(cc));
386 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); 371 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
387 HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); 372 HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
388 } else if (IsASCIILower(cc)) {
389 temporary_buffer_.push_back(static_cast<LChar>(cc));
390 AddToPossibleEndTag(static_cast<LChar>(cc));
391 HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
392 } else { 373 } else {
393 BufferCharacter('<'); 374 BufferCharacter('<');
394 BufferCharacter('/'); 375 BufferCharacter('/');
395 HTML_RECONSUME_IN(kRAWTEXTState); 376 HTML_RECONSUME_IN(kRAWTEXTState);
396 } 377 }
397 } 378 }
398 END_STATE() 379 END_STATE()
399 380
400 HTML_BEGIN_STATE(kRAWTEXTEndTagNameState) { 381 HTML_BEGIN_STATE(kRAWTEXTEndTagNameState) {
401 if (IsASCIIUpper(cc)) { 382 if (IsASCIIAlpha(cc)) {
402 temporary_buffer_.push_back(static_cast<LChar>(cc)); 383 temporary_buffer_.push_back(static_cast<LChar>(cc));
403 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); 384 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
404 HTML_ADVANCE_TO(kRAWTEXTEndTagNameState); 385 HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
405 } else if (IsASCIILower(cc)) {
406 temporary_buffer_.push_back(static_cast<LChar>(cc));
407 AddToPossibleEndTag(static_cast<LChar>(cc));
408 HTML_ADVANCE_TO(kRAWTEXTEndTagNameState);
409 } else { 386 } else {
410 if (IsTokenizerWhitespace(cc)) { 387 if (IsTokenizerWhitespace(cc)) {
411 if (IsAppropriateEndTag()) { 388 if (IsAppropriateEndTag()) {
412 temporary_buffer_.push_back(static_cast<LChar>(cc)); 389 temporary_buffer_.push_back(static_cast<LChar>(cc));
413 FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState); 390 FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState);
414 } 391 }
415 } else if (cc == '/') { 392 } else if (cc == '/') {
416 if (IsAppropriateEndTag()) { 393 if (IsAppropriateEndTag()) {
417 temporary_buffer_.push_back(static_cast<LChar>(cc)); 394 temporary_buffer_.push_back(static_cast<LChar>(cc));
418 FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState); 395 FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState);
(...skipping 24 matching lines...) Expand all
443 BufferCharacter('!'); 420 BufferCharacter('!');
444 HTML_ADVANCE_TO(kScriptDataEscapeStartState); 421 HTML_ADVANCE_TO(kScriptDataEscapeStartState);
445 } else { 422 } else {
446 BufferCharacter('<'); 423 BufferCharacter('<');
447 HTML_RECONSUME_IN(kScriptDataState); 424 HTML_RECONSUME_IN(kScriptDataState);
448 } 425 }
449 } 426 }
450 END_STATE() 427 END_STATE()
451 428
452 HTML_BEGIN_STATE(kScriptDataEndTagOpenState) { 429 HTML_BEGIN_STATE(kScriptDataEndTagOpenState) {
453 if (IsASCIIUpper(cc)) { 430 if (IsASCIIAlpha(cc)) {
454 temporary_buffer_.push_back(static_cast<LChar>(cc)); 431 temporary_buffer_.push_back(static_cast<LChar>(cc));
455 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); 432 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
456 HTML_ADVANCE_TO(kScriptDataEndTagNameState); 433 HTML_ADVANCE_TO(kScriptDataEndTagNameState);
457 } else if (IsASCIILower(cc)) {
458 temporary_buffer_.push_back(static_cast<LChar>(cc));
459 AddToPossibleEndTag(static_cast<LChar>(cc));
460 HTML_ADVANCE_TO(kScriptDataEndTagNameState);
461 } else { 434 } else {
462 BufferCharacter('<'); 435 BufferCharacter('<');
463 BufferCharacter('/'); 436 BufferCharacter('/');
464 HTML_RECONSUME_IN(kScriptDataState); 437 HTML_RECONSUME_IN(kScriptDataState);
465 } 438 }
466 } 439 }
467 END_STATE() 440 END_STATE()
468 441
469 HTML_BEGIN_STATE(kScriptDataEndTagNameState) { 442 HTML_BEGIN_STATE(kScriptDataEndTagNameState) {
470 if (IsASCIIUpper(cc)) { 443 if (IsASCIIAlpha(cc)) {
471 temporary_buffer_.push_back(static_cast<LChar>(cc)); 444 temporary_buffer_.push_back(static_cast<LChar>(cc));
472 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); 445 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
473 HTML_ADVANCE_TO(kScriptDataEndTagNameState); 446 HTML_ADVANCE_TO(kScriptDataEndTagNameState);
474 } else if (IsASCIILower(cc)) {
475 temporary_buffer_.push_back(static_cast<LChar>(cc));
476 AddToPossibleEndTag(static_cast<LChar>(cc));
477 HTML_ADVANCE_TO(kScriptDataEndTagNameState);
478 } else { 447 } else {
479 if (IsTokenizerWhitespace(cc)) { 448 if (IsTokenizerWhitespace(cc)) {
480 if (IsAppropriateEndTag()) { 449 if (IsAppropriateEndTag()) {
481 temporary_buffer_.push_back(static_cast<LChar>(cc)); 450 temporary_buffer_.push_back(static_cast<LChar>(cc));
482 FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState); 451 FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState);
483 } 452 }
484 } else if (cc == '/') { 453 } else if (cc == '/') {
485 if (IsAppropriateEndTag()) { 454 if (IsAppropriateEndTag()) {
486 temporary_buffer_.push_back(static_cast<LChar>(cc)); 455 temporary_buffer_.push_back(static_cast<LChar>(cc));
487 FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState); 456 FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState);
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
569 HTML_ADVANCE_TO(kScriptDataEscapedState); 538 HTML_ADVANCE_TO(kScriptDataEscapedState);
570 } 539 }
571 } 540 }
572 END_STATE() 541 END_STATE()
573 542
574 HTML_BEGIN_STATE(kScriptDataEscapedLessThanSignState) { 543 HTML_BEGIN_STATE(kScriptDataEscapedLessThanSignState) {
575 if (cc == '/') { 544 if (cc == '/') {
576 temporary_buffer_.clear(); 545 temporary_buffer_.clear();
577 DCHECK(buffered_end_tag_name_.IsEmpty()); 546 DCHECK(buffered_end_tag_name_.IsEmpty());
578 HTML_ADVANCE_TO(kScriptDataEscapedEndTagOpenState); 547 HTML_ADVANCE_TO(kScriptDataEscapedEndTagOpenState);
579 } else if (IsASCIIUpper(cc)) { 548 } else if (IsASCIIAlpha(cc)) {
580 BufferCharacter('<'); 549 BufferCharacter('<');
581 BufferCharacter(cc); 550 BufferCharacter(cc);
582 temporary_buffer_.clear(); 551 temporary_buffer_.clear();
583 temporary_buffer_.push_back(ToLowerCase(cc)); 552 temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc)));
584 HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
585 } else if (IsASCIILower(cc)) {
586 BufferCharacter('<');
587 BufferCharacter(cc);
588 temporary_buffer_.clear();
589 temporary_buffer_.push_back(static_cast<LChar>(cc));
590 HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); 553 HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
591 } else { 554 } else {
592 BufferCharacter('<'); 555 BufferCharacter('<');
593 HTML_RECONSUME_IN(kScriptDataEscapedState); 556 HTML_RECONSUME_IN(kScriptDataEscapedState);
594 } 557 }
595 } 558 }
596 END_STATE() 559 END_STATE()
597 560
598 HTML_BEGIN_STATE(kScriptDataEscapedEndTagOpenState) { 561 HTML_BEGIN_STATE(kScriptDataEscapedEndTagOpenState) {
599 if (IsASCIIUpper(cc)) { 562 if (IsASCIIAlpha(cc)) {
600 temporary_buffer_.push_back(static_cast<LChar>(cc)); 563 temporary_buffer_.push_back(static_cast<LChar>(cc));
601 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); 564 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
602 HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); 565 HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
603 } else if (IsASCIILower(cc)) {
604 temporary_buffer_.push_back(static_cast<LChar>(cc));
605 AddToPossibleEndTag(static_cast<LChar>(cc));
606 HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
607 } else { 566 } else {
608 BufferCharacter('<'); 567 BufferCharacter('<');
609 BufferCharacter('/'); 568 BufferCharacter('/');
610 HTML_RECONSUME_IN(kScriptDataEscapedState); 569 HTML_RECONSUME_IN(kScriptDataEscapedState);
611 } 570 }
612 } 571 }
613 END_STATE() 572 END_STATE()
614 573
615 HTML_BEGIN_STATE(kScriptDataEscapedEndTagNameState) { 574 HTML_BEGIN_STATE(kScriptDataEscapedEndTagNameState) {
616 if (IsASCIIUpper(cc)) { 575 if (IsASCIIAlpha(cc)) {
617 temporary_buffer_.push_back(static_cast<LChar>(cc)); 576 temporary_buffer_.push_back(static_cast<LChar>(cc));
618 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc))); 577 AddToPossibleEndTag(static_cast<LChar>(ToLowerCase(cc)));
619 HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState); 578 HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
620 } else if (IsASCIILower(cc)) {
621 temporary_buffer_.push_back(static_cast<LChar>(cc));
622 AddToPossibleEndTag(static_cast<LChar>(cc));
623 HTML_ADVANCE_TO(kScriptDataEscapedEndTagNameState);
624 } else { 579 } else {
625 if (IsTokenizerWhitespace(cc)) { 580 if (IsTokenizerWhitespace(cc)) {
626 if (IsAppropriateEndTag()) { 581 if (IsAppropriateEndTag()) {
627 temporary_buffer_.push_back(static_cast<LChar>(cc)); 582 temporary_buffer_.push_back(static_cast<LChar>(cc));
628 FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState); 583 FLUSH_AND_ADVANCE_TO(kBeforeAttributeNameState);
629 } 584 }
630 } else if (cc == '/') { 585 } else if (cc == '/') {
631 if (IsAppropriateEndTag()) { 586 if (IsAppropriateEndTag()) {
632 temporary_buffer_.push_back(static_cast<LChar>(cc)); 587 temporary_buffer_.push_back(static_cast<LChar>(cc));
633 FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState); 588 FLUSH_AND_ADVANCE_TO(kSelfClosingStartTagState);
(...skipping 14 matching lines...) Expand all
648 } 603 }
649 END_STATE() 604 END_STATE()
650 605
651 HTML_BEGIN_STATE(kScriptDataDoubleEscapeStartState) { 606 HTML_BEGIN_STATE(kScriptDataDoubleEscapeStartState) {
652 if (IsTokenizerWhitespace(cc) || cc == '/' || cc == '>') { 607 if (IsTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
653 BufferCharacter(cc); 608 BufferCharacter(cc);
654 if (TemporaryBufferIs(scriptTag.LocalName())) 609 if (TemporaryBufferIs(scriptTag.LocalName()))
655 HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); 610 HTML_ADVANCE_TO(kScriptDataDoubleEscapedState);
656 else 611 else
657 HTML_ADVANCE_TO(kScriptDataEscapedState); 612 HTML_ADVANCE_TO(kScriptDataEscapedState);
658 } else if (IsASCIIUpper(cc)) { 613 } else if (IsASCIIAlpha(cc)) {
659 BufferCharacter(cc); 614 BufferCharacter(cc);
660 temporary_buffer_.push_back(ToLowerCase(cc)); 615 temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc)));
661 HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
662 } else if (IsASCIILower(cc)) {
663 BufferCharacter(cc);
664 temporary_buffer_.push_back(static_cast<LChar>(cc));
665 HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState); 616 HTML_ADVANCE_TO(kScriptDataDoubleEscapeStartState);
666 } else 617 } else
667 HTML_RECONSUME_IN(kScriptDataEscapedState); 618 HTML_RECONSUME_IN(kScriptDataEscapedState);
668 } 619 }
669 END_STATE() 620 END_STATE()
670 621
671 HTML_BEGIN_STATE(kScriptDataDoubleEscapedState) { 622 HTML_BEGIN_STATE(kScriptDataDoubleEscapedState) {
672 if (cc == '-') { 623 if (cc == '-') {
673 BufferCharacter(cc); 624 BufferCharacter(cc);
674 HTML_ADVANCE_TO(kScriptDataDoubleEscapedDashState); 625 HTML_ADVANCE_TO(kScriptDataDoubleEscapedDashState);
(...skipping 57 matching lines...) Expand 10 before | Expand all | Expand 10 after
732 } 683 }
733 END_STATE() 684 END_STATE()
734 685
735 HTML_BEGIN_STATE(kScriptDataDoubleEscapeEndState) { 686 HTML_BEGIN_STATE(kScriptDataDoubleEscapeEndState) {
736 if (IsTokenizerWhitespace(cc) || cc == '/' || cc == '>') { 687 if (IsTokenizerWhitespace(cc) || cc == '/' || cc == '>') {
737 BufferCharacter(cc); 688 BufferCharacter(cc);
738 if (TemporaryBufferIs(scriptTag.LocalName())) 689 if (TemporaryBufferIs(scriptTag.LocalName()))
739 HTML_ADVANCE_TO(kScriptDataEscapedState); 690 HTML_ADVANCE_TO(kScriptDataEscapedState);
740 else 691 else
741 HTML_ADVANCE_TO(kScriptDataDoubleEscapedState); 692 HTML_ADVANCE_TO(kScriptDataDoubleEscapedState);
742 } else if (IsASCIIUpper(cc)) { 693 } else if (IsASCIIAlpha(cc)) {
743 BufferCharacter(cc); 694 BufferCharacter(cc);
744 temporary_buffer_.push_back(ToLowerCase(cc)); 695 temporary_buffer_.push_back(static_cast<LChar>(ToLowerCase(cc)));
745 HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState);
746 } else if (IsASCIILower(cc)) {
747 BufferCharacter(cc);
748 temporary_buffer_.push_back(static_cast<LChar>(cc));
749 HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState); 696 HTML_ADVANCE_TO(kScriptDataDoubleEscapeEndState);
750 } else 697 } else
751 HTML_RECONSUME_IN(kScriptDataDoubleEscapedState); 698 HTML_RECONSUME_IN(kScriptDataDoubleEscapedState);
752 } 699 }
753 END_STATE() 700 END_STATE()
754 701
755 HTML_BEGIN_STATE(kBeforeAttributeNameState) { 702 HTML_BEGIN_STATE(kBeforeAttributeNameState) {
756 if (IsTokenizerWhitespace(cc)) 703 if (IsTokenizerWhitespace(cc)) {
757 HTML_ADVANCE_TO(kBeforeAttributeNameState); 704 HTML_ADVANCE_TO(kBeforeAttributeNameState);
758 else if (cc == '/') 705 } else if (cc == '/') {
759 HTML_ADVANCE_TO(kSelfClosingStartTagState); 706 HTML_ADVANCE_TO(kSelfClosingStartTagState);
760 else if (cc == '>') 707 } else if (cc == '>') {
761 return EmitAndResumeIn(source, HTMLTokenizer::kDataState); 708 return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
762 else if (IsASCIIUpper(cc)) {
763 token_->AddNewAttribute();
764 token_->BeginAttributeName(source.NumberOfCharactersConsumed());
765 token_->AppendToAttributeName(ToLowerCase(cc));
766 HTML_ADVANCE_TO(kAttributeNameState);
767 } else if (cc == kEndOfFileMarker) { 709 } else if (cc == kEndOfFileMarker) {
768 ParseError(); 710 ParseError();
769 HTML_RECONSUME_IN(kDataState); 711 HTML_RECONSUME_IN(kDataState);
770 } else { 712 } else {
771 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') 713 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
772 ParseError(); 714 ParseError();
773 token_->AddNewAttribute(); 715 token_->AddNewAttribute();
774 token_->BeginAttributeName(source.NumberOfCharactersConsumed()); 716 token_->BeginAttributeName(source.NumberOfCharactersConsumed());
775 token_->AppendToAttributeName(cc); 717 token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc));
776 HTML_ADVANCE_TO(kAttributeNameState); 718 HTML_ADVANCE_TO(kAttributeNameState);
777 } 719 }
778 } 720 }
779 END_STATE() 721 END_STATE()
780 722
781 HTML_BEGIN_STATE(kAttributeNameState) { 723 HTML_BEGIN_STATE(kAttributeNameState) {
782 if (IsTokenizerWhitespace(cc)) { 724 if (IsTokenizerWhitespace(cc)) {
783 token_->EndAttributeName(source.NumberOfCharactersConsumed()); 725 token_->EndAttributeName(source.NumberOfCharactersConsumed());
784 HTML_ADVANCE_TO(kAfterAttributeNameState); 726 HTML_ADVANCE_TO(kAfterAttributeNameState);
785 } else if (cc == '/') { 727 } else if (cc == '/') {
786 token_->EndAttributeName(source.NumberOfCharactersConsumed()); 728 token_->EndAttributeName(source.NumberOfCharactersConsumed());
787 HTML_ADVANCE_TO(kSelfClosingStartTagState); 729 HTML_ADVANCE_TO(kSelfClosingStartTagState);
788 } else if (cc == '=') { 730 } else if (cc == '=') {
789 token_->EndAttributeName(source.NumberOfCharactersConsumed()); 731 token_->EndAttributeName(source.NumberOfCharactersConsumed());
790 HTML_ADVANCE_TO(kBeforeAttributeValueState); 732 HTML_ADVANCE_TO(kBeforeAttributeValueState);
791 } else if (cc == '>') { 733 } else if (cc == '>') {
792 token_->EndAttributeName(source.NumberOfCharactersConsumed()); 734 token_->EndAttributeName(source.NumberOfCharactersConsumed());
793 return EmitAndResumeIn(source, HTMLTokenizer::kDataState); 735 return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
794 } else if (IsASCIIUpper(cc)) {
795 token_->AppendToAttributeName(ToLowerCase(cc));
796 HTML_ADVANCE_TO(kAttributeNameState);
797 } else if (cc == kEndOfFileMarker) { 736 } else if (cc == kEndOfFileMarker) {
798 ParseError(); 737 ParseError();
799 token_->EndAttributeName(source.NumberOfCharactersConsumed()); 738 token_->EndAttributeName(source.NumberOfCharactersConsumed());
800 HTML_RECONSUME_IN(kDataState); 739 HTML_RECONSUME_IN(kDataState);
801 } else { 740 } else {
802 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=') 741 if (cc == '"' || cc == '\'' || cc == '<' || cc == '=')
803 ParseError(); 742 ParseError();
804 token_->AppendToAttributeName(cc); 743 token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc));
805 HTML_ADVANCE_TO(kAttributeNameState); 744 HTML_ADVANCE_TO(kAttributeNameState);
806 } 745 }
807 } 746 }
808 END_STATE() 747 END_STATE()
809 748
810 HTML_BEGIN_STATE(kAfterAttributeNameState) { 749 HTML_BEGIN_STATE(kAfterAttributeNameState) {
811 if (IsTokenizerWhitespace(cc)) 750 if (IsTokenizerWhitespace(cc)) {
812 HTML_ADVANCE_TO(kAfterAttributeNameState); 751 HTML_ADVANCE_TO(kAfterAttributeNameState);
813 else if (cc == '/') 752 } else if (cc == '/') {
814 HTML_ADVANCE_TO(kSelfClosingStartTagState); 753 HTML_ADVANCE_TO(kSelfClosingStartTagState);
815 else if (cc == '=') 754 } else if (cc == '=') {
816 HTML_ADVANCE_TO(kBeforeAttributeValueState); 755 HTML_ADVANCE_TO(kBeforeAttributeValueState);
817 else if (cc == '>') 756 } else if (cc == '>') {
818 return EmitAndResumeIn(source, HTMLTokenizer::kDataState); 757 return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
819 else if (IsASCIIUpper(cc)) {
820 token_->AddNewAttribute();
821 token_->BeginAttributeName(source.NumberOfCharactersConsumed());
822 token_->AppendToAttributeName(ToLowerCase(cc));
823 HTML_ADVANCE_TO(kAttributeNameState);
824 } else if (cc == kEndOfFileMarker) { 758 } else if (cc == kEndOfFileMarker) {
825 ParseError(); 759 ParseError();
826 HTML_RECONSUME_IN(kDataState); 760 HTML_RECONSUME_IN(kDataState);
827 } else { 761 } else {
828 if (cc == '"' || cc == '\'' || cc == '<') 762 if (cc == '"' || cc == '\'' || cc == '<')
829 ParseError(); 763 ParseError();
830 token_->AddNewAttribute(); 764 token_->AddNewAttribute();
831 token_->BeginAttributeName(source.NumberOfCharactersConsumed()); 765 token_->BeginAttributeName(source.NumberOfCharactersConsumed());
832 token_->AppendToAttributeName(cc); 766 token_->AppendToAttributeName(ToLowerCaseIfAlpha(cc));
833 HTML_ADVANCE_TO(kAttributeNameState); 767 HTML_ADVANCE_TO(kAttributeNameState);
834 } 768 }
835 } 769 }
836 END_STATE() 770 END_STATE()
837 771
838 HTML_BEGIN_STATE(kBeforeAttributeValueState) { 772 HTML_BEGIN_STATE(kBeforeAttributeValueState) {
839 if (IsTokenizerWhitespace(cc)) 773 if (IsTokenizerWhitespace(cc))
840 HTML_ADVANCE_TO(kBeforeAttributeValueState); 774 HTML_ADVANCE_TO(kBeforeAttributeValueState);
841 else if (cc == '"') { 775 else if (cc == '"') {
842 token_->BeginAttributeValue(source.NumberOfCharactersConsumed() + 1); 776 token_->BeginAttributeValue(source.NumberOfCharactersConsumed() + 1);
(...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after
1147 token_->SetForceQuirks(); 1081 token_->SetForceQuirks();
1148 return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); 1082 return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState);
1149 } else { 1083 } else {
1150 ParseError(); 1084 ParseError();
1151 HTML_RECONSUME_IN(kBeforeDOCTYPENameState); 1085 HTML_RECONSUME_IN(kBeforeDOCTYPENameState);
1152 } 1086 }
1153 } 1087 }
1154 END_STATE() 1088 END_STATE()
1155 1089
1156 HTML_BEGIN_STATE(kBeforeDOCTYPENameState) { 1090 HTML_BEGIN_STATE(kBeforeDOCTYPENameState) {
1157 if (IsTokenizerWhitespace(cc)) 1091 if (IsTokenizerWhitespace(cc)) {
1158 HTML_ADVANCE_TO(kBeforeDOCTYPENameState); 1092 HTML_ADVANCE_TO(kBeforeDOCTYPENameState);
1159 else if (IsASCIIUpper(cc)) {
1160 token_->BeginDOCTYPE(ToLowerCase(cc));
1161 HTML_ADVANCE_TO(kDOCTYPENameState);
1162 } else if (cc == '>') { 1093 } else if (cc == '>') {
1163 ParseError(); 1094 ParseError();
1164 token_->BeginDOCTYPE(); 1095 token_->BeginDOCTYPE();
1165 token_->SetForceQuirks(); 1096 token_->SetForceQuirks();
1166 return EmitAndResumeIn(source, HTMLTokenizer::kDataState); 1097 return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
1167 } else if (cc == kEndOfFileMarker) { 1098 } else if (cc == kEndOfFileMarker) {
1168 ParseError(); 1099 ParseError();
1169 token_->BeginDOCTYPE(); 1100 token_->BeginDOCTYPE();
1170 token_->SetForceQuirks(); 1101 token_->SetForceQuirks();
1171 return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); 1102 return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState);
1172 } else { 1103 } else {
1173 token_->BeginDOCTYPE(cc); 1104 token_->BeginDOCTYPE(ToLowerCaseIfAlpha(cc));
1174 HTML_ADVANCE_TO(kDOCTYPENameState); 1105 HTML_ADVANCE_TO(kDOCTYPENameState);
1175 } 1106 }
1176 } 1107 }
1177 END_STATE() 1108 END_STATE()
1178 1109
1179 HTML_BEGIN_STATE(kDOCTYPENameState) { 1110 HTML_BEGIN_STATE(kDOCTYPENameState) {
1180 if (IsTokenizerWhitespace(cc)) 1111 if (IsTokenizerWhitespace(cc)) {
1181 HTML_ADVANCE_TO(kAfterDOCTYPENameState); 1112 HTML_ADVANCE_TO(kAfterDOCTYPENameState);
1182 else if (cc == '>') 1113 } else if (cc == '>') {
1183 return EmitAndResumeIn(source, HTMLTokenizer::kDataState); 1114 return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
1184 else if (IsASCIIUpper(cc)) {
1185 token_->AppendToName(ToLowerCase(cc));
1186 HTML_ADVANCE_TO(kDOCTYPENameState);
1187 } else if (cc == kEndOfFileMarker) { 1115 } else if (cc == kEndOfFileMarker) {
1188 ParseError(); 1116 ParseError();
1189 token_->SetForceQuirks(); 1117 token_->SetForceQuirks();
1190 return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState); 1118 return EmitAndReconsumeIn(source, HTMLTokenizer::kDataState);
1191 } else { 1119 } else {
1192 token_->AppendToName(cc); 1120 token_->AppendToName(ToLowerCaseIfAlpha(cc));
1193 HTML_ADVANCE_TO(kDOCTYPENameState); 1121 HTML_ADVANCE_TO(kDOCTYPENameState);
1194 } 1122 }
1195 } 1123 }
1196 END_STATE() 1124 END_STATE()
1197 1125
1198 HTML_BEGIN_STATE(kAfterDOCTYPENameState) { 1126 HTML_BEGIN_STATE(kAfterDOCTYPENameState) {
1199 if (IsTokenizerWhitespace(cc)) 1127 if (IsTokenizerWhitespace(cc))
1200 HTML_ADVANCE_TO(kAfterDOCTYPENameState); 1128 HTML_ADVANCE_TO(kAfterDOCTYPENameState);
1201 if (cc == '>') 1129 if (cc == '>')
1202 return EmitAndResumeIn(source, HTMLTokenizer::kDataState); 1130 return EmitAndResumeIn(source, HTMLTokenizer::kDataState);
(...skipping 366 matching lines...) Expand 10 before | Expand all | Expand 10 after
1569 return true; 1497 return true;
1570 } 1498 }
1571 1499
1572 inline void HTMLTokenizer::ParseError() { 1500 inline void HTMLTokenizer::ParseError() {
1573 #if DCHECK_IS_ON() 1501 #if DCHECK_IS_ON()
1574 DVLOG(1) << "Not implemented."; 1502 DVLOG(1) << "Not implemented.";
1575 #endif 1503 #endif
1576 } 1504 }
1577 1505
1578 } // namespace blink 1506 } // namespace blink
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698