| OLD | NEW |
| 1 /* | 1 /* |
| 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
| 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ | 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
| 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
| 5 * | 5 * |
| 6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
| 7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
| 8 * are met: | 8 * are met: |
| 9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
| 10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
| (...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 228 return emitEndOfFile(source); | 228 return emitEndOfFile(source); |
| 229 else { | 229 else { |
| 230 bufferCharacter(cc); | 230 bufferCharacter(cc); |
| 231 HTML_ADVANCE_TO(RAWTEXTState); | 231 HTML_ADVANCE_TO(RAWTEXTState); |
| 232 } | 232 } |
| 233 } | 233 } |
| 234 END_STATE() | 234 END_STATE() |
| 235 | 235 |
| 236 HTML_BEGIN_STATE(TagOpenState) { | 236 HTML_BEGIN_STATE(TagOpenState) { |
| 237 if (cc == '!') | 237 if (cc == '!') |
| 238 HTML_ADVANCE_TO(MarkupDeclarationOpenState); | 238 HTML_ADVANCE_TO(CommentStart1State); |
| 239 else if (cc == '/') | 239 else if (cc == '/') |
| 240 HTML_ADVANCE_TO(EndTagOpenState); | 240 HTML_ADVANCE_TO(CloseTagState); |
| 241 else if (isASCIIUpper(cc)) { | 241 else if (isASCIIUpper(cc)) { |
| 242 m_token->beginStartTag(toLowerCase(cc)); | 242 m_token->beginStartTag(toLowerCase(cc)); |
| 243 HTML_ADVANCE_TO(TagNameState); | 243 HTML_ADVANCE_TO(TagNameState); |
| 244 } else if (isASCIILower(cc)) { | 244 } else if (isASCIILower(cc)) { |
| 245 m_token->beginStartTag(cc); | 245 m_token->beginStartTag(cc); |
| 246 HTML_ADVANCE_TO(TagNameState); | 246 HTML_ADVANCE_TO(TagNameState); |
| 247 } else if (cc == '?') { | |
| 248 parseError(); | |
| 249 // The spec consumes the current character before switching | |
| 250 // to the bogus comment state, but it's easier to implement | |
| 251 // if we reconsume the current character. | |
| 252 HTML_RECONSUME_IN(BogusCommentState); | |
| 253 } else { | 247 } else { |
| 254 parseError(); | 248 parseError(); |
| 255 bufferCharacter('<'); | 249 bufferCharacter('<'); |
| 256 HTML_RECONSUME_IN(DataState); | 250 HTML_RECONSUME_IN(DataState); |
| 257 } | 251 } |
| 258 } | 252 } |
| 259 END_STATE() | 253 END_STATE() |
| 260 | 254 |
| 261 HTML_BEGIN_STATE(EndTagOpenState) { | 255 HTML_BEGIN_STATE(CloseTagState) { |
| 262 if (isASCIIUpper(cc)) { | 256 if (isASCIIUpper(cc)) { |
| 263 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); | 257 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); |
| 264 m_appropriateEndTagName.clear(); | 258 m_appropriateEndTagName.clear(); |
| 265 HTML_ADVANCE_TO(TagNameState); | 259 HTML_ADVANCE_TO(TagNameState); |
| 266 } else if (isASCIILower(cc)) { | 260 } else if (isASCIILower(cc)) { |
| 267 m_token->beginEndTag(static_cast<LChar>(cc)); | 261 m_token->beginEndTag(static_cast<LChar>(cc)); |
| 268 m_appropriateEndTagName.clear(); | 262 m_appropriateEndTagName.clear(); |
| 269 HTML_ADVANCE_TO(TagNameState); | 263 HTML_ADVANCE_TO(TagNameState); |
| 270 } else if (cc == '>') { | 264 } else if (cc == '>') { |
| 271 parseError(); | 265 bufferCharacter('<'); |
| 266 bufferCharacter('/'); |
| 267 bufferCharacter('>'); |
| 272 HTML_ADVANCE_TO(DataState); | 268 HTML_ADVANCE_TO(DataState); |
| 273 } else if (cc == kEndOfFileMarker) { | 269 } else { |
| 274 parseError(); | |
| 275 bufferCharacter('<'); | 270 bufferCharacter('<'); |
| 276 bufferCharacter('/'); | 271 bufferCharacter('/'); |
| 277 HTML_RECONSUME_IN(DataState); | 272 HTML_RECONSUME_IN(DataState); |
| 278 } else { | |
| 279 parseError(); | |
| 280 HTML_RECONSUME_IN(BogusCommentState); | |
| 281 } | 273 } |
| 282 } | 274 } |
| 283 END_STATE() | 275 END_STATE() |
| 284 | 276 |
| 285 HTML_BEGIN_STATE(TagNameState) { | 277 HTML_BEGIN_STATE(TagNameState) { |
| 286 if (isTokenizerWhitespace(cc)) | 278 if (isTokenizerWhitespace(cc)) |
| 287 HTML_ADVANCE_TO(BeforeAttributeNameState); | 279 HTML_ADVANCE_TO(BeforeAttributeNameState); |
| 288 else if (cc == '/') | 280 else if (cc == '/') |
| 289 HTML_ADVANCE_TO(SelfClosingStartTagState); | 281 HTML_ADVANCE_TO(SelfClosingStartTagState); |
| 290 else if (cc == '>') | 282 else if (cc == '>') |
| (...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 564 } else if (cc == kEndOfFileMarker) { | 556 } else if (cc == kEndOfFileMarker) { |
| 565 parseError(); | 557 parseError(); |
| 566 HTML_RECONSUME_IN(DataState); | 558 HTML_RECONSUME_IN(DataState); |
| 567 } else { | 559 } else { |
| 568 parseError(); | 560 parseError(); |
| 569 HTML_RECONSUME_IN(BeforeAttributeNameState); | 561 HTML_RECONSUME_IN(BeforeAttributeNameState); |
| 570 } | 562 } |
| 571 } | 563 } |
| 572 END_STATE() | 564 END_STATE() |
| 573 | 565 |
| 574 HTML_BEGIN_STATE(BogusCommentState) { | 566 HTML_BEGIN_STATE(CommentStart1State) { |
| 575 m_token->beginComment(); | 567 if (cc == '-') { |
| 576 HTML_RECONSUME_IN(ContinueBogusCommentState); | 568 HTML_ADVANCE_TO(CommentStart2State); |
| 577 } | 569 } else { |
| 578 END_STATE() | 570 bufferCharacter('<'); |
| 579 | 571 bufferCharacter('!'); |
| 580 HTML_BEGIN_STATE(ContinueBogusCommentState) { | 572 HTML_RECONSUME_IN(DataState); |
| 581 if (cc == '>') | |
| 582 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
| 583 else if (cc == kEndOfFileMarker) | |
| 584 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
| 585 else { | |
| 586 m_token->appendToComment(cc); | |
| 587 HTML_ADVANCE_TO(ContinueBogusCommentState); | |
| 588 } | 573 } |
| 589 } | 574 } |
| 590 END_STATE() | 575 END_STATE() |
| 591 | 576 |
| 592 HTML_BEGIN_STATE(MarkupDeclarationOpenState) { | 577 HTML_BEGIN_STATE(CommentStart2State) { |
| 593 if (cc == '-') { | 578 if (cc == '-') { |
| 594 SegmentedString::LookAheadResult result = source.lookAhead(HTMLToken
izerNames::dashDash); | 579 HTML_ADVANCE_TO(CommentState); |
| 595 if (result == SegmentedString::DidMatch) { | |
| 596 source.advanceAndASSERT('-'); | |
| 597 source.advanceAndASSERT('-'); | |
| 598 m_token->beginComment(); | |
| 599 HTML_SWITCH_TO(CommentStartState); | |
| 600 } else if (result == SegmentedString::NotEnoughCharacters) | |
| 601 return haveBufferedCharacterToken(); | |
| 602 } | |
| 603 parseError(); | |
| 604 HTML_RECONSUME_IN(BogusCommentState); | |
| 605 } | |
| 606 END_STATE() | |
| 607 | |
| 608 HTML_BEGIN_STATE(CommentStartState) { | |
| 609 if (cc == '-') | |
| 610 HTML_ADVANCE_TO(CommentStartDashState); | |
| 611 else if (cc == '>') { | |
| 612 parseError(); | |
| 613 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
| 614 } else if (cc == kEndOfFileMarker) { | |
| 615 parseError(); | |
| 616 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
| 617 } else { | 580 } else { |
| 618 m_token->appendToComment(cc); | 581 bufferCharacter('<'); |
| 619 HTML_ADVANCE_TO(CommentState); | 582 bufferCharacter('!'); |
| 620 } | 583 bufferCharacter('-'); |
| 621 } | 584 HTML_RECONSUME_IN(DataState); |
| 622 END_STATE() | |
| 623 | |
| 624 HTML_BEGIN_STATE(CommentStartDashState) { | |
| 625 if (cc == '-') | |
| 626 HTML_ADVANCE_TO(CommentEndState); | |
| 627 else if (cc == '>') { | |
| 628 parseError(); | |
| 629 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
| 630 } else if (cc == kEndOfFileMarker) { | |
| 631 parseError(); | |
| 632 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
| 633 } else { | |
| 634 m_token->appendToComment('-'); | |
| 635 m_token->appendToComment(cc); | |
| 636 HTML_ADVANCE_TO(CommentState); | |
| 637 } | 585 } |
| 638 } | 586 } |
| 639 END_STATE() | 587 END_STATE() |
| 640 | 588 |
| 641 HTML_BEGIN_STATE(CommentState) { | 589 HTML_BEGIN_STATE(CommentState) { |
| 642 if (cc == '-') | 590 if (cc == '-') |
| 643 HTML_ADVANCE_TO(CommentEndDashState); | 591 HTML_ADVANCE_TO(CommentEnd1State); |
| 644 else if (cc == kEndOfFileMarker) { | 592 else |
| 645 parseError(); | |
| 646 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
| 647 } else { | |
| 648 m_token->appendToComment(cc); | |
| 649 HTML_ADVANCE_TO(CommentState); | 593 HTML_ADVANCE_TO(CommentState); |
| 650 } | |
| 651 } | 594 } |
| 652 END_STATE() | 595 END_STATE() |
| 653 | 596 |
| 654 HTML_BEGIN_STATE(CommentEndDashState) { | 597 HTML_BEGIN_STATE(CommentEnd1State) { |
| 655 if (cc == '-') | 598 if (cc == '-') |
| 656 HTML_ADVANCE_TO(CommentEndState); | 599 HTML_ADVANCE_TO(CommentEnd2State); |
| 657 else if (cc == kEndOfFileMarker) { | 600 else |
| 658 parseError(); | |
| 659 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
| 660 } else { | |
| 661 m_token->appendToComment('-'); | |
| 662 m_token->appendToComment(cc); | |
| 663 HTML_ADVANCE_TO(CommentState); | 601 HTML_ADVANCE_TO(CommentState); |
| 664 } | |
| 665 } | 602 } |
| 666 END_STATE() | 603 END_STATE() |
| 667 | 604 |
| 668 HTML_BEGIN_STATE(CommentEndState) { | 605 HTML_BEGIN_STATE(CommentEnd2State) { |
| 669 if (cc == '>') | 606 if (cc == '-') |
| 670 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 607 HTML_ADVANCE_TO(CommentEnd2State); |
| 671 else if (cc == '!') { | 608 else if (cc == '>') |
| 672 parseError(); | 609 HTML_ADVANCE_TO(DataState); |
| 673 HTML_ADVANCE_TO(CommentEndBangState); | 610 else |
| 674 } else if (cc == '-') { | |
| 675 parseError(); | |
| 676 m_token->appendToComment('-'); | |
| 677 HTML_ADVANCE_TO(CommentEndState); | |
| 678 } else if (cc == kEndOfFileMarker) { | |
| 679 parseError(); | |
| 680 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
| 681 } else { | |
| 682 parseError(); | |
| 683 m_token->appendToComment('-'); | |
| 684 m_token->appendToComment('-'); | |
| 685 m_token->appendToComment(cc); | |
| 686 HTML_ADVANCE_TO(CommentState); | 611 HTML_ADVANCE_TO(CommentState); |
| 687 } | |
| 688 } | 612 } |
| 689 END_STATE() | 613 END_STATE() |
| 690 | |
| 691 HTML_BEGIN_STATE(CommentEndBangState) { | |
| 692 if (cc == '-') { | |
| 693 m_token->appendToComment('-'); | |
| 694 m_token->appendToComment('-'); | |
| 695 m_token->appendToComment('!'); | |
| 696 HTML_ADVANCE_TO(CommentEndDashState); | |
| 697 } else if (cc == '>') | |
| 698 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
| 699 else if (cc == kEndOfFileMarker) { | |
| 700 parseError(); | |
| 701 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
| 702 } else { | |
| 703 m_token->appendToComment('-'); | |
| 704 m_token->appendToComment('-'); | |
| 705 m_token->appendToComment('!'); | |
| 706 m_token->appendToComment(cc); | |
| 707 HTML_ADVANCE_TO(CommentState); | |
| 708 } | |
| 709 } | |
| 710 END_STATE() | |
| 711 | |
| 712 } | 614 } |
| 713 | 615 |
| 714 ASSERT_NOT_REACHED(); | 616 ASSERT_NOT_REACHED(); |
| 715 return false; | 617 return false; |
| 716 } | 618 } |
| 717 | 619 |
| 718 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) | 620 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) |
| 719 { | 621 { |
| 720 return vectorEqualsString(m_temporaryBuffer, expectedString); | 622 return vectorEqualsString(m_temporaryBuffer, expectedString); |
| 721 } | 623 } |
| (...skipping 18 matching lines...) Expand all Loading... |
| 740 | 642 |
| 741 return true; | 643 return true; |
| 742 } | 644 } |
| 743 | 645 |
| 744 inline void HTMLTokenizer::parseError() | 646 inline void HTMLTokenizer::parseError() |
| 745 { | 647 { |
| 746 notImplemented(); | 648 notImplemented(); |
| 747 } | 649 } |
| 748 | 650 |
| 749 } | 651 } |
| OLD | NEW |