OLD | NEW |
1 /* | 1 /* |
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. | 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. |
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ | 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ |
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. | 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. |
5 * | 5 * |
6 * Redistribution and use in source and binary forms, with or without | 6 * Redistribution and use in source and binary forms, with or without |
7 * modification, are permitted provided that the following conditions | 7 * modification, are permitted provided that the following conditions |
8 * are met: | 8 * are met: |
9 * 1. Redistributions of source code must retain the above copyright | 9 * 1. Redistributions of source code must retain the above copyright |
10 * notice, this list of conditions and the following disclaimer. | 10 * notice, this list of conditions and the following disclaimer. |
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
228 return emitEndOfFile(source); | 228 return emitEndOfFile(source); |
229 else { | 229 else { |
230 bufferCharacter(cc); | 230 bufferCharacter(cc); |
231 HTML_ADVANCE_TO(RAWTEXTState); | 231 HTML_ADVANCE_TO(RAWTEXTState); |
232 } | 232 } |
233 } | 233 } |
234 END_STATE() | 234 END_STATE() |
235 | 235 |
236 HTML_BEGIN_STATE(TagOpenState) { | 236 HTML_BEGIN_STATE(TagOpenState) { |
237 if (cc == '!') | 237 if (cc == '!') |
238 HTML_ADVANCE_TO(MarkupDeclarationOpenState); | 238 HTML_ADVANCE_TO(CommentStart1State); |
239 else if (cc == '/') | 239 else if (cc == '/') |
240 HTML_ADVANCE_TO(EndTagOpenState); | 240 HTML_ADVANCE_TO(CloseTagState); |
241 else if (isASCIIUpper(cc)) { | 241 else if (isASCIIUpper(cc)) { |
242 m_token->beginStartTag(toLowerCase(cc)); | 242 m_token->beginStartTag(toLowerCase(cc)); |
243 HTML_ADVANCE_TO(TagNameState); | 243 HTML_ADVANCE_TO(TagNameState); |
244 } else if (isASCIILower(cc)) { | 244 } else if (isASCIILower(cc)) { |
245 m_token->beginStartTag(cc); | 245 m_token->beginStartTag(cc); |
246 HTML_ADVANCE_TO(TagNameState); | 246 HTML_ADVANCE_TO(TagNameState); |
247 } else if (cc == '?') { | |
248 parseError(); | |
249 // The spec consumes the current character before switching | |
250 // to the bogus comment state, but it's easier to implement | |
251 // if we reconsume the current character. | |
252 HTML_RECONSUME_IN(BogusCommentState); | |
253 } else { | 247 } else { |
254 parseError(); | 248 parseError(); |
255 bufferCharacter('<'); | 249 bufferCharacter('<'); |
256 HTML_RECONSUME_IN(DataState); | 250 HTML_RECONSUME_IN(DataState); |
257 } | 251 } |
258 } | 252 } |
259 END_STATE() | 253 END_STATE() |
260 | 254 |
261 HTML_BEGIN_STATE(EndTagOpenState) { | 255 HTML_BEGIN_STATE(CloseTagState) { |
262 if (isASCIIUpper(cc)) { | 256 if (isASCIIUpper(cc)) { |
263 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); | 257 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); |
264 m_appropriateEndTagName.clear(); | 258 m_appropriateEndTagName.clear(); |
265 HTML_ADVANCE_TO(TagNameState); | 259 HTML_ADVANCE_TO(TagNameState); |
266 } else if (isASCIILower(cc)) { | 260 } else if (isASCIILower(cc)) { |
267 m_token->beginEndTag(static_cast<LChar>(cc)); | 261 m_token->beginEndTag(static_cast<LChar>(cc)); |
268 m_appropriateEndTagName.clear(); | 262 m_appropriateEndTagName.clear(); |
269 HTML_ADVANCE_TO(TagNameState); | 263 HTML_ADVANCE_TO(TagNameState); |
270 } else if (cc == '>') { | 264 } else if (cc == '>') { |
271 parseError(); | 265 bufferCharacter('<'); |
| 266 bufferCharacter('/'); |
| 267 bufferCharacter('>'); |
272 HTML_ADVANCE_TO(DataState); | 268 HTML_ADVANCE_TO(DataState); |
273 } else if (cc == kEndOfFileMarker) { | 269 } else { |
274 parseError(); | |
275 bufferCharacter('<'); | 270 bufferCharacter('<'); |
276 bufferCharacter('/'); | 271 bufferCharacter('/'); |
277 HTML_RECONSUME_IN(DataState); | 272 HTML_RECONSUME_IN(DataState); |
278 } else { | |
279 parseError(); | |
280 HTML_RECONSUME_IN(BogusCommentState); | |
281 } | 273 } |
282 } | 274 } |
283 END_STATE() | 275 END_STATE() |
284 | 276 |
285 HTML_BEGIN_STATE(TagNameState) { | 277 HTML_BEGIN_STATE(TagNameState) { |
286 if (isTokenizerWhitespace(cc)) | 278 if (isTokenizerWhitespace(cc)) |
287 HTML_ADVANCE_TO(BeforeAttributeNameState); | 279 HTML_ADVANCE_TO(BeforeAttributeNameState); |
288 else if (cc == '/') | 280 else if (cc == '/') |
289 HTML_ADVANCE_TO(SelfClosingStartTagState); | 281 HTML_ADVANCE_TO(SelfClosingStartTagState); |
290 else if (cc == '>') | 282 else if (cc == '>') |
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
564 } else if (cc == kEndOfFileMarker) { | 556 } else if (cc == kEndOfFileMarker) { |
565 parseError(); | 557 parseError(); |
566 HTML_RECONSUME_IN(DataState); | 558 HTML_RECONSUME_IN(DataState); |
567 } else { | 559 } else { |
568 parseError(); | 560 parseError(); |
569 HTML_RECONSUME_IN(BeforeAttributeNameState); | 561 HTML_RECONSUME_IN(BeforeAttributeNameState); |
570 } | 562 } |
571 } | 563 } |
572 END_STATE() | 564 END_STATE() |
573 | 565 |
574 HTML_BEGIN_STATE(BogusCommentState) { | 566 HTML_BEGIN_STATE(CommentStart1State) { |
575 m_token->beginComment(); | 567 if (cc == '-') { |
576 HTML_RECONSUME_IN(ContinueBogusCommentState); | 568 HTML_ADVANCE_TO(CommentStart2State); |
577 } | 569 } else { |
578 END_STATE() | 570 bufferCharacter('<'); |
579 | 571 bufferCharacter('!'); |
580 HTML_BEGIN_STATE(ContinueBogusCommentState) { | 572 HTML_RECONSUME_IN(DataState); |
581 if (cc == '>') | |
582 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
583 else if (cc == kEndOfFileMarker) | |
584 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
585 else { | |
586 m_token->appendToComment(cc); | |
587 HTML_ADVANCE_TO(ContinueBogusCommentState); | |
588 } | 573 } |
589 } | 574 } |
590 END_STATE() | 575 END_STATE() |
591 | 576 |
592 HTML_BEGIN_STATE(MarkupDeclarationOpenState) { | 577 HTML_BEGIN_STATE(CommentStart2State) { |
593 if (cc == '-') { | 578 if (cc == '-') { |
594 SegmentedString::LookAheadResult result = source.lookAhead(HTMLToken
izerNames::dashDash); | 579 HTML_ADVANCE_TO(CommentState); |
595 if (result == SegmentedString::DidMatch) { | |
596 source.advanceAndASSERT('-'); | |
597 source.advanceAndASSERT('-'); | |
598 m_token->beginComment(); | |
599 HTML_SWITCH_TO(CommentStartState); | |
600 } else if (result == SegmentedString::NotEnoughCharacters) | |
601 return haveBufferedCharacterToken(); | |
602 } | |
603 parseError(); | |
604 HTML_RECONSUME_IN(BogusCommentState); | |
605 } | |
606 END_STATE() | |
607 | |
608 HTML_BEGIN_STATE(CommentStartState) { | |
609 if (cc == '-') | |
610 HTML_ADVANCE_TO(CommentStartDashState); | |
611 else if (cc == '>') { | |
612 parseError(); | |
613 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
614 } else if (cc == kEndOfFileMarker) { | |
615 parseError(); | |
616 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
617 } else { | 580 } else { |
618 m_token->appendToComment(cc); | 581 bufferCharacter('<'); |
619 HTML_ADVANCE_TO(CommentState); | 582 bufferCharacter('!'); |
620 } | 583 bufferCharacter('-'); |
621 } | 584 HTML_RECONSUME_IN(DataState); |
622 END_STATE() | |
623 | |
624 HTML_BEGIN_STATE(CommentStartDashState) { | |
625 if (cc == '-') | |
626 HTML_ADVANCE_TO(CommentEndState); | |
627 else if (cc == '>') { | |
628 parseError(); | |
629 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
630 } else if (cc == kEndOfFileMarker) { | |
631 parseError(); | |
632 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
633 } else { | |
634 m_token->appendToComment('-'); | |
635 m_token->appendToComment(cc); | |
636 HTML_ADVANCE_TO(CommentState); | |
637 } | 585 } |
638 } | 586 } |
639 END_STATE() | 587 END_STATE() |
640 | 588 |
641 HTML_BEGIN_STATE(CommentState) { | 589 HTML_BEGIN_STATE(CommentState) { |
642 if (cc == '-') | 590 if (cc == '-') |
643 HTML_ADVANCE_TO(CommentEndDashState); | 591 HTML_ADVANCE_TO(CommentEnd1State); |
644 else if (cc == kEndOfFileMarker) { | 592 else |
645 parseError(); | |
646 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
647 } else { | |
648 m_token->appendToComment(cc); | |
649 HTML_ADVANCE_TO(CommentState); | 593 HTML_ADVANCE_TO(CommentState); |
650 } | |
651 } | 594 } |
652 END_STATE() | 595 END_STATE() |
653 | 596 |
654 HTML_BEGIN_STATE(CommentEndDashState) { | 597 HTML_BEGIN_STATE(CommentEnd1State) { |
655 if (cc == '-') | 598 if (cc == '-') |
656 HTML_ADVANCE_TO(CommentEndState); | 599 HTML_ADVANCE_TO(CommentEnd2State); |
657 else if (cc == kEndOfFileMarker) { | 600 else |
658 parseError(); | |
659 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
660 } else { | |
661 m_token->appendToComment('-'); | |
662 m_token->appendToComment(cc); | |
663 HTML_ADVANCE_TO(CommentState); | 601 HTML_ADVANCE_TO(CommentState); |
664 } | |
665 } | 602 } |
666 END_STATE() | 603 END_STATE() |
667 | 604 |
668 HTML_BEGIN_STATE(CommentEndState) { | 605 HTML_BEGIN_STATE(CommentEnd2State) { |
669 if (cc == '>') | 606 if (cc == '-') |
670 return emitAndResumeIn(source, HTMLTokenizer::DataState); | 607 HTML_ADVANCE_TO(CommentEnd2State); |
671 else if (cc == '!') { | 608 else if (cc == '>') |
672 parseError(); | 609 HTML_ADVANCE_TO(DataState); |
673 HTML_ADVANCE_TO(CommentEndBangState); | 610 else |
674 } else if (cc == '-') { | |
675 parseError(); | |
676 m_token->appendToComment('-'); | |
677 HTML_ADVANCE_TO(CommentEndState); | |
678 } else if (cc == kEndOfFileMarker) { | |
679 parseError(); | |
680 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
681 } else { | |
682 parseError(); | |
683 m_token->appendToComment('-'); | |
684 m_token->appendToComment('-'); | |
685 m_token->appendToComment(cc); | |
686 HTML_ADVANCE_TO(CommentState); | 611 HTML_ADVANCE_TO(CommentState); |
687 } | |
688 } | 612 } |
689 END_STATE() | 613 END_STATE() |
690 | |
691 HTML_BEGIN_STATE(CommentEndBangState) { | |
692 if (cc == '-') { | |
693 m_token->appendToComment('-'); | |
694 m_token->appendToComment('-'); | |
695 m_token->appendToComment('!'); | |
696 HTML_ADVANCE_TO(CommentEndDashState); | |
697 } else if (cc == '>') | |
698 return emitAndResumeIn(source, HTMLTokenizer::DataState); | |
699 else if (cc == kEndOfFileMarker) { | |
700 parseError(); | |
701 return emitAndReconsumeIn(source, HTMLTokenizer::DataState); | |
702 } else { | |
703 m_token->appendToComment('-'); | |
704 m_token->appendToComment('-'); | |
705 m_token->appendToComment('!'); | |
706 m_token->appendToComment(cc); | |
707 HTML_ADVANCE_TO(CommentState); | |
708 } | |
709 } | |
710 END_STATE() | |
711 | |
712 } | 614 } |
713 | 615 |
714 ASSERT_NOT_REACHED(); | 616 ASSERT_NOT_REACHED(); |
715 return false; | 617 return false; |
716 } | 618 } |
717 | 619 |
718 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) | 620 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) |
719 { | 621 { |
720 return vectorEqualsString(m_temporaryBuffer, expectedString); | 622 return vectorEqualsString(m_temporaryBuffer, expectedString); |
721 } | 623 } |
(...skipping 18 matching lines...) Expand all Loading... |
740 | 642 |
741 return true; | 643 return true; |
742 } | 644 } |
743 | 645 |
744 inline void HTMLTokenizer::parseError() | 646 inline void HTMLTokenizer::parseError() |
745 { | 647 { |
746 notImplemented(); | 648 notImplemented(); |
747 } | 649 } |
748 | 650 |
749 } | 651 } |
OLD | NEW |