Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(134)

Side by Side Diff: sky/engine/core/html/parser/HTMLTokenizer.cpp

Issue 682893002: Parse comments according to parsing.md (Closed) Base URL: git@github.com:domokit/mojo.git@master
Patch Set: Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 /* 1 /*
2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google, Inc. All Rights Reserved. 4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
(...skipping 217 matching lines...) Expand 10 before | Expand all | Expand 10 after
228 return emitEndOfFile(source); 228 return emitEndOfFile(source);
229 else { 229 else {
230 bufferCharacter(cc); 230 bufferCharacter(cc);
231 HTML_ADVANCE_TO(RAWTEXTState); 231 HTML_ADVANCE_TO(RAWTEXTState);
232 } 232 }
233 } 233 }
234 END_STATE() 234 END_STATE()
235 235
236 HTML_BEGIN_STATE(TagOpenState) { 236 HTML_BEGIN_STATE(TagOpenState) {
237 if (cc == '!') 237 if (cc == '!')
238 HTML_ADVANCE_TO(MarkupDeclarationOpenState); 238 HTML_ADVANCE_TO(CommentStart1State);
239 else if (cc == '/') 239 else if (cc == '/')
240 HTML_ADVANCE_TO(EndTagOpenState); 240 HTML_ADVANCE_TO(CloseTagState);
241 else if (isASCIIUpper(cc)) { 241 else if (isASCIIUpper(cc)) {
242 m_token->beginStartTag(toLowerCase(cc)); 242 m_token->beginStartTag(toLowerCase(cc));
243 HTML_ADVANCE_TO(TagNameState); 243 HTML_ADVANCE_TO(TagNameState);
244 } else if (isASCIILower(cc)) { 244 } else if (isASCIILower(cc)) {
245 m_token->beginStartTag(cc); 245 m_token->beginStartTag(cc);
246 HTML_ADVANCE_TO(TagNameState); 246 HTML_ADVANCE_TO(TagNameState);
247 } else if (cc == '?') {
248 parseError();
249 // The spec consumes the current character before switching
250 // to the bogus comment state, but it's easier to implement
251 // if we reconsume the current character.
252 HTML_RECONSUME_IN(BogusCommentState);
253 } else { 247 } else {
254 parseError(); 248 parseError();
255 bufferCharacter('<'); 249 bufferCharacter('<');
256 HTML_RECONSUME_IN(DataState); 250 HTML_RECONSUME_IN(DataState);
257 } 251 }
258 } 252 }
259 END_STATE() 253 END_STATE()
260 254
261 HTML_BEGIN_STATE(EndTagOpenState) { 255 HTML_BEGIN_STATE(CloseTagState) {
262 if (isASCIIUpper(cc)) { 256 if (isASCIIUpper(cc)) {
263 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc))); 257 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));
264 m_appropriateEndTagName.clear(); 258 m_appropriateEndTagName.clear();
265 HTML_ADVANCE_TO(TagNameState); 259 HTML_ADVANCE_TO(TagNameState);
266 } else if (isASCIILower(cc)) { 260 } else if (isASCIILower(cc)) {
267 m_token->beginEndTag(static_cast<LChar>(cc)); 261 m_token->beginEndTag(static_cast<LChar>(cc));
268 m_appropriateEndTagName.clear(); 262 m_appropriateEndTagName.clear();
269 HTML_ADVANCE_TO(TagNameState); 263 HTML_ADVANCE_TO(TagNameState);
270 } else if (cc == '>') { 264 } else if (cc == '>') {
271 parseError(); 265 bufferCharacter('<');
266 bufferCharacter('/');
267 bufferCharacter('>');
272 HTML_ADVANCE_TO(DataState); 268 HTML_ADVANCE_TO(DataState);
273 } else if (cc == kEndOfFileMarker) { 269 } else {
274 parseError();
275 bufferCharacter('<'); 270 bufferCharacter('<');
276 bufferCharacter('/'); 271 bufferCharacter('/');
277 HTML_RECONSUME_IN(DataState); 272 HTML_RECONSUME_IN(DataState);
278 } else {
279 parseError();
280 HTML_RECONSUME_IN(BogusCommentState);
281 } 273 }
282 } 274 }
283 END_STATE() 275 END_STATE()
284 276
285 HTML_BEGIN_STATE(TagNameState) { 277 HTML_BEGIN_STATE(TagNameState) {
286 if (isTokenizerWhitespace(cc)) 278 if (isTokenizerWhitespace(cc))
287 HTML_ADVANCE_TO(BeforeAttributeNameState); 279 HTML_ADVANCE_TO(BeforeAttributeNameState);
288 else if (cc == '/') 280 else if (cc == '/')
289 HTML_ADVANCE_TO(SelfClosingStartTagState); 281 HTML_ADVANCE_TO(SelfClosingStartTagState);
290 else if (cc == '>') 282 else if (cc == '>')
(...skipping 273 matching lines...) Expand 10 before | Expand all | Expand 10 after
564 } else if (cc == kEndOfFileMarker) { 556 } else if (cc == kEndOfFileMarker) {
565 parseError(); 557 parseError();
566 HTML_RECONSUME_IN(DataState); 558 HTML_RECONSUME_IN(DataState);
567 } else { 559 } else {
568 parseError(); 560 parseError();
569 HTML_RECONSUME_IN(BeforeAttributeNameState); 561 HTML_RECONSUME_IN(BeforeAttributeNameState);
570 } 562 }
571 } 563 }
572 END_STATE() 564 END_STATE()
573 565
574 HTML_BEGIN_STATE(BogusCommentState) { 566 HTML_BEGIN_STATE(CommentStart1State) {
575 m_token->beginComment(); 567 if (cc == '-') {
576 HTML_RECONSUME_IN(ContinueBogusCommentState); 568 HTML_ADVANCE_TO(CommentStart2State);
577 } 569 } else {
578 END_STATE() 570 bufferCharacter('<');
579 571 bufferCharacter('!');
580 HTML_BEGIN_STATE(ContinueBogusCommentState) { 572 HTML_RECONSUME_IN(DataState);
581 if (cc == '>')
582 return emitAndResumeIn(source, HTMLTokenizer::DataState);
583 else if (cc == kEndOfFileMarker)
584 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
585 else {
586 m_token->appendToComment(cc);
587 HTML_ADVANCE_TO(ContinueBogusCommentState);
588 } 573 }
589 } 574 }
590 END_STATE() 575 END_STATE()
591 576
592 HTML_BEGIN_STATE(MarkupDeclarationOpenState) { 577 HTML_BEGIN_STATE(CommentStart2State) {
593 if (cc == '-') { 578 if (cc == '-') {
594 SegmentedString::LookAheadResult result = source.lookAhead(HTMLToken izerNames::dashDash); 579 HTML_ADVANCE_TO(CommentState);
595 if (result == SegmentedString::DidMatch) {
596 source.advanceAndASSERT('-');
597 source.advanceAndASSERT('-');
598 m_token->beginComment();
599 HTML_SWITCH_TO(CommentStartState);
600 } else if (result == SegmentedString::NotEnoughCharacters)
601 return haveBufferedCharacterToken();
602 }
603 parseError();
604 HTML_RECONSUME_IN(BogusCommentState);
605 }
606 END_STATE()
607
608 HTML_BEGIN_STATE(CommentStartState) {
609 if (cc == '-')
610 HTML_ADVANCE_TO(CommentStartDashState);
611 else if (cc == '>') {
612 parseError();
613 return emitAndResumeIn(source, HTMLTokenizer::DataState);
614 } else if (cc == kEndOfFileMarker) {
615 parseError();
616 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
617 } else { 580 } else {
618 m_token->appendToComment(cc); 581 bufferCharacter('<');
619 HTML_ADVANCE_TO(CommentState); 582 bufferCharacter('!');
620 } 583 bufferCharacter('-');
621 } 584 HTML_RECONSUME_IN(DataState);
622 END_STATE()
623
624 HTML_BEGIN_STATE(CommentStartDashState) {
625 if (cc == '-')
626 HTML_ADVANCE_TO(CommentEndState);
627 else if (cc == '>') {
628 parseError();
629 return emitAndResumeIn(source, HTMLTokenizer::DataState);
630 } else if (cc == kEndOfFileMarker) {
631 parseError();
632 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
633 } else {
634 m_token->appendToComment('-');
635 m_token->appendToComment(cc);
636 HTML_ADVANCE_TO(CommentState);
637 } 585 }
638 } 586 }
639 END_STATE() 587 END_STATE()
640 588
641 HTML_BEGIN_STATE(CommentState) { 589 HTML_BEGIN_STATE(CommentState) {
642 if (cc == '-') 590 if (cc == '-')
643 HTML_ADVANCE_TO(CommentEndDashState); 591 HTML_ADVANCE_TO(CommentEnd1State);
644 else if (cc == kEndOfFileMarker) { 592 else
645 parseError();
646 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
647 } else {
648 m_token->appendToComment(cc);
649 HTML_ADVANCE_TO(CommentState); 593 HTML_ADVANCE_TO(CommentState);
650 }
651 } 594 }
652 END_STATE() 595 END_STATE()
653 596
654 HTML_BEGIN_STATE(CommentEndDashState) { 597 HTML_BEGIN_STATE(CommentEnd1State) {
655 if (cc == '-') 598 if (cc == '-')
656 HTML_ADVANCE_TO(CommentEndState); 599 HTML_ADVANCE_TO(CommentEnd2State);
657 else if (cc == kEndOfFileMarker) { 600 else
658 parseError();
659 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
660 } else {
661 m_token->appendToComment('-');
662 m_token->appendToComment(cc);
663 HTML_ADVANCE_TO(CommentState); 601 HTML_ADVANCE_TO(CommentState);
664 }
665 } 602 }
666 END_STATE() 603 END_STATE()
667 604
668 HTML_BEGIN_STATE(CommentEndState) { 605 HTML_BEGIN_STATE(CommentEnd2State) {
669 if (cc == '>') 606 if (cc == '-')
670 return emitAndResumeIn(source, HTMLTokenizer::DataState); 607 HTML_ADVANCE_TO(CommentEnd2State);
671 else if (cc == '!') { 608 else if (cc == '>')
672 parseError(); 609 HTML_ADVANCE_TO(DataState);
673 HTML_ADVANCE_TO(CommentEndBangState); 610 else
674 } else if (cc == '-') {
675 parseError();
676 m_token->appendToComment('-');
677 HTML_ADVANCE_TO(CommentEndState);
678 } else if (cc == kEndOfFileMarker) {
679 parseError();
680 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
681 } else {
682 parseError();
683 m_token->appendToComment('-');
684 m_token->appendToComment('-');
685 m_token->appendToComment(cc);
686 HTML_ADVANCE_TO(CommentState); 611 HTML_ADVANCE_TO(CommentState);
687 }
688 } 612 }
689 END_STATE() 613 END_STATE()
690
691 HTML_BEGIN_STATE(CommentEndBangState) {
692 if (cc == '-') {
693 m_token->appendToComment('-');
694 m_token->appendToComment('-');
695 m_token->appendToComment('!');
696 HTML_ADVANCE_TO(CommentEndDashState);
697 } else if (cc == '>')
698 return emitAndResumeIn(source, HTMLTokenizer::DataState);
699 else if (cc == kEndOfFileMarker) {
700 parseError();
701 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);
702 } else {
703 m_token->appendToComment('-');
704 m_token->appendToComment('-');
705 m_token->appendToComment('!');
706 m_token->appendToComment(cc);
707 HTML_ADVANCE_TO(CommentState);
708 }
709 }
710 END_STATE()
711
712 } 614 }
713 615
714 ASSERT_NOT_REACHED(); 616 ASSERT_NOT_REACHED();
715 return false; 617 return false;
716 } 618 }
717 619
718 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString) 620 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)
719 { 621 {
720 return vectorEqualsString(m_temporaryBuffer, expectedString); 622 return vectorEqualsString(m_temporaryBuffer, expectedString);
721 } 623 }
(...skipping 18 matching lines...) Expand all
740 642
741 return true; 643 return true;
742 } 644 }
743 645
744 inline void HTMLTokenizer::parseError() 646 inline void HTMLTokenizer::parseError()
745 { 647 {
746 notImplemented(); 648 notImplemented();
747 } 649 }
748 650
749 } 651 }
OLDNEW
« no previous file with comments | « sky/engine/core/html/parser/HTMLTokenizer.h ('k') | sky/engine/core/html/parser/HTMLTreeBuilder.cpp » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698