sky/engine/core/html/parser/HTMLTokenizer.cpp - Issue 682893002: Parse comments according to parsing.md

Side by Side Diff: sky/engine/core/html/parser/HTMLTokenizer.cpp

Issue 682893002: Parse comments according to parsing.md (Closed) Base URL: git@github.com:domokit/mojo.git@master

Patch Set: Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 /*	1 /*

2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.	2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.

3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/	3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/

4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.	4 * Copyright (C) 2010 Google, Inc. All Rights Reserved.

5 *	5 *

6 * Redistribution and use in source and binary forms, with or without	6 * Redistribution and use in source and binary forms, with or without

7 * modification, are permitted provided that the following conditions	7 * modification, are permitted provided that the following conditions

8 * are met:	8 * are met:

9 * 1. Redistributions of source code must retain the above copyright	9 * 1. Redistributions of source code must retain the above copyright

10 * notice, this list of conditions and the following disclaimer.	10 * notice, this list of conditions and the following disclaimer.

(...skipping 217 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
228 return emitEndOfFile(source);	228 return emitEndOfFile(source);

229 else {	229 else {

230 bufferCharacter(cc);	230 bufferCharacter(cc);

231 HTML_ADVANCE_TO(RAWTEXTState);	231 HTML_ADVANCE_TO(RAWTEXTState);

232 }	232 }

233 }	233 }

234 END_STATE()	234 END_STATE()

235	235

236 HTML_BEGIN_STATE(TagOpenState) {	236 HTML_BEGIN_STATE(TagOpenState) {

237 if (cc == '!')	237 if (cc == '!')

238 HTML_ADVANCE_TO(MarkupDeclarationOpenState);	238 HTML_ADVANCE_TO(CommentStart1State);

239 else if (cc == '/')	239 else if (cc == '/')

240 HTML_ADVANCE_TO(EndTagOpenState);	240 HTML_ADVANCE_TO(CloseTagState);

241 else if (isASCIIUpper(cc)) {	241 else if (isASCIIUpper(cc)) {

242 m_token->beginStartTag(toLowerCase(cc));	242 m_token->beginStartTag(toLowerCase(cc));

243 HTML_ADVANCE_TO(TagNameState);	243 HTML_ADVANCE_TO(TagNameState);

244 } else if (isASCIILower(cc)) {	244 } else if (isASCIILower(cc)) {

245 m_token->beginStartTag(cc);	245 m_token->beginStartTag(cc);

246 HTML_ADVANCE_TO(TagNameState);	246 HTML_ADVANCE_TO(TagNameState);

247 } else if (cc == '?') {

248 parseError();

249 // The spec consumes the current character before switching

250 // to the bogus comment state, but it's easier to implement

251 // if we reconsume the current character.

252 HTML_RECONSUME_IN(BogusCommentState);

253 } else {	247 } else {

254 parseError();	248 parseError();

255 bufferCharacter('<');	249 bufferCharacter('<');

256 HTML_RECONSUME_IN(DataState);	250 HTML_RECONSUME_IN(DataState);

257 }	251 }

258 }	252 }

259 END_STATE()	253 END_STATE()

260	254

261 HTML_BEGIN_STATE(EndTagOpenState) {	255 HTML_BEGIN_STATE(CloseTagState) {

262 if (isASCIIUpper(cc)) {	256 if (isASCIIUpper(cc)) {

263 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));	257 m_token->beginEndTag(static_cast<LChar>(toLowerCase(cc)));

264 m_appropriateEndTagName.clear();	258 m_appropriateEndTagName.clear();

265 HTML_ADVANCE_TO(TagNameState);	259 HTML_ADVANCE_TO(TagNameState);

266 } else if (isASCIILower(cc)) {	260 } else if (isASCIILower(cc)) {

267 m_token->beginEndTag(static_cast<LChar>(cc));	261 m_token->beginEndTag(static_cast<LChar>(cc));

268 m_appropriateEndTagName.clear();	262 m_appropriateEndTagName.clear();

269 HTML_ADVANCE_TO(TagNameState);	263 HTML_ADVANCE_TO(TagNameState);

270 } else if (cc == '>') {	264 } else if (cc == '>') {

271 parseError();	265 bufferCharacter('<');

	266 bufferCharacter('/');

	267 bufferCharacter('>');

272 HTML_ADVANCE_TO(DataState);	268 HTML_ADVANCE_TO(DataState);

273 } else if (cc == kEndOfFileMarker) {	269 } else {

274 parseError();

275 bufferCharacter('<');	270 bufferCharacter('<');

276 bufferCharacter('/');	271 bufferCharacter('/');

277 HTML_RECONSUME_IN(DataState);	272 HTML_RECONSUME_IN(DataState);

278 } else {

279 parseError();

280 HTML_RECONSUME_IN(BogusCommentState);

281 }	273 }

282 }	274 }

283 END_STATE()	275 END_STATE()

284	276

285 HTML_BEGIN_STATE(TagNameState) {	277 HTML_BEGIN_STATE(TagNameState) {

286 if (isTokenizerWhitespace(cc))	278 if (isTokenizerWhitespace(cc))

287 HTML_ADVANCE_TO(BeforeAttributeNameState);	279 HTML_ADVANCE_TO(BeforeAttributeNameState);

288 else if (cc == '/')	280 else if (cc == '/')

289 HTML_ADVANCE_TO(SelfClosingStartTagState);	281 HTML_ADVANCE_TO(SelfClosingStartTagState);

290 else if (cc == '>')	282 else if (cc == '>')

(...skipping 273 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
564 } else if (cc == kEndOfFileMarker) {	556 } else if (cc == kEndOfFileMarker) {

565 parseError();	557 parseError();

566 HTML_RECONSUME_IN(DataState);	558 HTML_RECONSUME_IN(DataState);

567 } else {	559 } else {

568 parseError();	560 parseError();

569 HTML_RECONSUME_IN(BeforeAttributeNameState);	561 HTML_RECONSUME_IN(BeforeAttributeNameState);

570 }	562 }

571 }	563 }

572 END_STATE()	564 END_STATE()

573	565

574 HTML_BEGIN_STATE(BogusCommentState) {	566 HTML_BEGIN_STATE(CommentStart1State) {

575 m_token->beginComment();	567 if (cc == '-') {

576 HTML_RECONSUME_IN(ContinueBogusCommentState);	568 HTML_ADVANCE_TO(CommentStart2State);

577 }	569 } else {

578 END_STATE()	570 bufferCharacter('<');

579	571 bufferCharacter('!');

580 HTML_BEGIN_STATE(ContinueBogusCommentState) {	572 HTML_RECONSUME_IN(DataState);

581 if (cc == '>')

582 return emitAndResumeIn(source, HTMLTokenizer::DataState);

583 else if (cc == kEndOfFileMarker)

584 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

585 else {

586 m_token->appendToComment(cc);

587 HTML_ADVANCE_TO(ContinueBogusCommentState);

588 }	573 }

589 }	574 }

590 END_STATE()	575 END_STATE()

591	576

592 HTML_BEGIN_STATE(MarkupDeclarationOpenState) {	577 HTML_BEGIN_STATE(CommentStart2State) {

593 if (cc == '-') {	578 if (cc == '-') {

594 SegmentedString::LookAheadResult result = source.lookAhead(HTMLToken izerNames::dashDash);	579 HTML_ADVANCE_TO(CommentState);

595 if (result == SegmentedString::DidMatch) {

596 source.advanceAndASSERT('-');

597 source.advanceAndASSERT('-');

598 m_token->beginComment();

599 HTML_SWITCH_TO(CommentStartState);

600 } else if (result == SegmentedString::NotEnoughCharacters)

601 return haveBufferedCharacterToken();

602 }

603 parseError();

604 HTML_RECONSUME_IN(BogusCommentState);

605 }

606 END_STATE()

607

608 HTML_BEGIN_STATE(CommentStartState) {

609 if (cc == '-')

610 HTML_ADVANCE_TO(CommentStartDashState);

611 else if (cc == '>') {

612 parseError();

613 return emitAndResumeIn(source, HTMLTokenizer::DataState);

614 } else if (cc == kEndOfFileMarker) {

615 parseError();

616 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

617 } else {	580 } else {

618 m_token->appendToComment(cc);	581 bufferCharacter('<');

619 HTML_ADVANCE_TO(CommentState);	582 bufferCharacter('!');

620 }	583 bufferCharacter('-');

621 }	584 HTML_RECONSUME_IN(DataState);

622 END_STATE()

623

624 HTML_BEGIN_STATE(CommentStartDashState) {

625 if (cc == '-')

626 HTML_ADVANCE_TO(CommentEndState);

627 else if (cc == '>') {

628 parseError();

629 return emitAndResumeIn(source, HTMLTokenizer::DataState);

630 } else if (cc == kEndOfFileMarker) {

631 parseError();

632 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

633 } else {

634 m_token->appendToComment('-');

635 m_token->appendToComment(cc);

636 HTML_ADVANCE_TO(CommentState);

637 }	585 }

638 }	586 }

639 END_STATE()	587 END_STATE()

640	588

641 HTML_BEGIN_STATE(CommentState) {	589 HTML_BEGIN_STATE(CommentState) {

642 if (cc == '-')	590 if (cc == '-')

643 HTML_ADVANCE_TO(CommentEndDashState);	591 HTML_ADVANCE_TO(CommentEnd1State);

644 else if (cc == kEndOfFileMarker) {	592 else

645 parseError();

646 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

647 } else {

648 m_token->appendToComment(cc);

649 HTML_ADVANCE_TO(CommentState);	593 HTML_ADVANCE_TO(CommentState);

650 }

651 }	594 }

652 END_STATE()	595 END_STATE()

653	596

654 HTML_BEGIN_STATE(CommentEndDashState) {	597 HTML_BEGIN_STATE(CommentEnd1State) {

655 if (cc == '-')	598 if (cc == '-')

656 HTML_ADVANCE_TO(CommentEndState);	599 HTML_ADVANCE_TO(CommentEnd2State);

657 else if (cc == kEndOfFileMarker) {	600 else

658 parseError();

659 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

660 } else {

661 m_token->appendToComment('-');

662 m_token->appendToComment(cc);

663 HTML_ADVANCE_TO(CommentState);	601 HTML_ADVANCE_TO(CommentState);

664 }

665 }	602 }

666 END_STATE()	603 END_STATE()

667	604

668 HTML_BEGIN_STATE(CommentEndState) {	605 HTML_BEGIN_STATE(CommentEnd2State) {

669 if (cc == '>')	606 if (cc == '-')

670 return emitAndResumeIn(source, HTMLTokenizer::DataState);	607 HTML_ADVANCE_TO(CommentEnd2State);

671 else if (cc == '!') {	608 else if (cc == '>')

672 parseError();	609 HTML_ADVANCE_TO(DataState);

673 HTML_ADVANCE_TO(CommentEndBangState);	610 else

674 } else if (cc == '-') {

675 parseError();

676 m_token->appendToComment('-');

677 HTML_ADVANCE_TO(CommentEndState);

678 } else if (cc == kEndOfFileMarker) {

679 parseError();

680 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

681 } else {

682 parseError();

683 m_token->appendToComment('-');

684 m_token->appendToComment('-');

685 m_token->appendToComment(cc);

686 HTML_ADVANCE_TO(CommentState);	611 HTML_ADVANCE_TO(CommentState);

687 }

688 }	612 }

689 END_STATE()	613 END_STATE()

690

691 HTML_BEGIN_STATE(CommentEndBangState) {

692 if (cc == '-') {

693 m_token->appendToComment('-');

694 m_token->appendToComment('-');

695 m_token->appendToComment('!');

696 HTML_ADVANCE_TO(CommentEndDashState);

697 } else if (cc == '>')

698 return emitAndResumeIn(source, HTMLTokenizer::DataState);

699 else if (cc == kEndOfFileMarker) {

700 parseError();

701 return emitAndReconsumeIn(source, HTMLTokenizer::DataState);

702 } else {

703 m_token->appendToComment('-');

704 m_token->appendToComment('-');

705 m_token->appendToComment('!');

706 m_token->appendToComment(cc);

707 HTML_ADVANCE_TO(CommentState);

708 }

709 }

710 END_STATE()

711

712 }	614 }

713	615

714 ASSERT_NOT_REACHED();	616 ASSERT_NOT_REACHED();

715 return false;	617 return false;

716 }	618 }

717	619

718 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)	620 inline bool HTMLTokenizer::temporaryBufferIs(const String& expectedString)

719 {	621 {

720 return vectorEqualsString(m_temporaryBuffer, expectedString);	622 return vectorEqualsString(m_temporaryBuffer, expectedString);

721 }	623 }

(...skipping 18 matching lines...) Expand all Loading...
740	642

741 return true;	643 return true;

742 }	644 }

743	645

744 inline void HTMLTokenizer::parseError()	646 inline void HTMLTokenizer::parseError()

745 {	647 {

746 notImplemented();	648 notImplemented();

747 }	649 }

748	650

749 }	651 }

OLD	NEW

« no previous file with comments | « sky/engine/core/html/parser/HTMLTokenizer.h ('k') | sky/engine/core/html/parser/HTMLTreeBuilder.cpp » ('j') | no next file with comments »