src/scanner-base.h - Issue 7558017: Simpler (and a bit faster) keyword matcher

Side by Side Diff: src/scanner-base.h

Issue 7558017: Simpler (and a bit faster) keyword matcher (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Review fixes Created 9 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 524 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
535	535

536 // Whether there is a line terminator whitespace character after	536 // Whether there is a line terminator whitespace character after

537 // the current token, and before the next. Does not count newlines	537 // the current token, and before the next. Does not count newlines

538 // inside multiline comments.	538 // inside multiline comments.

539 bool has_line_terminator_before_next_;	539 bool has_line_terminator_before_next_;

540 // Whether there is a multi-line comment that contains a	540 // Whether there is a multi-line comment that contains a

541 // line-terminator after the current token, and before the next.	541 // line-terminator after the current token, and before the next.

542 bool has_multiline_comment_before_next_;	542 bool has_multiline_comment_before_next_;

543 };	543 };

544	544

545

546 // ----------------------------------------------------------------------------

547 // Keyword matching state machine.

548

549 class KeywordMatcher {

550 // Incrementally recognize keywords.

551 //

552 // We distinguish between normal future reserved words and words that are

553 // considered to be future reserved words only in strict mode as required by

554 // ECMA-262 7.6.1.2.

555 //

556 // Recognized as keywords:

557 // break, case, catch, const*, continue, debugger, default, delete, do,

558 // else, finally, false, for, function, if, in, instanceof, new, null,

559 // return, switch, this, throw, true, try, typeof, var, void, while, with.

560 //

561 // Recognized as Future Reserved Keywords:

562 // class, enum, export, extends, import, super.

563 //

564 // Recognized as Future Reserved Keywords (strict mode only):

565 // implements, interface, let, package, private, protected, public,

566 // static, yield.

567 //

568 // *: Actually a "future reserved keyword". It's the only one we are

569 // recognizing outside of ES5 strict mode, the remaining are allowed

570 // as identifiers.

571 //

572 public:

573 KeywordMatcher()

574 : state_(INITIAL),

575 token_(Token::IDENTIFIER),

576 keyword_(NULL),

577 counter_(0),

578 keyword_token_(Token::ILLEGAL) {}

579

580 Token::Value token() { return token_; }

581

582 inline bool AddChar(unibrow::uchar input) {

583 if (state_ != UNMATCHABLE) {

584 Step(input);

585 }

586 return state_ != UNMATCHABLE;

587 }

588

589 void Fail() {

590 token_ = Token::IDENTIFIER;

591 state_ = UNMATCHABLE;

592 }

593

594 private:

595 enum State {

596 UNMATCHABLE,

597 INITIAL,

598 KEYWORD_PREFIX,

599 KEYWORD_MATCHED,

600 C,

601 CA,

602 CO,

603 CON,

604 D,

605 DE,

606 E,

607 EX,

608 F,

609 I,

610 IM,

611 IMP,

612 IN,

613 N,

614 P,

615 PR,

616 S,

617 T,

618 TH,

619 TR,

620 V,

621 W

622 };

623

624 struct FirstState {

625 const char* keyword;

626 State state;

627 Token::Value token;

628 };

629

630 // Range of possible first characters of a keyword.

631 static const unsigned int kFirstCharRangeMin = 'b';

632 static const unsigned int kFirstCharRangeMax = 'y';

633 static const unsigned int kFirstCharRangeLength =

634 kFirstCharRangeMax - kFirstCharRangeMin + 1;

635 // State map for first keyword character range.

636 static const FirstState first_states_[kFirstCharRangeLength];

637

638 // If input equals keyword's character at position, continue matching keyword

639 // from that position.

640 inline bool MatchKeywordStart(unibrow::uchar input,

641 const char* keyword,

642 int position,

643 Token::Value token_if_match) {

644 if (input != static_cast<unibrow::uchar>(keyword[position])) {

645 return false;

646 }

647 state_ = KEYWORD_PREFIX;

648 this->keyword_ = keyword;

649 this->counter_ = position + 1;

650 this->keyword_token_ = token_if_match;

651 return true;

652 }

653

654 // If input equals match character, transition to new state and return true.

655 inline bool MatchState(unibrow::uchar input, char match, State new_state) {

656 if (input != static_cast<unibrow::uchar>(match)) {

657 return false;

658 }

659 state_ = new_state;

660 return true;

661 }

662

663 inline bool MatchKeyword(unibrow::uchar input,

664 char match,

665 State new_state,

666 Token::Value keyword_token) {

667 if (input != static_cast<unibrow::uchar>(match)) {

668 return false;

669 }

670 state_ = new_state;

671 token_ = keyword_token;

672 return true;

673 }

674

675 void Step(unibrow::uchar input);

676

677 // Current state.

678 State state_;

679 // Token for currently added characters.

680 Token::Value token_;

681

682 // Matching a specific keyword string (there is only one possible valid

683 // keyword with the current prefix).

684 const char* keyword_;

685 int counter_;

686 Token::Value keyword_token_;

687 };

688

689

690 } } // namespace v8::internal	545 } } // namespace v8::internal

691	546

692 #endif // V8_SCANNER_BASE_H_	547 #endif // V8_SCANNER_BASE_H_

OLD	NEW

« no previous file with comments | « no previous file | src/scanner-base.cc » ('j') | no next file with comments »