Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(95)

Side by Side Diff: src/scanner-base.h

Issue 7558017: Simpler (and a bit faster) keyword matcher (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Review fixes Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | src/scanner-base.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 524 matching lines...) Expand 10 before | Expand all | Expand 10 after
535 535
536 // Whether there is a line terminator whitespace character after 536 // Whether there is a line terminator whitespace character after
537 // the current token, and before the next. Does not count newlines 537 // the current token, and before the next. Does not count newlines
538 // inside multiline comments. 538 // inside multiline comments.
539 bool has_line_terminator_before_next_; 539 bool has_line_terminator_before_next_;
540 // Whether there is a multi-line comment that contains a 540 // Whether there is a multi-line comment that contains a
541 // line-terminator after the current token, and before the next. 541 // line-terminator after the current token, and before the next.
542 bool has_multiline_comment_before_next_; 542 bool has_multiline_comment_before_next_;
543 }; 543 };
544 544
545
546 // ----------------------------------------------------------------------------
547 // Keyword matching state machine.
548
549 class KeywordMatcher {
550 // Incrementally recognize keywords.
551 //
552 // We distinguish between normal future reserved words and words that are
553 // considered to be future reserved words only in strict mode as required by
554 // ECMA-262 7.6.1.2.
555 //
556 // Recognized as keywords:
557 // break, case, catch, const*, continue, debugger, default, delete, do,
558 // else, finally, false, for, function, if, in, instanceof, new, null,
559 // return, switch, this, throw, true, try, typeof, var, void, while, with.
560 //
561 // Recognized as Future Reserved Keywords:
562 // class, enum, export, extends, import, super.
563 //
564 // Recognized as Future Reserved Keywords (strict mode only):
565 // implements, interface, let, package, private, protected, public,
566 // static, yield.
567 //
568 // *: Actually a "future reserved keyword". It's the only one we are
569 // recognizing outside of ES5 strict mode, the remaining are allowed
570 // as identifiers.
571 //
572 public:
573 KeywordMatcher()
574 : state_(INITIAL),
575 token_(Token::IDENTIFIER),
576 keyword_(NULL),
577 counter_(0),
578 keyword_token_(Token::ILLEGAL) {}
579
580 Token::Value token() { return token_; }
581
582 inline bool AddChar(unibrow::uchar input) {
583 if (state_ != UNMATCHABLE) {
584 Step(input);
585 }
586 return state_ != UNMATCHABLE;
587 }
588
589 void Fail() {
590 token_ = Token::IDENTIFIER;
591 state_ = UNMATCHABLE;
592 }
593
594 private:
595 enum State {
596 UNMATCHABLE,
597 INITIAL,
598 KEYWORD_PREFIX,
599 KEYWORD_MATCHED,
600 C,
601 CA,
602 CO,
603 CON,
604 D,
605 DE,
606 E,
607 EX,
608 F,
609 I,
610 IM,
611 IMP,
612 IN,
613 N,
614 P,
615 PR,
616 S,
617 T,
618 TH,
619 TR,
620 V,
621 W
622 };
623
624 struct FirstState {
625 const char* keyword;
626 State state;
627 Token::Value token;
628 };
629
630 // Range of possible first characters of a keyword.
631 static const unsigned int kFirstCharRangeMin = 'b';
632 static const unsigned int kFirstCharRangeMax = 'y';
633 static const unsigned int kFirstCharRangeLength =
634 kFirstCharRangeMax - kFirstCharRangeMin + 1;
635 // State map for first keyword character range.
636 static const FirstState first_states_[kFirstCharRangeLength];
637
638 // If input equals keyword's character at position, continue matching keyword
639 // from that position.
640 inline bool MatchKeywordStart(unibrow::uchar input,
641 const char* keyword,
642 int position,
643 Token::Value token_if_match) {
644 if (input != static_cast<unibrow::uchar>(keyword[position])) {
645 return false;
646 }
647 state_ = KEYWORD_PREFIX;
648 this->keyword_ = keyword;
649 this->counter_ = position + 1;
650 this->keyword_token_ = token_if_match;
651 return true;
652 }
653
654 // If input equals match character, transition to new state and return true.
655 inline bool MatchState(unibrow::uchar input, char match, State new_state) {
656 if (input != static_cast<unibrow::uchar>(match)) {
657 return false;
658 }
659 state_ = new_state;
660 return true;
661 }
662
663 inline bool MatchKeyword(unibrow::uchar input,
664 char match,
665 State new_state,
666 Token::Value keyword_token) {
667 if (input != static_cast<unibrow::uchar>(match)) {
668 return false;
669 }
670 state_ = new_state;
671 token_ = keyword_token;
672 return true;
673 }
674
675 void Step(unibrow::uchar input);
676
677 // Current state.
678 State state_;
679 // Token for currently added characters.
680 Token::Value token_;
681
682 // Matching a specific keyword string (there is only one possible valid
683 // keyword with the current prefix).
684 const char* keyword_;
685 int counter_;
686 Token::Value keyword_token_;
687 };
688
689
690 } } // namespace v8::internal 545 } } // namespace v8::internal
691 546
692 #endif // V8_SCANNER_BASE_H_ 547 #endif // V8_SCANNER_BASE_H_
OLDNEW
« no previous file with comments | « no previous file | src/scanner-base.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698