Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(354)

Side by Side Diff: src/scanner-base.cc

Issue 7558017: Simpler (and a bit faster) keyword matcher (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: Review fixes Created 9 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner-base.h ('k') | src/token.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 644 matching lines...) Expand 10 before | Expand all | Expand 10 after
655 if (c0_ != 'u') return unibrow::Utf8::kBadChar; 655 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
656 Advance(); 656 Advance();
657 uc32 c = ScanHexEscape('u', 4); 657 uc32 c = ScanHexEscape('u', 4);
658 // We do not allow a unicode escape sequence to start another 658 // We do not allow a unicode escape sequence to start another
659 // unicode escape sequence. 659 // unicode escape sequence.
660 if (c == '\\') return unibrow::Utf8::kBadChar; 660 if (c == '\\') return unibrow::Utf8::kBadChar;
661 return c; 661 return c;
662 } 662 }
663 663
664 664
665 // ----------------------------------------------------------------------------
666 // Keyword Matcher
667
668 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
669 KEYWORD_GROUP('b') \
670 KEYWORD("break", BREAK) \
671 KEYWORD_GROUP('c') \
672 KEYWORD("case", CASE) \
673 KEYWORD("catch", CATCH) \
674 KEYWORD("class", FUTURE_RESERVED_WORD) \
675 KEYWORD("const", CONST) \
676 KEYWORD("continue", CONTINUE) \
677 KEYWORD_GROUP('d') \
678 KEYWORD("debugger", DEBUGGER) \
679 KEYWORD("default", DEFAULT) \
680 KEYWORD("delete", DELETE) \
681 KEYWORD("do", DO) \
682 KEYWORD_GROUP('e') \
683 KEYWORD("else", ELSE) \
684 KEYWORD("enum", FUTURE_RESERVED_WORD) \
685 KEYWORD("export", FUTURE_RESERVED_WORD) \
686 KEYWORD("extends", FUTURE_RESERVED_WORD) \
687 KEYWORD_GROUP('f') \
688 KEYWORD("false", FALSE_LITERAL) \
689 KEYWORD("finally", FINALLY) \
690 KEYWORD("for", FOR) \
691 KEYWORD("function", FUNCTION) \
692 KEYWORD_GROUP('i') \
693 KEYWORD("if", IF) \
694 KEYWORD("implements", FUTURE_STRICT_RESERVED_WORD) \
695 KEYWORD("import", FUTURE_RESERVED_WORD) \
696 KEYWORD("in", IN) \
697 KEYWORD("instanceof", INSTANCEOF) \
698 KEYWORD("interface", FUTURE_STRICT_RESERVED_WORD) \
699 KEYWORD_GROUP('l') \
700 KEYWORD("let", FUTURE_STRICT_RESERVED_WORD) \
701 KEYWORD_GROUP('n') \
702 KEYWORD("new", NEW) \
703 KEYWORD("null", NULL_LITERAL) \
704 KEYWORD_GROUP('p') \
705 KEYWORD("package", FUTURE_STRICT_RESERVED_WORD) \
706 KEYWORD("private", FUTURE_STRICT_RESERVED_WORD) \
707 KEYWORD("protected", FUTURE_STRICT_RESERVED_WORD) \
708 KEYWORD("public", FUTURE_STRICT_RESERVED_WORD) \
709 KEYWORD_GROUP('r') \
710 KEYWORD("return", RETURN) \
711 KEYWORD_GROUP('s') \
712 KEYWORD("static", FUTURE_STRICT_RESERVED_WORD) \
713 KEYWORD("super", FUTURE_RESERVED_WORD) \
714 KEYWORD("switch", SWITCH) \
715 KEYWORD_GROUP('t') \
716 KEYWORD("this", THIS) \
717 KEYWORD("throw", THROW) \
718 KEYWORD("true", TRUE_LITERAL) \
719 KEYWORD("try", TRY) \
720 KEYWORD("typeof", TYPEOF) \
721 KEYWORD_GROUP('v') \
722 KEYWORD("var", VAR) \
723 KEYWORD("void", VOID) \
724 KEYWORD_GROUP('w') \
725 KEYWORD("while", WHILE) \
726 KEYWORD("with", WITH) \
727 KEYWORD_GROUP('y') \
728 KEYWORD("yield", FUTURE_STRICT_RESERVED_WORD)
729
730
731 static Token::Value KeywordOrIdentifierToken(const char* input,
732 int input_length) {
733 ASSERT(input_length >= 1);
734 const int kMinLength = 2;
735 const int kMaxLength = 10;
736 if (input_length < kMinLength || input_length > kMaxLength) {
737 return Token::IDENTIFIER;
738 }
739 switch (input[0]) {
740 default:
741 #define KEYWORD_GROUP_CASE(ch) \
742 break; \
743 case ch:
744 #define KEYWORD(keyword, token) \
745 { \
746 /* 'keyword' is a char array, so sizeof(keyword) is */ \
747 /* strlen(keyword) plus 1 for the NUL char. */ \
748 const int keyword_length = sizeof(keyword) - 1; \
749 STATIC_ASSERT(keyword_length >= kMinLength); \
750 STATIC_ASSERT(keyword_length <= kMaxLength); \
751 if (input_length == keyword_length && \
752 input[1] == keyword[1] && \
753 (keyword_length <= 2 || input[2] == keyword[2]) && \
754 (keyword_length <= 3 || input[3] == keyword[3]) && \
755 (keyword_length <= 4 || input[4] == keyword[4]) && \
756 (keyword_length <= 5 || input[5] == keyword[5]) && \
757 (keyword_length <= 6 || input[6] == keyword[6]) && \
758 (keyword_length <= 7 || input[7] == keyword[7]) && \
759 (keyword_length <= 8 || input[8] == keyword[8]) && \
760 (keyword_length <= 9 || input[9] == keyword[9])) { \
761 return Token::token; \
762 } \
763 }
764 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
765 }
766 return Token::IDENTIFIER;
767 }
768
769
665 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { 770 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
666 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); 771 ASSERT(unicode_cache_->IsIdentifierStart(c0_));
667 LiteralScope literal(this); 772 LiteralScope literal(this);
668 KeywordMatcher keyword_match;
669 // Scan identifier start character. 773 // Scan identifier start character.
670 if (c0_ == '\\') { 774 if (c0_ == '\\') {
671 uc32 c = ScanIdentifierUnicodeEscape(); 775 uc32 c = ScanIdentifierUnicodeEscape();
672 // Only allow legal identifier start characters. 776 // Only allow legal identifier start characters.
673 if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL; 777 if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL;
674 AddLiteralChar(c); 778 AddLiteralChar(c);
675 return ScanIdentifierSuffix(&literal); 779 return ScanIdentifierSuffix(&literal);
676 } 780 }
677 781
678 uc32 first_char = c0_; 782 uc32 first_char = c0_;
679 Advance(); 783 Advance();
680 AddLiteralChar(first_char); 784 AddLiteralChar(first_char);
681 if (!keyword_match.AddChar(first_char)) {
682 return ScanIdentifierSuffix(&literal);
683 }
684 785
685 // Scan the rest of the identifier characters. 786 // Scan the rest of the identifier characters.
686 while (unicode_cache_->IsIdentifierPart(c0_)) { 787 while (unicode_cache_->IsIdentifierPart(c0_)) {
687 if (c0_ != '\\') { 788 if (c0_ != '\\') {
688 uc32 next_char = c0_; 789 uc32 next_char = c0_;
689 Advance(); 790 Advance();
690 AddLiteralChar(next_char); 791 AddLiteralChar(next_char);
691 if (keyword_match.AddChar(next_char)) continue; 792 continue;
692 } 793 }
693 // Fallthrough if no loner able to complete keyword. 794 // Fallthrough if no longer able to complete keyword.
694 return ScanIdentifierSuffix(&literal); 795 return ScanIdentifierSuffix(&literal);
695 } 796 }
797
696 literal.Complete(); 798 literal.Complete();
697 799
698 return keyword_match.token(); 800 if (next_.literal_chars->is_ascii()) {
801 Vector<const char> chars = next_.literal_chars->ascii_literal();
802 return KeywordOrIdentifierToken(chars.start(), chars.length());
803 }
804
805 return Token::IDENTIFIER;
699 } 806 }
700 807
701 808
702 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { 809 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {
703 // Scan the rest of the identifier characters. 810 // Scan the rest of the identifier characters.
704 while (unicode_cache_->IsIdentifierPart(c0_)) { 811 while (unicode_cache_->IsIdentifierPart(c0_)) {
705 if (c0_ == '\\') { 812 if (c0_ == '\\') {
706 uc32 c = ScanIdentifierUnicodeEscape(); 813 uc32 c = ScanIdentifierUnicodeEscape();
707 // Only allow legal identifier part characters. 814 // Only allow legal identifier part characters.
708 if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL; 815 if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL;
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after
778 } 885 }
779 } 886 }
780 AddLiteralCharAdvance(); 887 AddLiteralCharAdvance();
781 } 888 }
782 literal.Complete(); 889 literal.Complete();
783 890
784 next_.location.end_pos = source_pos() - 1; 891 next_.location.end_pos = source_pos() - 1;
785 return true; 892 return true;
786 } 893 }
787 894
788 // ----------------------------------------------------------------------------
789 // Keyword Matcher
790
791 const KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
792 { "break", KEYWORD_PREFIX, Token::BREAK },
793 { NULL, C, Token::ILLEGAL },
794 { NULL, D, Token::ILLEGAL },
795 { NULL, E, Token::ILLEGAL },
796 { NULL, F, Token::ILLEGAL },
797 { NULL, UNMATCHABLE, Token::ILLEGAL },
798 { NULL, UNMATCHABLE, Token::ILLEGAL },
799 { NULL, I, Token::ILLEGAL },
800 { NULL, UNMATCHABLE, Token::ILLEGAL },
801 { NULL, UNMATCHABLE, Token::ILLEGAL },
802 { "let", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD },
803 { NULL, UNMATCHABLE, Token::ILLEGAL },
804 { NULL, N, Token::ILLEGAL },
805 { NULL, UNMATCHABLE, Token::ILLEGAL },
806 { NULL, P, Token::ILLEGAL },
807 { NULL, UNMATCHABLE, Token::ILLEGAL },
808 { "return", KEYWORD_PREFIX, Token::RETURN },
809 { NULL, S, Token::ILLEGAL },
810 { NULL, T, Token::ILLEGAL },
811 { NULL, UNMATCHABLE, Token::ILLEGAL },
812 { NULL, V, Token::ILLEGAL },
813 { NULL, W, Token::ILLEGAL },
814 { NULL, UNMATCHABLE, Token::ILLEGAL },
815 { "yield", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD }
816 };
817
818
819 void KeywordMatcher::Step(unibrow::uchar input) {
820 switch (state_) {
821 case INITIAL: {
822 // matching the first character is the only state with significant fanout.
823 // Match only lower-case letters in range 'b'..'y'.
824 unsigned int offset = input - kFirstCharRangeMin;
825 if (offset < kFirstCharRangeLength) {
826 state_ = first_states_[offset].state;
827 if (state_ == KEYWORD_PREFIX) {
828 keyword_ = first_states_[offset].keyword;
829 counter_ = 1;
830 keyword_token_ = first_states_[offset].token;
831 }
832 return;
833 }
834 break;
835 }
836 case KEYWORD_PREFIX:
837 if (static_cast<unibrow::uchar>(keyword_[counter_]) == input) {
838 counter_++;
839 if (keyword_[counter_] == '\0') {
840 state_ = KEYWORD_MATCHED;
841 token_ = keyword_token_;
842 }
843 return;
844 }
845 break;
846 case KEYWORD_MATCHED:
847 token_ = Token::IDENTIFIER;
848 break;
849 case C:
850 if (MatchState(input, 'a', CA)) return;
851 if (MatchKeywordStart(input, "class", 1,
852 Token::FUTURE_RESERVED_WORD)) return;
853 if (MatchState(input, 'o', CO)) return;
854 break;
855 case CA:
856 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
857 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
858 break;
859 case CO:
860 if (MatchState(input, 'n', CON)) return;
861 break;
862 case CON:
863 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
864 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
865 break;
866 case D:
867 if (MatchState(input, 'e', DE)) return;
868 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
869 break;
870 case DE:
871 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
872 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
873 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
874 break;
875 case E:
876 if (MatchKeywordStart(input, "else", 1, Token::ELSE)) return;
877 if (MatchKeywordStart(input, "enum", 1,
878 Token::FUTURE_RESERVED_WORD)) return;
879 if (MatchState(input, 'x', EX)) return;
880 break;
881 case EX:
882 if (MatchKeywordStart(input, "export", 2,
883 Token::FUTURE_RESERVED_WORD)) return;
884 if (MatchKeywordStart(input, "extends", 2,
885 Token::FUTURE_RESERVED_WORD)) return;
886 break;
887 case F:
888 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
889 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
890 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
891 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
892 break;
893 case I:
894 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
895 if (MatchState(input, 'm', IM)) return;
896 if (MatchKeyword(input, 'n', IN, Token::IN)) return;
897 break;
898 case IM:
899 if (MatchState(input, 'p', IMP)) return;
900 break;
901 case IMP:
902 if (MatchKeywordStart(input, "implements", 3,
903 Token::FUTURE_STRICT_RESERVED_WORD )) return;
904 if (MatchKeywordStart(input, "import", 3,
905 Token::FUTURE_RESERVED_WORD)) return;
906 break;
907 case IN:
908 token_ = Token::IDENTIFIER;
909 if (MatchKeywordStart(input, "interface", 2,
910 Token::FUTURE_STRICT_RESERVED_WORD)) return;
911 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return;
912 break;
913 case N:
914 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
915 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
916 break;
917 case P:
918 if (MatchKeywordStart(input, "package", 1,
919 Token::FUTURE_STRICT_RESERVED_WORD)) return;
920 if (MatchState(input, 'r', PR)) return;
921 if (MatchKeywordStart(input, "public", 1,
922 Token::FUTURE_STRICT_RESERVED_WORD)) return;
923 break;
924 case PR:
925 if (MatchKeywordStart(input, "private", 2,
926 Token::FUTURE_STRICT_RESERVED_WORD)) return;
927 if (MatchKeywordStart(input, "protected", 2,
928 Token::FUTURE_STRICT_RESERVED_WORD)) return;
929 break;
930 case S:
931 if (MatchKeywordStart(input, "static", 1,
932 Token::FUTURE_STRICT_RESERVED_WORD)) return;
933 if (MatchKeywordStart(input, "super", 1,
934 Token::FUTURE_RESERVED_WORD)) return;
935 if (MatchKeywordStart(input, "switch", 1,
936 Token::SWITCH)) return;
937 break;
938 case T:
939 if (MatchState(input, 'h', TH)) return;
940 if (MatchState(input, 'r', TR)) return;
941 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
942 break;
943 case TH:
944 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
945 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
946 break;
947 case TR:
948 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;
949 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;
950 break;
951 case V:
952 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;
953 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;
954 break;
955 case W:
956 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;
957 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
958 break;
959 case UNMATCHABLE:
960 break;
961 }
962 // On fallthrough, it's a failure.
963 state_ = UNMATCHABLE;
964 }
965
966 } } // namespace v8::internal 895 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner-base.h ('k') | src/token.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698