OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 644 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
655 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 655 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
656 Advance(); | 656 Advance(); |
657 uc32 c = ScanHexEscape('u', 4); | 657 uc32 c = ScanHexEscape('u', 4); |
658 // We do not allow a unicode escape sequence to start another | 658 // We do not allow a unicode escape sequence to start another |
659 // unicode escape sequence. | 659 // unicode escape sequence. |
660 if (c == '\\') return unibrow::Utf8::kBadChar; | 660 if (c == '\\') return unibrow::Utf8::kBadChar; |
661 return c; | 661 return c; |
662 } | 662 } |
663 | 663 |
664 | 664 |
| 665 // ---------------------------------------------------------------------------- |
| 666 // Keyword Matcher |
| 667 |
| 668 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
| 669 KEYWORD_GROUP('b') \ |
| 670 KEYWORD("break", BREAK) \ |
| 671 KEYWORD_GROUP('c') \ |
| 672 KEYWORD("case", CASE) \ |
| 673 KEYWORD("catch", CATCH) \ |
| 674 KEYWORD("class", FUTURE_RESERVED_WORD) \ |
| 675 KEYWORD("const", CONST) \ |
| 676 KEYWORD("continue", CONTINUE) \ |
| 677 KEYWORD_GROUP('d') \ |
| 678 KEYWORD("debugger", DEBUGGER) \ |
| 679 KEYWORD("default", DEFAULT) \ |
| 680 KEYWORD("delete", DELETE) \ |
| 681 KEYWORD("do", DO) \ |
| 682 KEYWORD_GROUP('e') \ |
| 683 KEYWORD("else", ELSE) \ |
| 684 KEYWORD("enum", FUTURE_RESERVED_WORD) \ |
| 685 KEYWORD("export", FUTURE_RESERVED_WORD) \ |
| 686 KEYWORD("extends", FUTURE_RESERVED_WORD) \ |
| 687 KEYWORD_GROUP('f') \ |
| 688 KEYWORD("false", FALSE_LITERAL) \ |
| 689 KEYWORD("finally", FINALLY) \ |
| 690 KEYWORD("for", FOR) \ |
| 691 KEYWORD("function", FUNCTION) \ |
| 692 KEYWORD_GROUP('i') \ |
| 693 KEYWORD("if", IF) \ |
| 694 KEYWORD("implements", FUTURE_STRICT_RESERVED_WORD) \ |
| 695 KEYWORD("import", FUTURE_RESERVED_WORD) \ |
| 696 KEYWORD("in", IN) \ |
| 697 KEYWORD("instanceof", INSTANCEOF) \ |
| 698 KEYWORD("interface", FUTURE_STRICT_RESERVED_WORD) \ |
| 699 KEYWORD_GROUP('l') \ |
| 700 KEYWORD("let", FUTURE_STRICT_RESERVED_WORD) \ |
| 701 KEYWORD_GROUP('n') \ |
| 702 KEYWORD("new", NEW) \ |
| 703 KEYWORD("null", NULL_LITERAL) \ |
| 704 KEYWORD_GROUP('p') \ |
| 705 KEYWORD("package", FUTURE_STRICT_RESERVED_WORD) \ |
| 706 KEYWORD("private", FUTURE_STRICT_RESERVED_WORD) \ |
| 707 KEYWORD("protected", FUTURE_STRICT_RESERVED_WORD) \ |
| 708 KEYWORD("public", FUTURE_STRICT_RESERVED_WORD) \ |
| 709 KEYWORD_GROUP('r') \ |
| 710 KEYWORD("return", RETURN) \ |
| 711 KEYWORD_GROUP('s') \ |
| 712 KEYWORD("static", FUTURE_STRICT_RESERVED_WORD) \ |
| 713 KEYWORD("super", FUTURE_RESERVED_WORD) \ |
| 714 KEYWORD("switch", SWITCH) \ |
| 715 KEYWORD_GROUP('t') \ |
| 716 KEYWORD("this", THIS) \ |
| 717 KEYWORD("throw", THROW) \ |
| 718 KEYWORD("true", TRUE_LITERAL) \ |
| 719 KEYWORD("try", TRY) \ |
| 720 KEYWORD("typeof", TYPEOF) \ |
| 721 KEYWORD_GROUP('v') \ |
| 722 KEYWORD("var", VAR) \ |
| 723 KEYWORD("void", VOID) \ |
| 724 KEYWORD_GROUP('w') \ |
| 725 KEYWORD("while", WHILE) \ |
| 726 KEYWORD("with", WITH) \ |
| 727 KEYWORD_GROUP('y') \ |
| 728 KEYWORD("yield", FUTURE_STRICT_RESERVED_WORD) |
| 729 |
| 730 |
| 731 static Token::Value KeywordOrIdentifierToken(const char* input, |
| 732 int input_length) { |
| 733 ASSERT(input_length >= 1); |
| 734 const int kMinLength = 2; |
| 735 const int kMaxLength = 10; |
| 736 if (input_length < kMinLength || input_length > kMaxLength) { |
| 737 return Token::IDENTIFIER; |
| 738 } |
| 739 switch (input[0]) { |
| 740 default: |
| 741 #define KEYWORD_GROUP_CASE(ch) \ |
| 742 break; \ |
| 743 case ch: |
| 744 #define KEYWORD(keyword, token) \ |
| 745 { \ |
| 746 /* 'keyword' is a char array, so sizeof(keyword) is */ \ |
| 747 /* strlen(keyword) plus 1 for the NUL char. */ \ |
| 748 const int keyword_length = sizeof(keyword) - 1; \ |
| 749 STATIC_ASSERT(keyword_length >= kMinLength); \ |
| 750 STATIC_ASSERT(keyword_length <= kMaxLength); \ |
| 751 if (input_length == keyword_length && \ |
| 752 input[1] == keyword[1] && \ |
| 753 (keyword_length <= 2 || input[2] == keyword[2]) && \ |
| 754 (keyword_length <= 3 || input[3] == keyword[3]) && \ |
| 755 (keyword_length <= 4 || input[4] == keyword[4]) && \ |
| 756 (keyword_length <= 5 || input[5] == keyword[5]) && \ |
| 757 (keyword_length <= 6 || input[6] == keyword[6]) && \ |
| 758 (keyword_length <= 7 || input[7] == keyword[7]) && \ |
| 759 (keyword_length <= 8 || input[8] == keyword[8]) && \ |
| 760 (keyword_length <= 9 || input[9] == keyword[9])) { \ |
| 761 return Token::token; \ |
| 762 } \ |
| 763 } |
| 764 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) |
| 765 } |
| 766 return Token::IDENTIFIER; |
| 767 } |
| 768 |
| 769 |
665 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 770 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { |
666 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); | 771 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); |
667 LiteralScope literal(this); | 772 LiteralScope literal(this); |
668 KeywordMatcher keyword_match; | |
669 // Scan identifier start character. | 773 // Scan identifier start character. |
670 if (c0_ == '\\') { | 774 if (c0_ == '\\') { |
671 uc32 c = ScanIdentifierUnicodeEscape(); | 775 uc32 c = ScanIdentifierUnicodeEscape(); |
672 // Only allow legal identifier start characters. | 776 // Only allow legal identifier start characters. |
673 if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL; | 777 if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL; |
674 AddLiteralChar(c); | 778 AddLiteralChar(c); |
675 return ScanIdentifierSuffix(&literal); | 779 return ScanIdentifierSuffix(&literal); |
676 } | 780 } |
677 | 781 |
678 uc32 first_char = c0_; | 782 uc32 first_char = c0_; |
679 Advance(); | 783 Advance(); |
680 AddLiteralChar(first_char); | 784 AddLiteralChar(first_char); |
681 if (!keyword_match.AddChar(first_char)) { | |
682 return ScanIdentifierSuffix(&literal); | |
683 } | |
684 | 785 |
685 // Scan the rest of the identifier characters. | 786 // Scan the rest of the identifier characters. |
686 while (unicode_cache_->IsIdentifierPart(c0_)) { | 787 while (unicode_cache_->IsIdentifierPart(c0_)) { |
687 if (c0_ != '\\') { | 788 if (c0_ != '\\') { |
688 uc32 next_char = c0_; | 789 uc32 next_char = c0_; |
689 Advance(); | 790 Advance(); |
690 AddLiteralChar(next_char); | 791 AddLiteralChar(next_char); |
691 if (keyword_match.AddChar(next_char)) continue; | 792 continue; |
692 } | 793 } |
693 // Fallthrough if no loner able to complete keyword. | 794 // Fallthrough if no longer able to complete keyword. |
694 return ScanIdentifierSuffix(&literal); | 795 return ScanIdentifierSuffix(&literal); |
695 } | 796 } |
| 797 |
696 literal.Complete(); | 798 literal.Complete(); |
697 | 799 |
698 return keyword_match.token(); | 800 if (next_.literal_chars->is_ascii()) { |
| 801 Vector<const char> chars = next_.literal_chars->ascii_literal(); |
| 802 return KeywordOrIdentifierToken(chars.start(), chars.length()); |
| 803 } |
| 804 |
| 805 return Token::IDENTIFIER; |
699 } | 806 } |
700 | 807 |
701 | 808 |
702 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { | 809 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { |
703 // Scan the rest of the identifier characters. | 810 // Scan the rest of the identifier characters. |
704 while (unicode_cache_->IsIdentifierPart(c0_)) { | 811 while (unicode_cache_->IsIdentifierPart(c0_)) { |
705 if (c0_ == '\\') { | 812 if (c0_ == '\\') { |
706 uc32 c = ScanIdentifierUnicodeEscape(); | 813 uc32 c = ScanIdentifierUnicodeEscape(); |
707 // Only allow legal identifier part characters. | 814 // Only allow legal identifier part characters. |
708 if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL; | 815 if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL; |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
778 } | 885 } |
779 } | 886 } |
780 AddLiteralCharAdvance(); | 887 AddLiteralCharAdvance(); |
781 } | 888 } |
782 literal.Complete(); | 889 literal.Complete(); |
783 | 890 |
784 next_.location.end_pos = source_pos() - 1; | 891 next_.location.end_pos = source_pos() - 1; |
785 return true; | 892 return true; |
786 } | 893 } |
787 | 894 |
788 // ---------------------------------------------------------------------------- | |
789 // Keyword Matcher | |
790 | |
791 const KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { | |
792 { "break", KEYWORD_PREFIX, Token::BREAK }, | |
793 { NULL, C, Token::ILLEGAL }, | |
794 { NULL, D, Token::ILLEGAL }, | |
795 { NULL, E, Token::ILLEGAL }, | |
796 { NULL, F, Token::ILLEGAL }, | |
797 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
798 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
799 { NULL, I, Token::ILLEGAL }, | |
800 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
801 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
802 { "let", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD }, | |
803 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
804 { NULL, N, Token::ILLEGAL }, | |
805 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
806 { NULL, P, Token::ILLEGAL }, | |
807 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
808 { "return", KEYWORD_PREFIX, Token::RETURN }, | |
809 { NULL, S, Token::ILLEGAL }, | |
810 { NULL, T, Token::ILLEGAL }, | |
811 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
812 { NULL, V, Token::ILLEGAL }, | |
813 { NULL, W, Token::ILLEGAL }, | |
814 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
815 { "yield", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD } | |
816 }; | |
817 | |
818 | |
819 void KeywordMatcher::Step(unibrow::uchar input) { | |
820 switch (state_) { | |
821 case INITIAL: { | |
822 // matching the first character is the only state with significant fanout. | |
823 // Match only lower-case letters in range 'b'..'y'. | |
824 unsigned int offset = input - kFirstCharRangeMin; | |
825 if (offset < kFirstCharRangeLength) { | |
826 state_ = first_states_[offset].state; | |
827 if (state_ == KEYWORD_PREFIX) { | |
828 keyword_ = first_states_[offset].keyword; | |
829 counter_ = 1; | |
830 keyword_token_ = first_states_[offset].token; | |
831 } | |
832 return; | |
833 } | |
834 break; | |
835 } | |
836 case KEYWORD_PREFIX: | |
837 if (static_cast<unibrow::uchar>(keyword_[counter_]) == input) { | |
838 counter_++; | |
839 if (keyword_[counter_] == '\0') { | |
840 state_ = KEYWORD_MATCHED; | |
841 token_ = keyword_token_; | |
842 } | |
843 return; | |
844 } | |
845 break; | |
846 case KEYWORD_MATCHED: | |
847 token_ = Token::IDENTIFIER; | |
848 break; | |
849 case C: | |
850 if (MatchState(input, 'a', CA)) return; | |
851 if (MatchKeywordStart(input, "class", 1, | |
852 Token::FUTURE_RESERVED_WORD)) return; | |
853 if (MatchState(input, 'o', CO)) return; | |
854 break; | |
855 case CA: | |
856 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; | |
857 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; | |
858 break; | |
859 case CO: | |
860 if (MatchState(input, 'n', CON)) return; | |
861 break; | |
862 case CON: | |
863 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; | |
864 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; | |
865 break; | |
866 case D: | |
867 if (MatchState(input, 'e', DE)) return; | |
868 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; | |
869 break; | |
870 case DE: | |
871 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; | |
872 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; | |
873 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; | |
874 break; | |
875 case E: | |
876 if (MatchKeywordStart(input, "else", 1, Token::ELSE)) return; | |
877 if (MatchKeywordStart(input, "enum", 1, | |
878 Token::FUTURE_RESERVED_WORD)) return; | |
879 if (MatchState(input, 'x', EX)) return; | |
880 break; | |
881 case EX: | |
882 if (MatchKeywordStart(input, "export", 2, | |
883 Token::FUTURE_RESERVED_WORD)) return; | |
884 if (MatchKeywordStart(input, "extends", 2, | |
885 Token::FUTURE_RESERVED_WORD)) return; | |
886 break; | |
887 case F: | |
888 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; | |
889 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; | |
890 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; | |
891 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; | |
892 break; | |
893 case I: | |
894 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; | |
895 if (MatchState(input, 'm', IM)) return; | |
896 if (MatchKeyword(input, 'n', IN, Token::IN)) return; | |
897 break; | |
898 case IM: | |
899 if (MatchState(input, 'p', IMP)) return; | |
900 break; | |
901 case IMP: | |
902 if (MatchKeywordStart(input, "implements", 3, | |
903 Token::FUTURE_STRICT_RESERVED_WORD )) return; | |
904 if (MatchKeywordStart(input, "import", 3, | |
905 Token::FUTURE_RESERVED_WORD)) return; | |
906 break; | |
907 case IN: | |
908 token_ = Token::IDENTIFIER; | |
909 if (MatchKeywordStart(input, "interface", 2, | |
910 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
911 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return; | |
912 break; | |
913 case N: | |
914 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; | |
915 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; | |
916 break; | |
917 case P: | |
918 if (MatchKeywordStart(input, "package", 1, | |
919 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
920 if (MatchState(input, 'r', PR)) return; | |
921 if (MatchKeywordStart(input, "public", 1, | |
922 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
923 break; | |
924 case PR: | |
925 if (MatchKeywordStart(input, "private", 2, | |
926 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
927 if (MatchKeywordStart(input, "protected", 2, | |
928 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
929 break; | |
930 case S: | |
931 if (MatchKeywordStart(input, "static", 1, | |
932 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
933 if (MatchKeywordStart(input, "super", 1, | |
934 Token::FUTURE_RESERVED_WORD)) return; | |
935 if (MatchKeywordStart(input, "switch", 1, | |
936 Token::SWITCH)) return; | |
937 break; | |
938 case T: | |
939 if (MatchState(input, 'h', TH)) return; | |
940 if (MatchState(input, 'r', TR)) return; | |
941 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; | |
942 break; | |
943 case TH: | |
944 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; | |
945 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; | |
946 break; | |
947 case TR: | |
948 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; | |
949 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; | |
950 break; | |
951 case V: | |
952 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; | |
953 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; | |
954 break; | |
955 case W: | |
956 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; | |
957 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | |
958 break; | |
959 case UNMATCHABLE: | |
960 break; | |
961 } | |
962 // On fallthrough, it's a failure. | |
963 state_ = UNMATCHABLE; | |
964 } | |
965 | |
966 } } // namespace v8::internal | 895 } } // namespace v8::internal |
OLD | NEW |