| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 644 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 655 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 655 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
| 656 Advance(); | 656 Advance(); |
| 657 uc32 c = ScanHexEscape('u', 4); | 657 uc32 c = ScanHexEscape('u', 4); |
| 658 // We do not allow a unicode escape sequence to start another | 658 // We do not allow a unicode escape sequence to start another |
| 659 // unicode escape sequence. | 659 // unicode escape sequence. |
| 660 if (c == '\\') return unibrow::Utf8::kBadChar; | 660 if (c == '\\') return unibrow::Utf8::kBadChar; |
| 661 return c; | 661 return c; |
| 662 } | 662 } |
| 663 | 663 |
| 664 | 664 |
| 665 // ---------------------------------------------------------------------------- |
| 666 // Keyword Matcher |
| 667 |
| 668 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
| 669 KEYWORD_GROUP('b') \ |
| 670 KEYWORD("break", BREAK) \ |
| 671 KEYWORD_GROUP('c') \ |
| 672 KEYWORD("case", CASE) \ |
| 673 KEYWORD("catch", CATCH) \ |
| 674 KEYWORD("class", FUTURE_RESERVED_WORD) \ |
| 675 KEYWORD("const", CONST) \ |
| 676 KEYWORD("continue", CONTINUE) \ |
| 677 KEYWORD_GROUP('d') \ |
| 678 KEYWORD("debugger", DEBUGGER) \ |
| 679 KEYWORD("default", DEFAULT) \ |
| 680 KEYWORD("delete", DELETE) \ |
| 681 KEYWORD("do", DO) \ |
| 682 KEYWORD_GROUP('e') \ |
| 683 KEYWORD("else", ELSE) \ |
| 684 KEYWORD("enum", FUTURE_RESERVED_WORD) \ |
| 685 KEYWORD("export", FUTURE_RESERVED_WORD) \ |
| 686 KEYWORD("extends", FUTURE_RESERVED_WORD) \ |
| 687 KEYWORD_GROUP('f') \ |
| 688 KEYWORD("false", FALSE_LITERAL) \ |
| 689 KEYWORD("finally", FINALLY) \ |
| 690 KEYWORD("for", FOR) \ |
| 691 KEYWORD("function", FUNCTION) \ |
| 692 KEYWORD_GROUP('i') \ |
| 693 KEYWORD("if", IF) \ |
| 694 KEYWORD("implements", FUTURE_STRICT_RESERVED_WORD) \ |
| 695 KEYWORD("import", FUTURE_RESERVED_WORD) \ |
| 696 KEYWORD("in", IN) \ |
| 697 KEYWORD("instanceof", INSTANCEOF) \ |
| 698 KEYWORD("interface", FUTURE_STRICT_RESERVED_WORD) \ |
| 699 KEYWORD_GROUP('l') \ |
| 700 KEYWORD("let", FUTURE_STRICT_RESERVED_WORD) \ |
| 701 KEYWORD_GROUP('n') \ |
| 702 KEYWORD("new", NEW) \ |
| 703 KEYWORD("null", NULL_LITERAL) \ |
| 704 KEYWORD_GROUP('p') \ |
| 705 KEYWORD("package", FUTURE_STRICT_RESERVED_WORD) \ |
| 706 KEYWORD("private", FUTURE_STRICT_RESERVED_WORD) \ |
| 707 KEYWORD("protected", FUTURE_STRICT_RESERVED_WORD) \ |
| 708 KEYWORD("public", FUTURE_STRICT_RESERVED_WORD) \ |
| 709 KEYWORD_GROUP('r') \ |
| 710 KEYWORD("return", RETURN) \ |
| 711 KEYWORD_GROUP('s') \ |
| 712 KEYWORD("static", FUTURE_STRICT_RESERVED_WORD) \ |
| 713 KEYWORD("super", FUTURE_RESERVED_WORD) \ |
| 714 KEYWORD("switch", SWITCH) \ |
| 715 KEYWORD_GROUP('t') \ |
| 716 KEYWORD("this", THIS) \ |
| 717 KEYWORD("throw", THROW) \ |
| 718 KEYWORD("true", TRUE_LITERAL) \ |
| 719 KEYWORD("try", TRY) \ |
| 720 KEYWORD("typeof", TYPEOF) \ |
| 721 KEYWORD_GROUP('v') \ |
| 722 KEYWORD("var", VAR) \ |
| 723 KEYWORD("void", VOID) \ |
| 724 KEYWORD_GROUP('w') \ |
| 725 KEYWORD("while", WHILE) \ |
| 726 KEYWORD("with", WITH) \ |
| 727 KEYWORD_GROUP('y') \ |
| 728 KEYWORD("yield", FUTURE_STRICT_RESERVED_WORD) |
| 729 |
| 730 |
| 731 static Token::Value KeywordOrIdentifierToken(const char* input, |
| 732 int input_length) { |
| 733 ASSERT(input_length >= 1); |
| 734 const int kMinLength = 2; |
| 735 const int kMaxLength = 10; |
| 736 if (input_length < kMinLength || input_length > kMaxLength) { |
| 737 return Token::IDENTIFIER; |
| 738 } |
| 739 switch (input[0]) { |
| 740 default: |
| 741 #define KEYWORD_GROUP_CASE(ch) \ |
| 742 break; \ |
| 743 case ch: |
| 744 #define KEYWORD(keyword, token) \ |
| 745 { \ |
| 746 /* 'keyword' is a char array, so sizeof(keyword) is */ \ |
| 747 /* strlen(keyword) plus 1 for the NUL char. */ \ |
| 748 const int keyword_length = sizeof(keyword) - 1; \ |
| 749 STATIC_ASSERT(keyword_length >= kMinLength); \ |
| 750 STATIC_ASSERT(keyword_length <= kMaxLength); \ |
| 751 if (input_length == keyword_length && \ |
| 752 input[1] == keyword[1] && \ |
| 753 (keyword_length <= 2 || input[2] == keyword[2]) && \ |
| 754 (keyword_length <= 3 || input[3] == keyword[3]) && \ |
| 755 (keyword_length <= 4 || input[4] == keyword[4]) && \ |
| 756 (keyword_length <= 5 || input[5] == keyword[5]) && \ |
| 757 (keyword_length <= 6 || input[6] == keyword[6]) && \ |
| 758 (keyword_length <= 7 || input[7] == keyword[7]) && \ |
| 759 (keyword_length <= 8 || input[8] == keyword[8]) && \ |
| 760 (keyword_length <= 9 || input[9] == keyword[9])) { \ |
| 761 return Token::token; \ |
| 762 } \ |
| 763 } |
| 764 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) |
| 765 } |
| 766 return Token::IDENTIFIER; |
| 767 } |
| 768 |
| 769 |
| 665 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 770 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { |
| 666 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); | 771 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); |
| 667 LiteralScope literal(this); | 772 LiteralScope literal(this); |
| 668 KeywordMatcher keyword_match; | |
| 669 // Scan identifier start character. | 773 // Scan identifier start character. |
| 670 if (c0_ == '\\') { | 774 if (c0_ == '\\') { |
| 671 uc32 c = ScanIdentifierUnicodeEscape(); | 775 uc32 c = ScanIdentifierUnicodeEscape(); |
| 672 // Only allow legal identifier start characters. | 776 // Only allow legal identifier start characters. |
| 673 if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL; | 777 if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL; |
| 674 AddLiteralChar(c); | 778 AddLiteralChar(c); |
| 675 return ScanIdentifierSuffix(&literal); | 779 return ScanIdentifierSuffix(&literal); |
| 676 } | 780 } |
| 677 | 781 |
| 678 uc32 first_char = c0_; | 782 uc32 first_char = c0_; |
| 679 Advance(); | 783 Advance(); |
| 680 AddLiteralChar(first_char); | 784 AddLiteralChar(first_char); |
| 681 if (!keyword_match.AddChar(first_char)) { | |
| 682 return ScanIdentifierSuffix(&literal); | |
| 683 } | |
| 684 | 785 |
| 685 // Scan the rest of the identifier characters. | 786 // Scan the rest of the identifier characters. |
| 686 while (unicode_cache_->IsIdentifierPart(c0_)) { | 787 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| 687 if (c0_ != '\\') { | 788 if (c0_ != '\\') { |
| 688 uc32 next_char = c0_; | 789 uc32 next_char = c0_; |
| 689 Advance(); | 790 Advance(); |
| 690 AddLiteralChar(next_char); | 791 AddLiteralChar(next_char); |
| 691 if (keyword_match.AddChar(next_char)) continue; | 792 continue; |
| 692 } | 793 } |
| 693 // Fallthrough if no loner able to complete keyword. | 794 // Fallthrough if no longer able to complete keyword. |
| 694 return ScanIdentifierSuffix(&literal); | 795 return ScanIdentifierSuffix(&literal); |
| 695 } | 796 } |
| 797 |
| 696 literal.Complete(); | 798 literal.Complete(); |
| 697 | 799 |
| 698 return keyword_match.token(); | 800 if (next_.literal_chars->is_ascii()) { |
| 801 Vector<const char> chars = next_.literal_chars->ascii_literal(); |
| 802 return KeywordOrIdentifierToken(chars.start(), chars.length()); |
| 803 } |
| 804 |
| 805 return Token::IDENTIFIER; |
| 699 } | 806 } |
| 700 | 807 |
| 701 | 808 |
| 702 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { | 809 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { |
| 703 // Scan the rest of the identifier characters. | 810 // Scan the rest of the identifier characters. |
| 704 while (unicode_cache_->IsIdentifierPart(c0_)) { | 811 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| 705 if (c0_ == '\\') { | 812 if (c0_ == '\\') { |
| 706 uc32 c = ScanIdentifierUnicodeEscape(); | 813 uc32 c = ScanIdentifierUnicodeEscape(); |
| 707 // Only allow legal identifier part characters. | 814 // Only allow legal identifier part characters. |
| 708 if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL; | 815 if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL; |
| (...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 778 } | 885 } |
| 779 } | 886 } |
| 780 AddLiteralCharAdvance(); | 887 AddLiteralCharAdvance(); |
| 781 } | 888 } |
| 782 literal.Complete(); | 889 literal.Complete(); |
| 783 | 890 |
| 784 next_.location.end_pos = source_pos() - 1; | 891 next_.location.end_pos = source_pos() - 1; |
| 785 return true; | 892 return true; |
| 786 } | 893 } |
| 787 | 894 |
| 788 // ---------------------------------------------------------------------------- | |
| 789 // Keyword Matcher | |
| 790 | |
| 791 const KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { | |
| 792 { "break", KEYWORD_PREFIX, Token::BREAK }, | |
| 793 { NULL, C, Token::ILLEGAL }, | |
| 794 { NULL, D, Token::ILLEGAL }, | |
| 795 { NULL, E, Token::ILLEGAL }, | |
| 796 { NULL, F, Token::ILLEGAL }, | |
| 797 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 798 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 799 { NULL, I, Token::ILLEGAL }, | |
| 800 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 801 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 802 { "let", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD }, | |
| 803 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 804 { NULL, N, Token::ILLEGAL }, | |
| 805 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 806 { NULL, P, Token::ILLEGAL }, | |
| 807 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 808 { "return", KEYWORD_PREFIX, Token::RETURN }, | |
| 809 { NULL, S, Token::ILLEGAL }, | |
| 810 { NULL, T, Token::ILLEGAL }, | |
| 811 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 812 { NULL, V, Token::ILLEGAL }, | |
| 813 { NULL, W, Token::ILLEGAL }, | |
| 814 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 815 { "yield", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD } | |
| 816 }; | |
| 817 | |
| 818 | |
| 819 void KeywordMatcher::Step(unibrow::uchar input) { | |
| 820 switch (state_) { | |
| 821 case INITIAL: { | |
| 822 // matching the first character is the only state with significant fanout. | |
| 823 // Match only lower-case letters in range 'b'..'y'. | |
| 824 unsigned int offset = input - kFirstCharRangeMin; | |
| 825 if (offset < kFirstCharRangeLength) { | |
| 826 state_ = first_states_[offset].state; | |
| 827 if (state_ == KEYWORD_PREFIX) { | |
| 828 keyword_ = first_states_[offset].keyword; | |
| 829 counter_ = 1; | |
| 830 keyword_token_ = first_states_[offset].token; | |
| 831 } | |
| 832 return; | |
| 833 } | |
| 834 break; | |
| 835 } | |
| 836 case KEYWORD_PREFIX: | |
| 837 if (static_cast<unibrow::uchar>(keyword_[counter_]) == input) { | |
| 838 counter_++; | |
| 839 if (keyword_[counter_] == '\0') { | |
| 840 state_ = KEYWORD_MATCHED; | |
| 841 token_ = keyword_token_; | |
| 842 } | |
| 843 return; | |
| 844 } | |
| 845 break; | |
| 846 case KEYWORD_MATCHED: | |
| 847 token_ = Token::IDENTIFIER; | |
| 848 break; | |
| 849 case C: | |
| 850 if (MatchState(input, 'a', CA)) return; | |
| 851 if (MatchKeywordStart(input, "class", 1, | |
| 852 Token::FUTURE_RESERVED_WORD)) return; | |
| 853 if (MatchState(input, 'o', CO)) return; | |
| 854 break; | |
| 855 case CA: | |
| 856 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; | |
| 857 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; | |
| 858 break; | |
| 859 case CO: | |
| 860 if (MatchState(input, 'n', CON)) return; | |
| 861 break; | |
| 862 case CON: | |
| 863 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; | |
| 864 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; | |
| 865 break; | |
| 866 case D: | |
| 867 if (MatchState(input, 'e', DE)) return; | |
| 868 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; | |
| 869 break; | |
| 870 case DE: | |
| 871 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; | |
| 872 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; | |
| 873 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; | |
| 874 break; | |
| 875 case E: | |
| 876 if (MatchKeywordStart(input, "else", 1, Token::ELSE)) return; | |
| 877 if (MatchKeywordStart(input, "enum", 1, | |
| 878 Token::FUTURE_RESERVED_WORD)) return; | |
| 879 if (MatchState(input, 'x', EX)) return; | |
| 880 break; | |
| 881 case EX: | |
| 882 if (MatchKeywordStart(input, "export", 2, | |
| 883 Token::FUTURE_RESERVED_WORD)) return; | |
| 884 if (MatchKeywordStart(input, "extends", 2, | |
| 885 Token::FUTURE_RESERVED_WORD)) return; | |
| 886 break; | |
| 887 case F: | |
| 888 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; | |
| 889 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; | |
| 890 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; | |
| 891 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; | |
| 892 break; | |
| 893 case I: | |
| 894 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; | |
| 895 if (MatchState(input, 'm', IM)) return; | |
| 896 if (MatchKeyword(input, 'n', IN, Token::IN)) return; | |
| 897 break; | |
| 898 case IM: | |
| 899 if (MatchState(input, 'p', IMP)) return; | |
| 900 break; | |
| 901 case IMP: | |
| 902 if (MatchKeywordStart(input, "implements", 3, | |
| 903 Token::FUTURE_STRICT_RESERVED_WORD )) return; | |
| 904 if (MatchKeywordStart(input, "import", 3, | |
| 905 Token::FUTURE_RESERVED_WORD)) return; | |
| 906 break; | |
| 907 case IN: | |
| 908 token_ = Token::IDENTIFIER; | |
| 909 if (MatchKeywordStart(input, "interface", 2, | |
| 910 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 911 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return; | |
| 912 break; | |
| 913 case N: | |
| 914 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; | |
| 915 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; | |
| 916 break; | |
| 917 case P: | |
| 918 if (MatchKeywordStart(input, "package", 1, | |
| 919 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 920 if (MatchState(input, 'r', PR)) return; | |
| 921 if (MatchKeywordStart(input, "public", 1, | |
| 922 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 923 break; | |
| 924 case PR: | |
| 925 if (MatchKeywordStart(input, "private", 2, | |
| 926 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 927 if (MatchKeywordStart(input, "protected", 2, | |
| 928 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 929 break; | |
| 930 case S: | |
| 931 if (MatchKeywordStart(input, "static", 1, | |
| 932 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 933 if (MatchKeywordStart(input, "super", 1, | |
| 934 Token::FUTURE_RESERVED_WORD)) return; | |
| 935 if (MatchKeywordStart(input, "switch", 1, | |
| 936 Token::SWITCH)) return; | |
| 937 break; | |
| 938 case T: | |
| 939 if (MatchState(input, 'h', TH)) return; | |
| 940 if (MatchState(input, 'r', TR)) return; | |
| 941 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; | |
| 942 break; | |
| 943 case TH: | |
| 944 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; | |
| 945 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; | |
| 946 break; | |
| 947 case TR: | |
| 948 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; | |
| 949 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; | |
| 950 break; | |
| 951 case V: | |
| 952 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; | |
| 953 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; | |
| 954 break; | |
| 955 case W: | |
| 956 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; | |
| 957 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | |
| 958 break; | |
| 959 case UNMATCHABLE: | |
| 960 break; | |
| 961 } | |
| 962 // On fallthrough, it's a failure. | |
| 963 state_ = UNMATCHABLE; | |
| 964 } | |
| 965 | |
| 966 } } // namespace v8::internal | 895 } } // namespace v8::internal |
| OLD | NEW |