| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 23 matching lines...) Expand all Loading... |
| 34 namespace v8 { | 34 namespace v8 { |
| 35 namespace internal { | 35 namespace internal { |
| 36 | 36 |
| 37 // ---------------------------------------------------------------------------- | 37 // ---------------------------------------------------------------------------- |
| 38 // Scanner | 38 // Scanner |
| 39 | 39 |
| 40 Scanner::Scanner(UnicodeCache* unicode_cache) | 40 Scanner::Scanner(UnicodeCache* unicode_cache) |
| 41 : unicode_cache_(unicode_cache) { } | 41 : unicode_cache_(unicode_cache) { } |
| 42 | 42 |
| 43 | 43 |
| 44 uc32 Scanner::ScanHexEscape(uc32 c, int length) { | 44 uc32 Scanner::ScanHexNumber(int expected_length) { |
| 45 ASSERT(length <= 4); // prevent overflow | 45 ASSERT(expected_length <= 4); // prevent overflow |
| 46 | 46 |
| 47 uc32 digits[4]; | 47 uc32 digits[4] = { 0, 0, 0, 0 }; |
| 48 uc32 x = 0; | 48 uc32 x = 0; |
| 49 for (int i = 0; i < length; i++) { | 49 for (int i = 0; i < expected_length; i++) { |
| 50 digits[i] = c0_; | 50 digits[i] = c0_; |
| 51 int d = HexValue(c0_); | 51 int d = HexValue(c0_); |
| 52 if (d < 0) { | 52 if (d < 0) { |
| 53 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 53 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
| 54 // should be illegal, but other JS VMs just return the | 54 // should be illegal, but other JS VMs just return the |
| 55 // non-escaped version of the original character. | 55 // non-escaped version of the original character. |
| 56 | 56 |
| 57 // Push back digits read, except the last one (in c0_). | 57 // Push back digits that we have advanced past. |
| 58 for (int j = i-1; j >= 0; j--) { | 58 for (int j = i-1; j >= 0; j--) { |
| 59 PushBack(digits[j]); | 59 PushBack(digits[j]); |
| 60 } | 60 } |
| 61 // Notice: No handling of error - treat it as "\u"->"u". | 61 return -1; |
| 62 return c; | |
| 63 } | 62 } |
| 64 x = x * 16 + d; | 63 x = x * 16 + d; |
| 65 Advance(); | 64 Advance(); |
| 66 } | 65 } |
| 67 | 66 |
| 68 return x; | 67 return x; |
| 69 } | 68 } |
| 70 | 69 |
| 71 | 70 |
| 72 | 71 |
| 73 // ---------------------------------------------------------------------------- | 72 // ---------------------------------------------------------------------------- |
| 74 // JavaScriptScanner | 73 // JavaScriptScanner |
| 75 | 74 |
| 76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) | 75 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) |
| 77 : Scanner(scanner_contants), octal_pos_(Location::invalid()) { } | 76 : Scanner(scanner_contants), |
| 77 octal_pos_(Location::invalid()), |
| 78 harmony_block_scoping_(false) { } |
| 78 | 79 |
| 79 | 80 |
| 80 void JavaScriptScanner::Initialize(UC16CharacterStream* source) { | 81 void JavaScriptScanner::Initialize(UC16CharacterStream* source) { |
| 81 source_ = source; | 82 source_ = source; |
| 82 // Need to capture identifiers in order to recognize "get" and "set" | 83 // Need to capture identifiers in order to recognize "get" and "set" |
| 83 // in object literals. | 84 // in object literals. |
| 84 Init(); | 85 Init(); |
| 85 // Skip initial whitespace allowing HTML comment ends just like | 86 // Skip initial whitespace allowing HTML comment ends just like |
| 86 // after a newline and scan first token. | 87 // after a newline and scan first token. |
| 87 has_line_terminator_before_next_ = true; | 88 has_line_terminator_before_next_ = true; |
| 88 SkipWhiteSpace(); | 89 SkipWhiteSpace(); |
| 89 Scan(); | 90 Scan(); |
| 90 } | 91 } |
| 91 | 92 |
| 92 | 93 |
| 93 // Ensure that tokens can be stored in a byte. | 94 // Ensure that tokens can be stored in a byte. |
| 94 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 95 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
| 95 | 96 |
| 96 // Table of one-character tokens, by character (0x00..0x7f only). | 97 // Table of one-character tokens, by character (0x00..0x7f only). |
| 97 static byte one_char_tokens[] = { | 98 static const byte one_char_tokens[] = { |
| 98 Token::ILLEGAL, | 99 Token::ILLEGAL, |
| 99 Token::ILLEGAL, | 100 Token::ILLEGAL, |
| 100 Token::ILLEGAL, | 101 Token::ILLEGAL, |
| 101 Token::ILLEGAL, | 102 Token::ILLEGAL, |
| 102 Token::ILLEGAL, | 103 Token::ILLEGAL, |
| 103 Token::ILLEGAL, | 104 Token::ILLEGAL, |
| 104 Token::ILLEGAL, | 105 Token::ILLEGAL, |
| 105 Token::ILLEGAL, | 106 Token::ILLEGAL, |
| 106 Token::ILLEGAL, | 107 Token::ILLEGAL, |
| 107 Token::ILLEGAL, | 108 Token::ILLEGAL, |
| (...skipping 523 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 631 | 632 |
| 632 switch (c) { | 633 switch (c) { |
| 633 case '\'': // fall through | 634 case '\'': // fall through |
| 634 case '"' : // fall through | 635 case '"' : // fall through |
| 635 case '\\': break; | 636 case '\\': break; |
| 636 case 'b' : c = '\b'; break; | 637 case 'b' : c = '\b'; break; |
| 637 case 'f' : c = '\f'; break; | 638 case 'f' : c = '\f'; break; |
| 638 case 'n' : c = '\n'; break; | 639 case 'n' : c = '\n'; break; |
| 639 case 'r' : c = '\r'; break; | 640 case 'r' : c = '\r'; break; |
| 640 case 't' : c = '\t'; break; | 641 case 't' : c = '\t'; break; |
| 641 case 'u' : c = ScanHexEscape(c, 4); break; | 642 case 'u' : { |
| 643 c = ScanHexNumber(4); |
| 644 if (c < 0) c = 'u'; |
| 645 break; |
| 646 } |
| 642 case 'v' : c = '\v'; break; | 647 case 'v' : c = '\v'; break; |
| 643 case 'x' : c = ScanHexEscape(c, 2); break; | 648 case 'x' : { |
| 649 c = ScanHexNumber(2); |
| 650 if (c < 0) c = 'x'; |
| 651 break; |
| 652 } |
| 644 case '0' : // fall through | 653 case '0' : // fall through |
| 645 case '1' : // fall through | 654 case '1' : // fall through |
| 646 case '2' : // fall through | 655 case '2' : // fall through |
| 647 case '3' : // fall through | 656 case '3' : // fall through |
| 648 case '4' : // fall through | 657 case '4' : // fall through |
| 649 case '5' : // fall through | 658 case '5' : // fall through |
| 650 case '6' : // fall through | 659 case '6' : // fall through |
| 651 case '7' : c = ScanOctalEscape(c, 2); break; | 660 case '7' : c = ScanOctalEscape(c, 2); break; |
| 652 } | 661 } |
| 653 | 662 |
| (...skipping 139 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 793 return Token::ILLEGAL; | 802 return Token::ILLEGAL; |
| 794 | 803 |
| 795 literal.Complete(); | 804 literal.Complete(); |
| 796 | 805 |
| 797 return Token::NUMBER; | 806 return Token::NUMBER; |
| 798 } | 807 } |
| 799 | 808 |
| 800 | 809 |
| 801 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { | 810 uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() { |
| 802 Advance(); | 811 Advance(); |
| 803 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 812 if (c0_ != 'u') return -1; |
| 804 Advance(); | 813 Advance(); |
| 805 uc32 c = ScanHexEscape('u', 4); | 814 uc32 result = ScanHexNumber(4); |
| 806 // We do not allow a unicode escape sequence to start another | 815 if (result < 0) PushBack('u'); |
| 807 // unicode escape sequence. | 816 return result; |
| 808 if (c == '\\') return unibrow::Utf8::kBadChar; | 817 } |
| 809 return c; | 818 |
| 819 |
| 820 // ---------------------------------------------------------------------------- |
| 821 // Keyword Matcher |
| 822 |
| 823 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
| 824 KEYWORD_GROUP('b') \ |
| 825 KEYWORD("break", Token::BREAK) \ |
| 826 KEYWORD_GROUP('c') \ |
| 827 KEYWORD("case", Token::CASE) \ |
| 828 KEYWORD("catch", Token::CATCH) \ |
| 829 KEYWORD("class", Token::FUTURE_RESERVED_WORD) \ |
| 830 KEYWORD("const", Token::CONST) \ |
| 831 KEYWORD("continue", Token::CONTINUE) \ |
| 832 KEYWORD_GROUP('d') \ |
| 833 KEYWORD("debugger", Token::DEBUGGER) \ |
| 834 KEYWORD("default", Token::DEFAULT) \ |
| 835 KEYWORD("delete", Token::DELETE) \ |
| 836 KEYWORD("do", Token::DO) \ |
| 837 KEYWORD_GROUP('e') \ |
| 838 KEYWORD("else", Token::ELSE) \ |
| 839 KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \ |
| 840 KEYWORD("export", Token::FUTURE_RESERVED_WORD) \ |
| 841 KEYWORD("extends", Token::FUTURE_RESERVED_WORD) \ |
| 842 KEYWORD_GROUP('f') \ |
| 843 KEYWORD("false", Token::FALSE_LITERAL) \ |
| 844 KEYWORD("finally", Token::FINALLY) \ |
| 845 KEYWORD("for", Token::FOR) \ |
| 846 KEYWORD("function", Token::FUNCTION) \ |
| 847 KEYWORD_GROUP('i') \ |
| 848 KEYWORD("if", Token::IF) \ |
| 849 KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \ |
| 850 KEYWORD("import", Token::FUTURE_RESERVED_WORD) \ |
| 851 KEYWORD("in", Token::IN) \ |
| 852 KEYWORD("instanceof", Token::INSTANCEOF) \ |
| 853 KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \ |
| 854 KEYWORD_GROUP('l') \ |
| 855 KEYWORD("let", harmony_block_scoping \ |
| 856 ? Token::LET : Token::FUTURE_STRICT_RESERVED_WORD) \ |
| 857 KEYWORD_GROUP('n') \ |
| 858 KEYWORD("new", Token::NEW) \ |
| 859 KEYWORD("null", Token::NULL_LITERAL) \ |
| 860 KEYWORD_GROUP('p') \ |
| 861 KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \ |
| 862 KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \ |
| 863 KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \ |
| 864 KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \ |
| 865 KEYWORD_GROUP('r') \ |
| 866 KEYWORD("return", Token::RETURN) \ |
| 867 KEYWORD_GROUP('s') \ |
| 868 KEYWORD("static", Token::FUTURE_STRICT_RESERVED_WORD) \ |
| 869 KEYWORD("super", Token::FUTURE_RESERVED_WORD) \ |
| 870 KEYWORD("switch", Token::SWITCH) \ |
| 871 KEYWORD_GROUP('t') \ |
| 872 KEYWORD("this", Token::THIS) \ |
| 873 KEYWORD("throw", Token::THROW) \ |
| 874 KEYWORD("true", Token::TRUE_LITERAL) \ |
| 875 KEYWORD("try", Token::TRY) \ |
| 876 KEYWORD("typeof", Token::TYPEOF) \ |
| 877 KEYWORD_GROUP('v') \ |
| 878 KEYWORD("var", Token::VAR) \ |
| 879 KEYWORD("void", Token::VOID) \ |
| 880 KEYWORD_GROUP('w') \ |
| 881 KEYWORD("while", Token::WHILE) \ |
| 882 KEYWORD("with", Token::WITH) \ |
| 883 KEYWORD_GROUP('y') \ |
| 884 KEYWORD("yield", Token::FUTURE_STRICT_RESERVED_WORD) |
| 885 |
| 886 |
| 887 static Token::Value KeywordOrIdentifierToken(const char* input, |
| 888 int input_length, |
| 889 bool harmony_block_scoping) { |
| 890 ASSERT(input_length >= 1); |
| 891 const int kMinLength = 2; |
| 892 const int kMaxLength = 10; |
| 893 if (input_length < kMinLength || input_length > kMaxLength) { |
| 894 return Token::IDENTIFIER; |
| 895 } |
| 896 switch (input[0]) { |
| 897 default: |
| 898 #define KEYWORD_GROUP_CASE(ch) \ |
| 899 break; \ |
| 900 case ch: |
| 901 #define KEYWORD(keyword, token) \ |
| 902 { \ |
| 903 /* 'keyword' is a char array, so sizeof(keyword) is */ \ |
| 904 /* strlen(keyword) plus 1 for the NUL char. */ \ |
| 905 const int keyword_length = sizeof(keyword) - 1; \ |
| 906 STATIC_ASSERT(keyword_length >= kMinLength); \ |
| 907 STATIC_ASSERT(keyword_length <= kMaxLength); \ |
| 908 if (input_length == keyword_length && \ |
| 909 input[1] == keyword[1] && \ |
| 910 (keyword_length <= 2 || input[2] == keyword[2]) && \ |
| 911 (keyword_length <= 3 || input[3] == keyword[3]) && \ |
| 912 (keyword_length <= 4 || input[4] == keyword[4]) && \ |
| 913 (keyword_length <= 5 || input[5] == keyword[5]) && \ |
| 914 (keyword_length <= 6 || input[6] == keyword[6]) && \ |
| 915 (keyword_length <= 7 || input[7] == keyword[7]) && \ |
| 916 (keyword_length <= 8 || input[8] == keyword[8]) && \ |
| 917 (keyword_length <= 9 || input[9] == keyword[9])) { \ |
| 918 return token; \ |
| 919 } \ |
| 920 } |
| 921 KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) |
| 922 } |
| 923 return Token::IDENTIFIER; |
| 810 } | 924 } |
| 811 | 925 |
| 812 | 926 |
| 813 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 927 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { |
| 814 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); | 928 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); |
| 815 LiteralScope literal(this); | 929 LiteralScope literal(this); |
| 816 KeywordMatcher keyword_match; | |
| 817 // Scan identifier start character. | 930 // Scan identifier start character. |
| 818 if (c0_ == '\\') { | 931 if (c0_ == '\\') { |
| 819 uc32 c = ScanIdentifierUnicodeEscape(); | 932 uc32 c = ScanIdentifierUnicodeEscape(); |
| 820 // Only allow legal identifier start characters. | 933 // Only allow legal identifier start characters. |
| 821 if (!unicode_cache_->IsIdentifierStart(c)) return Token::ILLEGAL; | 934 if (c < 0 || |
| 935 c == '\\' || // No recursive escapes. |
| 936 !unicode_cache_->IsIdentifierStart(c)) { |
| 937 return Token::ILLEGAL; |
| 938 } |
| 822 AddLiteralChar(c); | 939 AddLiteralChar(c); |
| 823 return ScanIdentifierSuffix(&literal); | 940 return ScanIdentifierSuffix(&literal); |
| 824 } | 941 } |
| 825 | 942 |
| 826 uc32 first_char = c0_; | 943 uc32 first_char = c0_; |
| 827 Advance(); | 944 Advance(); |
| 828 AddLiteralChar(first_char); | 945 AddLiteralChar(first_char); |
| 829 if (!keyword_match.AddChar(first_char)) { | |
| 830 return ScanIdentifierSuffix(&literal); | |
| 831 } | |
| 832 | 946 |
| 833 // Scan the rest of the identifier characters. | 947 // Scan the rest of the identifier characters. |
| 834 while (unicode_cache_->IsIdentifierPart(c0_)) { | 948 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| 835 if (c0_ != '\\') { | 949 if (c0_ != '\\') { |
| 836 uc32 next_char = c0_; | 950 uc32 next_char = c0_; |
| 837 Advance(); | 951 Advance(); |
| 838 AddLiteralChar(next_char); | 952 AddLiteralChar(next_char); |
| 839 if (keyword_match.AddChar(next_char)) continue; | 953 continue; |
| 840 } | 954 } |
| 841 // Fallthrough if no loner able to complete keyword. | 955 // Fallthrough if no longer able to complete keyword. |
| 842 return ScanIdentifierSuffix(&literal); | 956 return ScanIdentifierSuffix(&literal); |
| 843 } | 957 } |
| 958 |
| 844 literal.Complete(); | 959 literal.Complete(); |
| 845 | 960 |
| 846 return keyword_match.token(); | 961 if (next_.literal_chars->is_ascii()) { |
| 962 Vector<const char> chars = next_.literal_chars->ascii_literal(); |
| 963 return KeywordOrIdentifierToken(chars.start(), |
| 964 chars.length(), |
| 965 harmony_block_scoping_); |
| 966 } |
| 967 |
| 968 return Token::IDENTIFIER; |
| 847 } | 969 } |
| 848 | 970 |
| 849 | 971 |
| 850 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { | 972 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) { |
| 851 // Scan the rest of the identifier characters. | 973 // Scan the rest of the identifier characters. |
| 852 while (unicode_cache_->IsIdentifierPart(c0_)) { | 974 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| 853 if (c0_ == '\\') { | 975 if (c0_ == '\\') { |
| 854 uc32 c = ScanIdentifierUnicodeEscape(); | 976 uc32 c = ScanIdentifierUnicodeEscape(); |
| 855 // Only allow legal identifier part characters. | 977 // Only allow legal identifier part characters. |
| 856 if (!unicode_cache_->IsIdentifierPart(c)) return Token::ILLEGAL; | 978 if (c < 0 || |
| 979 c == '\\' || |
| 980 !unicode_cache_->IsIdentifierPart(c)) { |
| 981 return Token::ILLEGAL; |
| 982 } |
| 857 AddLiteralChar(c); | 983 AddLiteralChar(c); |
| 858 } else { | 984 } else { |
| 859 AddLiteralChar(c0_); | 985 AddLiteralChar(c0_); |
| 860 Advance(); | 986 Advance(); |
| 861 } | 987 } |
| 862 } | 988 } |
| 863 literal->Complete(); | 989 literal->Complete(); |
| 864 | 990 |
| 865 return Token::IDENTIFIER; | 991 return Token::IDENTIFIER; |
| 866 } | 992 } |
| 867 | 993 |
| 868 | 994 |
| 869 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { | 995 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { |
| 870 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 996 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
| 871 bool in_character_class = false; | 997 bool in_character_class = false; |
| 872 | 998 |
| 873 // Previous token is either '/' or '/=', in the second case, the | 999 // Previous token is either '/' or '/=', in the second case, the |
| 874 // pattern starts at =. | 1000 // pattern starts at =. |
| 875 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 1001 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| 876 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 1002 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| 877 | 1003 |
| 878 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1004 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 879 // the scanner should pass uninterpreted bodies to the RegExp | 1005 // the scanner should pass uninterpreted bodies to the RegExp |
| 880 // constructor. | 1006 // constructor. |
| 881 LiteralScope literal(this); | 1007 LiteralScope literal(this); |
| 882 if (seen_equal) | 1008 if (seen_equal) { |
| 883 AddLiteralChar('='); | 1009 AddLiteralChar('='); |
| 1010 } |
| 884 | 1011 |
| 885 while (c0_ != '/' || in_character_class) { | 1012 while (c0_ != '/' || in_character_class) { |
| 886 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1013 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; |
| 887 if (c0_ == '\\') { // Escape sequence. | 1014 if (c0_ == '\\') { // Escape sequence. |
| 888 AddLiteralCharAdvance(); | 1015 AddLiteralCharAdvance(); |
| 889 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1016 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; |
| 890 AddLiteralCharAdvance(); | 1017 AddLiteralCharAdvance(); |
| 891 // If the escape allows more characters, i.e., \x??, \u????, or \c?, | 1018 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
| 892 // only "safe" characters are allowed (letters, digits, underscore), | 1019 // only "safe" characters are allowed (letters, digits, underscore), |
| 893 // otherwise the escape isn't valid and the invalid character has | 1020 // otherwise the escape isn't valid and the invalid character has |
| (...skipping 11 matching lines...) Expand all Loading... |
| 905 } | 1032 } |
| 906 } | 1033 } |
| 907 Advance(); // consume '/' | 1034 Advance(); // consume '/' |
| 908 | 1035 |
| 909 literal.Complete(); | 1036 literal.Complete(); |
| 910 | 1037 |
| 911 return true; | 1038 return true; |
| 912 } | 1039 } |
| 913 | 1040 |
| 914 | 1041 |
| 1042 bool JavaScriptScanner::ScanLiteralUnicodeEscape() { |
| 1043 ASSERT(c0_ == '\\'); |
| 1044 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; |
| 1045 Advance(); |
| 1046 int i = 1; |
| 1047 if (c0_ == 'u') { |
| 1048 i++; |
| 1049 while (i < 6) { |
| 1050 Advance(); |
| 1051 if (!IsHexDigit(c0_)) break; |
| 1052 chars_read[i] = c0_; |
| 1053 i++; |
| 1054 } |
| 1055 } |
| 1056 if (i < 6) { |
| 1057 // Incomplete escape. Undo all advances and return false. |
| 1058 while (i > 0) { |
| 1059 i--; |
| 1060 PushBack(chars_read[i]); |
| 1061 } |
| 1062 return false; |
| 1063 } |
| 1064 // Complete escape. Add all chars to current literal buffer. |
| 1065 for (int i = 0; i < 6; i++) { |
| 1066 AddLiteralChar(chars_read[i]); |
| 1067 } |
| 1068 return true; |
| 1069 } |
| 1070 |
| 1071 |
| 915 bool JavaScriptScanner::ScanRegExpFlags() { | 1072 bool JavaScriptScanner::ScanRegExpFlags() { |
| 916 // Scan regular expression flags. | 1073 // Scan regular expression flags. |
| 917 LiteralScope literal(this); | 1074 LiteralScope literal(this); |
| 918 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1075 while (unicode_cache_->IsIdentifierPart(c0_)) { |
| 919 if (c0_ == '\\') { | 1076 if (c0_ != '\\') { |
| 920 uc32 c = ScanIdentifierUnicodeEscape(); | 1077 AddLiteralCharAdvance(); |
| 921 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 1078 } else { |
| 922 // We allow any escaped character, unlike the restriction on | 1079 if (!ScanLiteralUnicodeEscape()) { |
| 923 // IdentifierPart when it is used to build an IdentifierName. | 1080 break; |
| 924 AddLiteralChar(c); | |
| 925 continue; | |
| 926 } | 1081 } |
| 927 } | 1082 } |
| 928 AddLiteralCharAdvance(); | |
| 929 } | 1083 } |
| 930 literal.Complete(); | 1084 literal.Complete(); |
| 931 | 1085 |
| 932 next_.location.end_pos = source_pos() - 1; | 1086 next_.location.end_pos = source_pos() - 1; |
| 933 return true; | 1087 return true; |
| 934 } | 1088 } |
| 935 | 1089 |
| 936 // ---------------------------------------------------------------------------- | |
| 937 // Keyword Matcher | |
| 938 | |
| 939 const KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { | |
| 940 { "break", KEYWORD_PREFIX, Token::BREAK }, | |
| 941 { NULL, C, Token::ILLEGAL }, | |
| 942 { NULL, D, Token::ILLEGAL }, | |
| 943 { NULL, E, Token::ILLEGAL }, | |
| 944 { NULL, F, Token::ILLEGAL }, | |
| 945 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 946 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 947 { NULL, I, Token::ILLEGAL }, | |
| 948 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 949 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 950 { "let", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD }, | |
| 951 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 952 { NULL, N, Token::ILLEGAL }, | |
| 953 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 954 { NULL, P, Token::ILLEGAL }, | |
| 955 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 956 { "return", KEYWORD_PREFIX, Token::RETURN }, | |
| 957 { NULL, S, Token::ILLEGAL }, | |
| 958 { NULL, T, Token::ILLEGAL }, | |
| 959 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 960 { NULL, V, Token::ILLEGAL }, | |
| 961 { NULL, W, Token::ILLEGAL }, | |
| 962 { NULL, UNMATCHABLE, Token::ILLEGAL }, | |
| 963 { "yield", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD } | |
| 964 }; | |
| 965 | |
| 966 | |
| 967 void KeywordMatcher::Step(unibrow::uchar input) { | |
| 968 switch (state_) { | |
| 969 case INITIAL: { | |
| 970 // matching the first character is the only state with significant fanout. | |
| 971 // Match only lower-case letters in range 'b'..'y'. | |
| 972 unsigned int offset = input - kFirstCharRangeMin; | |
| 973 if (offset < kFirstCharRangeLength) { | |
| 974 state_ = first_states_[offset].state; | |
| 975 if (state_ == KEYWORD_PREFIX) { | |
| 976 keyword_ = first_states_[offset].keyword; | |
| 977 counter_ = 1; | |
| 978 keyword_token_ = first_states_[offset].token; | |
| 979 } | |
| 980 return; | |
| 981 } | |
| 982 break; | |
| 983 } | |
| 984 case KEYWORD_PREFIX: | |
| 985 if (static_cast<unibrow::uchar>(keyword_[counter_]) == input) { | |
| 986 counter_++; | |
| 987 if (keyword_[counter_] == '\0') { | |
| 988 state_ = KEYWORD_MATCHED; | |
| 989 token_ = keyword_token_; | |
| 990 } | |
| 991 return; | |
| 992 } | |
| 993 break; | |
| 994 case KEYWORD_MATCHED: | |
| 995 token_ = Token::IDENTIFIER; | |
| 996 break; | |
| 997 case C: | |
| 998 if (MatchState(input, 'a', CA)) return; | |
| 999 if (MatchKeywordStart(input, "class", 1, | |
| 1000 Token::FUTURE_RESERVED_WORD)) return; | |
| 1001 if (MatchState(input, 'o', CO)) return; | |
| 1002 break; | |
| 1003 case CA: | |
| 1004 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; | |
| 1005 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; | |
| 1006 break; | |
| 1007 case CO: | |
| 1008 if (MatchState(input, 'n', CON)) return; | |
| 1009 break; | |
| 1010 case CON: | |
| 1011 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; | |
| 1012 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; | |
| 1013 break; | |
| 1014 case D: | |
| 1015 if (MatchState(input, 'e', DE)) return; | |
| 1016 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; | |
| 1017 break; | |
| 1018 case DE: | |
| 1019 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; | |
| 1020 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; | |
| 1021 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; | |
| 1022 break; | |
| 1023 case E: | |
| 1024 if (MatchKeywordStart(input, "else", 1, Token::ELSE)) return; | |
| 1025 if (MatchKeywordStart(input, "enum", 1, | |
| 1026 Token::FUTURE_RESERVED_WORD)) return; | |
| 1027 if (MatchState(input, 'x', EX)) return; | |
| 1028 break; | |
| 1029 case EX: | |
| 1030 if (MatchKeywordStart(input, "export", 2, | |
| 1031 Token::FUTURE_RESERVED_WORD)) return; | |
| 1032 if (MatchKeywordStart(input, "extends", 2, | |
| 1033 Token::FUTURE_RESERVED_WORD)) return; | |
| 1034 break; | |
| 1035 case F: | |
| 1036 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; | |
| 1037 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; | |
| 1038 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; | |
| 1039 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; | |
| 1040 break; | |
| 1041 case I: | |
| 1042 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; | |
| 1043 if (MatchState(input, 'm', IM)) return; | |
| 1044 if (MatchKeyword(input, 'n', IN, Token::IN)) return; | |
| 1045 break; | |
| 1046 case IM: | |
| 1047 if (MatchState(input, 'p', IMP)) return; | |
| 1048 break; | |
| 1049 case IMP: | |
| 1050 if (MatchKeywordStart(input, "implements", 3, | |
| 1051 Token::FUTURE_STRICT_RESERVED_WORD )) return; | |
| 1052 if (MatchKeywordStart(input, "import", 3, | |
| 1053 Token::FUTURE_RESERVED_WORD)) return; | |
| 1054 break; | |
| 1055 case IN: | |
| 1056 token_ = Token::IDENTIFIER; | |
| 1057 if (MatchKeywordStart(input, "interface", 2, | |
| 1058 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 1059 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return; | |
| 1060 break; | |
| 1061 case N: | |
| 1062 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; | |
| 1063 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; | |
| 1064 break; | |
| 1065 case P: | |
| 1066 if (MatchKeywordStart(input, "package", 1, | |
| 1067 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 1068 if (MatchState(input, 'r', PR)) return; | |
| 1069 if (MatchKeywordStart(input, "public", 1, | |
| 1070 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 1071 break; | |
| 1072 case PR: | |
| 1073 if (MatchKeywordStart(input, "private", 2, | |
| 1074 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 1075 if (MatchKeywordStart(input, "protected", 2, | |
| 1076 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 1077 break; | |
| 1078 case S: | |
| 1079 if (MatchKeywordStart(input, "static", 1, | |
| 1080 Token::FUTURE_STRICT_RESERVED_WORD)) return; | |
| 1081 if (MatchKeywordStart(input, "super", 1, | |
| 1082 Token::FUTURE_RESERVED_WORD)) return; | |
| 1083 if (MatchKeywordStart(input, "switch", 1, | |
| 1084 Token::SWITCH)) return; | |
| 1085 break; | |
| 1086 case T: | |
| 1087 if (MatchState(input, 'h', TH)) return; | |
| 1088 if (MatchState(input, 'r', TR)) return; | |
| 1089 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; | |
| 1090 break; | |
| 1091 case TH: | |
| 1092 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; | |
| 1093 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; | |
| 1094 break; | |
| 1095 case TR: | |
| 1096 if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; | |
| 1097 if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; | |
| 1098 break; | |
| 1099 case V: | |
| 1100 if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; | |
| 1101 if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; | |
| 1102 break; | |
| 1103 case W: | |
| 1104 if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; | |
| 1105 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | |
| 1106 break; | |
| 1107 case UNMATCHABLE: | |
| 1108 break; | |
| 1109 } | |
| 1110 // On fallthrough, it's a failure. | |
| 1111 state_ = UNMATCHABLE; | |
| 1112 } | |
| 1113 | |
| 1114 } } // namespace v8::internal | 1090 } } // namespace v8::internal |
| OLD | NEW |