| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 70 | 70 |
| 71 | 71 |
| 72 | 72 |
| 73 // ---------------------------------------------------------------------------- | 73 // ---------------------------------------------------------------------------- |
| 74 // JavaScriptScanner | 74 // JavaScriptScanner |
| 75 | 75 |
| 76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) | 76 JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants) |
| 77 : Scanner(scanner_contants), octal_pos_(Location::invalid()) { } | 77 : Scanner(scanner_contants), octal_pos_(Location::invalid()) { } |
| 78 | 78 |
| 79 | 79 |
| 80 void JavaScriptScanner::Initialize(UC16CharacterStream* source) { |
| 81 source_ = source; |
| 82 // Need to capture identifiers in order to recognize "get" and "set" |
| 83 // in object literals. |
| 84 Init(); |
| 85 // Skip initial whitespace allowing HTML comment ends just like |
| 86 // after a newline and scan first token. |
| 87 has_line_terminator_before_next_ = true; |
| 88 SkipWhiteSpace(); |
| 89 Scan(); |
| 90 } |
| 91 |
| 80 Token::Value JavaScriptScanner::Next() { | 92 Token::Value JavaScriptScanner::Next() { |
| 81 current_ = next_; | 93 current_ = next_; |
| 82 has_line_terminator_before_next_ = false; | 94 has_line_terminator_before_next_ = false; |
| 95 has_multiline_comment_before_next_ = false; |
| 83 Scan(); | 96 Scan(); |
| 84 return current_.token; | 97 return current_.token; |
| 85 } | 98 } |
| 86 | 99 |
| 87 | 100 |
| 88 static inline bool IsByteOrderMark(uc32 c) { | 101 static inline bool IsByteOrderMark(uc32 c) { |
| 89 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 102 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
| 90 // Unicode character; this implies that in a Unicode context the | 103 // Unicode character; this implies that in a Unicode context the |
| 91 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 104 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
| 92 // character expressed in little-endian byte order (since it could | 105 // character expressed in little-endian byte order (since it could |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 137 } | 150 } |
| 138 | 151 |
| 139 | 152 |
| 140 Token::Value JavaScriptScanner::SkipSingleLineComment() { | 153 Token::Value JavaScriptScanner::SkipSingleLineComment() { |
| 141 Advance(); | 154 Advance(); |
| 142 | 155 |
| 143 // The line terminator at the end of the line is not considered | 156 // The line terminator at the end of the line is not considered |
| 144 // to be part of the single-line comment; it is recognized | 157 // to be part of the single-line comment; it is recognized |
| 145 // separately by the lexical grammar and becomes part of the | 158 // separately by the lexical grammar and becomes part of the |
| 146 // stream of input elements for the syntactic grammar (see | 159 // stream of input elements for the syntactic grammar (see |
| 147 // ECMA-262, section 7.4, page 12). | 160 // ECMA-262, section 7.4). |
| 148 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { | 161 while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { |
| 149 Advance(); | 162 Advance(); |
| 150 } | 163 } |
| 151 | 164 |
| 152 return Token::WHITESPACE; | 165 return Token::WHITESPACE; |
| 153 } | 166 } |
| 154 | 167 |
| 155 | 168 |
| 156 Token::Value JavaScriptScanner::SkipMultiLineComment() { | 169 Token::Value JavaScriptScanner::SkipMultiLineComment() { |
| 157 ASSERT(c0_ == '*'); | 170 ASSERT(c0_ == '*'); |
| 158 Advance(); | 171 Advance(); |
| 159 | 172 |
| 160 while (c0_ >= 0) { | 173 while (c0_ >= 0) { |
| 161 char ch = c0_; | 174 char ch = c0_; |
| 162 Advance(); | 175 Advance(); |
| 176 if (unicode_cache_->IsLineTerminator(ch)) { |
| 177 // Following ECMA-262, section 7.4, a comment containing |
| 178 // a newline will make the comment count as a line-terminator. |
| 179 has_multiline_comment_before_next_ = true; |
| 180 } |
| 163 // If we have reached the end of the multi-line comment, we | 181 // If we have reached the end of the multi-line comment, we |
| 164 // consume the '/' and insert a whitespace. This way all | 182 // consume the '/' and insert a whitespace. This way all |
| 165 // multi-line comments are treated as whitespace - even the ones | 183 // multi-line comments are treated as whitespace. |
| 166 // containing line terminators. This contradicts ECMA-262, section | |
| 167 // 7.4, page 12, that says that multi-line comments containing | |
| 168 // line terminators should be treated as a line terminator, but it | |
| 169 // matches the behaviour of SpiderMonkey and KJS. | |
| 170 if (ch == '*' && c0_ == '/') { | 184 if (ch == '*' && c0_ == '/') { |
| 171 c0_ = ' '; | 185 c0_ = ' '; |
| 172 return Token::WHITESPACE; | 186 return Token::WHITESPACE; |
| 173 } | 187 } |
| 174 } | 188 } |
| 175 | 189 |
| 176 // Unterminated multi-line comment. | 190 // Unterminated multi-line comment. |
| 177 return Token::ILLEGAL; | 191 return Token::ILLEGAL; |
| 178 } | 192 } |
| 179 | 193 |
| (...skipping 261 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 441 ASSERT_EQ(next_.location.end_pos, current_pos); | 455 ASSERT_EQ(next_.location.end_pos, current_pos); |
| 442 // Positions inside the lookahead token aren't supported. | 456 // Positions inside the lookahead token aren't supported. |
| 443 ASSERT(pos >= current_pos); | 457 ASSERT(pos >= current_pos); |
| 444 if (pos != current_pos) { | 458 if (pos != current_pos) { |
| 445 source_->SeekForward(pos - source_->pos()); | 459 source_->SeekForward(pos - source_->pos()); |
| 446 Advance(); | 460 Advance(); |
| 447 // This function is only called to seek to the location | 461 // This function is only called to seek to the location |
| 448 // of the end of a function (at the "}" token). It doesn't matter | 462 // of the end of a function (at the "}" token). It doesn't matter |
| 449 // whether there was a line terminator in the part we skip. | 463 // whether there was a line terminator in the part we skip. |
| 450 has_line_terminator_before_next_ = false; | 464 has_line_terminator_before_next_ = false; |
| 465 has_multiline_comment_before_next_ = false; |
| 451 } | 466 } |
| 452 Scan(); | 467 Scan(); |
| 453 } | 468 } |
| 454 | 469 |
| 455 | 470 |
| 456 void JavaScriptScanner::ScanEscape() { | 471 void JavaScriptScanner::ScanEscape() { |
| 457 uc32 c = c0_; | 472 uc32 c = c0_; |
| 458 Advance(); | 473 Advance(); |
| 459 | 474 |
| 460 // Skip escaped newlines. | 475 // Skip escaped newlines. |
| (...skipping 316 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 777 { "break", KEYWORD_PREFIX, Token::BREAK }, | 792 { "break", KEYWORD_PREFIX, Token::BREAK }, |
| 778 { NULL, C, Token::ILLEGAL }, | 793 { NULL, C, Token::ILLEGAL }, |
| 779 { NULL, D, Token::ILLEGAL }, | 794 { NULL, D, Token::ILLEGAL }, |
| 780 { NULL, E, Token::ILLEGAL }, | 795 { NULL, E, Token::ILLEGAL }, |
| 781 { NULL, F, Token::ILLEGAL }, | 796 { NULL, F, Token::ILLEGAL }, |
| 782 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 797 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 783 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 798 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 784 { NULL, I, Token::ILLEGAL }, | 799 { NULL, I, Token::ILLEGAL }, |
| 785 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 800 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 786 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 801 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 787 { "let", KEYWORD_PREFIX, Token::FUTURE_RESERVED_WORD }, | 802 { "let", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD }, |
| 788 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 803 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 789 { NULL, N, Token::ILLEGAL }, | 804 { NULL, N, Token::ILLEGAL }, |
| 790 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 805 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 791 { NULL, P, Token::ILLEGAL }, | 806 { NULL, P, Token::ILLEGAL }, |
| 792 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 807 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 793 { "return", KEYWORD_PREFIX, Token::RETURN }, | 808 { "return", KEYWORD_PREFIX, Token::RETURN }, |
| 794 { NULL, S, Token::ILLEGAL }, | 809 { NULL, S, Token::ILLEGAL }, |
| 795 { NULL, T, Token::ILLEGAL }, | 810 { NULL, T, Token::ILLEGAL }, |
| 796 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 811 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 797 { NULL, V, Token::ILLEGAL }, | 812 { NULL, V, Token::ILLEGAL }, |
| 798 { NULL, W, Token::ILLEGAL }, | 813 { NULL, W, Token::ILLEGAL }, |
| 799 { NULL, UNMATCHABLE, Token::ILLEGAL }, | 814 { NULL, UNMATCHABLE, Token::ILLEGAL }, |
| 800 { "yield", KEYWORD_PREFIX, Token::FUTURE_RESERVED_WORD } | 815 { "yield", KEYWORD_PREFIX, Token::FUTURE_STRICT_RESERVED_WORD } |
| 801 }; | 816 }; |
| 802 | 817 |
| 803 | 818 |
| 804 void KeywordMatcher::Step(unibrow::uchar input) { | 819 void KeywordMatcher::Step(unibrow::uchar input) { |
| 805 switch (state_) { | 820 switch (state_) { |
| 806 case INITIAL: { | 821 case INITIAL: { |
| 807 // matching the first character is the only state with significant fanout. | 822 // matching the first character is the only state with significant fanout. |
| 808 // Match only lower-case letters in range 'b'..'y'. | 823 // Match only lower-case letters in range 'b'..'y'. |
| 809 unsigned int offset = input - kFirstCharRangeMin; | 824 unsigned int offset = input - kFirstCharRangeMin; |
| 810 if (offset < kFirstCharRangeLength) { | 825 if (offset < kFirstCharRangeLength) { |
| (...skipping 16 matching lines...) Expand all Loading... |
| 827 } | 842 } |
| 828 return; | 843 return; |
| 829 } | 844 } |
| 830 break; | 845 break; |
| 831 case KEYWORD_MATCHED: | 846 case KEYWORD_MATCHED: |
| 832 token_ = Token::IDENTIFIER; | 847 token_ = Token::IDENTIFIER; |
| 833 break; | 848 break; |
| 834 case C: | 849 case C: |
| 835 if (MatchState(input, 'a', CA)) return; | 850 if (MatchState(input, 'a', CA)) return; |
| 836 if (MatchKeywordStart(input, "class", 1, | 851 if (MatchKeywordStart(input, "class", 1, |
| 837 Token::FUTURE_RESERVED_WORD)) return; | 852 Token::FUTURE_RESERVED_WORD)) return; |
| 838 if (MatchState(input, 'o', CO)) return; | 853 if (MatchState(input, 'o', CO)) return; |
| 839 break; | 854 break; |
| 840 case CA: | 855 case CA: |
| 841 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; | 856 if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; |
| 842 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; | 857 if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; |
| 843 break; | 858 break; |
| 844 case CO: | 859 case CO: |
| 845 if (MatchState(input, 'n', CON)) return; | 860 if (MatchState(input, 'n', CON)) return; |
| 846 break; | 861 break; |
| 847 case CON: | 862 case CON: |
| 848 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; | 863 if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; |
| 849 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; | 864 if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; |
| 850 break; | 865 break; |
| 851 case D: | 866 case D: |
| 852 if (MatchState(input, 'e', DE)) return; | 867 if (MatchState(input, 'e', DE)) return; |
| 853 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; | 868 if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; |
| 854 break; | 869 break; |
| 855 case DE: | 870 case DE: |
| 856 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; | 871 if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; |
| 857 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; | 872 if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; |
| 858 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; | 873 if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; |
| 859 break; | 874 break; |
| 860 case E: | 875 case E: |
| 861 if (MatchKeywordStart(input, "else", 1, Token::ELSE)) return; | 876 if (MatchKeywordStart(input, "else", 1, Token::ELSE)) return; |
| 862 if (MatchKeywordStart(input, "enum", 1, | 877 if (MatchKeywordStart(input, "enum", 1, |
| 863 Token::FUTURE_RESERVED_WORD)) return; | 878 Token::FUTURE_RESERVED_WORD)) return; |
| 864 if (MatchState(input, 'x', EX)) return; | 879 if (MatchState(input, 'x', EX)) return; |
| 865 break; | 880 break; |
| 866 case EX: | 881 case EX: |
| 867 if (MatchKeywordStart(input, "export", 2, | 882 if (MatchKeywordStart(input, "export", 2, |
| 868 Token::FUTURE_RESERVED_WORD)) return; | 883 Token::FUTURE_RESERVED_WORD)) return; |
| 869 if (MatchKeywordStart(input, "extends", 2, | 884 if (MatchKeywordStart(input, "extends", 2, |
| 870 Token::FUTURE_RESERVED_WORD)) return; | 885 Token::FUTURE_RESERVED_WORD)) return; |
| 871 break; | 886 break; |
| 872 case F: | 887 case F: |
| 873 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; | 888 if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; |
| 874 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; | 889 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; |
| 875 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; | 890 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; |
| 876 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; | 891 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; |
| 877 break; | 892 break; |
| 878 case I: | 893 case I: |
| 879 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; | 894 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; |
| 880 if (MatchState(input, 'm', IM)) return; | 895 if (MatchState(input, 'm', IM)) return; |
| 881 if (MatchKeyword(input, 'n', IN, Token::IN)) return; | 896 if (MatchKeyword(input, 'n', IN, Token::IN)) return; |
| 882 break; | 897 break; |
| 883 case IM: | 898 case IM: |
| 884 if (MatchState(input, 'p', IMP)) return; | 899 if (MatchState(input, 'p', IMP)) return; |
| 885 break; | 900 break; |
| 886 case IMP: | 901 case IMP: |
| 887 if (MatchKeywordStart(input, "implements", 3, | 902 if (MatchKeywordStart(input, "implements", 3, |
| 888 Token::FUTURE_RESERVED_WORD )) return; | 903 Token::FUTURE_STRICT_RESERVED_WORD )) return; |
| 889 if (MatchKeywordStart(input, "import", 3, | 904 if (MatchKeywordStart(input, "import", 3, |
| 890 Token::FUTURE_RESERVED_WORD)) return; | 905 Token::FUTURE_RESERVED_WORD)) return; |
| 891 break; | 906 break; |
| 892 case IN: | 907 case IN: |
| 893 token_ = Token::IDENTIFIER; | 908 token_ = Token::IDENTIFIER; |
| 894 if (MatchKeywordStart(input, "interface", 2, | 909 if (MatchKeywordStart(input, "interface", 2, |
| 895 Token::FUTURE_RESERVED_WORD)) return; | 910 Token::FUTURE_STRICT_RESERVED_WORD)) return; |
| 896 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return; | 911 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return; |
| 897 break; | 912 break; |
| 898 case N: | 913 case N: |
| 899 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return; | |
| 900 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; | 914 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; |
| 901 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; | 915 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; |
| 902 break; | 916 break; |
| 903 case P: | 917 case P: |
| 904 if (MatchKeywordStart(input, "package", 1, | 918 if (MatchKeywordStart(input, "package", 1, |
| 905 Token::FUTURE_RESERVED_WORD)) return; | 919 Token::FUTURE_STRICT_RESERVED_WORD)) return; |
| 906 if (MatchState(input, 'r', PR)) return; | 920 if (MatchState(input, 'r', PR)) return; |
| 907 if (MatchKeywordStart(input, "public", 1, | 921 if (MatchKeywordStart(input, "public", 1, |
| 908 Token::FUTURE_RESERVED_WORD)) return; | 922 Token::FUTURE_STRICT_RESERVED_WORD)) return; |
| 909 break; | 923 break; |
| 910 case PR: | 924 case PR: |
| 911 if (MatchKeywordStart(input, "private", 2, | 925 if (MatchKeywordStart(input, "private", 2, |
| 912 Token::FUTURE_RESERVED_WORD)) return; | 926 Token::FUTURE_STRICT_RESERVED_WORD)) return; |
| 913 if (MatchKeywordStart(input, "protected", 2, | 927 if (MatchKeywordStart(input, "protected", 2, |
| 914 Token::FUTURE_RESERVED_WORD)) return; | 928 Token::FUTURE_STRICT_RESERVED_WORD)) return; |
| 915 break; | 929 break; |
| 916 case S: | 930 case S: |
| 917 if (MatchKeywordStart(input, "static", 1, | 931 if (MatchKeywordStart(input, "static", 1, |
| 918 Token::FUTURE_RESERVED_WORD)) return; | 932 Token::FUTURE_STRICT_RESERVED_WORD)) return; |
| 919 if (MatchKeywordStart(input, "super", 1, | 933 if (MatchKeywordStart(input, "super", 1, |
| 920 Token::FUTURE_RESERVED_WORD)) return; | 934 Token::FUTURE_RESERVED_WORD)) return; |
| 921 if (MatchKeywordStart(input, "switch", 1, | 935 if (MatchKeywordStart(input, "switch", 1, |
| 922 Token::SWITCH)) return; | 936 Token::SWITCH)) return; |
| 923 break; | 937 break; |
| 924 case T: | 938 case T: |
| 925 if (MatchState(input, 'h', TH)) return; | 939 if (MatchState(input, 'h', TH)) return; |
| 926 if (MatchState(input, 'r', TR)) return; | 940 if (MatchState(input, 'r', TR)) return; |
| 927 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; | 941 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; |
| 928 break; | 942 break; |
| 929 case TH: | 943 case TH: |
| 930 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; | 944 if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; |
| 931 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; | 945 if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; |
| 932 break; | 946 break; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 943 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | 957 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
| 944 break; | 958 break; |
| 945 case UNMATCHABLE: | 959 case UNMATCHABLE: |
| 946 break; | 960 break; |
| 947 } | 961 } |
| 948 // On fallthrough, it's a failure. | 962 // On fallthrough, it's a failure. |
| 949 state_ = UNMATCHABLE; | 963 state_ = UNMATCHABLE; |
| 950 } | 964 } |
| 951 | 965 |
| 952 } } // namespace v8::internal | 966 } } // namespace v8::internal |
| OLD | NEW |