OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 12 matching lines...) Expand all Loading... |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
27 | 27 |
28 #include "v8.h" | 28 #include "v8.h" |
29 | 29 |
30 #include "ast.h" | 30 #include "ast.h" |
31 #include "handles.h" | 31 #include "handles.h" |
32 #include "scanner.h" | 32 #include "scanner.h" |
| 33 #include "unicode-inl.h" |
33 | 34 |
34 namespace v8 { | 35 namespace v8 { |
35 namespace internal { | 36 namespace internal { |
36 | 37 |
37 // ---------------------------------------------------------------------------- | 38 // ---------------------------------------------------------------------------- |
38 // Character predicates | |
39 | |
40 | |
41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; | |
42 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; | |
43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | |
44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | |
45 | |
46 | |
47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | |
48 | |
49 | |
50 // ---------------------------------------------------------------------------- | |
51 // UTF8Buffer | 39 // UTF8Buffer |
52 | 40 |
53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { } | 41 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { } |
54 | 42 |
55 | 43 |
56 UTF8Buffer::~UTF8Buffer() {} | 44 UTF8Buffer::~UTF8Buffer() {} |
57 | 45 |
58 | 46 |
59 void UTF8Buffer::AddCharSlow(uc32 c) { | 47 void UTF8Buffer::AddCharSlow(uc32 c) { |
60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); | 48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); |
(...skipping 290 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
351 return source_pos() != start_position; | 339 return source_pos() != start_position; |
352 } | 340 } |
353 | 341 |
354 | 342 |
355 bool Scanner::SkipJavaScriptWhiteSpace() { | 343 bool Scanner::SkipJavaScriptWhiteSpace() { |
356 int start_position = source_pos(); | 344 int start_position = source_pos(); |
357 | 345 |
358 while (true) { | 346 while (true) { |
359 // We treat byte-order marks (BOMs) as whitespace for better | 347 // We treat byte-order marks (BOMs) as whitespace for better |
360 // compatibility with Spidermonkey and other JavaScript engines. | 348 // compatibility with Spidermonkey and other JavaScript engines. |
361 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 349 while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
362 // IsWhiteSpace() includes line terminators! | 350 // IsWhiteSpace() includes line terminators! |
363 if (kIsLineTerminator.get(c0_)) { | 351 if (ScannerConstants::kIsLineTerminator.get(c0_)) { |
364 // Ignore line terminators, but remember them. This is necessary | 352 // Ignore line terminators, but remember them. This is necessary |
365 // for automatic semicolon insertion. | 353 // for automatic semicolon insertion. |
366 has_line_terminator_before_next_ = true; | 354 has_line_terminator_before_next_ = true; |
367 } | 355 } |
368 Advance(); | 356 Advance(); |
369 } | 357 } |
370 | 358 |
371 // If there is an HTML comment end '-->' at the beginning of a | 359 // If there is an HTML comment end '-->' at the beginning of a |
372 // line (with only whitespace in front of it), we treat the rest | 360 // line (with only whitespace in front of it), we treat the rest |
373 // of the line as a comment. This is in line with the way | 361 // of the line as a comment. This is in line with the way |
(...skipping 19 matching lines...) Expand all Loading... |
393 | 381 |
394 | 382 |
395 Token::Value Scanner::SkipSingleLineComment() { | 383 Token::Value Scanner::SkipSingleLineComment() { |
396 Advance(); | 384 Advance(); |
397 | 385 |
398 // The line terminator at the end of the line is not considered | 386 // The line terminator at the end of the line is not considered |
399 // to be part of the single-line comment; it is recognized | 387 // to be part of the single-line comment; it is recognized |
400 // separately by the lexical grammar and becomes part of the | 388 // separately by the lexical grammar and becomes part of the |
401 // stream of input elements for the syntactic grammar (see | 389 // stream of input elements for the syntactic grammar (see |
402 // ECMA-262, section 7.4, page 12). | 390 // ECMA-262, section 7.4, page 12). |
403 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 391 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
404 Advance(); | 392 Advance(); |
405 } | 393 } |
406 | 394 |
407 return Token::WHITESPACE; | 395 return Token::WHITESPACE; |
408 } | 396 } |
409 | 397 |
410 | 398 |
411 Token::Value Scanner::SkipMultiLineComment() { | 399 Token::Value Scanner::SkipMultiLineComment() { |
412 ASSERT(c0_ == '*'); | 400 ASSERT(c0_ == '*'); |
413 Advance(); | 401 Advance(); |
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
624 | 612 |
625 | 613 |
626 Token::Value Scanner::ScanJsonIdentifier(const char* text, | 614 Token::Value Scanner::ScanJsonIdentifier(const char* text, |
627 Token::Value token) { | 615 Token::Value token) { |
628 LiteralScope literal(this); | 616 LiteralScope literal(this); |
629 while (*text != '\0') { | 617 while (*text != '\0') { |
630 if (c0_ != *text) return Token::ILLEGAL; | 618 if (c0_ != *text) return Token::ILLEGAL; |
631 Advance(); | 619 Advance(); |
632 text++; | 620 text++; |
633 } | 621 } |
634 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; | 622 if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; |
635 literal.Complete(); | 623 literal.Complete(); |
636 return token; | 624 return token; |
637 } | 625 } |
638 | 626 |
639 | 627 |
640 void Scanner::ScanJavaScript() { | 628 void Scanner::ScanJavaScript() { |
641 next_.literal_chars = Vector<const char>(); | 629 next_.literal_chars = Vector<const char>(); |
642 Token::Value token; | 630 Token::Value token; |
643 do { | 631 do { |
644 // Remember the position of the next token | 632 // Remember the position of the next token |
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
847 | 835 |
848 case '?': | 836 case '?': |
849 token = Select(Token::CONDITIONAL); | 837 token = Select(Token::CONDITIONAL); |
850 break; | 838 break; |
851 | 839 |
852 case '~': | 840 case '~': |
853 token = Select(Token::BIT_NOT); | 841 token = Select(Token::BIT_NOT); |
854 break; | 842 break; |
855 | 843 |
856 default: | 844 default: |
857 if (kIsIdentifierStart.get(c0_)) { | 845 if (ScannerConstants::kIsIdentifierStart.get(c0_)) { |
858 token = ScanIdentifier(); | 846 token = ScanIdentifier(); |
859 } else if (IsDecimalDigit(c0_)) { | 847 } else if (IsDecimalDigit(c0_)) { |
860 token = ScanNumber(false); | 848 token = ScanNumber(false); |
861 } else if (SkipWhiteSpace()) { | 849 } else if (SkipWhiteSpace()) { |
862 token = Token::WHITESPACE; | 850 token = Token::WHITESPACE; |
863 } else if (c0_ < 0) { | 851 } else if (c0_ < 0) { |
864 token = Token::EOS; | 852 token = Token::EOS; |
865 } else { | 853 } else { |
866 token = Select(Token::ILLEGAL); | 854 token = Select(Token::ILLEGAL); |
867 } | 855 } |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
930 } | 918 } |
931 return x; | 919 return x; |
932 } | 920 } |
933 | 921 |
934 | 922 |
935 void Scanner::ScanEscape() { | 923 void Scanner::ScanEscape() { |
936 uc32 c = c0_; | 924 uc32 c = c0_; |
937 Advance(); | 925 Advance(); |
938 | 926 |
939 // Skip escaped newlines. | 927 // Skip escaped newlines. |
940 if (kIsLineTerminator.get(c)) { | 928 if (ScannerConstants::kIsLineTerminator.get(c)) { |
941 // Allow CR+LF newlines in multiline string literals. | 929 // Allow CR+LF newlines in multiline string literals. |
942 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 930 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
943 // Allow LF+CR newlines in multiline string literals. | 931 // Allow LF+CR newlines in multiline string literals. |
944 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 932 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
945 return; | 933 return; |
946 } | 934 } |
947 | 935 |
948 switch (c) { | 936 switch (c) { |
949 case '\'': // fall through | 937 case '\'': // fall through |
950 case '"' : // fall through | 938 case '"' : // fall through |
(...skipping 21 matching lines...) Expand all Loading... |
972 // as non-escaped characters by JS VMs. | 960 // as non-escaped characters by JS VMs. |
973 AddChar(c); | 961 AddChar(c); |
974 } | 962 } |
975 | 963 |
976 | 964 |
977 Token::Value Scanner::ScanString() { | 965 Token::Value Scanner::ScanString() { |
978 uc32 quote = c0_; | 966 uc32 quote = c0_; |
979 Advance(); // consume quote | 967 Advance(); // consume quote |
980 | 968 |
981 LiteralScope literal(this); | 969 LiteralScope literal(this); |
982 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 970 while (c0_ != quote && c0_ >= 0 |
| 971 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
983 uc32 c = c0_; | 972 uc32 c = c0_; |
984 Advance(); | 973 Advance(); |
985 if (c == '\\') { | 974 if (c == '\\') { |
986 if (c0_ < 0) return Token::ILLEGAL; | 975 if (c0_ < 0) return Token::ILLEGAL; |
987 ScanEscape(); | 976 ScanEscape(); |
988 } else { | 977 } else { |
989 AddChar(c); | 978 AddChar(c); |
990 } | 979 } |
991 } | 980 } |
992 if (c0_ != quote) return Token::ILLEGAL; | 981 if (c0_ != quote) return Token::ILLEGAL; |
(...skipping 92 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1085 // we must have at least one decimal digit after 'e'/'E' | 1074 // we must have at least one decimal digit after 'e'/'E' |
1086 return Token::ILLEGAL; | 1075 return Token::ILLEGAL; |
1087 } | 1076 } |
1088 ScanDecimalDigits(); | 1077 ScanDecimalDigits(); |
1089 } | 1078 } |
1090 | 1079 |
1091 // The source character immediately following a numeric literal must | 1080 // The source character immediately following a numeric literal must |
1092 // not be an identifier start or a decimal digit; see ECMA-262 | 1081 // not be an identifier start or a decimal digit; see ECMA-262 |
1093 // section 7.8.3, page 17 (note that we read only one decimal digit | 1082 // section 7.8.3, page 17 (note that we read only one decimal digit |
1094 // if the value is 0). | 1083 // if the value is 0). |
1095 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) | 1084 if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_)) |
1096 return Token::ILLEGAL; | 1085 return Token::ILLEGAL; |
1097 | 1086 |
1098 literal.Complete(); | 1087 literal.Complete(); |
1099 | 1088 |
1100 return Token::NUMBER; | 1089 return Token::NUMBER; |
1101 } | 1090 } |
1102 | 1091 |
1103 | 1092 |
1104 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 1093 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
1105 Advance(); | 1094 Advance(); |
1106 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 1095 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
1107 Advance(); | 1096 Advance(); |
1108 uc32 c = ScanHexEscape('u', 4); | 1097 uc32 c = ScanHexEscape('u', 4); |
1109 // We do not allow a unicode escape sequence to start another | 1098 // We do not allow a unicode escape sequence to start another |
1110 // unicode escape sequence. | 1099 // unicode escape sequence. |
1111 if (c == '\\') return unibrow::Utf8::kBadChar; | 1100 if (c == '\\') return unibrow::Utf8::kBadChar; |
1112 return c; | 1101 return c; |
1113 } | 1102 } |
1114 | 1103 |
1115 | 1104 |
1116 Token::Value Scanner::ScanIdentifier() { | 1105 Token::Value Scanner::ScanIdentifier() { |
1117 ASSERT(kIsIdentifierStart.get(c0_)); | 1106 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); |
1118 | 1107 |
1119 LiteralScope literal(this); | 1108 LiteralScope literal(this); |
1120 KeywordMatcher keyword_match; | 1109 KeywordMatcher keyword_match; |
1121 | 1110 |
1122 // Scan identifier start character. | 1111 // Scan identifier start character. |
1123 if (c0_ == '\\') { | 1112 if (c0_ == '\\') { |
1124 uc32 c = ScanIdentifierUnicodeEscape(); | 1113 uc32 c = ScanIdentifierUnicodeEscape(); |
1125 // Only allow legal identifier start characters. | 1114 // Only allow legal identifier start characters. |
1126 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 1115 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
1127 AddChar(c); | 1116 AddChar(c); |
1128 keyword_match.Fail(); | 1117 keyword_match.Fail(); |
1129 } else { | 1118 } else { |
1130 AddChar(c0_); | 1119 AddChar(c0_); |
1131 keyword_match.AddChar(c0_); | 1120 keyword_match.AddChar(c0_); |
1132 Advance(); | 1121 Advance(); |
1133 } | 1122 } |
1134 | 1123 |
1135 // Scan the rest of the identifier characters. | 1124 // Scan the rest of the identifier characters. |
1136 while (kIsIdentifierPart.get(c0_)) { | 1125 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
1137 if (c0_ == '\\') { | 1126 if (c0_ == '\\') { |
1138 uc32 c = ScanIdentifierUnicodeEscape(); | 1127 uc32 c = ScanIdentifierUnicodeEscape(); |
1139 // Only allow legal identifier part characters. | 1128 // Only allow legal identifier part characters. |
1140 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; | 1129 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL; |
1141 AddChar(c); | 1130 AddChar(c); |
1142 keyword_match.Fail(); | 1131 keyword_match.Fail(); |
1143 } else { | 1132 } else { |
1144 AddChar(c0_); | 1133 AddChar(c0_); |
1145 keyword_match.AddChar(c0_); | 1134 keyword_match.AddChar(c0_); |
1146 Advance(); | 1135 Advance(); |
1147 } | 1136 } |
1148 } | 1137 } |
1149 literal.Complete(); | 1138 literal.Complete(); |
1150 | 1139 |
1151 return keyword_match.token(); | 1140 return keyword_match.token(); |
1152 } | 1141 } |
1153 | 1142 |
1154 | 1143 |
1155 | 1144 |
1156 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | |
1157 // Checks whether the buffer contains an identifier (no escape). | |
1158 if (!buffer->has_more()) return false; | |
1159 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; | |
1160 while (buffer->has_more()) { | |
1161 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; | |
1162 } | |
1163 return true; | |
1164 } | |
1165 | |
1166 | |
1167 bool Scanner::ScanRegExpPattern(bool seen_equal) { | 1145 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
1168 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 1146 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
1169 bool in_character_class = false; | 1147 bool in_character_class = false; |
1170 | 1148 |
1171 // Previous token is either '/' or '/=', in the second case, the | 1149 // Previous token is either '/' or '/=', in the second case, the |
1172 // pattern starts at =. | 1150 // pattern starts at =. |
1173 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 1151 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
1174 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 1152 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
1175 | 1153 |
1176 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1154 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
1177 // the scanner should pass uninterpreted bodies to the RegExp | 1155 // the scanner should pass uninterpreted bodies to the RegExp |
1178 // constructor. | 1156 // constructor. |
1179 LiteralScope literal(this); | 1157 LiteralScope literal(this); |
1180 if (seen_equal) | 1158 if (seen_equal) |
1181 AddChar('='); | 1159 AddChar('='); |
1182 | 1160 |
1183 while (c0_ != '/' || in_character_class) { | 1161 while (c0_ != '/' || in_character_class) { |
1184 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 1162 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
1185 if (c0_ == '\\') { // escaped character | 1163 if (c0_ == '\\') { // escaped character |
1186 AddCharAdvance(); | 1164 AddCharAdvance(); |
1187 if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 1165 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
1188 AddCharAdvance(); | 1166 AddCharAdvance(); |
1189 } else { // unescaped character | 1167 } else { // unescaped character |
1190 if (c0_ == '[') in_character_class = true; | 1168 if (c0_ == '[') in_character_class = true; |
1191 if (c0_ == ']') in_character_class = false; | 1169 if (c0_ == ']') in_character_class = false; |
1192 AddCharAdvance(); | 1170 AddCharAdvance(); |
1193 } | 1171 } |
1194 } | 1172 } |
1195 Advance(); // consume '/' | 1173 Advance(); // consume '/' |
1196 | 1174 |
1197 literal.Complete(); | 1175 literal.Complete(); |
1198 | 1176 |
1199 return true; | 1177 return true; |
1200 } | 1178 } |
1201 | 1179 |
1202 bool Scanner::ScanRegExpFlags() { | 1180 bool Scanner::ScanRegExpFlags() { |
1203 // Scan regular expression flags. | 1181 // Scan regular expression flags. |
1204 LiteralScope literal(this); | 1182 LiteralScope literal(this); |
1205 while (kIsIdentifierPart.get(c0_)) { | 1183 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
1206 if (c0_ == '\\') { | 1184 if (c0_ == '\\') { |
1207 uc32 c = ScanIdentifierUnicodeEscape(); | 1185 uc32 c = ScanIdentifierUnicodeEscape(); |
1208 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 1186 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
1209 // We allow any escaped character, unlike the restriction on | 1187 // We allow any escaped character, unlike the restriction on |
1210 // IdentifierPart when it is used to build an IdentifierName. | 1188 // IdentifierPart when it is used to build an IdentifierName. |
1211 AddChar(c); | 1189 AddChar(c); |
1212 continue; | 1190 continue; |
1213 } | 1191 } |
1214 } | 1192 } |
1215 AddCharAdvance(); | 1193 AddCharAdvance(); |
1216 } | 1194 } |
1217 literal.Complete(); | 1195 literal.Complete(); |
1218 | 1196 |
1219 next_.location.end_pos = source_pos() - 1; | 1197 next_.location.end_pos = source_pos() - 1; |
1220 return true; | 1198 return true; |
1221 } | 1199 } |
1222 | 1200 |
1223 } } // namespace v8::internal | 1201 } } // namespace v8::internal |
OLD | NEW |