OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 19 matching lines...) Expand all Loading... |
30 #include "ast.h" | 30 #include "ast.h" |
31 #include "scanner.h" | 31 #include "scanner.h" |
32 | 32 |
33 namespace v8 { | 33 namespace v8 { |
34 namespace internal { | 34 namespace internal { |
35 | 35 |
36 // ---------------------------------------------------------------------------- | 36 // ---------------------------------------------------------------------------- |
37 // Character predicates | 37 // Character predicates |
38 | 38 |
39 | 39 |
40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; | 40 ScannerData::ScannerData() {} |
41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart; | |
42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | |
43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | |
44 | |
45 | |
46 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | |
47 | |
48 | 41 |
49 // ---------------------------------------------------------------------------- | 42 // ---------------------------------------------------------------------------- |
50 // UTF8Buffer | 43 // UTF8Buffer |
51 | 44 |
52 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } | 45 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } |
53 | 46 |
54 | 47 |
55 UTF8Buffer::~UTF8Buffer() { | 48 UTF8Buffer::~UTF8Buffer() { |
56 if (data_ != NULL) DeleteArray(data_); | 49 if (data_ != NULL) DeleteArray(data_); |
57 } | 50 } |
(...skipping 258 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
316 UNREACHABLE(); | 309 UNREACHABLE(); |
317 } | 310 } |
318 // On fallthrough, it's a failure. | 311 // On fallthrough, it's a failure. |
319 state_ = UNMATCHABLE; | 312 state_ = UNMATCHABLE; |
320 } | 313 } |
321 | 314 |
322 | 315 |
323 // ---------------------------------------------------------------------------- | 316 // ---------------------------------------------------------------------------- |
324 // Scanner | 317 // Scanner |
325 | 318 |
326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } | 319 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre), |
| 320 scanner_data_(v8_context()->scanner_data_) { |
| 321 } |
327 | 322 |
328 | 323 |
329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, | 324 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, |
330 int position) { | 325 int position) { |
331 // Initialize the source buffer. | 326 // Initialize the source buffer. |
332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { | 327 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { |
333 two_byte_string_buffer_.Initialize( | 328 two_byte_string_buffer_.Initialize( |
334 Handle<ExternalTwoByteString>::cast(source)); | 329 Handle<ExternalTwoByteString>::cast(source)); |
335 source_ = &two_byte_string_buffer_; | 330 source_ = &two_byte_string_buffer_; |
336 } else { | 331 } else { |
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 405 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
411 // character expressed in little-endian byte order (since it could | 406 // character expressed in little-endian byte order (since it could |
412 // not be a U+FFFE character expressed in big-endian byte | 407 // not be a U+FFFE character expressed in big-endian byte |
413 // order). Nevertheless, we check for it to be compatible with | 408 // order). Nevertheless, we check for it to be compatible with |
414 // Spidermonkey. | 409 // Spidermonkey. |
415 return c == 0xFEFF || c == 0xFFFE; | 410 return c == 0xFEFF || c == 0xFFFE; |
416 } | 411 } |
417 | 412 |
418 | 413 |
419 bool Scanner::SkipWhiteSpace() { | 414 bool Scanner::SkipWhiteSpace() { |
| 415 unibrow::Predicate<unibrow::WhiteSpace, 128>& kIsWhiteSpace = |
| 416 scanner_data_.kIsWhiteSpace_; |
| 417 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator = |
| 418 scanner_data_.kIsLineTerminator_; |
420 int start_position = source_pos(); | 419 int start_position = source_pos(); |
421 | 420 |
422 while (true) { | 421 while (true) { |
423 // We treat byte-order marks (BOMs) as whitespace for better | 422 // We treat byte-order marks (BOMs) as whitespace for better |
424 // compatibility with Spidermonkey and other JavaScript engines. | 423 // compatibility with Spidermonkey and other JavaScript engines. |
425 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { | 424 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { |
426 // IsWhiteSpace() includes line terminators! | 425 // IsWhiteSpace() includes line terminators! |
427 if (kIsLineTerminator.get(c0_)) { | 426 if (kIsLineTerminator.get(c0_)) { |
428 // Ignore line terminators, but remember them. This is necessary | 427 // Ignore line terminators, but remember them. This is necessary |
429 // for automatic semicolon insertion. | 428 // for automatic semicolon insertion. |
(...skipping 22 matching lines...) Expand all Loading... |
452 } | 451 } |
453 // Return whether or not we skipped any characters. | 452 // Return whether or not we skipped any characters. |
454 return source_pos() != start_position; | 453 return source_pos() != start_position; |
455 } | 454 } |
456 } | 455 } |
457 | 456 |
458 | 457 |
459 Token::Value Scanner::SkipSingleLineComment() { | 458 Token::Value Scanner::SkipSingleLineComment() { |
460 Advance(); | 459 Advance(); |
461 | 460 |
| 461 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator = |
| 462 scanner_data_.kIsLineTerminator_; |
462 // The line terminator at the end of the line is not considered | 463 // The line terminator at the end of the line is not considered |
463 // to be part of the single-line comment; it is recognized | 464 // to be part of the single-line comment; it is recognized |
464 // separately by the lexical grammar and becomes part of the | 465 // separately by the lexical grammar and becomes part of the |
465 // stream of input elements for the syntactic grammar (see | 466 // stream of input elements for the syntactic grammar (see |
466 // ECMA-262, section 7.4, page 12). | 467 // ECMA-262, section 7.4, page 12). |
467 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 468 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
468 Advance(); | 469 Advance(); |
469 } | 470 } |
470 | 471 |
471 return Token::WHITESPACE; | 472 return Token::WHITESPACE; |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
506 if (c0_ == '-') return SkipSingleLineComment(); | 507 if (c0_ == '-') return SkipSingleLineComment(); |
507 PushBack('-'); // undo Advance() | 508 PushBack('-'); // undo Advance() |
508 } | 509 } |
509 PushBack('!'); // undo Advance() | 510 PushBack('!'); // undo Advance() |
510 ASSERT(c0_ == '!'); | 511 ASSERT(c0_ == '!'); |
511 return Token::LT; | 512 return Token::LT; |
512 } | 513 } |
513 | 514 |
514 | 515 |
515 void Scanner::Scan() { | 516 void Scanner::Scan() { |
| 517 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart = |
| 518 scanner_data_.kIsIdentifierPart_; |
| 519 unibrow::Predicate<IdentifierStart, 128>& kIsIdentifierStart = |
| 520 scanner_data_.kIsIdentifierStart_; |
| 521 |
516 next_.literal_buffer = NULL; | 522 next_.literal_buffer = NULL; |
517 Token::Value token; | 523 Token::Value token; |
518 has_line_terminator_before_next_ = false; | 524 has_line_terminator_before_next_ = false; |
519 do { | 525 do { |
520 // Remember the position of the next token | 526 // Remember the position of the next token |
521 next_.location.beg_pos = source_pos(); | 527 next_.location.beg_pos = source_pos(); |
522 | 528 |
523 switch (c0_) { | 529 switch (c0_) { |
524 case ' ': | 530 case ' ': |
525 case '\t': | 531 case '\t': |
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
802 } | 808 } |
803 return x; | 809 return x; |
804 } | 810 } |
805 | 811 |
806 | 812 |
807 void Scanner::ScanEscape() { | 813 void Scanner::ScanEscape() { |
808 uc32 c = c0_; | 814 uc32 c = c0_; |
809 Advance(); | 815 Advance(); |
810 | 816 |
811 // Skip escaped newlines. | 817 // Skip escaped newlines. |
812 if (kIsLineTerminator.get(c)) { | 818 if (scanner_data_.kIsLineTerminator_.get(c)) { |
813 // Allow CR+LF newlines in multiline string literals. | 819 // Allow CR+LF newlines in multiline string literals. |
814 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 820 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
815 // Allow LF+CR newlines in multiline string literals. | 821 // Allow LF+CR newlines in multiline string literals. |
816 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 822 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
817 return; | 823 return; |
818 } | 824 } |
819 | 825 |
820 switch (c) { | 826 switch (c) { |
821 case '\'': // fall through | 827 case '\'': // fall through |
822 case '"' : // fall through | 828 case '"' : // fall through |
(...skipping 21 matching lines...) Expand all Loading... |
844 // as non-escaped characters by JS VMs. | 850 // as non-escaped characters by JS VMs. |
845 AddChar(c); | 851 AddChar(c); |
846 } | 852 } |
847 | 853 |
848 | 854 |
849 Token::Value Scanner::ScanString() { | 855 Token::Value Scanner::ScanString() { |
850 uc32 quote = c0_; | 856 uc32 quote = c0_; |
851 Advance(); // consume quote | 857 Advance(); // consume quote |
852 | 858 |
853 StartLiteral(); | 859 StartLiteral(); |
| 860 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator = |
| 861 scanner_data_.kIsLineTerminator_; |
854 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { | 862 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { |
855 uc32 c = c0_; | 863 uc32 c = c0_; |
856 Advance(); | 864 Advance(); |
857 if (c == '\\') { | 865 if (c == '\\') { |
858 if (c0_ < 0) return Token::ILLEGAL; | 866 if (c0_ < 0) return Token::ILLEGAL; |
859 ScanEscape(); | 867 ScanEscape(); |
860 } else { | 868 } else { |
861 AddChar(c); | 869 AddChar(c); |
862 } | 870 } |
863 } | 871 } |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
958 // we must have at least one decimal digit after 'e'/'E' | 966 // we must have at least one decimal digit after 'e'/'E' |
959 return Token::ILLEGAL; | 967 return Token::ILLEGAL; |
960 ScanDecimalDigits(); | 968 ScanDecimalDigits(); |
961 } | 969 } |
962 TerminateLiteral(); | 970 TerminateLiteral(); |
963 | 971 |
964 // The source character immediately following a numeric literal must | 972 // The source character immediately following a numeric literal must |
965 // not be an identifier start or a decimal digit; see ECMA-262 | 973 // not be an identifier start or a decimal digit; see ECMA-262 |
966 // section 7.8.3, page 17 (note that we read only one decimal digit | 974 // section 7.8.3, page 17 (note that we read only one decimal digit |
967 // if the value is 0). | 975 // if the value is 0). |
968 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) | 976 if (IsDecimalDigit(c0_) || |
| 977 scanner_data_.kIsIdentifierStart_.get(c0_)) |
969 return Token::ILLEGAL; | 978 return Token::ILLEGAL; |
970 | 979 |
971 return Token::NUMBER; | 980 return Token::NUMBER; |
972 } | 981 } |
973 | 982 |
974 | 983 |
975 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 984 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
976 Advance(); | 985 Advance(); |
977 if (c0_ != 'u') return unibrow::Utf8::kBadChar; | 986 if (c0_ != 'u') return unibrow::Utf8::kBadChar; |
978 Advance(); | 987 Advance(); |
979 uc32 c = ScanHexEscape('u', 4); | 988 uc32 c = ScanHexEscape('u', 4); |
980 // We do not allow a unicode escape sequence to start another | 989 // We do not allow a unicode escape sequence to start another |
981 // unicode escape sequence. | 990 // unicode escape sequence. |
982 if (c == '\\') return unibrow::Utf8::kBadChar; | 991 if (c == '\\') return unibrow::Utf8::kBadChar; |
983 return c; | 992 return c; |
984 } | 993 } |
985 | 994 |
986 | 995 |
987 Token::Value Scanner::ScanIdentifier() { | 996 Token::Value Scanner::ScanIdentifier() { |
988 ASSERT(kIsIdentifierStart.get(c0_)); | 997 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart = |
| 998 scanner_data_.kIsIdentifierPart_; |
| 999 |
| 1000 ASSERT(scanner_data_.kIsIdentifierStart_.get(c0_)); |
989 | 1001 |
990 StartLiteral(); | 1002 StartLiteral(); |
991 KeywordMatcher keyword_match; | 1003 KeywordMatcher keyword_match; |
992 | 1004 |
993 // Scan identifier start character. | 1005 // Scan identifier start character. |
994 if (c0_ == '\\') { | 1006 if (c0_ == '\\') { |
995 uc32 c = ScanIdentifierUnicodeEscape(); | 1007 uc32 c = ScanIdentifierUnicodeEscape(); |
996 // Only allow legal identifier start characters. | 1008 // Only allow legal identifier start characters. |
997 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 1009 if (!scanner_data_.kIsIdentifierStart_.get(c)) return Token::ILLEGAL; |
998 AddChar(c); | 1010 AddChar(c); |
999 keyword_match.Fail(); | 1011 keyword_match.Fail(); |
1000 } else { | 1012 } else { |
1001 AddChar(c0_); | 1013 AddChar(c0_); |
1002 keyword_match.AddChar(c0_); | 1014 keyword_match.AddChar(c0_); |
1003 Advance(); | 1015 Advance(); |
1004 } | 1016 } |
1005 | 1017 |
1006 // Scan the rest of the identifier characters. | 1018 // Scan the rest of the identifier characters. |
1007 while (kIsIdentifierPart.get(c0_)) { | 1019 while (kIsIdentifierPart.get(c0_)) { |
(...skipping 10 matching lines...) Expand all Loading... |
1018 } | 1030 } |
1019 } | 1031 } |
1020 TerminateLiteral(); | 1032 TerminateLiteral(); |
1021 | 1033 |
1022 return keyword_match.token(); | 1034 return keyword_match.token(); |
1023 } | 1035 } |
1024 | 1036 |
1025 | 1037 |
1026 | 1038 |
1027 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { | 1039 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { |
| 1040 ScannerData& scanner_data = v8_context()->scanner_data_; |
| 1041 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart = |
| 1042 scanner_data.kIsIdentifierPart_; |
| 1043 |
1028 // Checks whether the buffer contains an identifier (no escape). | 1044 // Checks whether the buffer contains an identifier (no escape). |
1029 if (!buffer->has_more()) return false; | 1045 if (!buffer->has_more()) return false; |
1030 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; | 1046 if (!scanner_data.kIsIdentifierStart_.get(buffer->GetNext())) return false; |
1031 while (buffer->has_more()) { | 1047 while (buffer->has_more()) { |
1032 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; | 1048 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; |
1033 } | 1049 } |
1034 return true; | 1050 return true; |
1035 } | 1051 } |
1036 | 1052 |
1037 | 1053 |
1038 bool Scanner::ScanRegExpPattern(bool seen_equal) { | 1054 bool Scanner::ScanRegExpPattern(bool seen_equal) { |
1039 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags | 1055 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags |
1040 bool in_character_class = false; | 1056 bool in_character_class = false; |
1041 | 1057 |
1042 // Previous token is either '/' or '/=', in the second case, the | 1058 // Previous token is either '/' or '/=', in the second case, the |
1043 // pattern starts at =. | 1059 // pattern starts at =. |
1044 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 1060 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
1045 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 1061 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
1046 | 1062 |
1047 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1063 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
1048 // the scanner should pass uninterpreted bodies to the RegExp | 1064 // the scanner should pass uninterpreted bodies to the RegExp |
1049 // constructor. | 1065 // constructor. |
1050 StartLiteral(); | 1066 StartLiteral(); |
1051 if (seen_equal) | 1067 if (seen_equal) |
1052 AddChar('='); | 1068 AddChar('='); |
1053 | 1069 |
| 1070 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator = |
| 1071 scanner_data_.kIsLineTerminator_; |
| 1072 |
1054 while (c0_ != '/' || in_character_class) { | 1073 while (c0_ != '/' || in_character_class) { |
1055 if (kIsLineTerminator.get(c0_) || c0_ < 0) | 1074 if (kIsLineTerminator.get(c0_) || c0_ < 0) |
1056 return false; | 1075 return false; |
1057 if (c0_ == '\\') { // escaped character | 1076 if (c0_ == '\\') { // escaped character |
1058 AddCharAdvance(); | 1077 AddCharAdvance(); |
1059 if (kIsLineTerminator.get(c0_) || c0_ < 0) | 1078 if (kIsLineTerminator.get(c0_) || c0_ < 0) |
1060 return false; | 1079 return false; |
1061 AddCharAdvance(); | 1080 AddCharAdvance(); |
1062 } else { // unescaped character | 1081 } else { // unescaped character |
1063 if (c0_ == '[') | 1082 if (c0_ == '[') |
1064 in_character_class = true; | 1083 in_character_class = true; |
1065 if (c0_ == ']') | 1084 if (c0_ == ']') |
1066 in_character_class = false; | 1085 in_character_class = false; |
1067 AddCharAdvance(); | 1086 AddCharAdvance(); |
1068 } | 1087 } |
1069 } | 1088 } |
1070 Advance(); // consume '/' | 1089 Advance(); // consume '/' |
1071 | 1090 |
1072 TerminateLiteral(); | 1091 TerminateLiteral(); |
1073 | 1092 |
1074 return true; | 1093 return true; |
1075 } | 1094 } |
1076 | 1095 |
1077 bool Scanner::ScanRegExpFlags() { | 1096 bool Scanner::ScanRegExpFlags() { |
1078 // Scan regular expression flags. | 1097 // Scan regular expression flags. |
1079 StartLiteral(); | 1098 StartLiteral(); |
| 1099 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart = |
| 1100 scanner_data_.kIsIdentifierPart_; |
| 1101 |
1080 while (kIsIdentifierPart.get(c0_)) { | 1102 while (kIsIdentifierPart.get(c0_)) { |
1081 if (c0_ == '\\') { | 1103 if (c0_ == '\\') { |
1082 uc32 c = ScanIdentifierUnicodeEscape(); | 1104 uc32 c = ScanIdentifierUnicodeEscape(); |
1083 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 1105 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
1084 // We allow any escaped character, unlike the restriction on | 1106 // We allow any escaped character, unlike the restriction on |
1085 // IdentifierPart when it is used to build an IdentifierName. | 1107 // IdentifierPart when it is used to build an IdentifierName. |
1086 AddChar(c); | 1108 AddChar(c); |
1087 continue; | 1109 continue; |
1088 } | 1110 } |
1089 } | 1111 } |
1090 AddCharAdvance(); | 1112 AddCharAdvance(); |
1091 } | 1113 } |
1092 TerminateLiteral(); | 1114 TerminateLiteral(); |
1093 | 1115 |
1094 next_.location.end_pos = source_pos() - 1; | 1116 next_.location.end_pos = source_pos() - 1; |
1095 return true; | 1117 return true; |
1096 } | 1118 } |
1097 | 1119 |
1098 } } // namespace v8::internal | 1120 } } // namespace v8::internal |
OLD | NEW |