OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include <cmath> | 9 #include <cmath> |
10 | 10 |
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
238 // Spidermonkey. | 238 // Spidermonkey. |
239 return c == 0xFFFE; | 239 return c == 0xFFFE; |
240 } | 240 } |
241 | 241 |
242 | 242 |
243 bool Scanner::SkipWhiteSpace() { | 243 bool Scanner::SkipWhiteSpace() { |
244 int start_position = source_pos(); | 244 int start_position = source_pos(); |
245 | 245 |
246 while (true) { | 246 while (true) { |
247 while (true) { | 247 while (true) { |
| 248 // The unicode cache accepts unsigned inputs. |
| 249 if (c0_ < 0) break; |
248 // Advance as long as character is a WhiteSpace or LineTerminator. | 250 // Advance as long as character is a WhiteSpace or LineTerminator. |
249 // Remember if the latter is the case. | 251 // Remember if the latter is the case. |
250 if (unicode_cache_->IsLineTerminator(c0_)) { | 252 if (unicode_cache_->IsLineTerminator(c0_)) { |
251 has_line_terminator_before_next_ = true; | 253 has_line_terminator_before_next_ = true; |
252 } else if (!unicode_cache_->IsWhiteSpace(c0_) && | 254 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
253 !IsLittleEndianByteOrderMark(c0_)) { | 255 !IsLittleEndianByteOrderMark(c0_)) { |
254 break; | 256 break; |
255 } | 257 } |
256 Advance(); | 258 Advance(); |
257 } | 259 } |
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
358 } | 360 } |
359 | 361 |
360 | 362 |
361 Token::Value Scanner::SkipMultiLineComment() { | 363 Token::Value Scanner::SkipMultiLineComment() { |
362 DCHECK(c0_ == '*'); | 364 DCHECK(c0_ == '*'); |
363 Advance(); | 365 Advance(); |
364 | 366 |
365 while (c0_ >= 0) { | 367 while (c0_ >= 0) { |
366 uc32 ch = c0_; | 368 uc32 ch = c0_; |
367 Advance(); | 369 Advance(); |
368 if (unicode_cache_->IsLineTerminator(ch)) { | 370 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { |
369 // Following ECMA-262, section 7.4, a comment containing | 371 // Following ECMA-262, section 7.4, a comment containing |
370 // a newline will make the comment count as a line-terminator. | 372 // a newline will make the comment count as a line-terminator. |
371 has_multiline_comment_before_next_ = true; | 373 has_multiline_comment_before_next_ = true; |
372 } | 374 } |
373 // If we have reached the end of the multi-line comment, we | 375 // If we have reached the end of the multi-line comment, we |
374 // consume the '/' and insert a whitespace. This way all | 376 // consume the '/' and insert a whitespace. This way all |
375 // multi-line comments are treated as whitespace. | 377 // multi-line comments are treated as whitespace. |
376 if (ch == '*' && c0_ == '/') { | 378 if (ch == '*' && c0_ == '/') { |
377 c0_ = ' '; | 379 c0_ = ' '; |
378 return Token::WHITESPACE; | 380 return Token::WHITESPACE; |
(...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
618 | 620 |
619 case '?': | 621 case '?': |
620 token = Select(Token::CONDITIONAL); | 622 token = Select(Token::CONDITIONAL); |
621 break; | 623 break; |
622 | 624 |
623 case '~': | 625 case '~': |
624 token = Select(Token::BIT_NOT); | 626 token = Select(Token::BIT_NOT); |
625 break; | 627 break; |
626 | 628 |
627 default: | 629 default: |
628 if (unicode_cache_->IsIdentifierStart(c0_)) { | 630 if (c0_ < 0) { |
| 631 token = Token::EOS; |
| 632 } else if (unicode_cache_->IsIdentifierStart(c0_)) { |
629 token = ScanIdentifierOrKeyword(); | 633 token = ScanIdentifierOrKeyword(); |
630 } else if (IsDecimalDigit(c0_)) { | 634 } else if (IsDecimalDigit(c0_)) { |
631 token = ScanNumber(false); | 635 token = ScanNumber(false); |
632 } else if (SkipWhiteSpace()) { | 636 } else if (SkipWhiteSpace()) { |
633 token = Token::WHITESPACE; | 637 token = Token::WHITESPACE; |
634 } else if (c0_ < 0) { | |
635 token = Token::EOS; | |
636 } else { | 638 } else { |
637 token = Select(Token::ILLEGAL); | 639 token = Select(Token::ILLEGAL); |
638 } | 640 } |
639 break; | 641 break; |
640 } | 642 } |
641 | 643 |
642 // Continue scanning for tokens as long as we're just skipping | 644 // Continue scanning for tokens as long as we're just skipping |
643 // whitespace. | 645 // whitespace. |
644 } while (token == Token::WHITESPACE); | 646 } while (token == Token::WHITESPACE); |
645 | 647 |
(...skipping 21 matching lines...) Expand all Loading... |
667 } | 669 } |
668 Scan(); | 670 Scan(); |
669 } | 671 } |
670 | 672 |
671 | 673 |
672 bool Scanner::ScanEscape() { | 674 bool Scanner::ScanEscape() { |
673 uc32 c = c0_; | 675 uc32 c = c0_; |
674 Advance(); | 676 Advance(); |
675 | 677 |
676 // Skip escaped newlines. | 678 // Skip escaped newlines. |
677 if (unicode_cache_->IsLineTerminator(c)) { | 679 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { |
678 // Allow CR+LF newlines in multiline string literals. | 680 // Allow CR+LF newlines in multiline string literals. |
679 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 681 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
680 // Allow LF+CR newlines in multiline string literals. | 682 // Allow LF+CR newlines in multiline string literals. |
681 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 683 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
682 return true; | 684 return true; |
683 } | 685 } |
684 | 686 |
685 switch (c) { | 687 switch (c) { |
686 case '\'': // fall through | 688 case '\'': // fall through |
687 case '"' : // fall through | 689 case '"' : // fall through |
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
864 // we must have at least one decimal digit after 'e'/'E' | 866 // we must have at least one decimal digit after 'e'/'E' |
865 return Token::ILLEGAL; | 867 return Token::ILLEGAL; |
866 } | 868 } |
867 ScanDecimalDigits(); | 869 ScanDecimalDigits(); |
868 } | 870 } |
869 | 871 |
870 // The source character immediately following a numeric literal must | 872 // The source character immediately following a numeric literal must |
871 // not be an identifier start or a decimal digit; see ECMA-262 | 873 // not be an identifier start or a decimal digit; see ECMA-262 |
872 // section 7.8.3, page 17 (note that we read only one decimal digit | 874 // section 7.8.3, page 17 (note that we read only one decimal digit |
873 // if the value is 0). | 875 // if the value is 0). |
874 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) | 876 if (IsDecimalDigit(c0_) || |
| 877 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_))) |
875 return Token::ILLEGAL; | 878 return Token::ILLEGAL; |
876 | 879 |
877 literal.Complete(); | 880 literal.Complete(); |
878 | 881 |
879 return Token::NUMBER; | 882 return Token::NUMBER; |
880 } | 883 } |
881 | 884 |
882 | 885 |
883 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 886 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
884 Advance(); | 887 Advance(); |
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1032 } | 1035 } |
1033 AddLiteralChar(c); | 1036 AddLiteralChar(c); |
1034 return ScanIdentifierSuffix(&literal); | 1037 return ScanIdentifierSuffix(&literal); |
1035 } | 1038 } |
1036 | 1039 |
1037 uc32 first_char = c0_; | 1040 uc32 first_char = c0_; |
1038 Advance(); | 1041 Advance(); |
1039 AddLiteralChar(first_char); | 1042 AddLiteralChar(first_char); |
1040 | 1043 |
1041 // Scan the rest of the identifier characters. | 1044 // Scan the rest of the identifier characters. |
1042 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1045 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
1043 if (c0_ != '\\') { | 1046 if (c0_ != '\\') { |
1044 uc32 next_char = c0_; | 1047 uc32 next_char = c0_; |
1045 Advance(); | 1048 Advance(); |
1046 AddLiteralChar(next_char); | 1049 AddLiteralChar(next_char); |
1047 continue; | 1050 continue; |
1048 } | 1051 } |
1049 // Fallthrough if no longer able to complete keyword. | 1052 // Fallthrough if no longer able to complete keyword. |
1050 return ScanIdentifierSuffix(&literal); | 1053 return ScanIdentifierSuffix(&literal); |
1051 } | 1054 } |
1052 | 1055 |
1053 literal.Complete(); | 1056 literal.Complete(); |
1054 | 1057 |
1055 if (next_.literal_chars->is_one_byte()) { | 1058 if (next_.literal_chars->is_one_byte()) { |
1056 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | 1059 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
1057 return KeywordOrIdentifierToken(chars.start(), | 1060 return KeywordOrIdentifierToken(chars.start(), |
1058 chars.length(), | 1061 chars.length(), |
1059 harmony_scoping_, | 1062 harmony_scoping_, |
1060 harmony_modules_, | 1063 harmony_modules_, |
1061 harmony_classes_); | 1064 harmony_classes_); |
1062 } | 1065 } |
1063 | 1066 |
1064 return Token::IDENTIFIER; | 1067 return Token::IDENTIFIER; |
1065 } | 1068 } |
1066 | 1069 |
1067 | 1070 |
1068 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { | 1071 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { |
1069 // Scan the rest of the identifier characters. | 1072 // Scan the rest of the identifier characters. |
1070 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1073 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
1071 if (c0_ == '\\') { | 1074 if (c0_ == '\\') { |
1072 uc32 c = ScanIdentifierUnicodeEscape(); | 1075 uc32 c = ScanIdentifierUnicodeEscape(); |
1073 // Only allow legal identifier part characters. | 1076 // Only allow legal identifier part characters. |
1074 if (c < 0 || | 1077 if (c < 0 || |
1075 c == '\\' || | 1078 c == '\\' || |
1076 !unicode_cache_->IsIdentifierPart(c)) { | 1079 !unicode_cache_->IsIdentifierPart(c)) { |
1077 return Token::ILLEGAL; | 1080 return Token::ILLEGAL; |
1078 } | 1081 } |
1079 AddLiteralChar(c); | 1082 AddLiteralChar(c); |
1080 } else { | 1083 } else { |
(...skipping 18 matching lines...) Expand all Loading... |
1099 | 1102 |
1100 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1103 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
1101 // the scanner should pass uninterpreted bodies to the RegExp | 1104 // the scanner should pass uninterpreted bodies to the RegExp |
1102 // constructor. | 1105 // constructor. |
1103 LiteralScope literal(this); | 1106 LiteralScope literal(this); |
1104 if (seen_equal) { | 1107 if (seen_equal) { |
1105 AddLiteralChar('='); | 1108 AddLiteralChar('='); |
1106 } | 1109 } |
1107 | 1110 |
1108 while (c0_ != '/' || in_character_class) { | 1111 while (c0_ != '/' || in_character_class) { |
1109 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1112 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; |
1110 if (c0_ == '\\') { // Escape sequence. | 1113 if (c0_ == '\\') { // Escape sequence. |
1111 AddLiteralCharAdvance(); | 1114 AddLiteralCharAdvance(); |
1112 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1115 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; |
1113 AddLiteralCharAdvance(); | 1116 AddLiteralCharAdvance(); |
1114 // If the escape allows more characters, i.e., \x??, \u????, or \c?, | 1117 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
1115 // only "safe" characters are allowed (letters, digits, underscore), | 1118 // only "safe" characters are allowed (letters, digits, underscore), |
1116 // otherwise the escape isn't valid and the invalid character has | 1119 // otherwise the escape isn't valid and the invalid character has |
1117 // its normal meaning. I.e., we can just continue scanning without | 1120 // its normal meaning. I.e., we can just continue scanning without |
1118 // worrying whether the following characters are part of the escape | 1121 // worrying whether the following characters are part of the escape |
1119 // or not, since any '/', '\\' or '[' is guaranteed to not be part | 1122 // or not, since any '/', '\\' or '[' is guaranteed to not be part |
1120 // of the escape sequence. | 1123 // of the escape sequence. |
1121 | 1124 |
1122 // TODO(896): At some point, parse RegExps more throughly to capture | 1125 // TODO(896): At some point, parse RegExps more throughly to capture |
(...skipping 26 matching lines...) Expand all Loading... |
1149 ++hex_digits_read; | 1152 ++hex_digits_read; |
1150 } | 1153 } |
1151 } | 1154 } |
1152 return hex_digits_read == 4; | 1155 return hex_digits_read == 4; |
1153 } | 1156 } |
1154 | 1157 |
1155 | 1158 |
1156 bool Scanner::ScanRegExpFlags() { | 1159 bool Scanner::ScanRegExpFlags() { |
1157 // Scan regular expression flags. | 1160 // Scan regular expression flags. |
1158 LiteralScope literal(this); | 1161 LiteralScope literal(this); |
1159 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1162 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
1160 if (c0_ != '\\') { | 1163 if (c0_ != '\\') { |
1161 AddLiteralCharAdvance(); | 1164 AddLiteralCharAdvance(); |
1162 } else { | 1165 } else { |
1163 if (!ScanLiteralUnicodeEscape()) { | 1166 if (!ScanLiteralUnicodeEscape()) { |
1164 return false; | 1167 return false; |
1165 } | 1168 } |
1166 Advance(); | 1169 Advance(); |
1167 } | 1170 } |
1168 } | 1171 } |
1169 literal.Complete(); | 1172 literal.Complete(); |
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1344 } | 1347 } |
1345 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1348 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
1346 } | 1349 } |
1347 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1350 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
1348 | 1351 |
1349 backing_store_.AddBlock(bytes); | 1352 backing_store_.AddBlock(bytes); |
1350 return backing_store_.EndSequence().start(); | 1353 return backing_store_.EndSequence().start(); |
1351 } | 1354 } |
1352 | 1355 |
1353 } } // namespace v8::internal | 1356 } } // namespace v8::internal |
OLD | NEW |