| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include <cmath> | 7 #include <cmath> |
| 8 | 8 |
| 9 #include "src/v8.h" | 9 #include "src/v8.h" |
| 10 | 10 |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 46 Init(); | 46 Init(); |
| 47 // Skip initial whitespace allowing HTML comment ends just like | 47 // Skip initial whitespace allowing HTML comment ends just like |
| 48 // after a newline and scan first token. | 48 // after a newline and scan first token. |
| 49 has_line_terminator_before_next_ = true; | 49 has_line_terminator_before_next_ = true; |
| 50 SkipWhiteSpace(); | 50 SkipWhiteSpace(); |
| 51 Scan(); | 51 Scan(); |
| 52 } | 52 } |
| 53 | 53 |
| 54 | 54 |
| 55 uc32 Scanner::ScanHexNumber(int expected_length) { | 55 uc32 Scanner::ScanHexNumber(int expected_length) { |
| 56 ASSERT(expected_length <= 4); // prevent overflow | 56 DCHECK(expected_length <= 4); // prevent overflow |
| 57 | 57 |
| 58 uc32 digits[4] = { 0, 0, 0, 0 }; | 58 uc32 digits[4] = { 0, 0, 0, 0 }; |
| 59 uc32 x = 0; | 59 uc32 x = 0; |
| 60 for (int i = 0; i < expected_length; i++) { | 60 for (int i = 0; i < expected_length; i++) { |
| 61 digits[i] = c0_; | 61 digits[i] = c0_; |
| 62 int d = HexValue(c0_); | 62 int d = HexValue(c0_); |
| 63 if (d < 0) { | 63 if (d < 0) { |
| 64 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 64 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
| 65 // should be illegal, but other JS VMs just return the | 65 // should be illegal, but other JS VMs just return the |
| 66 // non-escaped version of the original character. | 66 // non-escaped version of the original character. |
| (...skipping 293 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 360 if (!unicode_cache_->IsWhiteSpace(c0_)) { | 360 if (!unicode_cache_->IsWhiteSpace(c0_)) { |
| 361 value->Reset(); | 361 value->Reset(); |
| 362 break; | 362 break; |
| 363 } | 363 } |
| 364 Advance(); | 364 Advance(); |
| 365 } | 365 } |
| 366 } | 366 } |
| 367 | 367 |
| 368 | 368 |
| 369 Token::Value Scanner::SkipMultiLineComment() { | 369 Token::Value Scanner::SkipMultiLineComment() { |
| 370 ASSERT(c0_ == '*'); | 370 DCHECK(c0_ == '*'); |
| 371 Advance(); | 371 Advance(); |
| 372 | 372 |
| 373 while (c0_ >= 0) { | 373 while (c0_ >= 0) { |
| 374 uc32 ch = c0_; | 374 uc32 ch = c0_; |
| 375 Advance(); | 375 Advance(); |
| 376 if (unicode_cache_->IsLineTerminator(ch)) { | 376 if (unicode_cache_->IsLineTerminator(ch)) { |
| 377 // Following ECMA-262, section 7.4, a comment containing | 377 // Following ECMA-262, section 7.4, a comment containing |
| 378 // a newline will make the comment count as a line-terminator. | 378 // a newline will make the comment count as a line-terminator. |
| 379 has_multiline_comment_before_next_ = true; | 379 has_multiline_comment_before_next_ = true; |
| 380 } | 380 } |
| 381 // If we have reached the end of the multi-line comment, we | 381 // If we have reached the end of the multi-line comment, we |
| 382 // consume the '/' and insert a whitespace. This way all | 382 // consume the '/' and insert a whitespace. This way all |
| 383 // multi-line comments are treated as whitespace. | 383 // multi-line comments are treated as whitespace. |
| 384 if (ch == '*' && c0_ == '/') { | 384 if (ch == '*' && c0_ == '/') { |
| 385 c0_ = ' '; | 385 c0_ = ' '; |
| 386 return Token::WHITESPACE; | 386 return Token::WHITESPACE; |
| 387 } | 387 } |
| 388 } | 388 } |
| 389 | 389 |
| 390 // Unterminated multi-line comment. | 390 // Unterminated multi-line comment. |
| 391 return Token::ILLEGAL; | 391 return Token::ILLEGAL; |
| 392 } | 392 } |
| 393 | 393 |
| 394 | 394 |
| 395 Token::Value Scanner::ScanHtmlComment() { | 395 Token::Value Scanner::ScanHtmlComment() { |
| 396 // Check for <!-- comments. | 396 // Check for <!-- comments. |
| 397 ASSERT(c0_ == '!'); | 397 DCHECK(c0_ == '!'); |
| 398 Advance(); | 398 Advance(); |
| 399 if (c0_ == '-') { | 399 if (c0_ == '-') { |
| 400 Advance(); | 400 Advance(); |
| 401 if (c0_ == '-') return SkipSingleLineComment(); | 401 if (c0_ == '-') return SkipSingleLineComment(); |
| 402 PushBack('-'); // undo Advance() | 402 PushBack('-'); // undo Advance() |
| 403 } | 403 } |
| 404 PushBack('!'); // undo Advance() | 404 PushBack('!'); // undo Advance() |
| 405 ASSERT(c0_ == '!'); | 405 DCHECK(c0_ == '!'); |
| 406 return Token::LT; | 406 return Token::LT; |
| 407 } | 407 } |
| 408 | 408 |
| 409 | 409 |
| 410 void Scanner::Scan() { | 410 void Scanner::Scan() { |
| 411 next_.literal_chars = NULL; | 411 next_.literal_chars = NULL; |
| 412 Token::Value token; | 412 Token::Value token; |
| 413 do { | 413 do { |
| 414 // Remember the position of the next token | 414 // Remember the position of the next token |
| 415 next_.location.beg_pos = source_pos(); | 415 next_.location.beg_pos = source_pos(); |
| (...skipping 238 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 654 next_.location.end_pos = source_pos(); | 654 next_.location.end_pos = source_pos(); |
| 655 next_.token = token; | 655 next_.token = token; |
| 656 } | 656 } |
| 657 | 657 |
| 658 | 658 |
| 659 void Scanner::SeekForward(int pos) { | 659 void Scanner::SeekForward(int pos) { |
| 660 // After this call, we will have the token at the given position as | 660 // After this call, we will have the token at the given position as |
| 661 // the "next" token. The "current" token will be invalid. | 661 // the "next" token. The "current" token will be invalid. |
| 662 if (pos == next_.location.beg_pos) return; | 662 if (pos == next_.location.beg_pos) return; |
| 663 int current_pos = source_pos(); | 663 int current_pos = source_pos(); |
| 664 ASSERT_EQ(next_.location.end_pos, current_pos); | 664 DCHECK_EQ(next_.location.end_pos, current_pos); |
| 665 // Positions inside the lookahead token aren't supported. | 665 // Positions inside the lookahead token aren't supported. |
| 666 ASSERT(pos >= current_pos); | 666 DCHECK(pos >= current_pos); |
| 667 if (pos != current_pos) { | 667 if (pos != current_pos) { |
| 668 source_->SeekForward(pos - source_->pos()); | 668 source_->SeekForward(pos - source_->pos()); |
| 669 Advance(); | 669 Advance(); |
| 670 // This function is only called to seek to the location | 670 // This function is only called to seek to the location |
| 671 // of the end of a function (at the "}" token). It doesn't matter | 671 // of the end of a function (at the "}" token). It doesn't matter |
| 672 // whether there was a line terminator in the part we skip. | 672 // whether there was a line terminator in the part we skip. |
| 673 has_line_terminator_before_next_ = false; | 673 has_line_terminator_before_next_ = false; |
| 674 has_multiline_comment_before_next_ = false; | 674 has_multiline_comment_before_next_ = false; |
| 675 } | 675 } |
| 676 Scan(); | 676 Scan(); |
| (...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 776 } | 776 } |
| 777 | 777 |
| 778 | 778 |
| 779 void Scanner::ScanDecimalDigits() { | 779 void Scanner::ScanDecimalDigits() { |
| 780 while (IsDecimalDigit(c0_)) | 780 while (IsDecimalDigit(c0_)) |
| 781 AddLiteralCharAdvance(); | 781 AddLiteralCharAdvance(); |
| 782 } | 782 } |
| 783 | 783 |
| 784 | 784 |
| 785 Token::Value Scanner::ScanNumber(bool seen_period) { | 785 Token::Value Scanner::ScanNumber(bool seen_period) { |
| 786 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 786 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
| 787 | 787 |
| 788 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; | 788 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; |
| 789 | 789 |
| 790 LiteralScope literal(this); | 790 LiteralScope literal(this); |
| 791 if (seen_period) { | 791 if (seen_period) { |
| 792 // we have already seen a decimal point of the float | 792 // we have already seen a decimal point of the float |
| 793 AddLiteralChar('.'); | 793 AddLiteralChar('.'); |
| 794 ScanDecimalDigits(); // we know we have at least one digit | 794 ScanDecimalDigits(); // we know we have at least one digit |
| 795 | 795 |
| 796 } else { | 796 } else { |
| (...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 855 ScanDecimalDigits(); // optional | 855 ScanDecimalDigits(); // optional |
| 856 if (c0_ == '.') { | 856 if (c0_ == '.') { |
| 857 AddLiteralCharAdvance(); | 857 AddLiteralCharAdvance(); |
| 858 ScanDecimalDigits(); // optional | 858 ScanDecimalDigits(); // optional |
| 859 } | 859 } |
| 860 } | 860 } |
| 861 } | 861 } |
| 862 | 862 |
| 863 // scan exponent, if any | 863 // scan exponent, if any |
| 864 if (c0_ == 'e' || c0_ == 'E') { | 864 if (c0_ == 'e' || c0_ == 'E') { |
| 865 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number | 865 DCHECK(kind != HEX); // 'e'/'E' must be scanned as part of the hex number |
| 866 if (kind != DECIMAL) return Token::ILLEGAL; | 866 if (kind != DECIMAL) return Token::ILLEGAL; |
| 867 // scan exponent | 867 // scan exponent |
| 868 AddLiteralCharAdvance(); | 868 AddLiteralCharAdvance(); |
| 869 if (c0_ == '+' || c0_ == '-') | 869 if (c0_ == '+' || c0_ == '-') |
| 870 AddLiteralCharAdvance(); | 870 AddLiteralCharAdvance(); |
| 871 if (!IsDecimalDigit(c0_)) { | 871 if (!IsDecimalDigit(c0_)) { |
| 872 // we must have at least one decimal digit after 'e'/'E' | 872 // we must have at least one decimal digit after 'e'/'E' |
| 873 return Token::ILLEGAL; | 873 return Token::ILLEGAL; |
| 874 } | 874 } |
| 875 ScanDecimalDigits(); | 875 ScanDecimalDigits(); |
| (...skipping 88 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 964 KEYWORD("while", Token::WHILE) \ | 964 KEYWORD("while", Token::WHILE) \ |
| 965 KEYWORD("with", Token::WITH) \ | 965 KEYWORD("with", Token::WITH) \ |
| 966 KEYWORD_GROUP('y') \ | 966 KEYWORD_GROUP('y') \ |
| 967 KEYWORD("yield", Token::YIELD) | 967 KEYWORD("yield", Token::YIELD) |
| 968 | 968 |
| 969 | 969 |
| 970 static Token::Value KeywordOrIdentifierToken(const uint8_t* input, | 970 static Token::Value KeywordOrIdentifierToken(const uint8_t* input, |
| 971 int input_length, | 971 int input_length, |
| 972 bool harmony_scoping, | 972 bool harmony_scoping, |
| 973 bool harmony_modules) { | 973 bool harmony_modules) { |
| 974 ASSERT(input_length >= 1); | 974 DCHECK(input_length >= 1); |
| 975 const int kMinLength = 2; | 975 const int kMinLength = 2; |
| 976 const int kMaxLength = 10; | 976 const int kMaxLength = 10; |
| 977 if (input_length < kMinLength || input_length > kMaxLength) { | 977 if (input_length < kMinLength || input_length > kMaxLength) { |
| 978 return Token::IDENTIFIER; | 978 return Token::IDENTIFIER; |
| 979 } | 979 } |
| 980 switch (input[0]) { | 980 switch (input[0]) { |
| 981 default: | 981 default: |
| 982 #define KEYWORD_GROUP_CASE(ch) \ | 982 #define KEYWORD_GROUP_CASE(ch) \ |
| 983 break; \ | 983 break; \ |
| 984 case ch: | 984 case ch: |
| (...skipping 27 matching lines...) Expand all Loading... |
| 1012 const AstRawString* string) const { | 1012 const AstRawString* string) const { |
| 1013 // Keywords are always 1-byte strings. | 1013 // Keywords are always 1-byte strings. |
| 1014 return string->is_one_byte() && | 1014 return string->is_one_byte() && |
| 1015 Token::FUTURE_STRICT_RESERVED_WORD == | 1015 Token::FUTURE_STRICT_RESERVED_WORD == |
| 1016 KeywordOrIdentifierToken(string->raw_data(), string->length(), | 1016 KeywordOrIdentifierToken(string->raw_data(), string->length(), |
| 1017 harmony_scoping_, harmony_modules_); | 1017 harmony_scoping_, harmony_modules_); |
| 1018 } | 1018 } |
| 1019 | 1019 |
| 1020 | 1020 |
| 1021 Token::Value Scanner::ScanIdentifierOrKeyword() { | 1021 Token::Value Scanner::ScanIdentifierOrKeyword() { |
| 1022 ASSERT(unicode_cache_->IsIdentifierStart(c0_)); | 1022 DCHECK(unicode_cache_->IsIdentifierStart(c0_)); |
| 1023 LiteralScope literal(this); | 1023 LiteralScope literal(this); |
| 1024 // Scan identifier start character. | 1024 // Scan identifier start character. |
| 1025 if (c0_ == '\\') { | 1025 if (c0_ == '\\') { |
| 1026 uc32 c = ScanIdentifierUnicodeEscape(); | 1026 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1027 // Only allow legal identifier start characters. | 1027 // Only allow legal identifier start characters. |
| 1028 if (c < 0 || | 1028 if (c < 0 || |
| 1029 c == '\\' || // No recursive escapes. | 1029 c == '\\' || // No recursive escapes. |
| 1030 !unicode_cache_->IsIdentifierStart(c)) { | 1030 !unicode_cache_->IsIdentifierStart(c)) { |
| 1031 return Token::ILLEGAL; | 1031 return Token::ILLEGAL; |
| 1032 } | 1032 } |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1128 } | 1128 } |
| 1129 Advance(); // consume '/' | 1129 Advance(); // consume '/' |
| 1130 | 1130 |
| 1131 literal.Complete(); | 1131 literal.Complete(); |
| 1132 | 1132 |
| 1133 return true; | 1133 return true; |
| 1134 } | 1134 } |
| 1135 | 1135 |
| 1136 | 1136 |
| 1137 bool Scanner::ScanLiteralUnicodeEscape() { | 1137 bool Scanner::ScanLiteralUnicodeEscape() { |
| 1138 ASSERT(c0_ == '\\'); | 1138 DCHECK(c0_ == '\\'); |
| 1139 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; | 1139 uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; |
| 1140 Advance(); | 1140 Advance(); |
| 1141 int i = 1; | 1141 int i = 1; |
| 1142 if (c0_ == 'u') { | 1142 if (c0_ == 'u') { |
| 1143 i++; | 1143 i++; |
| 1144 while (i < 6) { | 1144 while (i < 6) { |
| 1145 Advance(); | 1145 Advance(); |
| 1146 if (!IsHexDigit(c0_)) break; | 1146 if (!IsHexDigit(c0_)) break; |
| 1147 chars_read[i] = c0_; | 1147 chars_read[i] = c0_; |
| 1148 i++; | 1148 i++; |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1194 | 1194 |
| 1195 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { | 1195 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { |
| 1196 if (is_next_literal_one_byte()) { | 1196 if (is_next_literal_one_byte()) { |
| 1197 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); | 1197 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); |
| 1198 } | 1198 } |
| 1199 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); | 1199 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); |
| 1200 } | 1200 } |
| 1201 | 1201 |
| 1202 | 1202 |
| 1203 double Scanner::DoubleValue() { | 1203 double Scanner::DoubleValue() { |
| 1204 ASSERT(is_literal_one_byte()); | 1204 DCHECK(is_literal_one_byte()); |
| 1205 return StringToDouble( | 1205 return StringToDouble( |
| 1206 unicode_cache_, | 1206 unicode_cache_, |
| 1207 literal_one_byte_string(), | 1207 literal_one_byte_string(), |
| 1208 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); | 1208 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); |
| 1209 } | 1209 } |
| 1210 | 1210 |
| 1211 | 1211 |
| 1212 int Scanner::FindNumber(DuplicateFinder* finder, int value) { | 1212 int Scanner::FindNumber(DuplicateFinder* finder, int value) { |
| 1213 return finder->AddNumber(literal_one_byte_string(), value); | 1213 return finder->AddNumber(literal_one_byte_string(), value); |
| 1214 } | 1214 } |
| (...skipping 24 matching lines...) Expand all Loading... |
| 1239 byte* encoding = BackupKey(key, is_one_byte); | 1239 byte* encoding = BackupKey(key, is_one_byte); |
| 1240 HashMap::Entry* entry = map_.Lookup(encoding, hash, true); | 1240 HashMap::Entry* entry = map_.Lookup(encoding, hash, true); |
| 1241 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value)); | 1241 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value)); |
| 1242 entry->value = | 1242 entry->value = |
| 1243 reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value)); | 1243 reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value)); |
| 1244 return old_value; | 1244 return old_value; |
| 1245 } | 1245 } |
| 1246 | 1246 |
| 1247 | 1247 |
| 1248 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) { | 1248 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) { |
| 1249 ASSERT(key.length() > 0); | 1249 DCHECK(key.length() > 0); |
| 1250 // Quick check for already being in canonical form. | 1250 // Quick check for already being in canonical form. |
| 1251 if (IsNumberCanonical(key)) { | 1251 if (IsNumberCanonical(key)) { |
| 1252 return AddOneByteSymbol(key, value); | 1252 return AddOneByteSymbol(key, value); |
| 1253 } | 1253 } |
| 1254 | 1254 |
| 1255 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY; | 1255 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY; |
| 1256 double double_value = StringToDouble( | 1256 double double_value = StringToDouble( |
| 1257 unicode_constants_, key, flags, 0.0); | 1257 unicode_constants_, key, flags, 0.0); |
| 1258 int length; | 1258 int length; |
| 1259 const char* string; | 1259 const char* string; |
| (...skipping 95 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1355 } | 1355 } |
| 1356 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1356 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
| 1357 } | 1357 } |
| 1358 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1358 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
| 1359 | 1359 |
| 1360 backing_store_.AddBlock(bytes); | 1360 backing_store_.AddBlock(bytes); |
| 1361 return backing_store_.EndSequence().start(); | 1361 return backing_store_.EndSequence().start(); |
| 1362 } | 1362 } |
| 1363 | 1363 |
| 1364 } } // namespace v8::internal | 1364 } } // namespace v8::internal |
| OLD | NEW |