| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include <stdint.h> | 7 #include <stdint.h> |
| 8 | 8 |
| 9 #include <cmath> | 9 #include <cmath> |
| 10 | 10 |
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 47 // in object literals. | 47 // in object literals. |
| 48 Init(); | 48 Init(); |
| 49 // Skip initial whitespace allowing HTML comment ends just like | 49 // Skip initial whitespace allowing HTML comment ends just like |
| 50 // after a newline and scan first token. | 50 // after a newline and scan first token. |
| 51 has_line_terminator_before_next_ = true; | 51 has_line_terminator_before_next_ = true; |
| 52 SkipWhiteSpace(); | 52 SkipWhiteSpace(); |
| 53 Scan(); | 53 Scan(); |
| 54 } | 54 } |
| 55 | 55 |
| 56 | 56 |
| 57 uc32 Scanner::ScanHexNumber(int expected_length) { | 57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) { |
| 58 DCHECK(expected_length <= 4); // prevent overflow | 58 DCHECK(expected_length <= 4); // prevent overflow |
| 59 | 59 |
| 60 uc32 digits[4] = { 0, 0, 0, 0 }; | 60 uc32 digits[4] = { 0, 0, 0, 0 }; |
| 61 uc32 x = 0; | 61 uc32 x = 0; |
| 62 for (int i = 0; i < expected_length; i++) { | 62 for (int i = 0; i < expected_length; i++) { |
| 63 digits[i] = c0_; | 63 digits[i] = c0_; |
| 64 int d = HexValue(c0_); | 64 int d = HexValue(c0_); |
| 65 if (d < 0) { | 65 if (d < 0) { |
| 66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
| 67 // should be illegal, but other JS VMs just return the | 67 // should be illegal, but other JS VMs just return the |
| 68 // non-escaped version of the original character. | 68 // non-escaped version of the original character. |
| 69 | 69 |
| 70 // Push back digits that we have advanced past. | 70 // Push back digits that we have advanced past. |
| 71 for (int j = i-1; j >= 0; j--) { | 71 for (int j = i-1; j >= 0; j--) { |
| 72 PushBack(digits[j]); | 72 PushBack(digits[j]); |
| 73 } | 73 } |
| 74 return -1; | 74 return -1; |
| 75 } | 75 } |
| 76 x = x * 16 + d; | 76 x = x * 16 + d; |
| 77 if (recordRaw) { |
| 78 AddRawLiteralChar(c0_); |
| 79 } |
| 77 Advance(); | 80 Advance(); |
| 78 } | 81 } |
| 79 | 82 |
| 80 return x; | 83 return x; |
| 81 } | 84 } |
| 82 | 85 |
| 83 | 86 |
| 84 // Ensure that tokens can be stored in a byte. | 87 // Ensure that tokens can be stored in a byte. |
| 85 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
| 86 | 89 |
| (...skipping 317 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 404 PushBack('-'); // undo Advance() | 407 PushBack('-'); // undo Advance() |
| 405 } | 408 } |
| 406 PushBack('!'); // undo Advance() | 409 PushBack('!'); // undo Advance() |
| 407 DCHECK(c0_ == '!'); | 410 DCHECK(c0_ == '!'); |
| 408 return Token::LT; | 411 return Token::LT; |
| 409 } | 412 } |
| 410 | 413 |
| 411 | 414 |
| 412 void Scanner::Scan() { | 415 void Scanner::Scan() { |
| 413 next_.literal_chars = NULL; | 416 next_.literal_chars = NULL; |
| 417 next_.raw_literal_chars = NULL; |
| 414 Token::Value token; | 418 Token::Value token; |
| 419 |
| 415 do { | 420 do { |
| 416 // Remember the position of the next token | 421 // Remember the position of the next token |
| 417 next_.location.beg_pos = source_pos(); | 422 next_.location.beg_pos = source_pos(); |
| 418 | 423 |
| 419 switch (c0_) { | 424 switch (c0_) { |
| 420 case ' ': | 425 case ' ': |
| 421 case '\t': | 426 case '\t': |
| 422 Advance(); | 427 Advance(); |
| 423 token = Token::WHITESPACE; | 428 token = Token::WHITESPACE; |
| 424 break; | 429 break; |
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 627 break; | 632 break; |
| 628 | 633 |
| 629 case '?': | 634 case '?': |
| 630 token = Select(Token::CONDITIONAL); | 635 token = Select(Token::CONDITIONAL); |
| 631 break; | 636 break; |
| 632 | 637 |
| 633 case '~': | 638 case '~': |
| 634 token = Select(Token::BIT_NOT); | 639 token = Select(Token::BIT_NOT); |
| 635 break; | 640 break; |
| 636 | 641 |
| 642 case '`': |
| 643 if (HarmonyTemplates()) { |
| 644 token = ScanTemplateSpan(); |
| 645 break; |
| 646 } |
| 647 |
| 637 default: | 648 default: |
| 638 if (unicode_cache_->IsIdentifierStart(c0_)) { | 649 if (unicode_cache_->IsIdentifierStart(c0_)) { |
| 639 token = ScanIdentifierOrKeyword(); | 650 token = ScanIdentifierOrKeyword(); |
| 640 } else if (IsDecimalDigit(c0_)) { | 651 } else if (IsDecimalDigit(c0_)) { |
| 641 token = ScanNumber(false); | 652 token = ScanNumber(false); |
| 642 } else if (SkipWhiteSpace()) { | 653 } else if (SkipWhiteSpace()) { |
| 643 token = Token::WHITESPACE; | 654 token = Token::WHITESPACE; |
| 644 } else if (c0_ < 0) { | 655 } else if (c0_ < 0) { |
| 645 token = Token::EOS; | 656 token = Token::EOS; |
| 646 } else { | 657 } else { |
| (...skipping 25 matching lines...) Expand all Loading... |
| 672 // This function is only called to seek to the location | 683 // This function is only called to seek to the location |
| 673 // of the end of a function (at the "}" token). It doesn't matter | 684 // of the end of a function (at the "}" token). It doesn't matter |
| 674 // whether there was a line terminator in the part we skip. | 685 // whether there was a line terminator in the part we skip. |
| 675 has_line_terminator_before_next_ = false; | 686 has_line_terminator_before_next_ = false; |
| 676 has_multiline_comment_before_next_ = false; | 687 has_multiline_comment_before_next_ = false; |
| 677 } | 688 } |
| 678 Scan(); | 689 Scan(); |
| 679 } | 690 } |
| 680 | 691 |
| 681 | 692 |
| 682 bool Scanner::ScanEscape() { | 693 bool Scanner::ScanEscape(bool recordRaw) { |
| 683 uc32 c = c0_; | 694 uc32 c = c0_; |
| 695 uc32 rc = c; |
| 696 bool singleCharEscape = true; |
| 684 Advance(); | 697 Advance(); |
| 685 | 698 |
| 686 // Skip escaped newlines. | 699 // Skip escaped newlines. |
| 687 if (unicode_cache_->IsLineTerminator(c)) { | 700 if (unicode_cache_->IsLineTerminator(c)) { |
| 688 // Allow CR+LF newlines in multiline string literals. | 701 // Allow CR+LF newlines in multiline string literals. |
| 689 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 702 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
| 690 // Allow LF+CR newlines in multiline string literals. | 703 // Allow LF+CR newlines in multiline string literals. |
| 691 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 704 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
| 692 return true; | 705 return true; |
| 693 } | 706 } |
| 694 | 707 |
| 695 switch (c) { | 708 switch (c) { |
| 696 case '\'': // fall through | 709 case '\'': // fall through |
| 697 case '"' : // fall through | 710 case '"' : // fall through |
| 698 case '\\': break; | 711 case '\\': break; |
| 699 case 'b' : c = '\b'; break; | 712 case 'b' : c = '\b'; break; |
| 700 case 'f' : c = '\f'; break; | 713 case 'f' : c = '\f'; break; |
| 701 case 'n' : c = '\n'; break; | 714 case 'n' : c = '\n'; break; |
| 702 case 'r' : c = '\r'; break; | 715 case 'r' : c = '\r'; break; |
| 703 case 't' : c = '\t'; break; | 716 case 't' : c = '\t'; break; |
| 704 case 'u' : { | 717 case 'u' : { |
| 705 c = ScanHexNumber(4); | 718 if (recordRaw) AddRawLiteralChar('u'); |
| 719 singleCharEscape = false; |
| 720 c = ScanHexNumber(4, recordRaw); |
| 706 if (c < 0) return false; | 721 if (c < 0) return false; |
| 707 break; | 722 break; |
| 708 } | 723 } |
| 709 case 'v' : c = '\v'; break; | 724 case 'v' : c = '\v'; break; |
| 710 case 'x' : { | 725 case 'x' : { |
| 711 c = ScanHexNumber(2); | 726 if (recordRaw) AddRawLiteralChar('x'); |
| 727 singleCharEscape = false; |
| 728 c = ScanHexNumber(2, recordRaw); |
| 712 if (c < 0) return false; | 729 if (c < 0) return false; |
| 713 break; | 730 break; |
| 714 } | 731 } |
| 715 case '0' : // fall through | 732 case '0' : // fall through |
| 716 case '1' : // fall through | 733 case '1' : // fall through |
| 717 case '2' : // fall through | 734 case '2' : // fall through |
| 718 case '3' : // fall through | 735 case '3' : // fall through |
| 719 case '4' : // fall through | 736 case '4' : // fall through |
| 720 case '5' : // fall through | 737 case '5' : // fall through |
| 721 case '6' : // fall through | 738 case '6' : // fall through |
| 722 case '7' : c = ScanOctalEscape(c, 2); break; | 739 case '7': |
| 740 singleCharEscape = false; |
| 741 c = ScanOctalEscape(c, 2, recordRaw); |
| 742 break; |
| 723 } | 743 } |
| 724 | 744 |
| 725 // According to ECMA-262, section 7.8.4, characters not covered by the | 745 // According to ECMA-262, section 7.8.4, characters not covered by the |
| 726 // above cases should be illegal, but they are commonly handled as | 746 // above cases should be illegal, but they are commonly handled as |
| 727 // non-escaped characters by JS VMs. | 747 // non-escaped characters by JS VMs. |
| 748 if (singleCharEscape && recordRaw) AddRawLiteralChar(rc); |
| 728 AddLiteralChar(c); | 749 AddLiteralChar(c); |
| 729 return true; | 750 return true; |
| 730 } | 751 } |
| 731 | 752 |
| 732 | 753 |
| 733 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 754 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
| 734 // ECMA-262. Other JS VMs support them. | 755 // ECMA-262. Other JS VMs support them. |
| 735 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { | 756 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) { |
| 736 uc32 x = c - '0'; | 757 uc32 x = c - '0'; |
| 737 int i = 0; | 758 int i = 0; |
| 738 for (; i < length; i++) { | 759 for (; i < length; i++) { |
| 739 int d = c0_ - '0'; | 760 int d = c0_ - '0'; |
| 740 if (d < 0 || d > 7) break; | 761 if (d < 0 || d > 7) break; |
| 741 int nx = x * 8 + d; | 762 int nx = x * 8 + d; |
| 742 if (nx >= 256) break; | 763 if (nx >= 256) break; |
| 743 x = nx; | 764 x = nx; |
| 765 if (recordRaw) { |
| 766 AddRawLiteralChar(c0_); |
| 767 } |
| 744 Advance(); | 768 Advance(); |
| 745 } | 769 } |
| 746 // Anything except '\0' is an octal escape sequence, illegal in strict mode. | 770 // Anything except '\0' is an octal escape sequence, illegal in strict mode. |
| 747 // Remember the position of octal escape sequences so that an error | 771 // Remember the position of octal escape sequences so that an error |
| 748 // can be reported later (in strict mode). | 772 // can be reported later (in strict mode). |
| 749 // We don't report the error immediately, because the octal escape can | 773 // We don't report the error immediately, because the octal escape can |
| 750 // occur before the "use strict" directive. | 774 // occur before the "use strict" directive. |
| 751 if (c != '0' || i > 0) { | 775 if (c != '0' || i > 0) { |
| 752 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); | 776 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); |
| 753 } | 777 } |
| (...skipping 17 matching lines...) Expand all Loading... |
| 771 } | 795 } |
| 772 } | 796 } |
| 773 if (c0_ != quote) return Token::ILLEGAL; | 797 if (c0_ != quote) return Token::ILLEGAL; |
| 774 literal.Complete(); | 798 literal.Complete(); |
| 775 | 799 |
| 776 Advance(); // consume quote | 800 Advance(); // consume quote |
| 777 return Token::STRING; | 801 return Token::STRING; |
| 778 } | 802 } |
| 779 | 803 |
| 780 | 804 |
| 805 Token::Value Scanner::ScanTemplateSpan() { |
| 806 if (next_.token == Token::RBRACE) { |
| 807 PushBack('}'); |
| 808 } |
| 809 next_.location.beg_pos = source_pos(); |
| 810 Token::Value result = Token::ILLEGAL; |
| 811 DCHECK(c0_ == '`' || c0_ == '}'); |
| 812 Advance(); // Consume ` or } |
| 813 |
| 814 LiteralScope literal(this); |
| 815 while (true) { |
| 816 uc32 c = c0_; |
| 817 Advance(); |
| 818 if (c == '`') { |
| 819 result = Token::TEMPLATE_TAIL; |
| 820 break; |
| 821 } else if (c == '$' && c0_ == '{') { |
| 822 Advance(); // Consume '{' |
| 823 result = Token::TEMPLATE_SPAN; |
| 824 break; |
| 825 } else if (c == '\\') { |
| 826 AddRawLiteralChar('\\'); |
| 827 if (unicode_cache_->IsLineTerminator(c0_)) { |
| 828 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty |
| 829 // code unit sequence. |
| 830 do { |
| 831 uc32 lastChar = c0_; |
| 832 Advance(); |
| 833 if (lastChar == '\r' && c0_ == '\n') Advance(); |
| 834 AddRawLiteralChar('\n'); |
| 835 } while (unicode_cache_->IsLineTerminator(c0_)); |
| 836 } else if (c0_ == '0') { |
| 837 Advance(); |
| 838 AddRawLiteralChar('0'); |
| 839 AddLiteralChar('0'); |
| 840 } else { |
| 841 ScanEscape(true); |
| 842 } |
| 843 } else if (c < 0) { |
| 844 // Unterminated template literal |
| 845 PushBack(c); |
| 846 break; |
| 847 } else { |
| 848 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. |
| 849 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence |
| 850 // consisting of the CV 0x000A. |
| 851 if (c == '\r') { |
| 852 if (c0_ == '\n') Advance(); |
| 853 c = '\n'; |
| 854 } |
| 855 AddLiteralChar(c); |
| 856 AddRawLiteralChar(c); |
| 857 } |
| 858 } |
| 859 literal.Complete(); |
| 860 next_.location.end_pos = source_pos(); |
| 861 next_.token = result; |
| 862 return result; |
| 863 } |
| 864 |
| 865 |
| 781 void Scanner::ScanDecimalDigits() { | 866 void Scanner::ScanDecimalDigits() { |
| 782 while (IsDecimalDigit(c0_)) | 867 while (IsDecimalDigit(c0_)) |
| 783 AddLiteralCharAdvance(); | 868 AddLiteralCharAdvance(); |
| 784 } | 869 } |
| 785 | 870 |
| 786 | 871 |
| 787 Token::Value Scanner::ScanNumber(bool seen_period) { | 872 Token::Value Scanner::ScanNumber(bool seen_period) { |
| 788 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 873 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
| 789 | 874 |
| 790 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; | 875 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; |
| (...skipping 403 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1194 | 1279 |
| 1195 | 1280 |
| 1196 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { | 1281 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { |
| 1197 if (is_literal_one_byte()) { | 1282 if (is_literal_one_byte()) { |
| 1198 return ast_value_factory->GetOneByteString(literal_one_byte_string()); | 1283 return ast_value_factory->GetOneByteString(literal_one_byte_string()); |
| 1199 } | 1284 } |
| 1200 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); | 1285 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); |
| 1201 } | 1286 } |
| 1202 | 1287 |
| 1203 | 1288 |
| 1289 const AstRawString* Scanner::CurrentRawSymbol( |
| 1290 AstValueFactory* ast_value_factory) { |
| 1291 if (is_raw_one_byte()) { |
| 1292 return ast_value_factory->GetOneByteString(raw_one_byte_string()); |
| 1293 } |
| 1294 return ast_value_factory->GetTwoByteString(raw_two_byte_string()); |
| 1295 } |
| 1296 |
| 1297 |
| 1204 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { | 1298 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { |
| 1205 if (is_next_literal_one_byte()) { | 1299 if (is_next_literal_one_byte()) { |
| 1206 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); | 1300 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); |
| 1207 } | 1301 } |
| 1208 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); | 1302 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); |
| 1209 } | 1303 } |
| 1210 | 1304 |
| 1211 | 1305 |
| 1306 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) { |
| 1307 if (is_next_raw_one_byte()) { |
| 1308 return ast_value_factory->GetOneByteString(next_raw_one_byte_string()); |
| 1309 } |
| 1310 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string()); |
| 1311 } |
| 1312 |
| 1313 |
| 1212 double Scanner::DoubleValue() { | 1314 double Scanner::DoubleValue() { |
| 1213 DCHECK(is_literal_one_byte()); | 1315 DCHECK(is_literal_one_byte()); |
| 1214 return StringToDouble( | 1316 return StringToDouble( |
| 1215 unicode_cache_, | 1317 unicode_cache_, |
| 1216 literal_one_byte_string(), | 1318 literal_one_byte_string(), |
| 1217 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); | 1319 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); |
| 1218 } | 1320 } |
| 1219 | 1321 |
| 1220 | 1322 |
| 1221 int Scanner::FindNumber(DuplicateFinder* finder, int value) { | 1323 int Scanner::FindNumber(DuplicateFinder* finder, int value) { |
| (...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1364 } | 1466 } |
| 1365 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1467 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
| 1366 } | 1468 } |
| 1367 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1469 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
| 1368 | 1470 |
| 1369 backing_store_.AddBlock(bytes); | 1471 backing_store_.AddBlock(bytes); |
| 1370 return backing_store_.EndSequence().start(); | 1472 return backing_store_.EndSequence().start(); |
| 1371 } | 1473 } |
| 1372 | 1474 |
| 1373 } } // namespace v8::internal | 1475 } } // namespace v8::internal |
| OLD | NEW |