Chromium Code Reviews| OLD | NEW | 
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. | 
| 6 | 6 | 
| 7 #include <stdint.h> | 7 #include <stdint.h> | 
| 8 | 8 | 
| 9 #include <cmath> | 9 #include <cmath> | 
| 10 | 10 | 
| (...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 47 // in object literals. | 47 // in object literals. | 
| 48 Init(); | 48 Init(); | 
| 49 // Skip initial whitespace allowing HTML comment ends just like | 49 // Skip initial whitespace allowing HTML comment ends just like | 
| 50 // after a newline and scan first token. | 50 // after a newline and scan first token. | 
| 51 has_line_terminator_before_next_ = true; | 51 has_line_terminator_before_next_ = true; | 
| 52 SkipWhiteSpace(); | 52 SkipWhiteSpace(); | 
| 53 Scan(); | 53 Scan(); | 
| 54 } | 54 } | 
| 55 | 55 | 
| 56 | 56 | 
| 57 uc32 Scanner::ScanHexNumber(int expected_length) { | 57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) { | 
| 58 DCHECK(expected_length <= 4); // prevent overflow | 58 DCHECK(expected_length <= 4); // prevent overflow | 
| 59 | 59 | 
| 60 uc32 x = 0; | 60 uc32 x = 0; | 
| 61 for (int i = 0; i < expected_length; i++) { | 61 for (int i = 0; i < expected_length; i++) { | 
| 62 int d = HexValue(c0_); | 62 int d = HexValue(c0_); | 
| 63 if (d < 0) { | 63 if (d < 0) { | 
| 64 return -1; | 64 return -1; | 
| 65 } | 65 } | 
| 66 x = x * 16 + d; | 66 x = x * 16 + d; | 
| 67 if (recordRaw) { | |
| 68 AddRawLiteralChar(c0_); | |
| 69 } | |
| 67 Advance(); | 70 Advance(); | 
| 68 } | 71 } | 
| 69 | 72 | 
| 70 return x; | 73 return x; | 
| 71 } | 74 } | 
| 72 | 75 | 
| 73 | 76 | 
| 74 // Ensure that tokens can be stored in a byte. | 77 // Ensure that tokens can be stored in a byte. | 
| 75 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 78 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 
| 76 | 79 | 
| (...skipping 319 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 396 PushBack('-'); // undo Advance() | 399 PushBack('-'); // undo Advance() | 
| 397 } | 400 } | 
| 398 PushBack('!'); // undo Advance() | 401 PushBack('!'); // undo Advance() | 
| 399 DCHECK(c0_ == '!'); | 402 DCHECK(c0_ == '!'); | 
| 400 return Token::LT; | 403 return Token::LT; | 
| 401 } | 404 } | 
| 402 | 405 | 
| 403 | 406 | 
| 404 void Scanner::Scan() { | 407 void Scanner::Scan() { | 
| 405 next_.literal_chars = NULL; | 408 next_.literal_chars = NULL; | 
| 409 next_.raw_literal_chars = NULL; | |
| 406 Token::Value token; | 410 Token::Value token; | 
| 411 | |
| 407 do { | 412 do { | 
| 408 // Remember the position of the next token | 413 // Remember the position of the next token | 
| 409 next_.location.beg_pos = source_pos(); | 414 next_.location.beg_pos = source_pos(); | 
| 410 | 415 | 
| 411 switch (c0_) { | 416 switch (c0_) { | 
| 412 case ' ': | 417 case ' ': | 
| 413 case '\t': | 418 case '\t': | 
| 414 Advance(); | 419 Advance(); | 
| 415 token = Token::WHITESPACE; | 420 token = Token::WHITESPACE; | 
| 416 break; | 421 break; | 
| (...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 619 break; | 624 break; | 
| 620 | 625 | 
| 621 case '?': | 626 case '?': | 
| 622 token = Select(Token::CONDITIONAL); | 627 token = Select(Token::CONDITIONAL); | 
| 623 break; | 628 break; | 
| 624 | 629 | 
| 625 case '~': | 630 case '~': | 
| 626 token = Select(Token::BIT_NOT); | 631 token = Select(Token::BIT_NOT); | 
| 627 break; | 632 break; | 
| 628 | 633 | 
| 634 case '`': | |
| 635 if (HarmonyTemplates()) { | |
| 636 token = ScanTemplateSpan(); | |
| 637 break; | |
| 638 } | |
| 639 | |
| 629 default: | 640 default: | 
| 630 if (c0_ < 0) { | 641 if (c0_ < 0) { | 
| 631 token = Token::EOS; | 642 token = Token::EOS; | 
| 632 } else if (unicode_cache_->IsIdentifierStart(c0_)) { | 643 } else if (unicode_cache_->IsIdentifierStart(c0_)) { | 
| 633 token = ScanIdentifierOrKeyword(); | 644 token = ScanIdentifierOrKeyword(); | 
| 634 } else if (IsDecimalDigit(c0_)) { | 645 } else if (IsDecimalDigit(c0_)) { | 
| 635 token = ScanNumber(false); | 646 token = ScanNumber(false); | 
| 636 } else if (SkipWhiteSpace()) { | 647 } else if (SkipWhiteSpace()) { | 
| 637 token = Token::WHITESPACE; | 648 token = Token::WHITESPACE; | 
| 638 } else { | 649 } else { | 
| (...skipping 25 matching lines...) Expand all Loading... | |
| 664 // This function is only called to seek to the location | 675 // This function is only called to seek to the location | 
| 665 // of the end of a function (at the "}" token). It doesn't matter | 676 // of the end of a function (at the "}" token). It doesn't matter | 
| 666 // whether there was a line terminator in the part we skip. | 677 // whether there was a line terminator in the part we skip. | 
| 667 has_line_terminator_before_next_ = false; | 678 has_line_terminator_before_next_ = false; | 
| 668 has_multiline_comment_before_next_ = false; | 679 has_multiline_comment_before_next_ = false; | 
| 669 } | 680 } | 
| 670 Scan(); | 681 Scan(); | 
| 671 } | 682 } | 
| 672 | 683 | 
| 673 | 684 | 
| 674 bool Scanner::ScanEscape() { | 685 bool Scanner::ScanEscape(bool recordRaw) { | 
| 675 uc32 c = c0_; | 686 uc32 c = c0_; | 
| 687 uc32 rc = c; | |
| 688 bool singleCharEscape = true; | |
| 676 Advance(); | 689 Advance(); | 
| 677 | 690 | 
| 678 // Skip escaped newlines. | 691 // Skip escaped newlines. | 
| 679 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { | 692 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { | 
| 680 // Allow CR+LF newlines in multiline string literals. | 693 // Allow CR+LF newlines in multiline string literals. | 
| 681 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 694 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 
| 682 // Allow LF+CR newlines in multiline string literals. | 695 // Allow LF+CR newlines in multiline string literals. | 
| 683 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 696 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 
| 684 return true; | 697 return true; | 
| 685 } | 698 } | 
| 686 | 699 | 
| 687 switch (c) { | 700 switch (c) { | 
| 688 case '\'': // fall through | 701 case '\'': // fall through | 
| 689 case '"' : // fall through | 702 case '"' : // fall through | 
| 690 case '\\': break; | 703 case '\\': break; | 
| 691 case 'b' : c = '\b'; break; | 704 case 'b' : c = '\b'; break; | 
| 692 case 'f' : c = '\f'; break; | 705 case 'f' : c = '\f'; break; | 
| 693 case 'n' : c = '\n'; break; | 706 case 'n' : c = '\n'; break; | 
| 694 case 'r' : c = '\r'; break; | 707 case 'r' : c = '\r'; break; | 
| 695 case 't' : c = '\t'; break; | 708 case 't' : c = '\t'; break; | 
| 696 case 'u' : { | 709 case 'u' : { | 
| 697 c = ScanHexNumber(4); | 710 if (recordRaw) AddRawLiteralChar('u'); | 
| 711 singleCharEscape = false; | |
| 712 c = ScanHexNumber(4, recordRaw); | |
| 698 if (c < 0) return false; | 713 if (c < 0) return false; | 
| 699 break; | 714 break; | 
| 700 } | 715 } | 
| 701 case 'v' : c = '\v'; break; | 716 case 'v' : c = '\v'; break; | 
| 702 case 'x' : { | 717 case 'x' : { | 
| 703 c = ScanHexNumber(2); | 718 if (recordRaw) AddRawLiteralChar('x'); | 
| 719 singleCharEscape = false; | |
| 720 c = ScanHexNumber(2, recordRaw); | |
| 704 if (c < 0) return false; | 721 if (c < 0) return false; | 
| 705 break; | 722 break; | 
| 706 } | 723 } | 
| 707 case '0' : // fall through | 724 case '0' : // fall through | 
| 708 case '1' : // fall through | 725 case '1' : // fall through | 
| 709 case '2' : // fall through | 726 case '2' : // fall through | 
| 710 case '3' : // fall through | 727 case '3' : // fall through | 
| 711 case '4' : // fall through | 728 case '4' : // fall through | 
| 712 case '5' : // fall through | 729 case '5' : // fall through | 
| 713 case '6' : // fall through | 730 case '6' : // fall through | 
| 714 case '7' : c = ScanOctalEscape(c, 2); break; | 731 case '7': | 
| 732 singleCharEscape = false; | |
| 733 c = ScanOctalEscape(c, 2, recordRaw); | |
| 734 break; | |
| 715 } | 735 } | 
| 716 | 736 | 
| 717 // According to ECMA-262, section 7.8.4, characters not covered by the | 737 // According to ECMA-262, section 7.8.4, characters not covered by the | 
| 718 // above cases should be illegal, but they are commonly handled as | 738 // above cases should be illegal, but they are commonly handled as | 
| 719 // non-escaped characters by JS VMs. | 739 // non-escaped characters by JS VMs. | 
| 740 if (singleCharEscape && recordRaw) AddRawLiteralChar(rc); | |
| 720 AddLiteralChar(c); | 741 AddLiteralChar(c); | 
| 721 return true; | 742 return true; | 
| 722 } | 743 } | 
| 723 | 744 | 
| 724 | 745 | 
| 725 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 746 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 
| 726 // ECMA-262. Other JS VMs support them. | 747 // ECMA-262. Other JS VMs support them. | 
| 727 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { | 748 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) { | 
| 728 uc32 x = c - '0'; | 749 uc32 x = c - '0'; | 
| 729 int i = 0; | 750 int i = 0; | 
| 730 for (; i < length; i++) { | 751 for (; i < length; i++) { | 
| 731 int d = c0_ - '0'; | 752 int d = c0_ - '0'; | 
| 732 if (d < 0 || d > 7) break; | 753 if (d < 0 || d > 7) break; | 
| 733 int nx = x * 8 + d; | 754 int nx = x * 8 + d; | 
| 734 if (nx >= 256) break; | 755 if (nx >= 256) break; | 
| 735 x = nx; | 756 x = nx; | 
| 757 if (recordRaw) { | |
| 758 AddRawLiteralChar(c0_); | |
| 759 } | |
| 736 Advance(); | 760 Advance(); | 
| 737 } | 761 } | 
| 738 // Anything except '\0' is an octal escape sequence, illegal in strict mode. | 762 // Anything except '\0' is an octal escape sequence, illegal in strict mode. | 
| 739 // Remember the position of octal escape sequences so that an error | 763 // Remember the position of octal escape sequences so that an error | 
| 740 // can be reported later (in strict mode). | 764 // can be reported later (in strict mode). | 
| 741 // We don't report the error immediately, because the octal escape can | 765 // We don't report the error immediately, because the octal escape can | 
| 742 // occur before the "use strict" directive. | 766 // occur before the "use strict" directive. | 
| 743 if (c != '0' || i > 0) { | 767 if (c != '0' || i > 0) { | 
| 744 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); | 768 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); | 
| 745 } | 769 } | 
| (...skipping 17 matching lines...) Expand all Loading... | |
| 763 } | 787 } | 
| 764 } | 788 } | 
| 765 if (c0_ != quote) return Token::ILLEGAL; | 789 if (c0_ != quote) return Token::ILLEGAL; | 
| 766 literal.Complete(); | 790 literal.Complete(); | 
| 767 | 791 | 
| 768 Advance(); // consume quote | 792 Advance(); // consume quote | 
| 769 return Token::STRING; | 793 return Token::STRING; | 
| 770 } | 794 } | 
| 771 | 795 | 
| 772 | 796 | 
| 797 Token::Value Scanner::ScanTemplateSpan() { | |
| 798 // When scanning a TemplateSpan, we are looking for the following construct: | |
| 799 // TEMPLATE_SPAN :: | |
| 800 // ` LiteralChars* ${ | |
| 801 // | } LiteralChars* ${ | |
| 802 // | |
| 803 // TEMPLATE_TAIL :: | |
| 804 // ` LiteralChars* ` | |
| 805 // | } LiteralChar* ` | |
| 806 // | |
| 807 // A TEMPLATE_SPAN should always be followed by an Expression, while a | |
| 808 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be | |
| 809 // followed by an Expression. | |
| 810 // | |
| 811 // raw_literal_chars_ represents TRV or the raw value of the template span, | |
| 812 // per the spec, while literal_chars_ represents TV or the cooked value of | |
| 813 // the template span. | |
| 814 // | |
| 815 // TODO(caitp): Do not store a separate literal buffer for the span TRV. | |
| 816 // | |
| 817 | |
| 818 if (next_.token == Token::RBRACE) { | |
| 819 PushBack('}'); | |
| 
 
marja
2014/11/11 09:47:24
Hmm, when does this happen and why do we PushBack?
 
caitp (gmail)
2014/11/11 13:59:29
After parsing an expression, the scanner ends up w
 
marja
2014/11/11 15:01:56
But after this, we anyway do Advance() right away.
 
 | |
| 820 } | |
| 821 next_.location.beg_pos = source_pos(); | |
| 822 Token::Value result = Token::ILLEGAL; | |
| 823 DCHECK(c0_ == '`' || c0_ == '}'); | |
| 824 Advance(); // Consume ` or } | |
| 825 | |
| 826 LiteralScope literal(this); | |
| 827 while (true) { | |
| 828 uc32 c = c0_; | |
| 829 Advance(); | |
| 830 if (c == '`') { | |
| 831 result = Token::TEMPLATE_TAIL; | |
| 832 break; | |
| 833 } else if (c == '$' && c0_ == '{') { | |
| 834 Advance(); // Consume '{' | |
| 835 result = Token::TEMPLATE_SPAN; | |
| 836 break; | |
| 837 } else if (c == '\\') { | |
| 838 AddRawLiteralChar('\\'); | |
| 839 if (unicode_cache_->IsLineTerminator(c0_)) { | |
| 840 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty | |
| 841 // code unit sequence. | |
| 842 do { | |
| 843 uc32 lastChar = c0_; | |
| 844 Advance(); | |
| 845 if (lastChar == '\r' && c0_ == '\n') Advance(); | |
| 846 AddRawLiteralChar('\n'); | |
| 847 } while (unicode_cache_->IsLineTerminator(c0_)); | |
| 848 } else if (c0_ == '0') { | |
| 849 Advance(); | |
| 850 AddRawLiteralChar('0'); | |
| 851 AddLiteralChar('0'); | |
| 852 } else { | |
| 853 ScanEscape(true); | |
| 854 } | |
| 855 } else if (c < 0) { | |
| 856 // Unterminated template literal | |
| 857 PushBack(c); | |
| 858 break; | |
| 859 } else { | |
| 860 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. | |
| 861 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence | |
| 862 // consisting of the CV 0x000A. | |
| 863 if (c == '\r') { | |
| 864 if (c0_ == '\n') Advance(); | |
| 865 c = '\n'; | |
| 866 } | |
| 867 AddLiteralChar(c); | |
| 868 AddRawLiteralChar(c); | |
| 869 } | |
| 870 } | |
| 871 literal.Complete(); | |
| 872 next_.location.end_pos = source_pos(); | |
| 873 next_.token = result; | |
| 874 return result; | |
| 875 } | |
| 876 | |
| 877 | |
| 773 void Scanner::ScanDecimalDigits() { | 878 void Scanner::ScanDecimalDigits() { | 
| 774 while (IsDecimalDigit(c0_)) | 879 while (IsDecimalDigit(c0_)) | 
| 775 AddLiteralCharAdvance(); | 880 AddLiteralCharAdvance(); | 
| 776 } | 881 } | 
| 777 | 882 | 
| 778 | 883 | 
| 779 Token::Value Scanner::ScanNumber(bool seen_period) { | 884 Token::Value Scanner::ScanNumber(bool seen_period) { | 
| 780 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 885 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 
| 781 | 886 | 
| 782 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; | 887 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; | 
| (...skipping 373 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1156 | 1261 | 
| 1157 | 1262 | 
| 1158 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { | 1263 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { | 
| 1159 if (is_literal_one_byte()) { | 1264 if (is_literal_one_byte()) { | 
| 1160 return ast_value_factory->GetOneByteString(literal_one_byte_string()); | 1265 return ast_value_factory->GetOneByteString(literal_one_byte_string()); | 
| 1161 } | 1266 } | 
| 1162 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); | 1267 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); | 
| 1163 } | 1268 } | 
| 1164 | 1269 | 
| 1165 | 1270 | 
| 1271 const AstRawString* Scanner::CurrentRawSymbol( | |
| 1272 AstValueFactory* ast_value_factory) { | |
| 1273 if (is_raw_one_byte()) { | |
| 1274 return ast_value_factory->GetOneByteString(raw_one_byte_string()); | |
| 1275 } | |
| 1276 return ast_value_factory->GetTwoByteString(raw_two_byte_string()); | |
| 1277 } | |
| 1278 | |
| 1279 | |
| 1166 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { | 1280 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { | 
| 1167 if (is_next_literal_one_byte()) { | 1281 if (is_next_literal_one_byte()) { | 
| 1168 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); | 1282 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); | 
| 1169 } | 1283 } | 
| 1170 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); | 1284 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); | 
| 1171 } | 1285 } | 
| 1172 | 1286 | 
| 1173 | 1287 | 
| 1288 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) { | |
| 1289 if (is_next_raw_one_byte()) { | |
| 1290 return ast_value_factory->GetOneByteString(next_raw_one_byte_string()); | |
| 1291 } | |
| 1292 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string()); | |
| 1293 } | |
| 1294 | |
| 1295 | |
| 1174 double Scanner::DoubleValue() { | 1296 double Scanner::DoubleValue() { | 
| 1175 DCHECK(is_literal_one_byte()); | 1297 DCHECK(is_literal_one_byte()); | 
| 1176 return StringToDouble( | 1298 return StringToDouble( | 
| 1177 unicode_cache_, | 1299 unicode_cache_, | 
| 1178 literal_one_byte_string(), | 1300 literal_one_byte_string(), | 
| 1179 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); | 1301 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); | 
| 1180 } | 1302 } | 
| 1181 | 1303 | 
| 1182 | 1304 | 
| 1183 int Scanner::FindNumber(DuplicateFinder* finder, int value) { | 1305 int Scanner::FindNumber(DuplicateFinder* finder, int value) { | 
| (...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1326 } | 1448 } | 
| 1327 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1449 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 
| 1328 } | 1450 } | 
| 1329 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1451 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 
| 1330 | 1452 | 
| 1331 backing_store_.AddBlock(bytes); | 1453 backing_store_.AddBlock(bytes); | 
| 1332 return backing_store_.EndSequence().start(); | 1454 return backing_store_.EndSequence().start(); | 
| 1333 } | 1455 } | 
| 1334 | 1456 | 
| 1335 } } // namespace v8::internal | 1457 } } // namespace v8::internal | 
| OLD | NEW |