OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include <stdint.h> | 7 #include <stdint.h> |
8 | 8 |
9 #include <cmath> | 9 #include <cmath> |
10 | 10 |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
47 // in object literals. | 47 // in object literals. |
48 Init(); | 48 Init(); |
49 // Skip initial whitespace allowing HTML comment ends just like | 49 // Skip initial whitespace allowing HTML comment ends just like |
50 // after a newline and scan first token. | 50 // after a newline and scan first token. |
51 has_line_terminator_before_next_ = true; | 51 has_line_terminator_before_next_ = true; |
52 SkipWhiteSpace(); | 52 SkipWhiteSpace(); |
53 Scan(); | 53 Scan(); |
54 } | 54 } |
55 | 55 |
56 | 56 |
57 uc32 Scanner::ScanHexNumber(int expected_length) { | 57 uc32 Scanner::ScanHexNumber(int expected_length, bool recordRaw) { |
58 DCHECK(expected_length <= 4); // prevent overflow | 58 DCHECK(expected_length <= 4); // prevent overflow |
59 | 59 |
60 uc32 digits[4] = { 0, 0, 0, 0 }; | 60 uc32 digits[4] = { 0, 0, 0, 0 }; |
61 uc32 x = 0; | 61 uc32 x = 0; |
62 for (int i = 0; i < expected_length; i++) { | 62 for (int i = 0; i < expected_length; i++) { |
63 digits[i] = c0_; | 63 digits[i] = c0_; |
64 int d = HexValue(c0_); | 64 int d = HexValue(c0_); |
65 if (d < 0) { | 65 if (d < 0) { |
66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes | 66 // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes |
67 // should be illegal, but other JS VMs just return the | 67 // should be illegal, but other JS VMs just return the |
68 // non-escaped version of the original character. | 68 // non-escaped version of the original character. |
69 | 69 |
70 // Push back digits that we have advanced past. | 70 // Push back digits that we have advanced past. |
71 for (int j = i-1; j >= 0; j--) { | 71 for (int j = i-1; j >= 0; j--) { |
72 PushBack(digits[j]); | 72 PushBack(digits[j]); |
73 } | 73 } |
74 return -1; | 74 return -1; |
75 } | 75 } |
76 x = x * 16 + d; | 76 x = x * 16 + d; |
| 77 if (recordRaw) { |
| 78 AddRawLiteralChar(c0_); |
| 79 } |
77 Advance(); | 80 Advance(); |
78 } | 81 } |
79 | 82 |
80 return x; | 83 return x; |
81 } | 84 } |
82 | 85 |
83 | 86 |
84 // Ensure that tokens can be stored in a byte. | 87 // Ensure that tokens can be stored in a byte. |
85 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 88 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
86 | 89 |
(...skipping 123 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
210 Token::ILLEGAL, | 213 Token::ILLEGAL, |
211 Token::ILLEGAL, | 214 Token::ILLEGAL, |
212 Token::LBRACE, // 0x7b | 215 Token::LBRACE, // 0x7b |
213 Token::ILLEGAL, | 216 Token::ILLEGAL, |
214 Token::RBRACE, // 0x7d | 217 Token::RBRACE, // 0x7d |
215 Token::BIT_NOT, // 0x7e | 218 Token::BIT_NOT, // 0x7e |
216 Token::ILLEGAL | 219 Token::ILLEGAL |
217 }; | 220 }; |
218 | 221 |
219 | 222 |
220 Token::Value Scanner::Next() { | 223 Token::Value Scanner::Next(Mode mode) { |
221 current_ = next_; | 224 current_ = next_; |
222 has_line_terminator_before_next_ = false; | 225 has_line_terminator_before_next_ = false; |
223 has_multiline_comment_before_next_ = false; | 226 has_multiline_comment_before_next_ = false; |
224 if (static_cast<unsigned>(c0_) <= 0x7f) { | 227 if (mode != TemplateLiteral && static_cast<unsigned>(c0_) <= 0x7f) { |
225 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); | 228 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); |
226 if (token != Token::ILLEGAL) { | 229 if (token != Token::ILLEGAL) { |
227 int pos = source_pos(); | 230 int pos = source_pos(); |
228 next_.token = token; | 231 next_.token = token; |
229 next_.location.beg_pos = pos; | 232 next_.location.beg_pos = pos; |
230 next_.location.end_pos = pos + 1; | 233 next_.location.end_pos = pos + 1; |
231 Advance(); | 234 Advance(); |
232 return current_.token; | 235 return current_.token; |
233 } | 236 } |
234 } | 237 } |
235 Scan(); | 238 Scan(mode); |
| 239 if (mode == TemplateLiteral && current_.token == Token::RBRACE) { |
| 240 // The current token is now invalid |
| 241 return Next(); |
| 242 } |
236 return current_.token; | 243 return current_.token; |
237 } | 244 } |
238 | 245 |
239 | 246 |
240 // TODO(yangguo): check whether this is actually necessary. | 247 // TODO(yangguo): check whether this is actually necessary. |
241 static inline bool IsLittleEndianByteOrderMark(uc32 c) { | 248 static inline bool IsLittleEndianByteOrderMark(uc32 c) { |
242 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 249 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
243 // Unicode character; this implies that in a Unicode context the | 250 // Unicode character; this implies that in a Unicode context the |
244 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 251 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
245 // character expressed in little-endian byte order (since it could | 252 // character expressed in little-endian byte order (since it could |
(...skipping 156 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
402 Advance(); | 409 Advance(); |
403 if (c0_ == '-') return SkipSingleLineComment(); | 410 if (c0_ == '-') return SkipSingleLineComment(); |
404 PushBack('-'); // undo Advance() | 411 PushBack('-'); // undo Advance() |
405 } | 412 } |
406 PushBack('!'); // undo Advance() | 413 PushBack('!'); // undo Advance() |
407 DCHECK(c0_ == '!'); | 414 DCHECK(c0_ == '!'); |
408 return Token::LT; | 415 return Token::LT; |
409 } | 416 } |
410 | 417 |
411 | 418 |
412 void Scanner::Scan() { | 419 void Scanner::Scan(Scanner::Mode mode) { |
413 next_.literal_chars = NULL; | 420 next_.literal_chars = NULL; |
| 421 next_.raw_literal_chars = NULL; |
414 Token::Value token; | 422 Token::Value token; |
| 423 |
| 424 if (mode == TemplateLiteral) { |
| 425 CHECK(HarmonyTemplates()); |
| 426 |
| 427 // If we have an RBRACE next, PushBack a `}`, this should be the start of a |
| 428 // TemplateMiddle span |
| 429 if (peek() == Token::RBRACE) { |
| 430 PushBack('}'); |
| 431 } |
| 432 |
| 433 token = ScanTemplateSpan(); |
| 434 next_.location.end_pos = source_pos(); |
| 435 next_.token = token; |
| 436 return; |
| 437 } |
| 438 |
415 do { | 439 do { |
416 // Remember the position of the next token | 440 // Remember the position of the next token |
417 next_.location.beg_pos = source_pos(); | 441 next_.location.beg_pos = source_pos(); |
418 | 442 |
419 switch (c0_) { | 443 switch (c0_) { |
420 case ' ': | 444 case ' ': |
421 case '\t': | 445 case '\t': |
422 Advance(); | 446 Advance(); |
423 token = Token::WHITESPACE; | 447 token = Token::WHITESPACE; |
424 break; | 448 break; |
(...skipping 202 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
627 break; | 651 break; |
628 | 652 |
629 case '?': | 653 case '?': |
630 token = Select(Token::CONDITIONAL); | 654 token = Select(Token::CONDITIONAL); |
631 break; | 655 break; |
632 | 656 |
633 case '~': | 657 case '~': |
634 token = Select(Token::BIT_NOT); | 658 token = Select(Token::BIT_NOT); |
635 break; | 659 break; |
636 | 660 |
| 661 case '`': |
| 662 if (HarmonyTemplates()) { |
| 663 token = ScanTemplateSpan(); |
| 664 break; |
| 665 } |
| 666 |
637 default: | 667 default: |
638 if (unicode_cache_->IsIdentifierStart(c0_)) { | 668 if (unicode_cache_->IsIdentifierStart(c0_)) { |
639 token = ScanIdentifierOrKeyword(); | 669 token = ScanIdentifierOrKeyword(); |
640 } else if (IsDecimalDigit(c0_)) { | 670 } else if (IsDecimalDigit(c0_)) { |
641 token = ScanNumber(false); | 671 token = ScanNumber(false); |
642 } else if (SkipWhiteSpace()) { | 672 } else if (SkipWhiteSpace()) { |
643 token = Token::WHITESPACE; | 673 token = Token::WHITESPACE; |
644 } else if (c0_ < 0) { | 674 } else if (c0_ < 0) { |
645 token = Token::EOS; | 675 token = Token::EOS; |
646 } else { | 676 } else { |
(...skipping 25 matching lines...) Expand all Loading... |
672 // This function is only called to seek to the location | 702 // This function is only called to seek to the location |
673 // of the end of a function (at the "}" token). It doesn't matter | 703 // of the end of a function (at the "}" token). It doesn't matter |
674 // whether there was a line terminator in the part we skip. | 704 // whether there was a line terminator in the part we skip. |
675 has_line_terminator_before_next_ = false; | 705 has_line_terminator_before_next_ = false; |
676 has_multiline_comment_before_next_ = false; | 706 has_multiline_comment_before_next_ = false; |
677 } | 707 } |
678 Scan(); | 708 Scan(); |
679 } | 709 } |
680 | 710 |
681 | 711 |
682 bool Scanner::ScanEscape() { | 712 bool Scanner::ScanEscape(bool recordRaw) { |
683 uc32 c = c0_; | 713 uc32 c = c0_; |
684 Advance(); | 714 Advance(); |
685 | 715 |
686 // Skip escaped newlines. | 716 // Skip escaped newlines. |
687 if (unicode_cache_->IsLineTerminator(c)) { | 717 if (unicode_cache_->IsLineTerminator(c)) { |
688 // Allow CR+LF newlines in multiline string literals. | 718 // Allow CR+LF newlines in multiline string literals. |
689 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 719 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
690 // Allow LF+CR newlines in multiline string literals. | 720 // Allow LF+CR newlines in multiline string literals. |
691 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 721 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
692 return true; | 722 return true; |
693 } | 723 } |
694 | 724 |
695 switch (c) { | 725 switch (c) { |
696 case '\'': // fall through | 726 case '\'': // fall through |
697 case '"' : // fall through | 727 case '"' : // fall through |
698 case '\\': break; | 728 case '\\': break; |
699 case 'b' : c = '\b'; break; | 729 case 'b' : c = '\b'; break; |
700 case 'f' : c = '\f'; break; | 730 case 'f' : c = '\f'; break; |
701 case 'n' : c = '\n'; break; | 731 case 'n' : c = '\n'; break; |
702 case 'r' : c = '\r'; break; | 732 case 'r' : c = '\r'; break; |
703 case 't' : c = '\t'; break; | 733 case 't' : c = '\t'; break; |
704 case 'u' : { | 734 case 'u' : { |
705 c = ScanHexNumber(4); | 735 c = ScanHexNumber(4, recordRaw); |
706 if (c < 0) return false; | 736 if (c < 0) return false; |
707 break; | 737 break; |
708 } | 738 } |
709 case 'v' : c = '\v'; break; | 739 case 'v' : c = '\v'; break; |
710 case 'x' : { | 740 case 'x' : { |
711 c = ScanHexNumber(2); | 741 c = ScanHexNumber(2, recordRaw); |
712 if (c < 0) return false; | 742 if (c < 0) return false; |
713 break; | 743 break; |
714 } | 744 } |
715 case '0' : // fall through | 745 case '0' : // fall through |
716 case '1' : // fall through | 746 case '1' : // fall through |
717 case '2' : // fall through | 747 case '2' : // fall through |
718 case '3' : // fall through | 748 case '3' : // fall through |
719 case '4' : // fall through | 749 case '4' : // fall through |
720 case '5' : // fall through | 750 case '5' : // fall through |
721 case '6' : // fall through | 751 case '6' : // fall through |
722 case '7' : c = ScanOctalEscape(c, 2); break; | 752 case '7': |
| 753 c = ScanOctalEscape(c, 2, recordRaw); |
| 754 break; |
723 } | 755 } |
724 | 756 |
725 // According to ECMA-262, section 7.8.4, characters not covered by the | 757 // According to ECMA-262, section 7.8.4, characters not covered by the |
726 // above cases should be illegal, but they are commonly handled as | 758 // above cases should be illegal, but they are commonly handled as |
727 // non-escaped characters by JS VMs. | 759 // non-escaped characters by JS VMs. |
728 AddLiteralChar(c); | 760 AddLiteralChar(c); |
729 return true; | 761 return true; |
730 } | 762 } |
731 | 763 |
732 | 764 |
733 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of | 765 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of |
734 // ECMA-262. Other JS VMs support them. | 766 // ECMA-262. Other JS VMs support them. |
735 uc32 Scanner::ScanOctalEscape(uc32 c, int length) { | 767 uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool recordRaw) { |
736 uc32 x = c - '0'; | 768 uc32 x = c - '0'; |
737 int i = 0; | 769 int i = 0; |
738 for (; i < length; i++) { | 770 for (; i < length; i++) { |
739 int d = c0_ - '0'; | 771 int d = c0_ - '0'; |
740 if (d < 0 || d > 7) break; | 772 if (d < 0 || d > 7) break; |
741 int nx = x * 8 + d; | 773 int nx = x * 8 + d; |
742 if (nx >= 256) break; | 774 if (nx >= 256) break; |
743 x = nx; | 775 x = nx; |
| 776 if (recordRaw) { |
| 777 AddRawLiteralChar(c0_); |
| 778 } |
744 Advance(); | 779 Advance(); |
745 } | 780 } |
746 // Anything except '\0' is an octal escape sequence, illegal in strict mode. | 781 // Anything except '\0' is an octal escape sequence, illegal in strict mode. |
747 // Remember the position of octal escape sequences so that an error | 782 // Remember the position of octal escape sequences so that an error |
748 // can be reported later (in strict mode). | 783 // can be reported later (in strict mode). |
749 // We don't report the error immediately, because the octal escape can | 784 // We don't report the error immediately, because the octal escape can |
750 // occur before the "use strict" directive. | 785 // occur before the "use strict" directive. |
751 if (c != '0' || i > 0) { | 786 if (c != '0' || i > 0) { |
752 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); | 787 octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); |
753 } | 788 } |
(...skipping 17 matching lines...) Expand all Loading... |
771 } | 806 } |
772 } | 807 } |
773 if (c0_ != quote) return Token::ILLEGAL; | 808 if (c0_ != quote) return Token::ILLEGAL; |
774 literal.Complete(); | 809 literal.Complete(); |
775 | 810 |
776 Advance(); // consume quote | 811 Advance(); // consume quote |
777 return Token::STRING; | 812 return Token::STRING; |
778 } | 813 } |
779 | 814 |
780 | 815 |
| 816 Token::Value Scanner::ScanTemplateSpan() { |
| 817 DCHECK(c0_ == '`' || c0_ == '}'); |
| 818 Advance(); // Consume ` or } |
| 819 LiteralScope literal(this); |
| 820 while (true) { |
| 821 uc32 c = c0_; |
| 822 Advance(); |
| 823 if (c == '`') { |
| 824 literal.Complete(); |
| 825 return Token::TEMPLATE_TAIL; |
| 826 } else if (c == '$' && c0_ == '{') { |
| 827 Advance(); // Consume '{' |
| 828 literal.Complete(); |
| 829 return Token::TEMPLATE_SPAN; |
| 830 } else if (c == '\\') { |
| 831 if (unicode_cache_->IsLineTerminator(c0_)) { |
| 832 // The TV of LineContinuation :: \ LineTerminatorSequence is the empty |
| 833 // code unit sequence. |
| 834 do { |
| 835 uc32 lastChar = c0_; |
| 836 Advance(); |
| 837 if (lastChar == '\r' && c0_ == '\n') Advance(); |
| 838 AddRawLiteralChar('\n'); |
| 839 } while (unicode_cache_->IsLineTerminator(c0_)); |
| 840 } else { |
| 841 ScanEscape(); |
| 842 } |
| 843 } else if (c < 0) { |
| 844 // Unterminated template literal |
| 845 literal.Complete(); |
| 846 PushBack(c); |
| 847 return Token::ILLEGAL; |
| 848 } else { |
| 849 AddLiteralChar(c); |
| 850 AddRawLiteralChar(c); |
| 851 } |
| 852 } |
| 853 } |
| 854 |
| 855 |
781 void Scanner::ScanDecimalDigits() { | 856 void Scanner::ScanDecimalDigits() { |
782 while (IsDecimalDigit(c0_)) | 857 while (IsDecimalDigit(c0_)) |
783 AddLiteralCharAdvance(); | 858 AddLiteralCharAdvance(); |
784 } | 859 } |
785 | 860 |
786 | 861 |
787 Token::Value Scanner::ScanNumber(bool seen_period) { | 862 Token::Value Scanner::ScanNumber(bool seen_period) { |
788 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 863 DCHECK(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
789 | 864 |
790 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; | 865 enum { DECIMAL, HEX, OCTAL, IMPLICIT_OCTAL, BINARY } kind = DECIMAL; |
(...skipping 403 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1194 | 1269 |
1195 | 1270 |
1196 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { | 1271 const AstRawString* Scanner::CurrentSymbol(AstValueFactory* ast_value_factory) { |
1197 if (is_literal_one_byte()) { | 1272 if (is_literal_one_byte()) { |
1198 return ast_value_factory->GetOneByteString(literal_one_byte_string()); | 1273 return ast_value_factory->GetOneByteString(literal_one_byte_string()); |
1199 } | 1274 } |
1200 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); | 1275 return ast_value_factory->GetTwoByteString(literal_two_byte_string()); |
1201 } | 1276 } |
1202 | 1277 |
1203 | 1278 |
| 1279 const AstRawString* Scanner::CurrentRawSymbol( |
| 1280 AstValueFactory* ast_value_factory) { |
| 1281 if (is_raw_one_byte()) { |
| 1282 return ast_value_factory->GetOneByteString(raw_one_byte_string()); |
| 1283 } |
| 1284 return ast_value_factory->GetTwoByteString(raw_two_byte_string()); |
| 1285 } |
| 1286 |
| 1287 |
1204 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { | 1288 const AstRawString* Scanner::NextSymbol(AstValueFactory* ast_value_factory) { |
1205 if (is_next_literal_one_byte()) { | 1289 if (is_next_literal_one_byte()) { |
1206 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); | 1290 return ast_value_factory->GetOneByteString(next_literal_one_byte_string()); |
1207 } | 1291 } |
1208 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); | 1292 return ast_value_factory->GetTwoByteString(next_literal_two_byte_string()); |
1209 } | 1293 } |
1210 | 1294 |
1211 | 1295 |
| 1296 const AstRawString* Scanner::NextRawSymbol(AstValueFactory* ast_value_factory) { |
| 1297 if (is_next_raw_one_byte()) { |
| 1298 return ast_value_factory->GetOneByteString(next_raw_one_byte_string()); |
| 1299 } |
| 1300 return ast_value_factory->GetTwoByteString(next_raw_two_byte_string()); |
| 1301 } |
| 1302 |
| 1303 |
1212 double Scanner::DoubleValue() { | 1304 double Scanner::DoubleValue() { |
1213 DCHECK(is_literal_one_byte()); | 1305 DCHECK(is_literal_one_byte()); |
1214 return StringToDouble( | 1306 return StringToDouble( |
1215 unicode_cache_, | 1307 unicode_cache_, |
1216 literal_one_byte_string(), | 1308 literal_one_byte_string(), |
1217 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); | 1309 ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); |
1218 } | 1310 } |
1219 | 1311 |
1220 | 1312 |
1221 int Scanner::FindNumber(DuplicateFinder* finder, int value) { | 1313 int Scanner::FindNumber(DuplicateFinder* finder, int value) { |
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1364 } | 1456 } |
1365 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1457 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
1366 } | 1458 } |
1367 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1459 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
1368 | 1460 |
1369 backing_store_.AddBlock(bytes); | 1461 backing_store_.AddBlock(bytes); |
1370 return backing_store_.EndSequence().start(); | 1462 return backing_store_.EndSequence().start(); |
1371 } | 1463 } |
1372 | 1464 |
1373 } } // namespace v8::internal | 1465 } } // namespace v8::internal |
OLD | NEW |