OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
11 #include <cmath> | 11 #include <cmath> |
12 | 12 |
13 #include "src/ast/ast-value-factory.h" | 13 #include "src/ast/ast-value-factory.h" |
14 #include "src/char-predicates-inl.h" | 14 #include "src/char-predicates-inl.h" |
15 #include "src/conversions-inl.h" | 15 #include "src/conversions-inl.h" |
16 #include "src/list-inl.h" | 16 #include "src/list-inl.h" |
17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol | 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol |
18 | 18 |
19 namespace v8 { | 19 namespace v8 { |
20 namespace internal { | 20 namespace internal { |
21 | 21 |
22 // Scoped helper for saving & restoring scanner error state. | 22 class Scanner::ErrorState { |
23 // This is used for tagged template literals, in which normally forbidden | |
24 // escape sequences are allowed. | |
25 class ErrorState { | |
26 public: | 23 public: |
27 ErrorState(MessageTemplate::Template* message_stack, | 24 ErrorState(MessageTemplate::Template* message_stack, |
28 Scanner::Location* location_stack) | 25 Scanner::Location* location_stack) |
29 : message_stack_(message_stack), | 26 : message_stack_(message_stack), |
30 old_message_(*message_stack), | 27 old_message_(*message_stack), |
31 location_stack_(location_stack), | 28 location_stack_(location_stack), |
32 old_location_(*location_stack) { | 29 old_location_(*location_stack) { |
33 *message_stack_ = MessageTemplate::kNone; | 30 *message_stack_ = MessageTemplate::kNone; |
34 *location_stack_ = Scanner::Location::invalid(); | 31 *location_stack_ = Location::invalid(); |
35 } | 32 } |
36 | 33 |
37 ~ErrorState() { | 34 ~ErrorState() { |
38 *message_stack_ = old_message_; | 35 *message_stack_ = old_message_; |
39 *location_stack_ = old_location_; | 36 *location_stack_ = old_location_; |
40 } | 37 } |
41 | 38 |
42 void MoveErrorTo(MessageTemplate::Template* message_dest, | 39 void MoveErrorTo(TokenDesc* dest) { |
43 Scanner::Location* location_dest) { | |
44 if (*message_stack_ == MessageTemplate::kNone) { | 40 if (*message_stack_ == MessageTemplate::kNone) { |
45 return; | 41 return; |
46 } | 42 } |
47 if (*message_dest == MessageTemplate::kNone) { | 43 if (dest->invalid_template_escape_message == MessageTemplate::kNone) { |
48 *message_dest = *message_stack_; | 44 dest->invalid_template_escape_message = *message_stack_; |
49 *location_dest = *location_stack_; | 45 dest->invalid_template_escape_location = *location_stack_; |
50 } | 46 } |
51 *message_stack_ = MessageTemplate::kNone; | 47 *message_stack_ = MessageTemplate::kNone; |
52 *location_stack_ = Scanner::Location::invalid(); | 48 *location_stack_ = Location::invalid(); |
53 } | 49 } |
54 | 50 |
55 private: | 51 private: |
56 MessageTemplate::Template* const message_stack_; | 52 MessageTemplate::Template* const message_stack_; |
57 MessageTemplate::Template const old_message_; | 53 MessageTemplate::Template const old_message_; |
58 Scanner::Location* const location_stack_; | 54 Scanner::Location* const location_stack_; |
59 Scanner::Location const old_location_; | 55 Scanner::Location const old_location_; |
60 }; | 56 }; |
61 | 57 |
62 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { | 58 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { |
(...skipping 327 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
390 has_multiline_comment_before_next_ = false; | 386 has_multiline_comment_before_next_ = false; |
391 if (static_cast<unsigned>(c0_) <= 0x7f) { | 387 if (static_cast<unsigned>(c0_) <= 0x7f) { |
392 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); | 388 Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]); |
393 if (token != Token::ILLEGAL) { | 389 if (token != Token::ILLEGAL) { |
394 int pos = source_pos(); | 390 int pos = source_pos(); |
395 next_.token = token; | 391 next_.token = token; |
396 next_.location.beg_pos = pos; | 392 next_.location.beg_pos = pos; |
397 next_.location.end_pos = pos + 1; | 393 next_.location.end_pos = pos + 1; |
398 next_.literal_chars = nullptr; | 394 next_.literal_chars = nullptr; |
399 next_.raw_literal_chars = nullptr; | 395 next_.raw_literal_chars = nullptr; |
| 396 next_.invalid_template_escape_message = MessageTemplate::kNone; |
400 Advance(); | 397 Advance(); |
401 return current_.token; | 398 return current_.token; |
402 } | 399 } |
403 } | 400 } |
404 Scan(); | 401 Scan(); |
405 return current_.token; | 402 return current_.token; |
406 } | 403 } |
407 | 404 |
408 | 405 |
409 Token::Value Scanner::PeekAhead() { | 406 Token::Value Scanner::PeekAhead() { |
(...skipping 192 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
602 return Token::LT; | 599 return Token::LT; |
603 } | 600 } |
604 | 601 |
605 found_html_comment_ = true; | 602 found_html_comment_ = true; |
606 return SkipSingleLineComment(); | 603 return SkipSingleLineComment(); |
607 } | 604 } |
608 | 605 |
609 void Scanner::Scan() { | 606 void Scanner::Scan() { |
610 next_.literal_chars = NULL; | 607 next_.literal_chars = NULL; |
611 next_.raw_literal_chars = NULL; | 608 next_.raw_literal_chars = NULL; |
| 609 next_.invalid_template_escape_message = MessageTemplate::kNone; |
612 Token::Value token; | 610 Token::Value token; |
613 do { | 611 do { |
614 // Remember the position of the next token | 612 // Remember the position of the next token |
615 next_.location.beg_pos = source_pos(); | 613 next_.location.beg_pos = source_pos(); |
616 | 614 |
617 switch (c0_) { | 615 switch (c0_) { |
618 case ' ': | 616 case ' ': |
619 case '\t': | 617 case '\t': |
620 Advance(); | 618 Advance(); |
621 token = Token::WHITESPACE; | 619 token = Token::WHITESPACE; |
(...skipping 260 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
882 } | 880 } |
883 | 881 |
884 #ifdef DEBUG | 882 #ifdef DEBUG |
885 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const { | 883 void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const { |
886 // Most tokens should not have literal_chars or even raw_literal chars. | 884 // Most tokens should not have literal_chars or even raw_literal chars. |
887 // The rules are: | 885 // The rules are: |
888 // - UNINITIALIZED: we don't care. | 886 // - UNINITIALIZED: we don't care. |
889 // - TEMPLATE_*: need both literal + raw literal chars. | 887 // - TEMPLATE_*: need both literal + raw literal chars. |
890 // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal. | 888 // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal. |
891 // - all others: should have neither. | 889 // - all others: should have neither. |
| 890 // Furthermore, only TEMPLATE_* tokens can have a |
| 891 // invalid_template_escape_message. |
892 | 892 |
893 switch (token.token) { | 893 switch (token.token) { |
894 case Token::UNINITIALIZED: | 894 case Token::UNINITIALIZED: |
895 // token.literal_chars & other members might be garbage. That's ok. | 895 // token.literal_chars & other members might be garbage. That's ok. |
896 break; | 896 break; |
897 case Token::TEMPLATE_SPAN: | 897 case Token::TEMPLATE_SPAN: |
898 case Token::TEMPLATE_TAIL: | 898 case Token::TEMPLATE_TAIL: |
899 DCHECK_NOT_NULL(token.raw_literal_chars); | 899 DCHECK_NOT_NULL(token.raw_literal_chars); |
900 DCHECK_NOT_NULL(token.literal_chars); | 900 DCHECK_NOT_NULL(token.literal_chars); |
901 break; | 901 break; |
902 case Token::ESCAPED_KEYWORD: | 902 case Token::ESCAPED_KEYWORD: |
903 case Token::ESCAPED_STRICT_RESERVED_WORD: | 903 case Token::ESCAPED_STRICT_RESERVED_WORD: |
904 case Token::FUTURE_STRICT_RESERVED_WORD: | 904 case Token::FUTURE_STRICT_RESERVED_WORD: |
905 case Token::IDENTIFIER: | 905 case Token::IDENTIFIER: |
906 case Token::NUMBER: | 906 case Token::NUMBER: |
907 case Token::REGEXP_LITERAL: | 907 case Token::REGEXP_LITERAL: |
908 case Token::SMI: | 908 case Token::SMI: |
909 case Token::STRING: | 909 case Token::STRING: |
910 DCHECK_NOT_NULL(token.literal_chars); | 910 DCHECK_NOT_NULL(token.literal_chars); |
911 DCHECK_NULL(token.raw_literal_chars); | 911 DCHECK_NULL(token.raw_literal_chars); |
| 912 DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone); |
912 break; | 913 break; |
913 default: | 914 default: |
914 DCHECK_NULL(token.literal_chars); | 915 DCHECK_NULL(token.literal_chars); |
915 DCHECK_NULL(token.raw_literal_chars); | 916 DCHECK_NULL(token.raw_literal_chars); |
| 917 DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone); |
916 break; | 918 break; |
917 } | 919 } |
918 } | 920 } |
919 #endif // DEBUG | 921 #endif // DEBUG |
920 | 922 |
921 void Scanner::SeekForward(int pos) { | 923 void Scanner::SeekForward(int pos) { |
922 // After this call, we will have the token at the given position as | 924 // After this call, we will have the token at the given position as |
923 // the "next" token. The "current" token will be invalid. | 925 // the "next" token. The "current" token will be invalid. |
924 if (pos == next_.location.beg_pos) return; | 926 if (pos == next_.location.beg_pos) return; |
925 int current_pos = source_pos(); | 927 int current_pos = source_pos(); |
(...skipping 184 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1110 } else { | 1112 } else { |
1111 AddRawLiteralChar('\n'); | 1113 AddRawLiteralChar('\n'); |
1112 } | 1114 } |
1113 } | 1115 } |
1114 } else { | 1116 } else { |
1115 bool success = ScanEscape<capture_raw, in_template_literal>(); | 1117 bool success = ScanEscape<capture_raw, in_template_literal>(); |
1116 USE(success); | 1118 USE(success); |
1117 DCHECK_EQ(!success, has_error()); | 1119 DCHECK_EQ(!success, has_error()); |
1118 // For templates, invalid escape sequence checking is handled in the | 1120 // For templates, invalid escape sequence checking is handled in the |
1119 // parser. | 1121 // parser. |
1120 scanner_error_state.MoveErrorTo(&invalid_template_escape_message_, | 1122 scanner_error_state.MoveErrorTo(&next_); |
1121 &invalid_template_escape_location_); | 1123 octal_error_state.MoveErrorTo(&next_); |
1122 octal_error_state.MoveErrorTo(&invalid_template_escape_message_, | |
1123 &invalid_template_escape_location_); | |
1124 } | 1124 } |
1125 } else if (c < 0) { | 1125 } else if (c < 0) { |
1126 // Unterminated template literal | 1126 // Unterminated template literal |
1127 PushBack(c); | 1127 PushBack(c); |
1128 break; | 1128 break; |
1129 } else { | 1129 } else { |
1130 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. | 1130 // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A. |
1131 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence | 1131 // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence |
1132 // consisting of the CV 0x000A. | 1132 // consisting of the CV 0x000A. |
1133 if (c == '\r') { | 1133 if (c == '\r') { |
(...skipping 595 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1729 } | 1729 } |
1730 | 1730 |
1731 void Scanner::SeekNext(size_t position) { | 1731 void Scanner::SeekNext(size_t position) { |
1732 // Use with care: This cleanly resets most, but not all scanner state. | 1732 // Use with care: This cleanly resets most, but not all scanner state. |
1733 // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions. | 1733 // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions. |
1734 | 1734 |
1735 // To re-scan from a given character position, we need to: | 1735 // To re-scan from a given character position, we need to: |
1736 // 1, Reset the current_, next_ and next_next_ tokens | 1736 // 1, Reset the current_, next_ and next_next_ tokens |
1737 // (next_ + next_next_ will be overwrittem by Next(), | 1737 // (next_ + next_next_ will be overwrittem by Next(), |
1738 // current_ will remain unchanged, so overwrite it fully.) | 1738 // current_ will remain unchanged, so overwrite it fully.) |
1739 current_ = {{0, 0}, nullptr, nullptr, 0, Token::UNINITIALIZED}; | 1739 current_ = { |
| 1740 {0, 0}, nullptr, nullptr, 0, Token::UNINITIALIZED, MessageTemplate::kNone, |
| 1741 {0, 0}}; |
1740 next_.token = Token::UNINITIALIZED; | 1742 next_.token = Token::UNINITIALIZED; |
1741 next_next_.token = Token::UNINITIALIZED; | 1743 next_next_.token = Token::UNINITIALIZED; |
1742 // 2, reset the source to the desired position, | 1744 // 2, reset the source to the desired position, |
1743 source_->Seek(position); | 1745 source_->Seek(position); |
1744 // 3, re-scan, by scanning the look-ahead char + 1 token (next_). | 1746 // 3, re-scan, by scanning the look-ahead char + 1 token (next_). |
1745 c0_ = source_->Advance(); | 1747 c0_ = source_->Advance(); |
1746 Next(); | 1748 Next(); |
1747 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position)); | 1749 DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position)); |
1748 } | 1750 } |
1749 | 1751 |
1750 } // namespace internal | 1752 } // namespace internal |
1751 } // namespace v8 | 1753 } // namespace v8 |
OLD | NEW |