OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
55 // Need to capture identifiers in order to recognize "get" and "set" | 55 // Need to capture identifiers in order to recognize "get" and "set" |
56 // in object literals. | 56 // in object literals. |
57 Init(); | 57 Init(); |
58 // Skip initial whitespace allowing HTML comment ends just like | 58 // Skip initial whitespace allowing HTML comment ends just like |
59 // after a newline and scan first token. | 59 // after a newline and scan first token. |
60 has_line_terminator_before_next_ = true; | 60 has_line_terminator_before_next_ = true; |
61 SkipWhiteSpace(); | 61 SkipWhiteSpace(); |
62 Scan(); | 62 Scan(); |
63 } | 63 } |
64 | 64 |
65 | 65 template <bool capture_raw, bool unicode> |
66 template <bool capture_raw> | |
67 uc32 Scanner::ScanHexNumber(int expected_length) { | 66 uc32 Scanner::ScanHexNumber(int expected_length) { |
68 DCHECK(expected_length <= 4); // prevent overflow | 67 DCHECK(expected_length <= 4); // prevent overflow |
69 | 68 |
69 int begin = source_pos() - 2; | |
70 uc32 x = 0; | 70 uc32 x = 0; |
71 for (int i = 0; i < expected_length; i++) { | 71 for (int i = 0; i < expected_length; i++) { |
72 int d = HexValue(c0_); | 72 int d = HexValue(c0_); |
73 if (d < 0) { | 73 if (d < 0) { |
74 ReportScannerError(Location(begin, begin + expected_length + 2), | |
75 unicode | |
76 ? MessageTemplate::kInvalidUnicodeEscapeSequence | |
77 : MessageTemplate::kInvalidHexEscapeSequence); | |
74 return -1; | 78 return -1; |
75 } | 79 } |
76 x = x * 16 + d; | 80 x = x * 16 + d; |
77 Advance<capture_raw>(); | 81 Advance<capture_raw>(); |
78 } | 82 } |
79 | 83 |
80 return x; | 84 return x; |
81 } | 85 } |
82 | 86 |
83 | |
84 template <bool capture_raw> | 87 template <bool capture_raw> |
85 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { | 88 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { |
86 uc32 x = 0; | 89 uc32 x = 0; |
87 int d = HexValue(c0_); | 90 int d = HexValue(c0_); |
88 if (d < 0) { | 91 int nof_digits = 0; |
92 while (d >= 0) { | |
93 x = x * 16 + d; | |
94 if (x > max_value) { | |
95 return -1; | |
adamk
2016/03/21 19:13:56
Seems like this is the right place to report the g
caitp (gmail)
2016/03/21 19:40:05
Done.
| |
96 } | |
97 Advance<capture_raw>(); | |
98 d = HexValue(c0_); | |
99 nof_digits++; | |
100 } | |
101 | |
102 if (!nof_digits || (c0_ != '}')) { | |
adamk
2016/03/21 19:13:56
I'd leave the closing brace check in the caller, s
caitp (gmail)
2016/03/21 19:40:05
Done.
| |
103 ReportScannerError(source_pos(), | |
104 MessageTemplate::kInvalidUnicodeEscapeSequence); | |
89 return -1; | 105 return -1; |
90 } | 106 } |
91 while (d >= 0) { | 107 |
92 x = x * 16 + d; | |
93 if (x > max_value) return -1; | |
94 Advance<capture_raw>(); | |
95 d = HexValue(c0_); | |
96 } | |
97 return x; | 108 return x; |
98 } | 109 } |
99 | 110 |
100 | 111 |
101 // Ensure that tokens can be stored in a byte. | 112 // Ensure that tokens can be stored in a byte. |
102 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 113 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
103 | 114 |
104 // Table of one-character tokens, by character (0x00..0x7f only). | 115 // Table of one-character tokens, by character (0x00..0x7f only). |
105 static const byte one_char_tokens[] = { | 116 static const byte one_char_tokens[] = { |
106 Token::ILLEGAL, | 117 Token::ILLEGAL, |
(...skipping 742 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
849 if (c == '\\') break; | 860 if (c == '\\') break; |
850 Advance<false, false>(); | 861 Advance<false, false>(); |
851 AddLiteralChar(c); | 862 AddLiteralChar(c); |
852 } | 863 } |
853 | 864 |
854 while (c0_ != quote && c0_ >= 0 | 865 while (c0_ != quote && c0_ >= 0 |
855 && !unicode_cache_->IsLineTerminator(c0_)) { | 866 && !unicode_cache_->IsLineTerminator(c0_)) { |
856 uc32 c = c0_; | 867 uc32 c = c0_; |
857 Advance(); | 868 Advance(); |
858 if (c == '\\') { | 869 if (c == '\\') { |
859 if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL; | 870 if (c0_ < 0 || !ScanEscape<false, false>()) { |
871 return Token::ILLEGAL; | |
872 } | |
860 } else { | 873 } else { |
861 AddLiteralChar(c); | 874 AddLiteralChar(c); |
862 } | 875 } |
863 } | 876 } |
864 if (c0_ != quote) return Token::ILLEGAL; | 877 if (c0_ != quote) return Token::ILLEGAL; |
865 literal.Complete(); | 878 literal.Complete(); |
866 | 879 |
867 Advance(); // consume quote | 880 Advance(); // consume quote |
868 return Token::STRING; | 881 return Token::STRING; |
869 } | 882 } |
(...skipping 11 matching lines...) Expand all Loading... | |
881 // | 894 // |
882 // A TEMPLATE_SPAN should always be followed by an Expression, while a | 895 // A TEMPLATE_SPAN should always be followed by an Expression, while a |
883 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be | 896 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be |
884 // followed by an Expression. | 897 // followed by an Expression. |
885 | 898 |
886 Token::Value result = Token::TEMPLATE_SPAN; | 899 Token::Value result = Token::TEMPLATE_SPAN; |
887 LiteralScope literal(this); | 900 LiteralScope literal(this); |
888 StartRawLiteral(); | 901 StartRawLiteral(); |
889 const bool capture_raw = true; | 902 const bool capture_raw = true; |
890 const bool in_template_literal = true; | 903 const bool in_template_literal = true; |
891 | |
892 while (true) { | 904 while (true) { |
893 uc32 c = c0_; | 905 uc32 c = c0_; |
894 Advance<capture_raw>(); | 906 Advance<capture_raw>(); |
895 if (c == '`') { | 907 if (c == '`') { |
896 result = Token::TEMPLATE_TAIL; | 908 result = Token::TEMPLATE_TAIL; |
897 ReduceRawLiteralLength(1); | 909 ReduceRawLiteralLength(1); |
898 break; | 910 break; |
899 } else if (c == '$' && c0_ == '{') { | 911 } else if (c == '$' && c0_ == '{') { |
900 Advance<capture_raw>(); // Consume '{' | 912 Advance<capture_raw>(); // Consume '{' |
901 ReduceRawLiteralLength(2); | 913 ReduceRawLiteralLength(2); |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1101 Advance(); | 1113 Advance(); |
1102 return ScanUnicodeEscape<false>(); | 1114 return ScanUnicodeEscape<false>(); |
1103 } | 1115 } |
1104 | 1116 |
1105 | 1117 |
1106 template <bool capture_raw> | 1118 template <bool capture_raw> |
1107 uc32 Scanner::ScanUnicodeEscape() { | 1119 uc32 Scanner::ScanUnicodeEscape() { |
1108 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of | 1120 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of |
1109 // hex digits between { } is arbitrary. \ and u have already been read. | 1121 // hex digits between { } is arbitrary. \ and u have already been read. |
1110 if (c0_ == '{') { | 1122 if (c0_ == '{') { |
1123 int begin = source_pos() - 2; | |
1111 Advance<capture_raw>(); | 1124 Advance<capture_raw>(); |
1112 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); | 1125 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); |
1113 if (cp < 0) { | 1126 if (cp < 0) { |
1114 return -1; | 1127 ReportScannerError(Location(begin, source_pos() + 1), |
1115 } | 1128 MessageTemplate::kUndefinedUnicodeCodePoint); |
1116 if (c0_ != '}') { | |
1117 return -1; | 1129 return -1; |
1118 } | 1130 } |
1119 Advance<capture_raw>(); | 1131 Advance<capture_raw>(); |
1120 return cp; | 1132 return cp; |
1121 } | 1133 } |
1122 return ScanHexNumber<capture_raw>(4); | 1134 const bool unicode = true; |
1135 return ScanHexNumber<capture_raw, unicode>(4); | |
1123 } | 1136 } |
1124 | 1137 |
1125 | 1138 |
1126 // ---------------------------------------------------------------------------- | 1139 // ---------------------------------------------------------------------------- |
1127 // Keyword Matcher | 1140 // Keyword Matcher |
1128 | 1141 |
1129 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ | 1142 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
1130 KEYWORD_GROUP('b') \ | 1143 KEYWORD_GROUP('b') \ |
1131 KEYWORD("break", Token::BREAK) \ | 1144 KEYWORD("break", Token::BREAK) \ |
1132 KEYWORD_GROUP('c') \ | 1145 KEYWORD_GROUP('c') \ |
(...skipping 546 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1679 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1692 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
1680 } | 1693 } |
1681 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1694 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
1682 | 1695 |
1683 backing_store_.AddBlock(bytes); | 1696 backing_store_.AddBlock(bytes); |
1684 return backing_store_.EndSequence().start(); | 1697 return backing_store_.EndSequence().start(); |
1685 } | 1698 } |
1686 | 1699 |
1687 } // namespace internal | 1700 } // namespace internal |
1688 } // namespace v8 | 1701 } // namespace v8 |
OLD | NEW |