OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
53 // Need to capture identifiers in order to recognize "get" and "set" | 53 // Need to capture identifiers in order to recognize "get" and "set" |
54 // in object literals. | 54 // in object literals. |
55 Init(); | 55 Init(); |
56 // Skip initial whitespace allowing HTML comment ends just like | 56 // Skip initial whitespace allowing HTML comment ends just like |
57 // after a newline and scan first token. | 57 // after a newline and scan first token. |
58 has_line_terminator_before_next_ = true; | 58 has_line_terminator_before_next_ = true; |
59 SkipWhiteSpace(); | 59 SkipWhiteSpace(); |
60 Scan(); | 60 Scan(); |
61 } | 61 } |
62 | 62 |
63 | 63 template <bool capture_raw, bool unicode> |
64 template <bool capture_raw> | |
65 uc32 Scanner::ScanHexNumber(int expected_length) { | 64 uc32 Scanner::ScanHexNumber(int expected_length) { |
66 DCHECK(expected_length <= 4); // prevent overflow | 65 DCHECK(expected_length <= 4); // prevent overflow |
67 | 66 |
67 int begin = source_pos() - 2; | |
68 uc32 x = 0; | 68 uc32 x = 0; |
69 for (int i = 0; i < expected_length; i++) { | 69 for (int i = 0; i < expected_length; i++) { |
70 int d = HexValue(c0_); | 70 int d = HexValue(c0_); |
71 if (d < 0) { | 71 if (d < 0) { |
72 ReportScannerError(unicode | |
73 ? MessageTemplate::kInvalidUnicodeEscapeSequence | |
74 : MessageTemplate::kInvalidHexEscapeSequence, | |
75 begin, begin + expected_length + 2); | |
72 return -1; | 76 return -1; |
73 } | 77 } |
74 x = x * 16 + d; | 78 x = x * 16 + d; |
75 Advance<capture_raw>(); | 79 Advance<capture_raw>(); |
76 } | 80 } |
77 | 81 |
78 return x; | 82 return x; |
79 } | 83 } |
80 | 84 |
81 | |
82 template <bool capture_raw> | 85 template <bool capture_raw> |
83 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { | 86 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, bool& bad_codepoint) { |
adamk
2016/03/18 18:21:55
The right type here is bool* in V8
caitp (gmail)
2016/03/18 19:00:36
dont you think reference ls are better for prevent
adamk
2016/03/18 19:09:57
I think bool args are dangerous in C++ no matter w
caitp (gmail)
2016/03/21 16:15:03
Since the early return was put back, there isn't m
| |
84 uc32 x = 0; | 87 uc32 x = 0; |
85 int d = HexValue(c0_); | 88 int d = HexValue(c0_); |
86 if (d < 0) { | 89 |
87 return -1; | |
adamk
2016/03/18 18:21:55
Losing this and the below early return makes me a
caitp (gmail)
2016/03/18 19:00:36
the idea is, if you get hex characters until the }
adamk
2016/03/18 19:09:57
Yeah, I'd just prefer a minimal change if the loca
caitp (gmail)
2016/03/21 16:15:03
Done.
| |
88 } | |
89 while (d >= 0) { | 90 while (d >= 0) { |
90 x = x * 16 + d; | 91 x = x * 16 + d; |
91 if (x > max_value) return -1; | 92 if (x > max_value) { |
93 bad_codepoint = true; | |
94 } | |
92 Advance<capture_raw>(); | 95 Advance<capture_raw>(); |
93 d = HexValue(c0_); | 96 d = HexValue(c0_); |
94 } | 97 } |
98 | |
99 if (d < 0 && (c0_ != '}')) { | |
100 ReportScannerError(x != 0 ? MessageTemplate::kUnclosedUnicodeEscapeSequence | |
101 : MessageTemplate::kInvalidUnicodeEscapeSequence, | |
102 source_pos()); | |
103 return -1; | |
104 } | |
105 | |
95 return x; | 106 return x; |
96 } | 107 } |
97 | 108 |
98 | 109 |
99 // Ensure that tokens can be stored in a byte. | 110 // Ensure that tokens can be stored in a byte. |
100 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 111 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
101 | 112 |
102 // Table of one-character tokens, by character (0x00..0x7f only). | 113 // Table of one-character tokens, by character (0x00..0x7f only). |
103 static const byte one_char_tokens[] = { | 114 static const byte one_char_tokens[] = { |
104 Token::ILLEGAL, | 115 Token::ILLEGAL, |
(...skipping 735 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
840 if (c == '\\') break; | 851 if (c == '\\') break; |
841 Advance<false, false>(); | 852 Advance<false, false>(); |
842 AddLiteralChar(c); | 853 AddLiteralChar(c); |
843 } | 854 } |
844 | 855 |
845 while (c0_ != quote && c0_ >= 0 | 856 while (c0_ != quote && c0_ >= 0 |
846 && !unicode_cache_->IsLineTerminator(c0_)) { | 857 && !unicode_cache_->IsLineTerminator(c0_)) { |
847 uc32 c = c0_; | 858 uc32 c = c0_; |
848 Advance(); | 859 Advance(); |
849 if (c == '\\') { | 860 if (c == '\\') { |
850 if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL; | 861 if (c0_ < 0 || !ScanEscape<false, false>()) { |
862 return Token::ILLEGAL; | |
863 } | |
851 } else { | 864 } else { |
852 AddLiteralChar(c); | 865 AddLiteralChar(c); |
853 } | 866 } |
854 } | 867 } |
855 if (c0_ != quote) return Token::ILLEGAL; | 868 if (c0_ != quote) return Token::ILLEGAL; |
856 literal.Complete(); | 869 literal.Complete(); |
857 | 870 |
858 Advance(); // consume quote | 871 Advance(); // consume quote |
859 return Token::STRING; | 872 return Token::STRING; |
860 } | 873 } |
(...skipping 11 matching lines...) Expand all Loading... | |
872 // | 885 // |
873 // A TEMPLATE_SPAN should always be followed by an Expression, while a | 886 // A TEMPLATE_SPAN should always be followed by an Expression, while a |
874 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be | 887 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be |
875 // followed by an Expression. | 888 // followed by an Expression. |
876 | 889 |
877 Token::Value result = Token::TEMPLATE_SPAN; | 890 Token::Value result = Token::TEMPLATE_SPAN; |
878 LiteralScope literal(this); | 891 LiteralScope literal(this); |
879 StartRawLiteral(); | 892 StartRawLiteral(); |
880 const bool capture_raw = true; | 893 const bool capture_raw = true; |
881 const bool in_template_literal = true; | 894 const bool in_template_literal = true; |
882 | |
883 while (true) { | 895 while (true) { |
884 uc32 c = c0_; | 896 uc32 c = c0_; |
885 Advance<capture_raw>(); | 897 Advance<capture_raw>(); |
886 if (c == '`') { | 898 if (c == '`') { |
887 result = Token::TEMPLATE_TAIL; | 899 result = Token::TEMPLATE_TAIL; |
888 ReduceRawLiteralLength(1); | 900 ReduceRawLiteralLength(1); |
889 break; | 901 break; |
890 } else if (c == '$' && c0_ == '{') { | 902 } else if (c == '$' && c0_ == '{') { |
891 Advance<capture_raw>(); // Consume '{' | 903 Advance<capture_raw>(); // Consume '{' |
892 ReduceRawLiteralLength(2); | 904 ReduceRawLiteralLength(2); |
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1092 Advance(); | 1104 Advance(); |
1093 return ScanUnicodeEscape<false>(); | 1105 return ScanUnicodeEscape<false>(); |
1094 } | 1106 } |
1095 | 1107 |
1096 | 1108 |
1097 template <bool capture_raw> | 1109 template <bool capture_raw> |
1098 uc32 Scanner::ScanUnicodeEscape() { | 1110 uc32 Scanner::ScanUnicodeEscape() { |
1099 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of | 1111 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of |
1100 // hex digits between { } is arbitrary. \ and u have already been read. | 1112 // hex digits between { } is arbitrary. \ and u have already been read. |
1101 if (c0_ == '{') { | 1113 if (c0_ == '{') { |
1114 int begin = source_pos() - 2; | |
1115 bool bad_codepoint = false; | |
1102 Advance<capture_raw>(); | 1116 Advance<capture_raw>(); |
1103 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); | 1117 uc32 cp = |
1104 if (cp < 0) { | 1118 ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, bad_codepoint); |
1105 return -1; | 1119 if (cp < 0 || bad_codepoint) { |
1106 } | 1120 ReportScannerError(MessageTemplate::kUndefinedUnicodeCodePoint, begin, |
1107 if (c0_ != '}') { | 1121 source_pos() + 1); |
1108 return -1; | 1122 return -1; |
1109 } | 1123 } |
1110 Advance<capture_raw>(); | 1124 Advance<capture_raw>(); |
1111 return cp; | 1125 return cp; |
1112 } | 1126 } |
1113 return ScanHexNumber<capture_raw>(4); | 1127 const bool unicode = true; |
1128 return ScanHexNumber<capture_raw, unicode>(4); | |
1114 } | 1129 } |
1115 | 1130 |
1116 | 1131 |
1117 // ---------------------------------------------------------------------------- | 1132 // ---------------------------------------------------------------------------- |
1118 // Keyword Matcher | 1133 // Keyword Matcher |
1119 | 1134 |
1120 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ | 1135 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
1121 KEYWORD_GROUP('b') \ | 1136 KEYWORD_GROUP('b') \ |
1122 KEYWORD("break", Token::BREAK) \ | 1137 KEYWORD("break", Token::BREAK) \ |
1123 KEYWORD_GROUP('c') \ | 1138 KEYWORD_GROUP('c') \ |
(...skipping 546 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
1670 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1685 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
1671 } | 1686 } |
1672 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1687 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
1673 | 1688 |
1674 backing_store_.AddBlock(bytes); | 1689 backing_store_.AddBlock(bytes); |
1675 return backing_store_.EndSequence().start(); | 1690 return backing_store_.EndSequence().start(); |
1676 } | 1691 } |
1677 | 1692 |
1678 } // namespace internal | 1693 } // namespace internal |
1679 } // namespace v8 | 1694 } // namespace v8 |
OLD | NEW |