Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
| 8 | 8 |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| (...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 53 // Need to capture identifiers in order to recognize "get" and "set" | 53 // Need to capture identifiers in order to recognize "get" and "set" |
| 54 // in object literals. | 54 // in object literals. |
| 55 Init(); | 55 Init(); |
| 56 // Skip initial whitespace allowing HTML comment ends just like | 56 // Skip initial whitespace allowing HTML comment ends just like |
| 57 // after a newline and scan first token. | 57 // after a newline and scan first token. |
| 58 has_line_terminator_before_next_ = true; | 58 has_line_terminator_before_next_ = true; |
| 59 SkipWhiteSpace(); | 59 SkipWhiteSpace(); |
| 60 Scan(); | 60 Scan(); |
| 61 } | 61 } |
| 62 | 62 |
| 63 | 63 template <bool capture_raw, bool unicode> |
| 64 template <bool capture_raw> | |
| 65 uc32 Scanner::ScanHexNumber(int expected_length) { | 64 uc32 Scanner::ScanHexNumber(int expected_length) { |
| 66 DCHECK(expected_length <= 4); // prevent overflow | 65 DCHECK(expected_length <= 4); // prevent overflow |
| 67 | 66 |
| 67 int begin = source_pos() - 2; | |
| 68 uc32 x = 0; | 68 uc32 x = 0; |
| 69 for (int i = 0; i < expected_length; i++) { | 69 for (int i = 0; i < expected_length; i++) { |
| 70 int d = HexValue(c0_); | 70 int d = HexValue(c0_); |
| 71 if (d < 0) { | 71 if (d < 0) { |
| 72 ReportScannerError(unicode | |
| 73 ? MessageTemplate::kInvalidUnicodeEscapeSequence | |
| 74 : MessageTemplate::kInvalidHexEscapeSequence, | |
| 75 begin, begin + expected_length + 2); | |
| 72 return -1; | 76 return -1; |
| 73 } | 77 } |
| 74 x = x * 16 + d; | 78 x = x * 16 + d; |
| 75 Advance<capture_raw>(); | 79 Advance<capture_raw>(); |
| 76 } | 80 } |
| 77 | 81 |
| 78 return x; | 82 return x; |
| 79 } | 83 } |
| 80 | 84 |
| 81 | |
| 82 template <bool capture_raw> | 85 template <bool capture_raw> |
| 83 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { | 86 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, bool& bad_codepoint) { |
|
adamk
2016/03/18 18:21:55
The right type here is bool* in V8
caitp (gmail)
2016/03/18 19:00:36
dont you think reference ls are better for prevent
adamk
2016/03/18 19:09:57
I think bool args are dangerous in C++ no matter w
caitp (gmail)
2016/03/21 16:15:03
Since the early return was put back, there isn't m
| |
| 84 uc32 x = 0; | 87 uc32 x = 0; |
| 85 int d = HexValue(c0_); | 88 int d = HexValue(c0_); |
| 86 if (d < 0) { | 89 |
| 87 return -1; | |
|
adamk
2016/03/18 18:21:55
Losing this and the below early return makes me a
caitp (gmail)
2016/03/18 19:00:36
the idea is, if you get hex characters until the }
adamk
2016/03/18 19:09:57
Yeah, I'd just prefer a minimal change if the loca
caitp (gmail)
2016/03/21 16:15:03
Done.
| |
| 88 } | |
| 89 while (d >= 0) { | 90 while (d >= 0) { |
| 90 x = x * 16 + d; | 91 x = x * 16 + d; |
| 91 if (x > max_value) return -1; | 92 if (x > max_value) { |
| 93 bad_codepoint = true; | |
| 94 } | |
| 92 Advance<capture_raw>(); | 95 Advance<capture_raw>(); |
| 93 d = HexValue(c0_); | 96 d = HexValue(c0_); |
| 94 } | 97 } |
| 98 | |
| 99 if (d < 0 && (c0_ != '}')) { | |
| 100 ReportScannerError(x != 0 ? MessageTemplate::kUnclosedUnicodeEscapeSequence | |
| 101 : MessageTemplate::kInvalidUnicodeEscapeSequence, | |
| 102 source_pos()); | |
| 103 return -1; | |
| 104 } | |
| 105 | |
| 95 return x; | 106 return x; |
| 96 } | 107 } |
| 97 | 108 |
| 98 | 109 |
| 99 // Ensure that tokens can be stored in a byte. | 110 // Ensure that tokens can be stored in a byte. |
| 100 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 111 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); |
| 101 | 112 |
| 102 // Table of one-character tokens, by character (0x00..0x7f only). | 113 // Table of one-character tokens, by character (0x00..0x7f only). |
| 103 static const byte one_char_tokens[] = { | 114 static const byte one_char_tokens[] = { |
| 104 Token::ILLEGAL, | 115 Token::ILLEGAL, |
| (...skipping 735 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 840 if (c == '\\') break; | 851 if (c == '\\') break; |
| 841 Advance<false, false>(); | 852 Advance<false, false>(); |
| 842 AddLiteralChar(c); | 853 AddLiteralChar(c); |
| 843 } | 854 } |
| 844 | 855 |
| 845 while (c0_ != quote && c0_ >= 0 | 856 while (c0_ != quote && c0_ >= 0 |
| 846 && !unicode_cache_->IsLineTerminator(c0_)) { | 857 && !unicode_cache_->IsLineTerminator(c0_)) { |
| 847 uc32 c = c0_; | 858 uc32 c = c0_; |
| 848 Advance(); | 859 Advance(); |
| 849 if (c == '\\') { | 860 if (c == '\\') { |
| 850 if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL; | 861 if (c0_ < 0 || !ScanEscape<false, false>()) { |
| 862 return Token::ILLEGAL; | |
| 863 } | |
| 851 } else { | 864 } else { |
| 852 AddLiteralChar(c); | 865 AddLiteralChar(c); |
| 853 } | 866 } |
| 854 } | 867 } |
| 855 if (c0_ != quote) return Token::ILLEGAL; | 868 if (c0_ != quote) return Token::ILLEGAL; |
| 856 literal.Complete(); | 869 literal.Complete(); |
| 857 | 870 |
| 858 Advance(); // consume quote | 871 Advance(); // consume quote |
| 859 return Token::STRING; | 872 return Token::STRING; |
| 860 } | 873 } |
| (...skipping 11 matching lines...) Expand all Loading... | |
| 872 // | 885 // |
| 873 // A TEMPLATE_SPAN should always be followed by an Expression, while a | 886 // A TEMPLATE_SPAN should always be followed by an Expression, while a |
| 874 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be | 887 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be |
| 875 // followed by an Expression. | 888 // followed by an Expression. |
| 876 | 889 |
| 877 Token::Value result = Token::TEMPLATE_SPAN; | 890 Token::Value result = Token::TEMPLATE_SPAN; |
| 878 LiteralScope literal(this); | 891 LiteralScope literal(this); |
| 879 StartRawLiteral(); | 892 StartRawLiteral(); |
| 880 const bool capture_raw = true; | 893 const bool capture_raw = true; |
| 881 const bool in_template_literal = true; | 894 const bool in_template_literal = true; |
| 882 | |
| 883 while (true) { | 895 while (true) { |
| 884 uc32 c = c0_; | 896 uc32 c = c0_; |
| 885 Advance<capture_raw>(); | 897 Advance<capture_raw>(); |
| 886 if (c == '`') { | 898 if (c == '`') { |
| 887 result = Token::TEMPLATE_TAIL; | 899 result = Token::TEMPLATE_TAIL; |
| 888 ReduceRawLiteralLength(1); | 900 ReduceRawLiteralLength(1); |
| 889 break; | 901 break; |
| 890 } else if (c == '$' && c0_ == '{') { | 902 } else if (c == '$' && c0_ == '{') { |
| 891 Advance<capture_raw>(); // Consume '{' | 903 Advance<capture_raw>(); // Consume '{' |
| 892 ReduceRawLiteralLength(2); | 904 ReduceRawLiteralLength(2); |
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1092 Advance(); | 1104 Advance(); |
| 1093 return ScanUnicodeEscape<false>(); | 1105 return ScanUnicodeEscape<false>(); |
| 1094 } | 1106 } |
| 1095 | 1107 |
| 1096 | 1108 |
| 1097 template <bool capture_raw> | 1109 template <bool capture_raw> |
| 1098 uc32 Scanner::ScanUnicodeEscape() { | 1110 uc32 Scanner::ScanUnicodeEscape() { |
| 1099 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of | 1111 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of |
| 1100 // hex digits between { } is arbitrary. \ and u have already been read. | 1112 // hex digits between { } is arbitrary. \ and u have already been read. |
| 1101 if (c0_ == '{') { | 1113 if (c0_ == '{') { |
| 1114 int begin = source_pos() - 2; | |
| 1115 bool bad_codepoint = false; | |
| 1102 Advance<capture_raw>(); | 1116 Advance<capture_raw>(); |
| 1103 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); | 1117 uc32 cp = |
| 1104 if (cp < 0) { | 1118 ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, bad_codepoint); |
| 1105 return -1; | 1119 if (cp < 0 || bad_codepoint) { |
| 1106 } | 1120 ReportScannerError(MessageTemplate::kUndefinedUnicodeCodePoint, begin, |
| 1107 if (c0_ != '}') { | 1121 source_pos() + 1); |
| 1108 return -1; | 1122 return -1; |
| 1109 } | 1123 } |
| 1110 Advance<capture_raw>(); | 1124 Advance<capture_raw>(); |
| 1111 return cp; | 1125 return cp; |
| 1112 } | 1126 } |
| 1113 return ScanHexNumber<capture_raw>(4); | 1127 const bool unicode = true; |
| 1128 return ScanHexNumber<capture_raw, unicode>(4); | |
| 1114 } | 1129 } |
| 1115 | 1130 |
| 1116 | 1131 |
| 1117 // ---------------------------------------------------------------------------- | 1132 // ---------------------------------------------------------------------------- |
| 1118 // Keyword Matcher | 1133 // Keyword Matcher |
| 1119 | 1134 |
| 1120 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ | 1135 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ |
| 1121 KEYWORD_GROUP('b') \ | 1136 KEYWORD_GROUP('b') \ |
| 1122 KEYWORD("break", Token::BREAK) \ | 1137 KEYWORD("break", Token::BREAK) \ |
| 1123 KEYWORD_GROUP('c') \ | 1138 KEYWORD_GROUP('c') \ |
| (...skipping 546 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1670 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1685 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
| 1671 } | 1686 } |
| 1672 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1687 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
| 1673 | 1688 |
| 1674 backing_store_.AddBlock(bytes); | 1689 backing_store_.AddBlock(bytes); |
| 1675 return backing_store_.EndSequence().start(); | 1690 return backing_store_.EndSequence().start(); |
| 1676 } | 1691 } |
| 1677 | 1692 |
| 1678 } // namespace internal | 1693 } // namespace internal |
| 1679 } // namespace v8 | 1694 } // namespace v8 |
| OLD | NEW |