 Chromium Code Reviews
 Chromium Code Reviews Issue 1793913002:
  [parser] implement error reporting for Scanner errors  (Closed) 
  Base URL: https://chromium.googlesource.com/v8/v8.git@master
    
  
    Issue 1793913002:
  [parser] implement error reporting for Scanner errors  (Closed) 
  Base URL: https://chromium.googlesource.com/v8/v8.git@master| OLD | NEW | 
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. | 
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be | 
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. | 
| 4 | 4 | 
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. | 
| 6 | 6 | 
| 7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" | 
| 8 | 8 | 
| 9 #include <stdint.h> | 9 #include <stdint.h> | 
| 10 | 10 | 
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 55 // Need to capture identifiers in order to recognize "get" and "set" | 55 // Need to capture identifiers in order to recognize "get" and "set" | 
| 56 // in object literals. | 56 // in object literals. | 
| 57 Init(); | 57 Init(); | 
| 58 // Skip initial whitespace allowing HTML comment ends just like | 58 // Skip initial whitespace allowing HTML comment ends just like | 
| 59 // after a newline and scan first token. | 59 // after a newline and scan first token. | 
| 60 has_line_terminator_before_next_ = true; | 60 has_line_terminator_before_next_ = true; | 
| 61 SkipWhiteSpace(); | 61 SkipWhiteSpace(); | 
| 62 Scan(); | 62 Scan(); | 
| 63 } | 63 } | 
| 64 | 64 | 
| 65 | 65 template <bool capture_raw, bool unicode> | 
| 66 template <bool capture_raw> | |
| 67 uc32 Scanner::ScanHexNumber(int expected_length) { | 66 uc32 Scanner::ScanHexNumber(int expected_length) { | 
| 68 DCHECK(expected_length <= 4); // prevent overflow | 67 DCHECK(expected_length <= 4); // prevent overflow | 
| 69 | 68 | 
| 69 int begin = source_pos() - 2; | |
| 70 uc32 x = 0; | 70 uc32 x = 0; | 
| 71 for (int i = 0; i < expected_length; i++) { | 71 for (int i = 0; i < expected_length; i++) { | 
| 72 int d = HexValue(c0_); | 72 int d = HexValue(c0_); | 
| 73 if (d < 0) { | 73 if (d < 0) { | 
| 74 ReportScannerError(Location(begin, begin + expected_length + 2), | |
| 75 unicode | |
| 76 ? MessageTemplate::kInvalidUnicodeEscapeSequence | |
| 77 : MessageTemplate::kInvalidHexEscapeSequence); | |
| 74 return -1; | 78 return -1; | 
| 75 } | 79 } | 
| 76 x = x * 16 + d; | 80 x = x * 16 + d; | 
| 77 Advance<capture_raw>(); | 81 Advance<capture_raw>(); | 
| 78 } | 82 } | 
| 79 | 83 | 
| 80 return x; | 84 return x; | 
| 81 } | 85 } | 
| 82 | 86 | 
| 83 | |
| 84 template <bool capture_raw> | 87 template <bool capture_raw> | 
| 85 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { | 88 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) { | 
| 86 uc32 x = 0; | 89 uc32 x = 0; | 
| 87 int d = HexValue(c0_); | 90 int d = HexValue(c0_); | 
| 88 if (d < 0) { | |
| 
adamk
2016/03/21 19:44:31
I think you still need this early return...
 
caitp (gmail)
2016/03/21 19:51:23
you're correct, `\u{}` parses successfully with th
 | |
| 89 return -1; | |
| 90 } | |
| 91 while (d >= 0) { | 91 while (d >= 0) { | 
| 92 x = x * 16 + d; | 92 x = x * 16 + d; | 
| 93 if (x > max_value) return -1; | 93 if (x > max_value) { | 
| 94 ReportScannerError(Location(beg_pos, source_pos() + 1), | |
| 95 MessageTemplate::kUndefinedUnicodeCodePoint); | |
| 96 return -1; | |
| 97 } | |
| 94 Advance<capture_raw>(); | 98 Advance<capture_raw>(); | 
| 95 d = HexValue(c0_); | 99 d = HexValue(c0_); | 
| 96 } | 100 } | 
| 101 | |
| 97 return x; | 102 return x; | 
| 98 } | 103 } | 
| 99 | 104 | 
| 100 | 105 | 
| 101 // Ensure that tokens can be stored in a byte. | 106 // Ensure that tokens can be stored in a byte. | 
| 102 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 107 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | 
| 103 | 108 | 
| 104 // Table of one-character tokens, by character (0x00..0x7f only). | 109 // Table of one-character tokens, by character (0x00..0x7f only). | 
| 105 static const byte one_char_tokens[] = { | 110 static const byte one_char_tokens[] = { | 
| 106 Token::ILLEGAL, | 111 Token::ILLEGAL, | 
| (...skipping 742 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 849 if (c == '\\') break; | 854 if (c == '\\') break; | 
| 850 Advance<false, false>(); | 855 Advance<false, false>(); | 
| 851 AddLiteralChar(c); | 856 AddLiteralChar(c); | 
| 852 } | 857 } | 
| 853 | 858 | 
| 854 while (c0_ != quote && c0_ >= 0 | 859 while (c0_ != quote && c0_ >= 0 | 
| 855 && !unicode_cache_->IsLineTerminator(c0_)) { | 860 && !unicode_cache_->IsLineTerminator(c0_)) { | 
| 856 uc32 c = c0_; | 861 uc32 c = c0_; | 
| 857 Advance(); | 862 Advance(); | 
| 858 if (c == '\\') { | 863 if (c == '\\') { | 
| 859 if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL; | 864 if (c0_ < 0 || !ScanEscape<false, false>()) { | 
| 865 return Token::ILLEGAL; | |
| 866 } | |
| 860 } else { | 867 } else { | 
| 861 AddLiteralChar(c); | 868 AddLiteralChar(c); | 
| 862 } | 869 } | 
| 863 } | 870 } | 
| 864 if (c0_ != quote) return Token::ILLEGAL; | 871 if (c0_ != quote) return Token::ILLEGAL; | 
| 865 literal.Complete(); | 872 literal.Complete(); | 
| 866 | 873 | 
| 867 Advance(); // consume quote | 874 Advance(); // consume quote | 
| 868 return Token::STRING; | 875 return Token::STRING; | 
| 869 } | 876 } | 
| (...skipping 11 matching lines...) Expand all Loading... | |
| 881 // | 888 // | 
| 882 // A TEMPLATE_SPAN should always be followed by an Expression, while a | 889 // A TEMPLATE_SPAN should always be followed by an Expression, while a | 
| 883 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be | 890 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be | 
| 884 // followed by an Expression. | 891 // followed by an Expression. | 
| 885 | 892 | 
| 886 Token::Value result = Token::TEMPLATE_SPAN; | 893 Token::Value result = Token::TEMPLATE_SPAN; | 
| 887 LiteralScope literal(this); | 894 LiteralScope literal(this); | 
| 888 StartRawLiteral(); | 895 StartRawLiteral(); | 
| 889 const bool capture_raw = true; | 896 const bool capture_raw = true; | 
| 890 const bool in_template_literal = true; | 897 const bool in_template_literal = true; | 
| 891 | |
| 892 while (true) { | 898 while (true) { | 
| 893 uc32 c = c0_; | 899 uc32 c = c0_; | 
| 894 Advance<capture_raw>(); | 900 Advance<capture_raw>(); | 
| 895 if (c == '`') { | 901 if (c == '`') { | 
| 896 result = Token::TEMPLATE_TAIL; | 902 result = Token::TEMPLATE_TAIL; | 
| 897 ReduceRawLiteralLength(1); | 903 ReduceRawLiteralLength(1); | 
| 898 break; | 904 break; | 
| 899 } else if (c == '$' && c0_ == '{') { | 905 } else if (c == '$' && c0_ == '{') { | 
| 900 Advance<capture_raw>(); // Consume '{' | 906 Advance<capture_raw>(); // Consume '{' | 
| 901 ReduceRawLiteralLength(2); | 907 ReduceRawLiteralLength(2); | 
| (...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1101 Advance(); | 1107 Advance(); | 
| 1102 return ScanUnicodeEscape<false>(); | 1108 return ScanUnicodeEscape<false>(); | 
| 1103 } | 1109 } | 
| 1104 | 1110 | 
| 1105 | 1111 | 
| 1106 template <bool capture_raw> | 1112 template <bool capture_raw> | 
| 1107 uc32 Scanner::ScanUnicodeEscape() { | 1113 uc32 Scanner::ScanUnicodeEscape() { | 
| 1108 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of | 1114 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of | 
| 1109 // hex digits between { } is arbitrary. \ and u have already been read. | 1115 // hex digits between { } is arbitrary. \ and u have already been read. | 
| 1110 if (c0_ == '{') { | 1116 if (c0_ == '{') { | 
| 1117 int begin = source_pos() - 2; | |
| 1111 Advance<capture_raw>(); | 1118 Advance<capture_raw>(); | 
| 1112 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); | 1119 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, begin); | 
| 1113 if (cp < 0) { | 1120 if (cp < 0) { | 
| 
adamk
2016/03/21 19:44:31
...and then report an error here, right?
 
caitp (gmail)
2016/03/21 19:51:23
Done.
 | |
| 1114 return -1; | 1121 return -1; | 
| 1115 } | 1122 } | 
| 1116 if (c0_ != '}') { | 1123 if (c0_ != '}') { | 
| 1124 ReportScannerError(source_pos(), | |
| 1125 MessageTemplate::kInvalidUnicodeEscapeSequence); | |
| 1117 return -1; | 1126 return -1; | 
| 1118 } | 1127 } | 
| 1119 Advance<capture_raw>(); | 1128 Advance<capture_raw>(); | 
| 1120 return cp; | 1129 return cp; | 
| 1121 } | 1130 } | 
| 1122 return ScanHexNumber<capture_raw>(4); | 1131 const bool unicode = true; | 
| 1132 return ScanHexNumber<capture_raw, unicode>(4); | |
| 1123 } | 1133 } | 
| 1124 | 1134 | 
| 1125 | 1135 | 
| 1126 // ---------------------------------------------------------------------------- | 1136 // ---------------------------------------------------------------------------- | 
| 1127 // Keyword Matcher | 1137 // Keyword Matcher | 
| 1128 | 1138 | 
| 1129 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ | 1139 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ | 
| 1130 KEYWORD_GROUP('b') \ | 1140 KEYWORD_GROUP('b') \ | 
| 1131 KEYWORD("break", Token::BREAK) \ | 1141 KEYWORD("break", Token::BREAK) \ | 
| 1132 KEYWORD_GROUP('c') \ | 1142 KEYWORD_GROUP('c') \ | 
| (...skipping 546 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 1679 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1689 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 
| 1680 } | 1690 } | 
| 1681 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1691 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 
| 1682 | 1692 | 
| 1683 backing_store_.AddBlock(bytes); | 1693 backing_store_.AddBlock(bytes); | 
| 1684 return backing_store_.EndSequence().start(); | 1694 return backing_store_.EndSequence().start(); | 
| 1685 } | 1695 } | 
| 1686 | 1696 | 
| 1687 } // namespace internal | 1697 } // namespace internal | 
| 1688 } // namespace v8 | 1698 } // namespace v8 | 
| OLD | NEW |