Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(88)

Side by Side Diff: src/parsing/scanner.cc

Issue 1793913002: [parser] implement error reporting for Scanner errors (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Remove "unclosed unicode escape sequence" message Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parsing/scanner.h ('k') | test/message/regress/regress-4829-1.out » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include "src/parsing/scanner.h" 7 #include "src/parsing/scanner.h"
8 8
9 #include <stdint.h> 9 #include <stdint.h>
10 10
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
55 // Need to capture identifiers in order to recognize "get" and "set" 55 // Need to capture identifiers in order to recognize "get" and "set"
56 // in object literals. 56 // in object literals.
57 Init(); 57 Init();
58 // Skip initial whitespace allowing HTML comment ends just like 58 // Skip initial whitespace allowing HTML comment ends just like
59 // after a newline and scan first token. 59 // after a newline and scan first token.
60 has_line_terminator_before_next_ = true; 60 has_line_terminator_before_next_ = true;
61 SkipWhiteSpace(); 61 SkipWhiteSpace();
62 Scan(); 62 Scan();
63 } 63 }
64 64
65 65 template <bool capture_raw, bool unicode>
66 template <bool capture_raw>
67 uc32 Scanner::ScanHexNumber(int expected_length) { 66 uc32 Scanner::ScanHexNumber(int expected_length) {
68 DCHECK(expected_length <= 4); // prevent overflow 67 DCHECK(expected_length <= 4); // prevent overflow
69 68
69 int begin = source_pos() - 2;
70 uc32 x = 0; 70 uc32 x = 0;
71 for (int i = 0; i < expected_length; i++) { 71 for (int i = 0; i < expected_length; i++) {
72 int d = HexValue(c0_); 72 int d = HexValue(c0_);
73 if (d < 0) { 73 if (d < 0) {
74 ReportScannerError(Location(begin, begin + expected_length + 2),
75 unicode
76 ? MessageTemplate::kInvalidUnicodeEscapeSequence
77 : MessageTemplate::kInvalidHexEscapeSequence);
74 return -1; 78 return -1;
75 } 79 }
76 x = x * 16 + d; 80 x = x * 16 + d;
77 Advance<capture_raw>(); 81 Advance<capture_raw>();
78 } 82 }
79 83
80 return x; 84 return x;
81 } 85 }
82 86
83
84 template <bool capture_raw> 87 template <bool capture_raw>
85 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { 88 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {
86 uc32 x = 0; 89 uc32 x = 0;
87 int d = HexValue(c0_); 90 int d = HexValue(c0_);
88 if (d < 0) { 91 int nof_digits = 0;
92 while (d >= 0) {
93 x = x * 16 + d;
94 if (x > max_value) {
95 return -1;
adamk 2016/03/21 19:13:56 Seems like this is the right place to report the g
caitp (gmail) 2016/03/21 19:40:05 Done.
96 }
97 Advance<capture_raw>();
98 d = HexValue(c0_);
99 nof_digits++;
100 }
101
102 if (!nof_digits || (c0_ != '}')) {
adamk 2016/03/21 19:13:56 I'd leave the closing brace check in the caller, s
caitp (gmail) 2016/03/21 19:40:05 Done.
103 ReportScannerError(source_pos(),
104 MessageTemplate::kInvalidUnicodeEscapeSequence);
89 return -1; 105 return -1;
90 } 106 }
91 while (d >= 0) { 107
92 x = x * 16 + d;
93 if (x > max_value) return -1;
94 Advance<capture_raw>();
95 d = HexValue(c0_);
96 }
97 return x; 108 return x;
98 } 109 }
99 110
100 111
101 // Ensure that tokens can be stored in a byte. 112 // Ensure that tokens can be stored in a byte.
102 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); 113 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
103 114
104 // Table of one-character tokens, by character (0x00..0x7f only). 115 // Table of one-character tokens, by character (0x00..0x7f only).
105 static const byte one_char_tokens[] = { 116 static const byte one_char_tokens[] = {
106 Token::ILLEGAL, 117 Token::ILLEGAL,
(...skipping 742 matching lines...) Expand 10 before | Expand all | Expand 10 after
849 if (c == '\\') break; 860 if (c == '\\') break;
850 Advance<false, false>(); 861 Advance<false, false>();
851 AddLiteralChar(c); 862 AddLiteralChar(c);
852 } 863 }
853 864
854 while (c0_ != quote && c0_ >= 0 865 while (c0_ != quote && c0_ >= 0
855 && !unicode_cache_->IsLineTerminator(c0_)) { 866 && !unicode_cache_->IsLineTerminator(c0_)) {
856 uc32 c = c0_; 867 uc32 c = c0_;
857 Advance(); 868 Advance();
858 if (c == '\\') { 869 if (c == '\\') {
859 if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL; 870 if (c0_ < 0 || !ScanEscape<false, false>()) {
871 return Token::ILLEGAL;
872 }
860 } else { 873 } else {
861 AddLiteralChar(c); 874 AddLiteralChar(c);
862 } 875 }
863 } 876 }
864 if (c0_ != quote) return Token::ILLEGAL; 877 if (c0_ != quote) return Token::ILLEGAL;
865 literal.Complete(); 878 literal.Complete();
866 879
867 Advance(); // consume quote 880 Advance(); // consume quote
868 return Token::STRING; 881 return Token::STRING;
869 } 882 }
(...skipping 11 matching lines...) Expand all
881 // 894 //
882 // A TEMPLATE_SPAN should always be followed by an Expression, while a 895 // A TEMPLATE_SPAN should always be followed by an Expression, while a
883 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be 896 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
884 // followed by an Expression. 897 // followed by an Expression.
885 898
886 Token::Value result = Token::TEMPLATE_SPAN; 899 Token::Value result = Token::TEMPLATE_SPAN;
887 LiteralScope literal(this); 900 LiteralScope literal(this);
888 StartRawLiteral(); 901 StartRawLiteral();
889 const bool capture_raw = true; 902 const bool capture_raw = true;
890 const bool in_template_literal = true; 903 const bool in_template_literal = true;
891
892 while (true) { 904 while (true) {
893 uc32 c = c0_; 905 uc32 c = c0_;
894 Advance<capture_raw>(); 906 Advance<capture_raw>();
895 if (c == '`') { 907 if (c == '`') {
896 result = Token::TEMPLATE_TAIL; 908 result = Token::TEMPLATE_TAIL;
897 ReduceRawLiteralLength(1); 909 ReduceRawLiteralLength(1);
898 break; 910 break;
899 } else if (c == '$' && c0_ == '{') { 911 } else if (c == '$' && c0_ == '{') {
900 Advance<capture_raw>(); // Consume '{' 912 Advance<capture_raw>(); // Consume '{'
901 ReduceRawLiteralLength(2); 913 ReduceRawLiteralLength(2);
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
1101 Advance(); 1113 Advance();
1102 return ScanUnicodeEscape<false>(); 1114 return ScanUnicodeEscape<false>();
1103 } 1115 }
1104 1116
1105 1117
1106 template <bool capture_raw> 1118 template <bool capture_raw>
1107 uc32 Scanner::ScanUnicodeEscape() { 1119 uc32 Scanner::ScanUnicodeEscape() {
1108 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of 1120 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
1109 // hex digits between { } is arbitrary. \ and u have already been read. 1121 // hex digits between { } is arbitrary. \ and u have already been read.
1110 if (c0_ == '{') { 1122 if (c0_ == '{') {
1123 int begin = source_pos() - 2;
1111 Advance<capture_raw>(); 1124 Advance<capture_raw>();
1112 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); 1125 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff);
1113 if (cp < 0) { 1126 if (cp < 0) {
1114 return -1; 1127 ReportScannerError(Location(begin, source_pos() + 1),
1115 } 1128 MessageTemplate::kUndefinedUnicodeCodePoint);
1116 if (c0_ != '}') {
1117 return -1; 1129 return -1;
1118 } 1130 }
1119 Advance<capture_raw>(); 1131 Advance<capture_raw>();
1120 return cp; 1132 return cp;
1121 } 1133 }
1122 return ScanHexNumber<capture_raw>(4); 1134 const bool unicode = true;
1135 return ScanHexNumber<capture_raw, unicode>(4);
1123 } 1136 }
1124 1137
1125 1138
1126 // ---------------------------------------------------------------------------- 1139 // ----------------------------------------------------------------------------
1127 // Keyword Matcher 1140 // Keyword Matcher
1128 1141
1129 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ 1142 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
1130 KEYWORD_GROUP('b') \ 1143 KEYWORD_GROUP('b') \
1131 KEYWORD("break", Token::BREAK) \ 1144 KEYWORD("break", Token::BREAK) \
1132 KEYWORD_GROUP('c') \ 1145 KEYWORD_GROUP('c') \
(...skipping 546 matching lines...) Expand 10 before | Expand all | Expand 10 after
1679 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1692 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1680 } 1693 }
1681 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1694 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1682 1695
1683 backing_store_.AddBlock(bytes); 1696 backing_store_.AddBlock(bytes);
1684 return backing_store_.EndSequence().start(); 1697 return backing_store_.EndSequence().start();
1685 } 1698 }
1686 1699
1687 } // namespace internal 1700 } // namespace internal
1688 } // namespace v8 1701 } // namespace v8
OLDNEW
« no previous file with comments | « src/parsing/scanner.h ('k') | test/message/regress/regress-4829-1.out » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698