Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(198)

Side by Side Diff: src/parsing/scanner.cc

Issue 1793913002: [parser] implement error reporting for Scanner errors (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: rebased Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include "src/parsing/scanner.h" 7 #include "src/parsing/scanner.h"
8 8
9 #include <stdint.h> 9 #include <stdint.h>
10 10
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 // Need to capture identifiers in order to recognize "get" and "set" 53 // Need to capture identifiers in order to recognize "get" and "set"
54 // in object literals. 54 // in object literals.
55 Init(); 55 Init();
56 // Skip initial whitespace allowing HTML comment ends just like 56 // Skip initial whitespace allowing HTML comment ends just like
57 // after a newline and scan first token. 57 // after a newline and scan first token.
58 has_line_terminator_before_next_ = true; 58 has_line_terminator_before_next_ = true;
59 SkipWhiteSpace(); 59 SkipWhiteSpace();
60 Scan(); 60 Scan();
61 } 61 }
62 62
63 63 template <bool capture_raw, bool unicode>
64 template <bool capture_raw>
65 uc32 Scanner::ScanHexNumber(int expected_length) { 64 uc32 Scanner::ScanHexNumber(int expected_length) {
66 DCHECK(expected_length <= 4); // prevent overflow 65 DCHECK(expected_length <= 4); // prevent overflow
67 66
67 int begin = source_pos() - 2;
68 uc32 x = 0; 68 uc32 x = 0;
69 for (int i = 0; i < expected_length; i++) { 69 for (int i = 0; i < expected_length; i++) {
70 int d = HexValue(c0_); 70 int d = HexValue(c0_);
71 if (d < 0) { 71 if (d < 0) {
72 ReportScannerError(unicode
73 ? MessageTemplate::kInvalidUnicodeEscapeSequence
74 : MessageTemplate::kInvalidHexEscapeSequence,
75 begin, begin + expected_length + 2);
72 return -1; 76 return -1;
73 } 77 }
74 x = x * 16 + d; 78 x = x * 16 + d;
75 Advance<capture_raw>(); 79 Advance<capture_raw>();
76 } 80 }
77 81
78 return x; 82 return x;
79 } 83 }
80 84
81
82 template <bool capture_raw> 85 template <bool capture_raw>
83 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { 86 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, bool& bad_codepoint) {
adamk 2016/03/18 18:21:55 The right type here is bool* in V8
caitp (gmail) 2016/03/18 19:00:36 dont you think reference ls are better for prevent
adamk 2016/03/18 19:09:57 I think bool args are dangerous in C++ no matter w
caitp (gmail) 2016/03/21 16:15:03 Since the early return was put back, there isn't m
84 uc32 x = 0; 87 uc32 x = 0;
85 int d = HexValue(c0_); 88 int d = HexValue(c0_);
86 if (d < 0) { 89
87 return -1;
adamk 2016/03/18 18:21:55 Losing this and the below early return makes me a
caitp (gmail) 2016/03/18 19:00:36 the idea is, if you get hex characters until the }
adamk 2016/03/18 19:09:57 Yeah, I'd just prefer a minimal change if the loca
caitp (gmail) 2016/03/21 16:15:03 Done.
88 }
89 while (d >= 0) { 90 while (d >= 0) {
90 x = x * 16 + d; 91 x = x * 16 + d;
91 if (x > max_value) return -1; 92 if (x > max_value) {
93 bad_codepoint = true;
94 }
92 Advance<capture_raw>(); 95 Advance<capture_raw>();
93 d = HexValue(c0_); 96 d = HexValue(c0_);
94 } 97 }
98
99 if (d < 0 && (c0_ != '}')) {
100 ReportScannerError(x != 0 ? MessageTemplate::kUnclosedUnicodeEscapeSequence
101 : MessageTemplate::kInvalidUnicodeEscapeSequence,
102 source_pos());
103 return -1;
104 }
105
95 return x; 106 return x;
96 } 107 }
97 108
98 109
99 // Ensure that tokens can be stored in a byte. 110 // Ensure that tokens can be stored in a byte.
100 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); 111 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
101 112
102 // Table of one-character tokens, by character (0x00..0x7f only). 113 // Table of one-character tokens, by character (0x00..0x7f only).
103 static const byte one_char_tokens[] = { 114 static const byte one_char_tokens[] = {
104 Token::ILLEGAL, 115 Token::ILLEGAL,
(...skipping 735 matching lines...) Expand 10 before | Expand all | Expand 10 after
840 if (c == '\\') break; 851 if (c == '\\') break;
841 Advance<false, false>(); 852 Advance<false, false>();
842 AddLiteralChar(c); 853 AddLiteralChar(c);
843 } 854 }
844 855
845 while (c0_ != quote && c0_ >= 0 856 while (c0_ != quote && c0_ >= 0
846 && !unicode_cache_->IsLineTerminator(c0_)) { 857 && !unicode_cache_->IsLineTerminator(c0_)) {
847 uc32 c = c0_; 858 uc32 c = c0_;
848 Advance(); 859 Advance();
849 if (c == '\\') { 860 if (c == '\\') {
850 if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL; 861 if (c0_ < 0 || !ScanEscape<false, false>()) {
862 return Token::ILLEGAL;
863 }
851 } else { 864 } else {
852 AddLiteralChar(c); 865 AddLiteralChar(c);
853 } 866 }
854 } 867 }
855 if (c0_ != quote) return Token::ILLEGAL; 868 if (c0_ != quote) return Token::ILLEGAL;
856 literal.Complete(); 869 literal.Complete();
857 870
858 Advance(); // consume quote 871 Advance(); // consume quote
859 return Token::STRING; 872 return Token::STRING;
860 } 873 }
(...skipping 11 matching lines...) Expand all
872 // 885 //
873 // A TEMPLATE_SPAN should always be followed by an Expression, while a 886 // A TEMPLATE_SPAN should always be followed by an Expression, while a
874 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be 887 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
875 // followed by an Expression. 888 // followed by an Expression.
876 889
877 Token::Value result = Token::TEMPLATE_SPAN; 890 Token::Value result = Token::TEMPLATE_SPAN;
878 LiteralScope literal(this); 891 LiteralScope literal(this);
879 StartRawLiteral(); 892 StartRawLiteral();
880 const bool capture_raw = true; 893 const bool capture_raw = true;
881 const bool in_template_literal = true; 894 const bool in_template_literal = true;
882
883 while (true) { 895 while (true) {
884 uc32 c = c0_; 896 uc32 c = c0_;
885 Advance<capture_raw>(); 897 Advance<capture_raw>();
886 if (c == '`') { 898 if (c == '`') {
887 result = Token::TEMPLATE_TAIL; 899 result = Token::TEMPLATE_TAIL;
888 ReduceRawLiteralLength(1); 900 ReduceRawLiteralLength(1);
889 break; 901 break;
890 } else if (c == '$' && c0_ == '{') { 902 } else if (c == '$' && c0_ == '{') {
891 Advance<capture_raw>(); // Consume '{' 903 Advance<capture_raw>(); // Consume '{'
892 ReduceRawLiteralLength(2); 904 ReduceRawLiteralLength(2);
(...skipping 199 matching lines...) Expand 10 before | Expand all | Expand 10 after
1092 Advance(); 1104 Advance();
1093 return ScanUnicodeEscape<false>(); 1105 return ScanUnicodeEscape<false>();
1094 } 1106 }
1095 1107
1096 1108
1097 template <bool capture_raw> 1109 template <bool capture_raw>
1098 uc32 Scanner::ScanUnicodeEscape() { 1110 uc32 Scanner::ScanUnicodeEscape() {
1099 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of 1111 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
1100 // hex digits between { } is arbitrary. \ and u have already been read. 1112 // hex digits between { } is arbitrary. \ and u have already been read.
1101 if (c0_ == '{') { 1113 if (c0_ == '{') {
1114 int begin = source_pos() - 2;
1115 bool bad_codepoint = false;
1102 Advance<capture_raw>(); 1116 Advance<capture_raw>();
1103 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff); 1117 uc32 cp =
1104 if (cp < 0) { 1118 ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, bad_codepoint);
1105 return -1; 1119 if (cp < 0 || bad_codepoint) {
1106 } 1120 ReportScannerError(MessageTemplate::kUndefinedUnicodeCodePoint, begin,
1107 if (c0_ != '}') { 1121 source_pos() + 1);
1108 return -1; 1122 return -1;
1109 } 1123 }
1110 Advance<capture_raw>(); 1124 Advance<capture_raw>();
1111 return cp; 1125 return cp;
1112 } 1126 }
1113 return ScanHexNumber<capture_raw>(4); 1127 const bool unicode = true;
1128 return ScanHexNumber<capture_raw, unicode>(4);
1114 } 1129 }
1115 1130
1116 1131
1117 // ---------------------------------------------------------------------------- 1132 // ----------------------------------------------------------------------------
1118 // Keyword Matcher 1133 // Keyword Matcher
1119 1134
1120 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ 1135 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
1121 KEYWORD_GROUP('b') \ 1136 KEYWORD_GROUP('b') \
1122 KEYWORD("break", Token::BREAK) \ 1137 KEYWORD("break", Token::BREAK) \
1123 KEYWORD_GROUP('c') \ 1138 KEYWORD_GROUP('c') \
(...skipping 546 matching lines...) Expand 10 before | Expand all | Expand 10 after
1670 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1685 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1671 } 1686 }
1672 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1687 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1673 1688
1674 backing_store_.AddBlock(bytes); 1689 backing_store_.AddBlock(bytes);
1675 return backing_store_.EndSequence().start(); 1690 return backing_store_.EndSequence().start();
1676 } 1691 }
1677 1692
1678 } // namespace internal 1693 } // namespace internal
1679 } // namespace v8 1694 } // namespace v8
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698