src/parsing/scanner.cc - Issue 1793913002: [parser] implement error reporting for Scanner errors

Side by Side Diff: src/parsing/scanner.cc

Issue 1793913002: [parser] implement error reporting for Scanner errors (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: rebased Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 42 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
53 // Need to capture identifiers in order to recognize "get" and "set"	53 // Need to capture identifiers in order to recognize "get" and "set"

54 // in object literals.	54 // in object literals.

55 Init();	55 Init();

56 // Skip initial whitespace allowing HTML comment ends just like	56 // Skip initial whitespace allowing HTML comment ends just like

57 // after a newline and scan first token.	57 // after a newline and scan first token.

58 has_line_terminator_before_next_ = true;	58 has_line_terminator_before_next_ = true;

59 SkipWhiteSpace();	59 SkipWhiteSpace();

60 Scan();	60 Scan();

61 }	61 }

62	62

63	63 template <bool capture_raw, bool unicode>

64 template <bool capture_raw>

65 uc32 Scanner::ScanHexNumber(int expected_length) {	64 uc32 Scanner::ScanHexNumber(int expected_length) {

66 DCHECK(expected_length <= 4); // prevent overflow	65 DCHECK(expected_length <= 4); // prevent overflow

67	66

	67 int begin = source_pos() - 2;

68 uc32 x = 0;	68 uc32 x = 0;

69 for (int i = 0; i < expected_length; i++) {	69 for (int i = 0; i < expected_length; i++) {

70 int d = HexValue(c0_);	70 int d = HexValue(c0_);

71 if (d < 0) {	71 if (d < 0) {

	72 ReportScannerError(unicode

	73 ? MessageTemplate::kInvalidUnicodeEscapeSequence

	74 : MessageTemplate::kInvalidHexEscapeSequence,

	75 begin, begin + expected_length + 2);

72 return -1;	76 return -1;

73 }	77 }

74 x = x * 16 + d;	78 x = x * 16 + d;

75 Advance<capture_raw>();	79 Advance<capture_raw>();

76 }	80 }

77	81

78 return x;	82 return x;

79 }	83 }

80	84

81

82 template <bool capture_raw>	85 template <bool capture_raw>

83 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {	86 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, bool& bad_codepoint) {
	adamk 2016/03/18 18:21:55 The right type here is bool* in V8 The right type here is bool* in V8 caitp (gmail) 2016/03/18 19:00:36 dont you think reference ls are better for prevent Show quoted text On 2016/03/18 18:21:55, adamk wrote: > The right type here is bool* in V8 dont you think reference ls are better for preventing accidents here? anyway, will do adamk 2016/03/18 19:09:57 I think bool args are dangerous in C++ no matter w Show quoted text On 2016/03/18 19:00:36, caitp wrote: > On 2016/03/18 18:21:55, adamk wrote: > > The right type here is bool* in V8 > > dont you think reference ls are better for preventing accidents here? anyway, > will do I think bool args are dangerous in C++ no matter what you do :) As for reference vs pointer, V8 follows Google style and uses pointers for outparams (Blink inherited WebKit style, which is the reason it uses references). caitp (gmail) 2016/03/21 16:15:03 Since the early return was put back, there isn't m Show quoted text On 2016/03/18 19:09:57, adamk wrote: > On 2016/03/18 19:00:36, caitp wrote: > > On 2016/03/18 18:21:55, adamk wrote: > > > The right type here is bool* in V8 > > > > dont you think reference ls are better for preventing accidents here? anyway, > > will do > > I think bool args are dangerous in C++ no matter what you do :) > > As for reference vs pointer, V8 follows Google style and uses pointers for > outparams (Blink inherited WebKit style, which is the reason it uses > references). Since the early return was put back, there isn't much value in the parameter anymore anyways, so it's gone
84 uc32 x = 0;	87 uc32 x = 0;

85 int d = HexValue(c0_);	88 int d = HexValue(c0_);

86 if (d < 0) {	89

87 return -1;
adamk 2016/03/18 18:21:55 Losing this and the below early return makes me a Losing this and the below early return makes me a little nervous. Why don't you just want to return here and below and handle the error-reporting in the caller, since you already have error-reporting there? Is it so that your reported location is wider? That seems odd to me, given that the problem happens as soon as you hit a non-hex character. Scanning after that is pretty meaningless. caitp (gmail) 2016/03/18 19:00:36 the idea is, if you get hex characters until the } Show quoted text On 2016/03/18 18:21:55, adamk wrote: > Losing this and the below early return makes me a little nervous. Why don't you > just want to return here and below and handle the error-reporting in the caller, > since you already have error-reporting there? Is it so that your reported > location is wider? That seems odd to me, given that the problem happens as soon > as you hit a non-hex character. Scanning after that is pretty meaningless. the idea is, if you get hex characters until the }, you can say that its an invalid codepoint, and if you fail to get a hex digit or closing brace, you can report the unclosed error. I'd like to get the chromium devtools using the location data to clarify the problem, and in this case it will point right to the invalid codepoint. its not worth much without location data being used, though. i can remove it until after that change is made, I guess. adamk 2016/03/18 19:09:57 Yeah, I'd just prefer a minimal change if the loca Show quoted text On 2016/03/18 19:00:36, caitp wrote: > On 2016/03/18 18:21:55, adamk wrote: > > Losing this and the below early return makes me a little nervous. Why don't > you > > just want to return here and below and handle the error-reporting in the > caller, > > since you already have error-reporting there? Is it so that your reported > > location is wider? That seems odd to me, given that the problem happens as > soon > > as you hit a non-hex character. Scanning after that is pretty meaningless. > > the idea is, if you get hex characters until the }, you can say that its an > invalid codepoint, and if you fail to get a hex digit or closing brace, you can > report the unclosed error. I'd like to get the chromium devtools using the > location data to clarify the problem, and in this case it will point right to > the invalid codepoint. > > its not worth much without location data being used, though. i can remove it > until after that change is made, I guess. Yeah, I'd just prefer a minimal change if the location data isn't being used; the improved message seems like the main point. caitp (gmail) 2016/03/21 16:15:03 Done. Show quoted text On 2016/03/18 19:09:57, adamk wrote: > On 2016/03/18 19:00:36, caitp wrote: > > On 2016/03/18 18:21:55, adamk wrote: > > > Losing this and the below early return makes me a little nervous. Why don't > > you > > > just want to return here and below and handle the error-reporting in the > > caller, > > > since you already have error-reporting there? Is it so that your reported > > > location is wider? That seems odd to me, given that the problem happens as > > soon > > > as you hit a non-hex character. Scanning after that is pretty meaningless. > > > > the idea is, if you get hex characters until the }, you can say that its an > > invalid codepoint, and if you fail to get a hex digit or closing brace, you > can > > report the unclosed error. I'd like to get the chromium devtools using the > > location data to clarify the problem, and in this case it will point right to > > the invalid codepoint. > > > > its not worth much without location data being used, though. i can remove it > > until after that change is made, I guess. > > Yeah, I'd just prefer a minimal change if the location data isn't being used; > the improved message seems like the main point. Done.
88 }

89 while (d >= 0) {	90 while (d >= 0) {

90 x = x * 16 + d;	91 x = x * 16 + d;

91 if (x > max_value) return -1;	92 if (x > max_value) {

	93 bad_codepoint = true;

	94 }

92 Advance<capture_raw>();	95 Advance<capture_raw>();

93 d = HexValue(c0_);	96 d = HexValue(c0_);

94 }	97 }

	98

	99 if (d < 0 && (c0_ != '}')) {

	100 ReportScannerError(x != 0 ? MessageTemplate::kUnclosedUnicodeEscapeSequence

	101 : MessageTemplate::kInvalidUnicodeEscapeSequence,

	102 source_pos());

	103 return -1;

	104 }

	105

95 return x;	106 return x;

96 }	107 }

97	108

98	109

99 // Ensure that tokens can be stored in a byte.	110 // Ensure that tokens can be stored in a byte.

100 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	111 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

101	112

102 // Table of one-character tokens, by character (0x00..0x7f only).	113 // Table of one-character tokens, by character (0x00..0x7f only).

103 static const byte one_char_tokens[] = {	114 static const byte one_char_tokens[] = {

104 Token::ILLEGAL,	115 Token::ILLEGAL,

(...skipping 735 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
840 if (c == '\\') break;	851 if (c == '\\') break;

841 Advance<false, false>();	852 Advance<false, false>();

842 AddLiteralChar(c);	853 AddLiteralChar(c);

843 }	854 }

844	855

845 while (c0_ != quote && c0_ >= 0	856 while (c0_ != quote && c0_ >= 0

846 && !unicode_cache_->IsLineTerminator(c0_)) {	857 && !unicode_cache_->IsLineTerminator(c0_)) {

847 uc32 c = c0_;	858 uc32 c = c0_;

848 Advance();	859 Advance();

849 if (c == '\\') {	860 if (c == '\\') {

850 if (c0_ < 0 \|\| !ScanEscape<false, false>()) return Token::ILLEGAL;	861 if (c0_ < 0 \|\| !ScanEscape<false, false>()) {

	862 return Token::ILLEGAL;

	863 }

851 } else {	864 } else {

852 AddLiteralChar(c);	865 AddLiteralChar(c);

853 }	866 }

854 }	867 }

855 if (c0_ != quote) return Token::ILLEGAL;	868 if (c0_ != quote) return Token::ILLEGAL;

856 literal.Complete();	869 literal.Complete();

857	870

858 Advance(); // consume quote	871 Advance(); // consume quote

859 return Token::STRING;	872 return Token::STRING;

860 }	873 }

(...skipping 11 matching lines...) Expand all Loading...
872 //	885 //

873 // A TEMPLATE_SPAN should always be followed by an Expression, while a	886 // A TEMPLATE_SPAN should always be followed by an Expression, while a

874 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be	887 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be

875 // followed by an Expression.	888 // followed by an Expression.

876	889

877 Token::Value result = Token::TEMPLATE_SPAN;	890 Token::Value result = Token::TEMPLATE_SPAN;

878 LiteralScope literal(this);	891 LiteralScope literal(this);

879 StartRawLiteral();	892 StartRawLiteral();

880 const bool capture_raw = true;	893 const bool capture_raw = true;

881 const bool in_template_literal = true;	894 const bool in_template_literal = true;

882

883 while (true) {	895 while (true) {

884 uc32 c = c0_;	896 uc32 c = c0_;

885 Advance<capture_raw>();	897 Advance<capture_raw>();

886 if (c == '`') {	898 if (c == '`') {

887 result = Token::TEMPLATE_TAIL;	899 result = Token::TEMPLATE_TAIL;

888 ReduceRawLiteralLength(1);	900 ReduceRawLiteralLength(1);

889 break;	901 break;

890 } else if (c == '$' && c0_ == '{') {	902 } else if (c == '$' && c0_ == '{') {

891 Advance<capture_raw>(); // Consume '{'	903 Advance<capture_raw>(); // Consume '{'

892 ReduceRawLiteralLength(2);	904 ReduceRawLiteralLength(2);

(...skipping 199 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1092 Advance();	1104 Advance();

1093 return ScanUnicodeEscape<false>();	1105 return ScanUnicodeEscape<false>();

1094 }	1106 }

1095	1107

1096	1108

1097 template <bool capture_raw>	1109 template <bool capture_raw>

1098 uc32 Scanner::ScanUnicodeEscape() {	1110 uc32 Scanner::ScanUnicodeEscape() {

1099 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of	1111 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of

1100 // hex digits between { } is arbitrary. \ and u have already been read.	1112 // hex digits between { } is arbitrary. \ and u have already been read.

1101 if (c0_ == '{') {	1113 if (c0_ == '{') {

	1114 int begin = source_pos() - 2;

	1115 bool bad_codepoint = false;

1102 Advance<capture_raw>();	1116 Advance<capture_raw>();

1103 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff);	1117 uc32 cp =

1104 if (cp < 0) {	1118 ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, bad_codepoint);

1105 return -1;	1119 if (cp < 0 \|\| bad_codepoint) {

1106 }	1120 ReportScannerError(MessageTemplate::kUndefinedUnicodeCodePoint, begin,

1107 if (c0_ != '}') {	1121 source_pos() + 1);

1108 return -1;	1122 return -1;

1109 }	1123 }

1110 Advance<capture_raw>();	1124 Advance<capture_raw>();

1111 return cp;	1125 return cp;

1112 }	1126 }

1113 return ScanHexNumber<capture_raw>(4);	1127 const bool unicode = true;

	1128 return ScanHexNumber<capture_raw, unicode>(4);

1114 }	1129 }

1115	1130

1116	1131

1117 // ----------------------------------------------------------------------------	1132 // ----------------------------------------------------------------------------

1118 // Keyword Matcher	1133 // Keyword Matcher

1119	1134

1120 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \	1135 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \

1121 KEYWORD_GROUP('b') \	1136 KEYWORD_GROUP('b') \

1122 KEYWORD("break", Token::BREAK) \	1137 KEYWORD("break", Token::BREAK) \

1123 KEYWORD_GROUP('c') \	1138 KEYWORD_GROUP('c') \

(...skipping 546 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1670 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1685 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1671 }	1686 }

1672 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1687 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1673	1688

1674 backing_store_.AddBlock(bytes);	1689 backing_store_.AddBlock(bytes);

1675 return backing_store_.EndSequence().start();	1690 return backing_store_.EndSequence().start();

1676 }	1691 }

1677	1692

1678 } // namespace internal	1693 } // namespace internal

1679 } // namespace v8	1694 } // namespace v8

OLD	NEW

« src/parsing/scanner.h ('K') | « src/parsing/scanner.h ('k') | test/message/regress/regress-4829-1.out » ('j') | test/message/unicode-escape-invalid.out » ('J')