src/parsing/scanner.cc - Issue 1793913002: [parser] implement error reporting for Scanner errors

Side by Side Diff: src/parsing/scanner.cc

Issue 1793913002: [parser] implement error reporting for Scanner errors (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Remove "unclosed unicode escape sequence" message Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
55 // Need to capture identifiers in order to recognize "get" and "set"	55 // Need to capture identifiers in order to recognize "get" and "set"

56 // in object literals.	56 // in object literals.

57 Init();	57 Init();

58 // Skip initial whitespace allowing HTML comment ends just like	58 // Skip initial whitespace allowing HTML comment ends just like

59 // after a newline and scan first token.	59 // after a newline and scan first token.

60 has_line_terminator_before_next_ = true;	60 has_line_terminator_before_next_ = true;

61 SkipWhiteSpace();	61 SkipWhiteSpace();

62 Scan();	62 Scan();

63 }	63 }

64	64

65	65 template <bool capture_raw, bool unicode>

66 template <bool capture_raw>

67 uc32 Scanner::ScanHexNumber(int expected_length) {	66 uc32 Scanner::ScanHexNumber(int expected_length) {

68 DCHECK(expected_length <= 4); // prevent overflow	67 DCHECK(expected_length <= 4); // prevent overflow

69	68

	69 int begin = source_pos() - 2;

70 uc32 x = 0;	70 uc32 x = 0;

71 for (int i = 0; i < expected_length; i++) {	71 for (int i = 0; i < expected_length; i++) {

72 int d = HexValue(c0_);	72 int d = HexValue(c0_);

73 if (d < 0) {	73 if (d < 0) {

	74 ReportScannerError(Location(begin, begin + expected_length + 2),

	75 unicode

	76 ? MessageTemplate::kInvalidUnicodeEscapeSequence

	77 : MessageTemplate::kInvalidHexEscapeSequence);

74 return -1;	78 return -1;

75 }	79 }

76 x = x * 16 + d;	80 x = x * 16 + d;

77 Advance<capture_raw>();	81 Advance<capture_raw>();

78 }	82 }

79	83

80 return x;	84 return x;

81 }	85 }

82	86

83

84 template <bool capture_raw>	87 template <bool capture_raw>

85 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {	88 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {

86 uc32 x = 0;	89 uc32 x = 0;

87 int d = HexValue(c0_);	90 int d = HexValue(c0_);

88 if (d < 0) {	91 int nof_digits = 0;

	92 while (d >= 0) {

	93 x = x * 16 + d;

	94 if (x > max_value) {

	95 return -1;
	adamk 2016/03/21 19:13:56 Seems like this is the right place to report the g Seems like this is the right place to report the going-over-the-max error, since that's the constraint this method implements. caitp (gmail) 2016/03/21 19:40:05 Done. Show quoted text On 2016/03/21 19:13:56, adamk wrote: > Seems like this is the right place to report the going-over-the-max error, since > that's the constraint this method implements. Done.
	96 }

	97 Advance<capture_raw>();

	98 d = HexValue(c0_);

	99 nof_digits++;

	100 }

	101

	102 if (!nof_digits \|\| (c0_ != '}')) {
	adamk 2016/03/21 19:13:56 I'd leave the closing brace check in the caller, s I'd leave the closing brace check in the caller, since it has the opening brace check. And the InvalidUnicodeEscapeSequence can also live in the caller, once the "max value" check is reported here. That'll make the code shape exactly the same as it was before the addition of error messages. caitp (gmail) 2016/03/21 19:40:05 Done. Show quoted text On 2016/03/21 19:13:56, adamk wrote: > I'd leave the closing brace check in the caller, since it has the opening brace > check. > > And the InvalidUnicodeEscapeSequence can also live in the caller, once the "max > value" check is reported here. That'll make the code shape exactly the same as > it was before the addition of error messages. Done.
	103 ReportScannerError(source_pos(),

	104 MessageTemplate::kInvalidUnicodeEscapeSequence);

89 return -1;	105 return -1;

90 }	106 }

91 while (d >= 0) {	107

92 x = x * 16 + d;

93 if (x > max_value) return -1;

94 Advance<capture_raw>();

95 d = HexValue(c0_);

96 }

97 return x;	108 return x;

98 }	109 }

99	110

100	111

101 // Ensure that tokens can be stored in a byte.	112 // Ensure that tokens can be stored in a byte.

102 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	113 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

103	114

104 // Table of one-character tokens, by character (0x00..0x7f only).	115 // Table of one-character tokens, by character (0x00..0x7f only).

105 static const byte one_char_tokens[] = {	116 static const byte one_char_tokens[] = {

106 Token::ILLEGAL,	117 Token::ILLEGAL,

(...skipping 742 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
849 if (c == '\\') break;	860 if (c == '\\') break;

850 Advance<false, false>();	861 Advance<false, false>();

851 AddLiteralChar(c);	862 AddLiteralChar(c);

852 }	863 }

853	864

854 while (c0_ != quote && c0_ >= 0	865 while (c0_ != quote && c0_ >= 0

855 && !unicode_cache_->IsLineTerminator(c0_)) {	866 && !unicode_cache_->IsLineTerminator(c0_)) {

856 uc32 c = c0_;	867 uc32 c = c0_;

857 Advance();	868 Advance();

858 if (c == '\\') {	869 if (c == '\\') {

859 if (c0_ < 0 \|\| !ScanEscape<false, false>()) return Token::ILLEGAL;	870 if (c0_ < 0 \|\| !ScanEscape<false, false>()) {

	871 return Token::ILLEGAL;

	872 }

860 } else {	873 } else {

861 AddLiteralChar(c);	874 AddLiteralChar(c);

862 }	875 }

863 }	876 }

864 if (c0_ != quote) return Token::ILLEGAL;	877 if (c0_ != quote) return Token::ILLEGAL;

865 literal.Complete();	878 literal.Complete();

866	879

867 Advance(); // consume quote	880 Advance(); // consume quote

868 return Token::STRING;	881 return Token::STRING;

869 }	882 }

(...skipping 11 matching lines...) Expand all Loading...
881 //	894 //

882 // A TEMPLATE_SPAN should always be followed by an Expression, while a	895 // A TEMPLATE_SPAN should always be followed by an Expression, while a

883 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be	896 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be

884 // followed by an Expression.	897 // followed by an Expression.

885	898

886 Token::Value result = Token::TEMPLATE_SPAN;	899 Token::Value result = Token::TEMPLATE_SPAN;

887 LiteralScope literal(this);	900 LiteralScope literal(this);

888 StartRawLiteral();	901 StartRawLiteral();

889 const bool capture_raw = true;	902 const bool capture_raw = true;

890 const bool in_template_literal = true;	903 const bool in_template_literal = true;

891

892 while (true) {	904 while (true) {

893 uc32 c = c0_;	905 uc32 c = c0_;

894 Advance<capture_raw>();	906 Advance<capture_raw>();

895 if (c == '`') {	907 if (c == '`') {

896 result = Token::TEMPLATE_TAIL;	908 result = Token::TEMPLATE_TAIL;

897 ReduceRawLiteralLength(1);	909 ReduceRawLiteralLength(1);

898 break;	910 break;

899 } else if (c == '$' && c0_ == '{') {	911 } else if (c == '$' && c0_ == '{') {

900 Advance<capture_raw>(); // Consume '{'	912 Advance<capture_raw>(); // Consume '{'

901 ReduceRawLiteralLength(2);	913 ReduceRawLiteralLength(2);

(...skipping 199 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1101 Advance();	1113 Advance();

1102 return ScanUnicodeEscape<false>();	1114 return ScanUnicodeEscape<false>();

1103 }	1115 }

1104	1116

1105	1117

1106 template <bool capture_raw>	1118 template <bool capture_raw>

1107 uc32 Scanner::ScanUnicodeEscape() {	1119 uc32 Scanner::ScanUnicodeEscape() {

1108 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of	1120 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of

1109 // hex digits between { } is arbitrary. \ and u have already been read.	1121 // hex digits between { } is arbitrary. \ and u have already been read.

1110 if (c0_ == '{') {	1122 if (c0_ == '{') {

	1123 int begin = source_pos() - 2;

1111 Advance<capture_raw>();	1124 Advance<capture_raw>();

1112 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff);	1125 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff);

1113 if (cp < 0) {	1126 if (cp < 0) {

1114 return -1;	1127 ReportScannerError(Location(begin, source_pos() + 1),

1115 }	1128 MessageTemplate::kUndefinedUnicodeCodePoint);

1116 if (c0_ != '}') {

1117 return -1;	1129 return -1;

1118 }	1130 }

1119 Advance<capture_raw>();	1131 Advance<capture_raw>();

1120 return cp;	1132 return cp;

1121 }	1133 }

1122 return ScanHexNumber<capture_raw>(4);	1134 const bool unicode = true;

	1135 return ScanHexNumber<capture_raw, unicode>(4);

1123 }	1136 }

1124	1137

1125	1138

1126 // ----------------------------------------------------------------------------	1139 // ----------------------------------------------------------------------------

1127 // Keyword Matcher	1140 // Keyword Matcher

1128	1141

1129 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \	1142 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \

1130 KEYWORD_GROUP('b') \	1143 KEYWORD_GROUP('b') \

1131 KEYWORD("break", Token::BREAK) \	1144 KEYWORD("break", Token::BREAK) \

1132 KEYWORD_GROUP('c') \	1145 KEYWORD_GROUP('c') \

(...skipping 546 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1679 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1692 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1680 }	1693 }

1681 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1694 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1682	1695

1683 backing_store_.AddBlock(bytes);	1696 backing_store_.AddBlock(bytes);

1684 return backing_store_.EndSequence().start();	1697 return backing_store_.EndSequence().start();

1685 }	1698 }

1686	1699

1687 } // namespace internal	1700 } // namespace internal

1688 } // namespace v8	1701 } // namespace v8

OLD	NEW

« no previous file with comments | « src/parsing/scanner.h ('k') | test/message/regress/regress-4829-1.out » ('j') | no next file with comments »