src/parsing/scanner.cc - Issue 1793913002: [parser] implement error reporting for Scanner errors

Side by Side Diff: src/parsing/scanner.cc

Issue 1793913002: [parser] implement error reporting for Scanner errors (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: shape changing lines of code into new lines of code Created 4 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
55 // Need to capture identifiers in order to recognize "get" and "set"	55 // Need to capture identifiers in order to recognize "get" and "set"

56 // in object literals.	56 // in object literals.

57 Init();	57 Init();

58 // Skip initial whitespace allowing HTML comment ends just like	58 // Skip initial whitespace allowing HTML comment ends just like

59 // after a newline and scan first token.	59 // after a newline and scan first token.

60 has_line_terminator_before_next_ = true;	60 has_line_terminator_before_next_ = true;

61 SkipWhiteSpace();	61 SkipWhiteSpace();

62 Scan();	62 Scan();

63 }	63 }

64	64

65	65 template <bool capture_raw, bool unicode>

66 template <bool capture_raw>

67 uc32 Scanner::ScanHexNumber(int expected_length) {	66 uc32 Scanner::ScanHexNumber(int expected_length) {

68 DCHECK(expected_length <= 4); // prevent overflow	67 DCHECK(expected_length <= 4); // prevent overflow

69	68

	69 int begin = source_pos() - 2;

70 uc32 x = 0;	70 uc32 x = 0;

71 for (int i = 0; i < expected_length; i++) {	71 for (int i = 0; i < expected_length; i++) {

72 int d = HexValue(c0_);	72 int d = HexValue(c0_);

73 if (d < 0) {	73 if (d < 0) {

	74 ReportScannerError(Location(begin, begin + expected_length + 2),

	75 unicode

	76 ? MessageTemplate::kInvalidUnicodeEscapeSequence

	77 : MessageTemplate::kInvalidHexEscapeSequence);

74 return -1;	78 return -1;

75 }	79 }

76 x = x * 16 + d;	80 x = x * 16 + d;

77 Advance<capture_raw>();	81 Advance<capture_raw>();

78 }	82 }

79	83

80 return x;	84 return x;

81 }	85 }

82	86

83

84 template <bool capture_raw>	87 template <bool capture_raw>

85 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {	88 uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {

86 uc32 x = 0;	89 uc32 x = 0;

87 int d = HexValue(c0_);	90 int d = HexValue(c0_);

88 if (d < 0) {
adamk 2016/03/21 19:44:31 I think you still need this early return... I think you still need this early return... caitp (gmail) 2016/03/21 19:51:23 you're correct, `\u{}` parses successfully with th Show quoted text On 2016/03/21 19:44:31, adamk wrote: > I think you still need this early return... you're correct, `\u{}` parses successfully with this :(
89 return -1;

90 }

91 while (d >= 0) {	91 while (d >= 0) {

92 x = x * 16 + d;	92 x = x * 16 + d;

93 if (x > max_value) return -1;	93 if (x > max_value) {

	94 ReportScannerError(Location(beg_pos, source_pos() + 1),

	95 MessageTemplate::kUndefinedUnicodeCodePoint);

	96 return -1;

	97 }

94 Advance<capture_raw>();	98 Advance<capture_raw>();

95 d = HexValue(c0_);	99 d = HexValue(c0_);

96 }	100 }

	101

97 return x;	102 return x;

98 }	103 }

99	104

100	105

101 // Ensure that tokens can be stored in a byte.	106 // Ensure that tokens can be stored in a byte.

102 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);	107 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

103	108

104 // Table of one-character tokens, by character (0x00..0x7f only).	109 // Table of one-character tokens, by character (0x00..0x7f only).

105 static const byte one_char_tokens[] = {	110 static const byte one_char_tokens[] = {

106 Token::ILLEGAL,	111 Token::ILLEGAL,

(...skipping 742 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
849 if (c == '\\') break;	854 if (c == '\\') break;

850 Advance<false, false>();	855 Advance<false, false>();

851 AddLiteralChar(c);	856 AddLiteralChar(c);

852 }	857 }

853	858

854 while (c0_ != quote && c0_ >= 0	859 while (c0_ != quote && c0_ >= 0

855 && !unicode_cache_->IsLineTerminator(c0_)) {	860 && !unicode_cache_->IsLineTerminator(c0_)) {

856 uc32 c = c0_;	861 uc32 c = c0_;

857 Advance();	862 Advance();

858 if (c == '\\') {	863 if (c == '\\') {

859 if (c0_ < 0 \|\| !ScanEscape<false, false>()) return Token::ILLEGAL;	864 if (c0_ < 0 \|\| !ScanEscape<false, false>()) {

	865 return Token::ILLEGAL;

	866 }

860 } else {	867 } else {

861 AddLiteralChar(c);	868 AddLiteralChar(c);

862 }	869 }

863 }	870 }

864 if (c0_ != quote) return Token::ILLEGAL;	871 if (c0_ != quote) return Token::ILLEGAL;

865 literal.Complete();	872 literal.Complete();

866	873

867 Advance(); // consume quote	874 Advance(); // consume quote

868 return Token::STRING;	875 return Token::STRING;

869 }	876 }

(...skipping 11 matching lines...) Expand all Loading...
881 //	888 //

882 // A TEMPLATE_SPAN should always be followed by an Expression, while a	889 // A TEMPLATE_SPAN should always be followed by an Expression, while a

883 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be	890 // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be

884 // followed by an Expression.	891 // followed by an Expression.

885	892

886 Token::Value result = Token::TEMPLATE_SPAN;	893 Token::Value result = Token::TEMPLATE_SPAN;

887 LiteralScope literal(this);	894 LiteralScope literal(this);

888 StartRawLiteral();	895 StartRawLiteral();

889 const bool capture_raw = true;	896 const bool capture_raw = true;

890 const bool in_template_literal = true;	897 const bool in_template_literal = true;

891

892 while (true) {	898 while (true) {

893 uc32 c = c0_;	899 uc32 c = c0_;

894 Advance<capture_raw>();	900 Advance<capture_raw>();

895 if (c == '`') {	901 if (c == '`') {

896 result = Token::TEMPLATE_TAIL;	902 result = Token::TEMPLATE_TAIL;

897 ReduceRawLiteralLength(1);	903 ReduceRawLiteralLength(1);

898 break;	904 break;

899 } else if (c == '$' && c0_ == '{') {	905 } else if (c == '$' && c0_ == '{') {

900 Advance<capture_raw>(); // Consume '{'	906 Advance<capture_raw>(); // Consume '{'

901 ReduceRawLiteralLength(2);	907 ReduceRawLiteralLength(2);

(...skipping 199 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1101 Advance();	1107 Advance();

1102 return ScanUnicodeEscape<false>();	1108 return ScanUnicodeEscape<false>();

1103 }	1109 }

1104	1110

1105	1111

1106 template <bool capture_raw>	1112 template <bool capture_raw>

1107 uc32 Scanner::ScanUnicodeEscape() {	1113 uc32 Scanner::ScanUnicodeEscape() {

1108 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of	1114 // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of

1109 // hex digits between { } is arbitrary. \ and u have already been read.	1115 // hex digits between { } is arbitrary. \ and u have already been read.

1110 if (c0_ == '{') {	1116 if (c0_ == '{') {

	1117 int begin = source_pos() - 2;

1111 Advance<capture_raw>();	1118 Advance<capture_raw>();

1112 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff);	1119 uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, begin);

1113 if (cp < 0) {	1120 if (cp < 0) {
	adamk 2016/03/21 19:44:31 ...and then report an error here, right? ...and then report an error here, right? caitp (gmail) 2016/03/21 19:51:23 Done. Show quoted text On 2016/03/21 19:44:31, adamk wrote: > ...and then report an error here, right? Done.
1114 return -1;	1121 return -1;

1115 }	1122 }

1116 if (c0_ != '}') {	1123 if (c0_ != '}') {

	1124 ReportScannerError(source_pos(),

	1125 MessageTemplate::kInvalidUnicodeEscapeSequence);

1117 return -1;	1126 return -1;

1118 }	1127 }

1119 Advance<capture_raw>();	1128 Advance<capture_raw>();

1120 return cp;	1129 return cp;

1121 }	1130 }

1122 return ScanHexNumber<capture_raw>(4);	1131 const bool unicode = true;

	1132 return ScanHexNumber<capture_raw, unicode>(4);

1123 }	1133 }

1124	1134

1125	1135

1126 // ----------------------------------------------------------------------------	1136 // ----------------------------------------------------------------------------

1127 // Keyword Matcher	1137 // Keyword Matcher

1128	1138

1129 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \	1139 #define KEYWORDS(KEYWORD_GROUP, KEYWORD) \

1130 KEYWORD_GROUP('b') \	1140 KEYWORD_GROUP('b') \

1131 KEYWORD("break", Token::BREAK) \	1141 KEYWORD("break", Token::BREAK) \

1132 KEYWORD_GROUP('c') \	1142 KEYWORD_GROUP('c') \

(...skipping 546 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1679 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1689 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1680 }	1690 }

1681 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1691 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1682	1692

1683 backing_store_.AddBlock(bytes);	1693 backing_store_.AddBlock(bytes);

1684 return backing_store_.EndSequence().start();	1694 return backing_store_.EndSequence().start();

1685 }	1695 }

1686	1696

1687 } // namespace internal	1697 } // namespace internal

1688 } // namespace v8	1698 } // namespace v8

OLD	NEW

« no previous file with comments | « src/parsing/scanner.h ('k') | test/message/regress/regress-4829-1.out » ('j') | no next file with comments »