src/scanner.cc - Issue 5026005: Move static scanner fields to scanner-base.h

Side by Side Diff: src/scanner.cc

Issue 5026005: Move static scanner fields to scanner-base.h (Closed)

Patch Set: Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 12 matching lines...) Expand all Loading...
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 #include "v8.h"	28 #include "v8.h"

29	29

30 #include "ast.h"	30 #include "ast.h"

31 #include "handles.h"	31 #include "handles.h"

32 #include "scanner.h"	32 #include "scanner.h"

	33 #include "unicode-inl.h"

33	34

34 namespace v8 {	35 namespace v8 {

35 namespace internal {	36 namespace internal {

36	37

37 // ----------------------------------------------------------------------------	38 // ----------------------------------------------------------------------------

38 // Character predicates

39

40

41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;

42 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;

43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;

44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;

45

46

47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;

48

49

50 // ----------------------------------------------------------------------------

51 // UTF8Buffer	39 // UTF8Buffer

52	40

53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }	41 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }

54	42

55	43

56 UTF8Buffer::~UTF8Buffer() {}	44 UTF8Buffer::~UTF8Buffer() {}

57	45

58	46

59 void UTF8Buffer::AddCharSlow(uc32 c) {	47 void UTF8Buffer::AddCharSlow(uc32 c) {

60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);	48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);

(...skipping 290 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
351 return source_pos() != start_position;	339 return source_pos() != start_position;

352 }	340 }

353	341

354	342

355 bool Scanner::SkipJavaScriptWhiteSpace() {	343 bool Scanner::SkipJavaScriptWhiteSpace() {

356 int start_position = source_pos();	344 int start_position = source_pos();

357	345

358 while (true) {	346 while (true) {

359 // We treat byte-order marks (BOMs) as whitespace for better	347 // We treat byte-order marks (BOMs) as whitespace for better

360 // compatibility with Spidermonkey and other JavaScript engines.	348 // compatibility with Spidermonkey and other JavaScript engines.

361 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {	349 while (ScannerConstants::kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {

362 // IsWhiteSpace() includes line terminators!	350 // IsWhiteSpace() includes line terminators!

363 if (kIsLineTerminator.get(c0_)) {	351 if (ScannerConstants::kIsLineTerminator.get(c0_)) {

364 // Ignore line terminators, but remember them. This is necessary	352 // Ignore line terminators, but remember them. This is necessary

365 // for automatic semicolon insertion.	353 // for automatic semicolon insertion.

366 has_line_terminator_before_next_ = true;	354 has_line_terminator_before_next_ = true;

367 }	355 }

368 Advance();	356 Advance();

369 }	357 }

370	358

371 // If there is an HTML comment end '-->' at the beginning of a	359 // If there is an HTML comment end '-->' at the beginning of a

372 // line (with only whitespace in front of it), we treat the rest	360 // line (with only whitespace in front of it), we treat the rest

373 // of the line as a comment. This is in line with the way	361 // of the line as a comment. This is in line with the way

(...skipping 19 matching lines...) Expand all Loading...
393	381

394	382

395 Token::Value Scanner::SkipSingleLineComment() {	383 Token::Value Scanner::SkipSingleLineComment() {

396 Advance();	384 Advance();

397	385

398 // The line terminator at the end of the line is not considered	386 // The line terminator at the end of the line is not considered

399 // to be part of the single-line comment; it is recognized	387 // to be part of the single-line comment; it is recognized

400 // separately by the lexical grammar and becomes part of the	388 // separately by the lexical grammar and becomes part of the

401 // stream of input elements for the syntactic grammar (see	389 // stream of input elements for the syntactic grammar (see

402 // ECMA-262, section 7.4, page 12).	390 // ECMA-262, section 7.4, page 12).

403 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	391 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {

404 Advance();	392 Advance();

405 }	393 }

406	394

407 return Token::WHITESPACE;	395 return Token::WHITESPACE;

408 }	396 }

409	397

410	398

411 Token::Value Scanner::SkipMultiLineComment() {	399 Token::Value Scanner::SkipMultiLineComment() {

412 ASSERT(c0_ == '*');	400 ASSERT(c0_ == '*');

413 Advance();	401 Advance();

(...skipping 210 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
624	612

625	613

626 Token::Value Scanner::ScanJsonIdentifier(const char* text,	614 Token::Value Scanner::ScanJsonIdentifier(const char* text,

627 Token::Value token) {	615 Token::Value token) {

628 LiteralScope literal(this);	616 LiteralScope literal(this);

629 while (*text != '\0') {	617 while (*text != '\0') {

630 if (c0_ != *text) return Token::ILLEGAL;	618 if (c0_ != *text) return Token::ILLEGAL;

631 Advance();	619 Advance();

632 text++;	620 text++;

633 }	621 }

634 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;	622 if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;

635 literal.Complete();	623 literal.Complete();

636 return token;	624 return token;

637 }	625 }

638	626

639	627

640 void Scanner::ScanJavaScript() {	628 void Scanner::ScanJavaScript() {

641 next_.literal_chars = Vector<const char>();	629 next_.literal_chars = Vector<const char>();

642 Token::Value token;	630 Token::Value token;

643 do {	631 do {

644 // Remember the position of the next token	632 // Remember the position of the next token

(...skipping 202 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
847	835

848 case '?':	836 case '?':

849 token = Select(Token::CONDITIONAL);	837 token = Select(Token::CONDITIONAL);

850 break;	838 break;

851	839

852 case '~':	840 case '~':

853 token = Select(Token::BIT_NOT);	841 token = Select(Token::BIT_NOT);

854 break;	842 break;

855	843

856 default:	844 default:

857 if (kIsIdentifierStart.get(c0_)) {	845 if (ScannerConstants::kIsIdentifierStart.get(c0_)) {

858 token = ScanIdentifier();	846 token = ScanIdentifier();

859 } else if (IsDecimalDigit(c0_)) {	847 } else if (IsDecimalDigit(c0_)) {

860 token = ScanNumber(false);	848 token = ScanNumber(false);

861 } else if (SkipWhiteSpace()) {	849 } else if (SkipWhiteSpace()) {

862 token = Token::WHITESPACE;	850 token = Token::WHITESPACE;

863 } else if (c0_ < 0) {	851 } else if (c0_ < 0) {

864 token = Token::EOS;	852 token = Token::EOS;

865 } else {	853 } else {

866 token = Select(Token::ILLEGAL);	854 token = Select(Token::ILLEGAL);

867 }	855 }

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
930 }	918 }

931 return x;	919 return x;

932 }	920 }

933	921

934	922

935 void Scanner::ScanEscape() {	923 void Scanner::ScanEscape() {

936 uc32 c = c0_;	924 uc32 c = c0_;

937 Advance();	925 Advance();

938	926

939 // Skip escaped newlines.	927 // Skip escaped newlines.

940 if (kIsLineTerminator.get(c)) {	928 if (ScannerConstants::kIsLineTerminator.get(c)) {

941 // Allow CR+LF newlines in multiline string literals.	929 // Allow CR+LF newlines in multiline string literals.

942 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	930 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

943 // Allow LF+CR newlines in multiline string literals.	931 // Allow LF+CR newlines in multiline string literals.

944 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	932 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

945 return;	933 return;

946 }	934 }

947	935

948 switch (c) {	936 switch (c) {

949 case '\'': // fall through	937 case '\'': // fall through

950 case '"' : // fall through	938 case '"' : // fall through

(...skipping 21 matching lines...) Expand all Loading...
972 // as non-escaped characters by JS VMs.	960 // as non-escaped characters by JS VMs.

973 AddChar(c);	961 AddChar(c);

974 }	962 }

975	963

976	964

977 Token::Value Scanner::ScanString() {	965 Token::Value Scanner::ScanString() {

978 uc32 quote = c0_;	966 uc32 quote = c0_;

979 Advance(); // consume quote	967 Advance(); // consume quote

980	968

981 LiteralScope literal(this);	969 LiteralScope literal(this);

982 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	970 while (c0_ != quote && c0_ >= 0

	971 && !ScannerConstants::kIsLineTerminator.get(c0_)) {

983 uc32 c = c0_;	972 uc32 c = c0_;

984 Advance();	973 Advance();

985 if (c == '\\') {	974 if (c == '\\') {

986 if (c0_ < 0) return Token::ILLEGAL;	975 if (c0_ < 0) return Token::ILLEGAL;

987 ScanEscape();	976 ScanEscape();

988 } else {	977 } else {

989 AddChar(c);	978 AddChar(c);

990 }	979 }

991 }	980 }

992 if (c0_ != quote) return Token::ILLEGAL;	981 if (c0_ != quote) return Token::ILLEGAL;

(...skipping 92 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1085 // we must have at least one decimal digit after 'e'/'E'	1074 // we must have at least one decimal digit after 'e'/'E'

1086 return Token::ILLEGAL;	1075 return Token::ILLEGAL;

1087 }	1076 }

1088 ScanDecimalDigits();	1077 ScanDecimalDigits();

1089 }	1078 }

1090	1079

1091 // The source character immediately following a numeric literal must	1080 // The source character immediately following a numeric literal must

1092 // not be an identifier start or a decimal digit; see ECMA-262	1081 // not be an identifier start or a decimal digit; see ECMA-262

1093 // section 7.8.3, page 17 (note that we read only one decimal digit	1082 // section 7.8.3, page 17 (note that we read only one decimal digit

1094 // if the value is 0).	1083 // if the value is 0).

1095 if (IsDecimalDigit(c0_) \|\| kIsIdentifierStart.get(c0_))	1084 if (IsDecimalDigit(c0_) \|\| ScannerConstants::kIsIdentifierStart.get(c0_))

1096 return Token::ILLEGAL;	1085 return Token::ILLEGAL;

1097	1086

1098 literal.Complete();	1087 literal.Complete();

1099	1088

1100 return Token::NUMBER;	1089 return Token::NUMBER;

1101 }	1090 }

1102	1091

1103	1092

1104 uc32 Scanner::ScanIdentifierUnicodeEscape() {	1093 uc32 Scanner::ScanIdentifierUnicodeEscape() {

1105 Advance();	1094 Advance();

1106 if (c0_ != 'u') return unibrow::Utf8::kBadChar;	1095 if (c0_ != 'u') return unibrow::Utf8::kBadChar;

1107 Advance();	1096 Advance();

1108 uc32 c = ScanHexEscape('u', 4);	1097 uc32 c = ScanHexEscape('u', 4);

1109 // We do not allow a unicode escape sequence to start another	1098 // We do not allow a unicode escape sequence to start another

1110 // unicode escape sequence.	1099 // unicode escape sequence.

1111 if (c == '\\') return unibrow::Utf8::kBadChar;	1100 if (c == '\\') return unibrow::Utf8::kBadChar;

1112 return c;	1101 return c;

1113 }	1102 }

1114	1103

1115	1104

1116 Token::Value Scanner::ScanIdentifier() {	1105 Token::Value Scanner::ScanIdentifier() {

1117 ASSERT(kIsIdentifierStart.get(c0_));	1106 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));

1118	1107

1119 LiteralScope literal(this);	1108 LiteralScope literal(this);

1120 KeywordMatcher keyword_match;	1109 KeywordMatcher keyword_match;

1121	1110

1122 // Scan identifier start character.	1111 // Scan identifier start character.

1123 if (c0_ == '\\') {	1112 if (c0_ == '\\') {

1124 uc32 c = ScanIdentifierUnicodeEscape();	1113 uc32 c = ScanIdentifierUnicodeEscape();

1125 // Only allow legal identifier start characters.	1114 // Only allow legal identifier start characters.

1126 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;	1115 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;

1127 AddChar(c);	1116 AddChar(c);

1128 keyword_match.Fail();	1117 keyword_match.Fail();

1129 } else {	1118 } else {

1130 AddChar(c0_);	1119 AddChar(c0_);

1131 keyword_match.AddChar(c0_);	1120 keyword_match.AddChar(c0_);

1132 Advance();	1121 Advance();

1133 }	1122 }

1134	1123

1135 // Scan the rest of the identifier characters.	1124 // Scan the rest of the identifier characters.

1136 while (kIsIdentifierPart.get(c0_)) {	1125 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

1137 if (c0_ == '\\') {	1126 if (c0_ == '\\') {

1138 uc32 c = ScanIdentifierUnicodeEscape();	1127 uc32 c = ScanIdentifierUnicodeEscape();

1139 // Only allow legal identifier part characters.	1128 // Only allow legal identifier part characters.

1140 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;	1129 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;

1141 AddChar(c);	1130 AddChar(c);

1142 keyword_match.Fail();	1131 keyword_match.Fail();

1143 } else {	1132 } else {

1144 AddChar(c0_);	1133 AddChar(c0_);

1145 keyword_match.AddChar(c0_);	1134 keyword_match.AddChar(c0_);

1146 Advance();	1135 Advance();

1147 }	1136 }

1148 }	1137 }

1149 literal.Complete();	1138 literal.Complete();

1150	1139

1151 return keyword_match.token();	1140 return keyword_match.token();

1152 }	1141 }

1153	1142

1154	1143

1155	1144

1156 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {

1157 // Checks whether the buffer contains an identifier (no escape).

1158 if (!buffer->has_more()) return false;

1159 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;

1160 while (buffer->has_more()) {

1161 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;

1162 }

1163 return true;

1164 }

1165

1166

1167 bool Scanner::ScanRegExpPattern(bool seen_equal) {	1145 bool Scanner::ScanRegExpPattern(bool seen_equal) {

1168 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	1146 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

1169 bool in_character_class = false;	1147 bool in_character_class = false;

1170	1148

1171 // Previous token is either '/' or '/=', in the second case, the	1149 // Previous token is either '/' or '/=', in the second case, the

1172 // pattern starts at =.	1150 // pattern starts at =.

1173 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);	1151 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

1174 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);	1152 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

1175	1153

1176 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	1154 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1177 // the scanner should pass uninterpreted bodies to the RegExp	1155 // the scanner should pass uninterpreted bodies to the RegExp

1178 // constructor.	1156 // constructor.

1179 LiteralScope literal(this);	1157 LiteralScope literal(this);

1180 if (seen_equal)	1158 if (seen_equal)

1181 AddChar('=');	1159 AddChar('=');

1182	1160

1183 while (c0_ != '/' \|\| in_character_class) {	1161 while (c0_ != '/' \|\| in_character_class) {

1184 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;	1162 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

1185 if (c0_ == '\\') { // escaped character	1163 if (c0_ == '\\') { // escaped character

1186 AddCharAdvance();	1164 AddCharAdvance();

1187 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;	1165 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

1188 AddCharAdvance();	1166 AddCharAdvance();

1189 } else { // unescaped character	1167 } else { // unescaped character

1190 if (c0_ == '[') in_character_class = true;	1168 if (c0_ == '[') in_character_class = true;

1191 if (c0_ == ']') in_character_class = false;	1169 if (c0_ == ']') in_character_class = false;

1192 AddCharAdvance();	1170 AddCharAdvance();

1193 }	1171 }

1194 }	1172 }

1195 Advance(); // consume '/'	1173 Advance(); // consume '/'

1196	1174

1197 literal.Complete();	1175 literal.Complete();

1198	1176

1199 return true;	1177 return true;

1200 }	1178 }

1201	1179

1202 bool Scanner::ScanRegExpFlags() {	1180 bool Scanner::ScanRegExpFlags() {

1203 // Scan regular expression flags.	1181 // Scan regular expression flags.

1204 LiteralScope literal(this);	1182 LiteralScope literal(this);

1205 while (kIsIdentifierPart.get(c0_)) {	1183 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

1206 if (c0_ == '\\') {	1184 if (c0_ == '\\') {

1207 uc32 c = ScanIdentifierUnicodeEscape();	1185 uc32 c = ScanIdentifierUnicodeEscape();

1208 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {	1186 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

1209 // We allow any escaped character, unlike the restriction on	1187 // We allow any escaped character, unlike the restriction on

1210 // IdentifierPart when it is used to build an IdentifierName.	1188 // IdentifierPart when it is used to build an IdentifierName.

1211 AddChar(c);	1189 AddChar(c);

1212 continue;	1190 continue;

1213 }	1191 }

1214 }	1192 }

1215 AddCharAdvance();	1193 AddCharAdvance();

1216 }	1194 }

1217 literal.Complete();	1195 literal.Complete();

1218	1196

1219 next_.location.end_pos = source_pos() - 1;	1197 next_.location.end_pos = source_pos() - 1;

1220 return true;	1198 return true;

1221 }	1199 }

1222	1200

1223 } } // namespace v8::internal	1201 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »