src/scanner.cc - Issue 2831016: [Isolates] Static mutable data of Scanner class moved to ScannerCharacterClasses / Isolate.

Side by Side Diff: src/scanner.cc

Issue 2831016: [Isolates] Static mutable data of Scanner class moved to ScannerCharacterClasses / Isolate. (Closed)

Patch Set: . Created 10 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 16 matching lines...) Expand all Loading...
27	27

28 #include "v8.h"	28 #include "v8.h"

29	29

30 #include "ast.h"	30 #include "ast.h"

31 #include "handles.h"	31 #include "handles.h"

32 #include "scanner.h"	32 #include "scanner.h"

33	33

34 namespace v8 {	34 namespace v8 {

35 namespace internal {	35 namespace internal {

36	36

37 // ----------------------------------------------------------------------------

38 // Character predicates

39

40

41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;

42 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;

43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;

44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;

45

46

47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;

48

49	37

50 // ----------------------------------------------------------------------------	38 // ----------------------------------------------------------------------------

51 // UTF8Buffer	39 // UTF8Buffer

52	40

53 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }	41 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }

54	42

55	43

56 UTF8Buffer::~UTF8Buffer() {	44 UTF8Buffer::~UTF8Buffer() {

57 if (data_ != NULL) DeleteArray(data_);	45 if (data_ != NULL) DeleteArray(data_);

58 }	46 }

(...skipping 269 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
328 }	316 }

329 // On fallthrough, it's a failure.	317 // On fallthrough, it's a failure.

330 state_ = UNMATCHABLE;	318 state_ = UNMATCHABLE;

331 }	319 }

332	320

333	321

334 // ----------------------------------------------------------------------------	322 // ----------------------------------------------------------------------------

335 // Scanner	323 // Scanner

336	324

337 Scanner::Scanner(ParserMode pre)	325 Scanner::Scanner(ParserMode pre)

338 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { }	326 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE),

	327 character_classes_(Isolate::Current()->

	328 scanner_character_classes()) { }

339	329

340	330

341 void Scanner::Initialize(Handle<String> source,	331 void Scanner::Initialize(Handle<String> source,

342 ParserLanguage language) {	332 ParserLanguage language) {

343 safe_string_input_buffer_.Reset(source.location());	333 safe_string_input_buffer_.Reset(source.location());

344 Init(source, &safe_string_input_buffer_, 0, source->length(), language);	334 Init(source, &safe_string_input_buffer_, 0, source->length(), language);

345 }	335 }

346	336

347	337

348 void Scanner::Initialize(Handle<String> source,	338 void Scanner::Initialize(Handle<String> source,

(...skipping 121 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
470 return source_pos() != start_position;	460 return source_pos() != start_position;

471 }	461 }

472	462

473	463

474 bool Scanner::SkipJavaScriptWhiteSpace() {	464 bool Scanner::SkipJavaScriptWhiteSpace() {

475 int start_position = source_pos();	465 int start_position = source_pos();

476	466

477 while (true) {	467 while (true) {

478 // We treat byte-order marks (BOMs) as whitespace for better	468 // We treat byte-order marks (BOMs) as whitespace for better

479 // compatibility with Spidermonkey and other JavaScript engines.	469 // compatibility with Spidermonkey and other JavaScript engines.

480 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {	470 while (character_classes_->is_white_space_.get(c0_) \|\|

	471 IsByteOrderMark(c0_)) {

481 // IsWhiteSpace() includes line terminators!	472 // IsWhiteSpace() includes line terminators!

482 if (kIsLineTerminator.get(c0_)) {	473 if (character_classes_->is_line_terminator_.get(c0_)) {

483 // Ignore line terminators, but remember them. This is necessary	474 // Ignore line terminators, but remember them. This is necessary

484 // for automatic semicolon insertion.	475 // for automatic semicolon insertion.

485 has_line_terminator_before_next_ = true;	476 has_line_terminator_before_next_ = true;

486 }	477 }

487 Advance();	478 Advance();

488 }	479 }

489	480

490 // If there is an HTML comment end '-->' at the beginning of a	481 // If there is an HTML comment end '-->' at the beginning of a

491 // line (with only whitespace in front of it), we treat the rest	482 // line (with only whitespace in front of it), we treat the rest

492 // of the line as a comment. This is in line with the way	483 // of the line as a comment. This is in line with the way

(...skipping 19 matching lines...) Expand all Loading...
512	503

513	504

514 Token::Value Scanner::SkipSingleLineComment() {	505 Token::Value Scanner::SkipSingleLineComment() {

515 Advance();	506 Advance();

516	507

517 // The line terminator at the end of the line is not considered	508 // The line terminator at the end of the line is not considered

518 // to be part of the single-line comment; it is recognized	509 // to be part of the single-line comment; it is recognized

519 // separately by the lexical grammar and becomes part of the	510 // separately by the lexical grammar and becomes part of the

520 // stream of input elements for the syntactic grammar (see	511 // stream of input elements for the syntactic grammar (see

521 // ECMA-262, section 7.4, page 12).	512 // ECMA-262, section 7.4, page 12).

522 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	513 while (c0_ >= 0 &&

	514 !character_classes_->is_line_terminator_.get(c0_)) {

523 Advance();	515 Advance();

524 }	516 }

525	517

526 return Token::WHITESPACE;	518 return Token::WHITESPACE;

527 }	519 }

528	520

529	521

530 Token::Value Scanner::SkipMultiLineComment() {	522 Token::Value Scanner::SkipMultiLineComment() {

531 ASSERT(c0_ == '*');	523 ASSERT(c0_ == '*');

532 Advance();	524 Advance();

(...skipping 208 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
741	733

742	734

743 Token::Value Scanner::ScanJsonIdentifier(const char* text,	735 Token::Value Scanner::ScanJsonIdentifier(const char* text,

744 Token::Value token) {	736 Token::Value token) {

745 StartLiteral();	737 StartLiteral();

746 while (*text != '\0') {	738 while (*text != '\0') {

747 if (c0_ != *text) return Token::ILLEGAL;	739 if (c0_ != *text) return Token::ILLEGAL;

748 Advance();	740 Advance();

749 text++;	741 text++;

750 }	742 }

751 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;	743 if (character_classes_->is_identifier_part_.get(c0_))

	744 return Token::ILLEGAL;

752 TerminateLiteral();	745 TerminateLiteral();

753 return token;	746 return token;

754 }	747 }

755	748

756	749

757 void Scanner::ScanJavaScript() {	750 void Scanner::ScanJavaScript() {

758 next_.literal_buffer = NULL;	751 next_.literal_buffer = NULL;

759 Token::Value token;	752 Token::Value token;

760 has_line_terminator_before_next_ = false;	753 has_line_terminator_before_next_ = false;

761 do {	754 do {

(...skipping 203 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
965	958

966 case '?':	959 case '?':

967 token = Select(Token::CONDITIONAL);	960 token = Select(Token::CONDITIONAL);

968 break;	961 break;

969	962

970 case '~':	963 case '~':

971 token = Select(Token::BIT_NOT);	964 token = Select(Token::BIT_NOT);

972 break;	965 break;

973	966

974 default:	967 default:

975 if (kIsIdentifierStart.get(c0_)) {	968 if (character_classes_->is_identifier_start_.get(c0_)) {

976 token = ScanIdentifier();	969 token = ScanIdentifier();

977 } else if (IsDecimalDigit(c0_)) {	970 } else if (IsDecimalDigit(c0_)) {

978 token = ScanNumber(false);	971 token = ScanNumber(false);

979 } else if (SkipWhiteSpace()) {	972 } else if (SkipWhiteSpace()) {

980 token = Token::WHITESPACE;	973 token = Token::WHITESPACE;

981 } else if (c0_ < 0) {	974 } else if (c0_ < 0) {

982 token = Token::EOS;	975 token = Token::EOS;

983 } else {	976 } else {

984 token = Select(Token::ILLEGAL);	977 token = Select(Token::ILLEGAL);

985 }	978 }

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1044 }	1037 }

1045 return x;	1038 return x;

1046 }	1039 }

1047	1040

1048	1041

1049 void Scanner::ScanEscape() {	1042 void Scanner::ScanEscape() {

1050 uc32 c = c0_;	1043 uc32 c = c0_;

1051 Advance();	1044 Advance();

1052	1045

1053 // Skip escaped newlines.	1046 // Skip escaped newlines.

1054 if (kIsLineTerminator.get(c)) {	1047 if (character_classes_->is_line_terminator_.get(c)) {

1055 // Allow CR+LF newlines in multiline string literals.	1048 // Allow CR+LF newlines in multiline string literals.

1056 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	1049 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

1057 // Allow LF+CR newlines in multiline string literals.	1050 // Allow LF+CR newlines in multiline string literals.

1058 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	1051 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

1059 return;	1052 return;

1060 }	1053 }

1061	1054

1062 switch (c) {	1055 switch (c) {

1063 case '\'': // fall through	1056 case '\'': // fall through

1064 case '"' : // fall through	1057 case '"' : // fall through

(...skipping 21 matching lines...) Expand all Loading...
1086 // as non-escaped characters by JS VMs.	1079 // as non-escaped characters by JS VMs.

1087 AddChar(c);	1080 AddChar(c);

1088 }	1081 }

1089	1082

1090	1083

1091 Token::Value Scanner::ScanString() {	1084 Token::Value Scanner::ScanString() {

1092 uc32 quote = c0_;	1085 uc32 quote = c0_;

1093 Advance(); // consume quote	1086 Advance(); // consume quote

1094	1087

1095 StartLiteral();	1088 StartLiteral();

1096 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	1089 while (c0_ != quote && c0_ >= 0 &&

	1090 !character_classes_->is_line_terminator_.get(c0_)) {

1097 uc32 c = c0_;	1091 uc32 c = c0_;

1098 Advance();	1092 Advance();

1099 if (c == '\\') {	1093 if (c == '\\') {

1100 if (c0_ < 0) return Token::ILLEGAL;	1094 if (c0_ < 0) return Token::ILLEGAL;

1101 ScanEscape();	1095 ScanEscape();

1102 } else {	1096 } else {

1103 AddChar(c);	1097 AddChar(c);

1104 }	1098 }

1105 }	1099 }

1106 if (c0_ != quote) {	1100 if (c0_ != quote) {

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1200 // we must have at least one decimal digit after 'e'/'E'	1194 // we must have at least one decimal digit after 'e'/'E'

1201 return Token::ILLEGAL;	1195 return Token::ILLEGAL;

1202 ScanDecimalDigits();	1196 ScanDecimalDigits();

1203 }	1197 }

1204 TerminateLiteral();	1198 TerminateLiteral();

1205	1199

1206 // The source character immediately following a numeric literal must	1200 // The source character immediately following a numeric literal must

1207 // not be an identifier start or a decimal digit; see ECMA-262	1201 // not be an identifier start or a decimal digit; see ECMA-262

1208 // section 7.8.3, page 17 (note that we read only one decimal digit	1202 // section 7.8.3, page 17 (note that we read only one decimal digit

1209 // if the value is 0).	1203 // if the value is 0).

1210 if (IsDecimalDigit(c0_) \|\| kIsIdentifierStart.get(c0_))	1204 if (IsDecimalDigit(c0_) \|\|

	1205 character_classes_->is_identifier_start_.get(c0_))

1211 return Token::ILLEGAL;	1206 return Token::ILLEGAL;

1212	1207

1213 return Token::NUMBER;	1208 return Token::NUMBER;

1214 }	1209 }

1215	1210

1216	1211

1217 uc32 Scanner::ScanIdentifierUnicodeEscape() {	1212 uc32 Scanner::ScanIdentifierUnicodeEscape() {

1218 Advance();	1213 Advance();

1219 if (c0_ != 'u') return unibrow::Utf8::kBadChar;	1214 if (c0_ != 'u') return unibrow::Utf8::kBadChar;

1220 Advance();	1215 Advance();

1221 uc32 c = ScanHexEscape('u', 4);	1216 uc32 c = ScanHexEscape('u', 4);

1222 // We do not allow a unicode escape sequence to start another	1217 // We do not allow a unicode escape sequence to start another

1223 // unicode escape sequence.	1218 // unicode escape sequence.

1224 if (c == '\\') return unibrow::Utf8::kBadChar;	1219 if (c == '\\') return unibrow::Utf8::kBadChar;

1225 return c;	1220 return c;

1226 }	1221 }

1227	1222

1228	1223

1229 Token::Value Scanner::ScanIdentifier() {	1224 Token::Value Scanner::ScanIdentifier() {

1230 ASSERT(kIsIdentifierStart.get(c0_));	1225 ASSERT(character_classes_->is_identifier_start_.get(c0_));

1231	1226

1232 StartLiteral();	1227 StartLiteral();

1233 KeywordMatcher keyword_match;	1228 KeywordMatcher keyword_match;

1234	1229

1235 // Scan identifier start character.	1230 // Scan identifier start character.

1236 if (c0_ == '\\') {	1231 if (c0_ == '\\') {

1237 uc32 c = ScanIdentifierUnicodeEscape();	1232 uc32 c = ScanIdentifierUnicodeEscape();

1238 // Only allow legal identifier start characters.	1233 // Only allow legal identifier start characters.

1239 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;	1234 if (!character_classes_->is_identifier_start_.get(c))

	1235 return Token::ILLEGAL;

1240 AddChar(c);	1236 AddChar(c);

1241 keyword_match.Fail();	1237 keyword_match.Fail();

1242 } else {	1238 } else {

1243 AddChar(c0_);	1239 AddChar(c0_);

1244 keyword_match.AddChar(c0_);	1240 keyword_match.AddChar(c0_);

1245 Advance();	1241 Advance();

1246 }	1242 }

1247	1243

1248 // Scan the rest of the identifier characters.	1244 // Scan the rest of the identifier characters.

1249 while (kIsIdentifierPart.get(c0_)) {	1245 while (character_classes_->is_identifier_part_.get(c0_)) {

1250 if (c0_ == '\\') {	1246 if (c0_ == '\\') {

1251 uc32 c = ScanIdentifierUnicodeEscape();	1247 uc32 c = ScanIdentifierUnicodeEscape();

1252 // Only allow legal identifier part characters.	1248 // Only allow legal identifier part characters.

1253 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;	1249 if (!character_classes_->is_identifier_part_.get(c))

	1250 return Token::ILLEGAL;

1254 AddChar(c);	1251 AddChar(c);

1255 keyword_match.Fail();	1252 keyword_match.Fail();

1256 } else {	1253 } else {

1257 AddChar(c0_);	1254 AddChar(c0_);

1258 keyword_match.AddChar(c0_);	1255 keyword_match.AddChar(c0_);

1259 Advance();	1256 Advance();

1260 }	1257 }

1261 }	1258 }

1262 TerminateLiteral();	1259 TerminateLiteral();

1263	1260

1264 return keyword_match.token();	1261 return keyword_match.token();

1265 }	1262 }

1266	1263

1267	1264

1268	1265

1269 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {	1266 bool ScannerCharacterClasses::IsIdentifier(unibrow::CharacterStream* buffer) {

1270 // Checks whether the buffer contains an identifier (no escape).	1267 // Checks whether the buffer contains an identifier (no escape).

1271 if (!buffer->has_more()) return false;	1268 if (!buffer->has_more()) return false;

1272 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;	1269 if (!is_identifier_start_.get(buffer->GetNext())) return false;

1273 while (buffer->has_more()) {	1270 while (buffer->has_more()) {

1274 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;	1271 if (!is_identifier_part_.get(buffer->GetNext())) return false;

1275 }	1272 }

1276 return true;	1273 return true;

1277 }	1274 }

1278	1275

1279	1276

1280 bool Scanner::ScanRegExpPattern(bool seen_equal) {	1277 bool Scanner::ScanRegExpPattern(bool seen_equal) {

1281 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	1278 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

1282 bool in_character_class = false;	1279 bool in_character_class = false;

1283	1280

1284 // Previous token is either '/' or '/=', in the second case, the	1281 // Previous token is either '/' or '/=', in the second case, the

1285 // pattern starts at =.	1282 // pattern starts at =.

1286 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);	1283 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

1287 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);	1284 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

1288	1285

1289 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	1286 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1290 // the scanner should pass uninterpreted bodies to the RegExp	1287 // the scanner should pass uninterpreted bodies to the RegExp

1291 // constructor.	1288 // constructor.

1292 StartLiteral();	1289 StartLiteral();

1293 if (seen_equal)	1290 if (seen_equal)

1294 AddChar('=');	1291 AddChar('=');

1295	1292

1296 while (c0_ != '/' \|\| in_character_class) {	1293 while (c0_ != '/' \|\| in_character_class) {

1297 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)	1294 if (character_classes_->is_line_terminator_.get(c0_) \|\| c0_ < 0)

1298 return false;	1295 return false;

1299 if (c0_ == '\\') { // escaped character	1296 if (c0_ == '\\') { // escaped character

1300 AddCharAdvance();	1297 AddCharAdvance();

1301 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)	1298 if (character_classes_->is_line_terminator_.get(c0_) \|\| c0_ < 0)

1302 return false;	1299 return false;

1303 AddCharAdvance();	1300 AddCharAdvance();

1304 } else { // unescaped character	1301 } else { // unescaped character

1305 if (c0_ == '[')	1302 if (c0_ == '[')

1306 in_character_class = true;	1303 in_character_class = true;

1307 if (c0_ == ']')	1304 if (c0_ == ']')

1308 in_character_class = false;	1305 in_character_class = false;

1309 AddCharAdvance();	1306 AddCharAdvance();

1310 }	1307 }

1311 }	1308 }

1312 Advance(); // consume '/'	1309 Advance(); // consume '/'

1313	1310

1314 TerminateLiteral();	1311 TerminateLiteral();

1315	1312

1316 return true;	1313 return true;

1317 }	1314 }

1318	1315

1319 bool Scanner::ScanRegExpFlags() {	1316 bool Scanner::ScanRegExpFlags() {

1320 // Scan regular expression flags.	1317 // Scan regular expression flags.

1321 StartLiteral();	1318 StartLiteral();

1322 while (kIsIdentifierPart.get(c0_)) {	1319 while (character_classes_->is_identifier_part_.get(c0_)) {

1323 if (c0_ == '\\') {	1320 if (c0_ == '\\') {

1324 uc32 c = ScanIdentifierUnicodeEscape();	1321 uc32 c = ScanIdentifierUnicodeEscape();

1325 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {	1322 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

1326 // We allow any escaped character, unlike the restriction on	1323 // We allow any escaped character, unlike the restriction on

1327 // IdentifierPart when it is used to build an IdentifierName.	1324 // IdentifierPart when it is used to build an IdentifierName.

1328 AddChar(c);	1325 AddChar(c);

1329 continue;	1326 continue;

1330 }	1327 }

1331 }	1328 }

1332 AddCharAdvance();	1329 AddCharAdvance();

1333 }	1330 }

1334 TerminateLiteral();	1331 TerminateLiteral();

1335	1332

1336 next_.location.end_pos = source_pos() - 1;	1333 next_.location.end_pos = source_pos() - 1;

1337 return true;	1334 return true;

1338 }	1335 }

1339	1336

1340 } } // namespace v8::internal	1337 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | no next file » | no next file with comments »