Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(74)

Side by Side Diff: src/scanner.cc

Issue 2831016: [Isolates] Static mutable data of Scanner class moved to ScannerCharacterClasses / Isolate. (Closed)
Patch Set: . Created 10 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 16 matching lines...) Expand all
27 27
28 #include "v8.h" 28 #include "v8.h"
29 29
30 #include "ast.h" 30 #include "ast.h"
31 #include "handles.h" 31 #include "handles.h"
32 #include "scanner.h" 32 #include "scanner.h"
33 33
34 namespace v8 { 34 namespace v8 {
35 namespace internal { 35 namespace internal {
36 36
37 // ----------------------------------------------------------------------------
38 // Character predicates
39
40
41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
42 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
45
46
47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
48
49 37
50 // ---------------------------------------------------------------------------- 38 // ----------------------------------------------------------------------------
51 // UTF8Buffer 39 // UTF8Buffer
52 40
53 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } 41 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }
54 42
55 43
56 UTF8Buffer::~UTF8Buffer() { 44 UTF8Buffer::~UTF8Buffer() {
57 if (data_ != NULL) DeleteArray(data_); 45 if (data_ != NULL) DeleteArray(data_);
58 } 46 }
(...skipping 269 matching lines...) Expand 10 before | Expand all | Expand 10 after
328 } 316 }
329 // On fallthrough, it's a failure. 317 // On fallthrough, it's a failure.
330 state_ = UNMATCHABLE; 318 state_ = UNMATCHABLE;
331 } 319 }
332 320
333 321
334 // ---------------------------------------------------------------------------- 322 // ----------------------------------------------------------------------------
335 // Scanner 323 // Scanner
336 324
337 Scanner::Scanner(ParserMode pre) 325 Scanner::Scanner(ParserMode pre)
338 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { } 326 : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE),
327 character_classes_(Isolate::Current()->
328 scanner_character_classes()) { }
339 329
340 330
341 void Scanner::Initialize(Handle<String> source, 331 void Scanner::Initialize(Handle<String> source,
342 ParserLanguage language) { 332 ParserLanguage language) {
343 safe_string_input_buffer_.Reset(source.location()); 333 safe_string_input_buffer_.Reset(source.location());
344 Init(source, &safe_string_input_buffer_, 0, source->length(), language); 334 Init(source, &safe_string_input_buffer_, 0, source->length(), language);
345 } 335 }
346 336
347 337
348 void Scanner::Initialize(Handle<String> source, 338 void Scanner::Initialize(Handle<String> source,
(...skipping 121 matching lines...) Expand 10 before | Expand all | Expand 10 after
470 return source_pos() != start_position; 460 return source_pos() != start_position;
471 } 461 }
472 462
473 463
474 bool Scanner::SkipJavaScriptWhiteSpace() { 464 bool Scanner::SkipJavaScriptWhiteSpace() {
475 int start_position = source_pos(); 465 int start_position = source_pos();
476 466
477 while (true) { 467 while (true) {
478 // We treat byte-order marks (BOMs) as whitespace for better 468 // We treat byte-order marks (BOMs) as whitespace for better
479 // compatibility with Spidermonkey and other JavaScript engines. 469 // compatibility with Spidermonkey and other JavaScript engines.
480 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { 470 while (character_classes_->is_white_space_.get(c0_) ||
471 IsByteOrderMark(c0_)) {
481 // IsWhiteSpace() includes line terminators! 472 // IsWhiteSpace() includes line terminators!
482 if (kIsLineTerminator.get(c0_)) { 473 if (character_classes_->is_line_terminator_.get(c0_)) {
483 // Ignore line terminators, but remember them. This is necessary 474 // Ignore line terminators, but remember them. This is necessary
484 // for automatic semicolon insertion. 475 // for automatic semicolon insertion.
485 has_line_terminator_before_next_ = true; 476 has_line_terminator_before_next_ = true;
486 } 477 }
487 Advance(); 478 Advance();
488 } 479 }
489 480
490 // If there is an HTML comment end '-->' at the beginning of a 481 // If there is an HTML comment end '-->' at the beginning of a
491 // line (with only whitespace in front of it), we treat the rest 482 // line (with only whitespace in front of it), we treat the rest
492 // of the line as a comment. This is in line with the way 483 // of the line as a comment. This is in line with the way
(...skipping 19 matching lines...) Expand all
512 503
513 504
514 Token::Value Scanner::SkipSingleLineComment() { 505 Token::Value Scanner::SkipSingleLineComment() {
515 Advance(); 506 Advance();
516 507
517 // The line terminator at the end of the line is not considered 508 // The line terminator at the end of the line is not considered
518 // to be part of the single-line comment; it is recognized 509 // to be part of the single-line comment; it is recognized
519 // separately by the lexical grammar and becomes part of the 510 // separately by the lexical grammar and becomes part of the
520 // stream of input elements for the syntactic grammar (see 511 // stream of input elements for the syntactic grammar (see
521 // ECMA-262, section 7.4, page 12). 512 // ECMA-262, section 7.4, page 12).
522 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 513 while (c0_ >= 0 &&
514 !character_classes_->is_line_terminator_.get(c0_)) {
523 Advance(); 515 Advance();
524 } 516 }
525 517
526 return Token::WHITESPACE; 518 return Token::WHITESPACE;
527 } 519 }
528 520
529 521
530 Token::Value Scanner::SkipMultiLineComment() { 522 Token::Value Scanner::SkipMultiLineComment() {
531 ASSERT(c0_ == '*'); 523 ASSERT(c0_ == '*');
532 Advance(); 524 Advance();
(...skipping 208 matching lines...) Expand 10 before | Expand all | Expand 10 after
741 733
742 734
743 Token::Value Scanner::ScanJsonIdentifier(const char* text, 735 Token::Value Scanner::ScanJsonIdentifier(const char* text,
744 Token::Value token) { 736 Token::Value token) {
745 StartLiteral(); 737 StartLiteral();
746 while (*text != '\0') { 738 while (*text != '\0') {
747 if (c0_ != *text) return Token::ILLEGAL; 739 if (c0_ != *text) return Token::ILLEGAL;
748 Advance(); 740 Advance();
749 text++; 741 text++;
750 } 742 }
751 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; 743 if (character_classes_->is_identifier_part_.get(c0_))
744 return Token::ILLEGAL;
752 TerminateLiteral(); 745 TerminateLiteral();
753 return token; 746 return token;
754 } 747 }
755 748
756 749
757 void Scanner::ScanJavaScript() { 750 void Scanner::ScanJavaScript() {
758 next_.literal_buffer = NULL; 751 next_.literal_buffer = NULL;
759 Token::Value token; 752 Token::Value token;
760 has_line_terminator_before_next_ = false; 753 has_line_terminator_before_next_ = false;
761 do { 754 do {
(...skipping 203 matching lines...) Expand 10 before | Expand all | Expand 10 after
965 958
966 case '?': 959 case '?':
967 token = Select(Token::CONDITIONAL); 960 token = Select(Token::CONDITIONAL);
968 break; 961 break;
969 962
970 case '~': 963 case '~':
971 token = Select(Token::BIT_NOT); 964 token = Select(Token::BIT_NOT);
972 break; 965 break;
973 966
974 default: 967 default:
975 if (kIsIdentifierStart.get(c0_)) { 968 if (character_classes_->is_identifier_start_.get(c0_)) {
976 token = ScanIdentifier(); 969 token = ScanIdentifier();
977 } else if (IsDecimalDigit(c0_)) { 970 } else if (IsDecimalDigit(c0_)) {
978 token = ScanNumber(false); 971 token = ScanNumber(false);
979 } else if (SkipWhiteSpace()) { 972 } else if (SkipWhiteSpace()) {
980 token = Token::WHITESPACE; 973 token = Token::WHITESPACE;
981 } else if (c0_ < 0) { 974 } else if (c0_ < 0) {
982 token = Token::EOS; 975 token = Token::EOS;
983 } else { 976 } else {
984 token = Select(Token::ILLEGAL); 977 token = Select(Token::ILLEGAL);
985 } 978 }
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
1044 } 1037 }
1045 return x; 1038 return x;
1046 } 1039 }
1047 1040
1048 1041
1049 void Scanner::ScanEscape() { 1042 void Scanner::ScanEscape() {
1050 uc32 c = c0_; 1043 uc32 c = c0_;
1051 Advance(); 1044 Advance();
1052 1045
1053 // Skip escaped newlines. 1046 // Skip escaped newlines.
1054 if (kIsLineTerminator.get(c)) { 1047 if (character_classes_->is_line_terminator_.get(c)) {
1055 // Allow CR+LF newlines in multiline string literals. 1048 // Allow CR+LF newlines in multiline string literals.
1056 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 1049 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
1057 // Allow LF+CR newlines in multiline string literals. 1050 // Allow LF+CR newlines in multiline string literals.
1058 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 1051 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
1059 return; 1052 return;
1060 } 1053 }
1061 1054
1062 switch (c) { 1055 switch (c) {
1063 case '\'': // fall through 1056 case '\'': // fall through
1064 case '"' : // fall through 1057 case '"' : // fall through
(...skipping 21 matching lines...) Expand all
1086 // as non-escaped characters by JS VMs. 1079 // as non-escaped characters by JS VMs.
1087 AddChar(c); 1080 AddChar(c);
1088 } 1081 }
1089 1082
1090 1083
1091 Token::Value Scanner::ScanString() { 1084 Token::Value Scanner::ScanString() {
1092 uc32 quote = c0_; 1085 uc32 quote = c0_;
1093 Advance(); // consume quote 1086 Advance(); // consume quote
1094 1087
1095 StartLiteral(); 1088 StartLiteral();
1096 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 1089 while (c0_ != quote && c0_ >= 0 &&
1090 !character_classes_->is_line_terminator_.get(c0_)) {
1097 uc32 c = c0_; 1091 uc32 c = c0_;
1098 Advance(); 1092 Advance();
1099 if (c == '\\') { 1093 if (c == '\\') {
1100 if (c0_ < 0) return Token::ILLEGAL; 1094 if (c0_ < 0) return Token::ILLEGAL;
1101 ScanEscape(); 1095 ScanEscape();
1102 } else { 1096 } else {
1103 AddChar(c); 1097 AddChar(c);
1104 } 1098 }
1105 } 1099 }
1106 if (c0_ != quote) { 1100 if (c0_ != quote) {
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
1200 // we must have at least one decimal digit after 'e'/'E' 1194 // we must have at least one decimal digit after 'e'/'E'
1201 return Token::ILLEGAL; 1195 return Token::ILLEGAL;
1202 ScanDecimalDigits(); 1196 ScanDecimalDigits();
1203 } 1197 }
1204 TerminateLiteral(); 1198 TerminateLiteral();
1205 1199
1206 // The source character immediately following a numeric literal must 1200 // The source character immediately following a numeric literal must
1207 // not be an identifier start or a decimal digit; see ECMA-262 1201 // not be an identifier start or a decimal digit; see ECMA-262
1208 // section 7.8.3, page 17 (note that we read only one decimal digit 1202 // section 7.8.3, page 17 (note that we read only one decimal digit
1209 // if the value is 0). 1203 // if the value is 0).
1210 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) 1204 if (IsDecimalDigit(c0_) ||
1205 character_classes_->is_identifier_start_.get(c0_))
1211 return Token::ILLEGAL; 1206 return Token::ILLEGAL;
1212 1207
1213 return Token::NUMBER; 1208 return Token::NUMBER;
1214 } 1209 }
1215 1210
1216 1211
1217 uc32 Scanner::ScanIdentifierUnicodeEscape() { 1212 uc32 Scanner::ScanIdentifierUnicodeEscape() {
1218 Advance(); 1213 Advance();
1219 if (c0_ != 'u') return unibrow::Utf8::kBadChar; 1214 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
1220 Advance(); 1215 Advance();
1221 uc32 c = ScanHexEscape('u', 4); 1216 uc32 c = ScanHexEscape('u', 4);
1222 // We do not allow a unicode escape sequence to start another 1217 // We do not allow a unicode escape sequence to start another
1223 // unicode escape sequence. 1218 // unicode escape sequence.
1224 if (c == '\\') return unibrow::Utf8::kBadChar; 1219 if (c == '\\') return unibrow::Utf8::kBadChar;
1225 return c; 1220 return c;
1226 } 1221 }
1227 1222
1228 1223
1229 Token::Value Scanner::ScanIdentifier() { 1224 Token::Value Scanner::ScanIdentifier() {
1230 ASSERT(kIsIdentifierStart.get(c0_)); 1225 ASSERT(character_classes_->is_identifier_start_.get(c0_));
1231 1226
1232 StartLiteral(); 1227 StartLiteral();
1233 KeywordMatcher keyword_match; 1228 KeywordMatcher keyword_match;
1234 1229
1235 // Scan identifier start character. 1230 // Scan identifier start character.
1236 if (c0_ == '\\') { 1231 if (c0_ == '\\') {
1237 uc32 c = ScanIdentifierUnicodeEscape(); 1232 uc32 c = ScanIdentifierUnicodeEscape();
1238 // Only allow legal identifier start characters. 1233 // Only allow legal identifier start characters.
1239 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; 1234 if (!character_classes_->is_identifier_start_.get(c))
1235 return Token::ILLEGAL;
1240 AddChar(c); 1236 AddChar(c);
1241 keyword_match.Fail(); 1237 keyword_match.Fail();
1242 } else { 1238 } else {
1243 AddChar(c0_); 1239 AddChar(c0_);
1244 keyword_match.AddChar(c0_); 1240 keyword_match.AddChar(c0_);
1245 Advance(); 1241 Advance();
1246 } 1242 }
1247 1243
1248 // Scan the rest of the identifier characters. 1244 // Scan the rest of the identifier characters.
1249 while (kIsIdentifierPart.get(c0_)) { 1245 while (character_classes_->is_identifier_part_.get(c0_)) {
1250 if (c0_ == '\\') { 1246 if (c0_ == '\\') {
1251 uc32 c = ScanIdentifierUnicodeEscape(); 1247 uc32 c = ScanIdentifierUnicodeEscape();
1252 // Only allow legal identifier part characters. 1248 // Only allow legal identifier part characters.
1253 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; 1249 if (!character_classes_->is_identifier_part_.get(c))
1250 return Token::ILLEGAL;
1254 AddChar(c); 1251 AddChar(c);
1255 keyword_match.Fail(); 1252 keyword_match.Fail();
1256 } else { 1253 } else {
1257 AddChar(c0_); 1254 AddChar(c0_);
1258 keyword_match.AddChar(c0_); 1255 keyword_match.AddChar(c0_);
1259 Advance(); 1256 Advance();
1260 } 1257 }
1261 } 1258 }
1262 TerminateLiteral(); 1259 TerminateLiteral();
1263 1260
1264 return keyword_match.token(); 1261 return keyword_match.token();
1265 } 1262 }
1266 1263
1267 1264
1268 1265
1269 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { 1266 bool ScannerCharacterClasses::IsIdentifier(unibrow::CharacterStream* buffer) {
1270 // Checks whether the buffer contains an identifier (no escape). 1267 // Checks whether the buffer contains an identifier (no escape).
1271 if (!buffer->has_more()) return false; 1268 if (!buffer->has_more()) return false;
1272 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; 1269 if (!is_identifier_start_.get(buffer->GetNext())) return false;
1273 while (buffer->has_more()) { 1270 while (buffer->has_more()) {
1274 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; 1271 if (!is_identifier_part_.get(buffer->GetNext())) return false;
1275 } 1272 }
1276 return true; 1273 return true;
1277 } 1274 }
1278 1275
1279 1276
1280 bool Scanner::ScanRegExpPattern(bool seen_equal) { 1277 bool Scanner::ScanRegExpPattern(bool seen_equal) {
1281 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 1278 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1282 bool in_character_class = false; 1279 bool in_character_class = false;
1283 1280
1284 // Previous token is either '/' or '/=', in the second case, the 1281 // Previous token is either '/' or '/=', in the second case, the
1285 // pattern starts at =. 1282 // pattern starts at =.
1286 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 1283 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1287 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 1284 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1288 1285
1289 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1286 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1290 // the scanner should pass uninterpreted bodies to the RegExp 1287 // the scanner should pass uninterpreted bodies to the RegExp
1291 // constructor. 1288 // constructor.
1292 StartLiteral(); 1289 StartLiteral();
1293 if (seen_equal) 1290 if (seen_equal)
1294 AddChar('='); 1291 AddChar('=');
1295 1292
1296 while (c0_ != '/' || in_character_class) { 1293 while (c0_ != '/' || in_character_class) {
1297 if (kIsLineTerminator.get(c0_) || c0_ < 0) 1294 if (character_classes_->is_line_terminator_.get(c0_) || c0_ < 0)
1298 return false; 1295 return false;
1299 if (c0_ == '\\') { // escaped character 1296 if (c0_ == '\\') { // escaped character
1300 AddCharAdvance(); 1297 AddCharAdvance();
1301 if (kIsLineTerminator.get(c0_) || c0_ < 0) 1298 if (character_classes_->is_line_terminator_.get(c0_) || c0_ < 0)
1302 return false; 1299 return false;
1303 AddCharAdvance(); 1300 AddCharAdvance();
1304 } else { // unescaped character 1301 } else { // unescaped character
1305 if (c0_ == '[') 1302 if (c0_ == '[')
1306 in_character_class = true; 1303 in_character_class = true;
1307 if (c0_ == ']') 1304 if (c0_ == ']')
1308 in_character_class = false; 1305 in_character_class = false;
1309 AddCharAdvance(); 1306 AddCharAdvance();
1310 } 1307 }
1311 } 1308 }
1312 Advance(); // consume '/' 1309 Advance(); // consume '/'
1313 1310
1314 TerminateLiteral(); 1311 TerminateLiteral();
1315 1312
1316 return true; 1313 return true;
1317 } 1314 }
1318 1315
1319 bool Scanner::ScanRegExpFlags() { 1316 bool Scanner::ScanRegExpFlags() {
1320 // Scan regular expression flags. 1317 // Scan regular expression flags.
1321 StartLiteral(); 1318 StartLiteral();
1322 while (kIsIdentifierPart.get(c0_)) { 1319 while (character_classes_->is_identifier_part_.get(c0_)) {
1323 if (c0_ == '\\') { 1320 if (c0_ == '\\') {
1324 uc32 c = ScanIdentifierUnicodeEscape(); 1321 uc32 c = ScanIdentifierUnicodeEscape();
1325 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { 1322 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
1326 // We allow any escaped character, unlike the restriction on 1323 // We allow any escaped character, unlike the restriction on
1327 // IdentifierPart when it is used to build an IdentifierName. 1324 // IdentifierPart when it is used to build an IdentifierName.
1328 AddChar(c); 1325 AddChar(c);
1329 continue; 1326 continue;
1330 } 1327 }
1331 } 1328 }
1332 AddCharAdvance(); 1329 AddCharAdvance();
1333 } 1330 }
1334 TerminateLiteral(); 1331 TerminateLiteral();
1335 1332
1336 next_.location.end_pos = source_pos() - 1; 1333 next_.location.end_pos = source_pos() - 1;
1337 return true; 1334 return true;
1338 } 1335 }
1339 1336
1340 } } // namespace v8::internal 1337 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698