Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(51)

Side by Side Diff: src/scanner.cc

Issue 435003: Patch for allowing several V8 instances in process:... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: '' Created 11 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/scanner.h ('k') | src/scopeinfo.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 19 matching lines...) Expand all
30 #include "ast.h" 30 #include "ast.h"
31 #include "scanner.h" 31 #include "scanner.h"
32 32
33 namespace v8 { 33 namespace v8 {
34 namespace internal { 34 namespace internal {
35 35
36 // ---------------------------------------------------------------------------- 36 // ----------------------------------------------------------------------------
37 // Character predicates 37 // Character predicates
38 38
39 39
40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart; 40 ScannerData::ScannerData() {}
41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
44
45
46 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
47
48 41
49 // ---------------------------------------------------------------------------- 42 // ----------------------------------------------------------------------------
50 // UTF8Buffer 43 // UTF8Buffer
51 44
52 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } 45 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }
53 46
54 47
55 UTF8Buffer::~UTF8Buffer() { 48 UTF8Buffer::~UTF8Buffer() {
56 if (data_ != NULL) DeleteArray(data_); 49 if (data_ != NULL) DeleteArray(data_);
57 } 50 }
(...skipping 258 matching lines...) Expand 10 before | Expand all | Expand 10 after
316 UNREACHABLE(); 309 UNREACHABLE();
317 } 310 }
318 // On fallthrough, it's a failure. 311 // On fallthrough, it's a failure.
319 state_ = UNMATCHABLE; 312 state_ = UNMATCHABLE;
320 } 313 }
321 314
322 315
323 // ---------------------------------------------------------------------------- 316 // ----------------------------------------------------------------------------
324 // Scanner 317 // Scanner
325 318
326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } 319 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre),
320 scanner_data_(v8_context()->scanner_data_) {
321 }
327 322
328 323
329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, 324 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
330 int position) { 325 int position) {
331 // Initialize the source buffer. 326 // Initialize the source buffer.
332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { 327 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
333 two_byte_string_buffer_.Initialize( 328 two_byte_string_buffer_.Initialize(
334 Handle<ExternalTwoByteString>::cast(source)); 329 Handle<ExternalTwoByteString>::cast(source));
335 source_ = &two_byte_string_buffer_; 330 source_ = &two_byte_string_buffer_;
336 } else { 331 } else {
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 405 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
411 // character expressed in little-endian byte order (since it could 406 // character expressed in little-endian byte order (since it could
412 // not be a U+FFFE character expressed in big-endian byte 407 // not be a U+FFFE character expressed in big-endian byte
413 // order). Nevertheless, we check for it to be compatible with 408 // order). Nevertheless, we check for it to be compatible with
414 // Spidermonkey. 409 // Spidermonkey.
415 return c == 0xFEFF || c == 0xFFFE; 410 return c == 0xFEFF || c == 0xFFFE;
416 } 411 }
417 412
418 413
419 bool Scanner::SkipWhiteSpace() { 414 bool Scanner::SkipWhiteSpace() {
415 unibrow::Predicate<unibrow::WhiteSpace, 128>& kIsWhiteSpace =
416 scanner_data_.kIsWhiteSpace_;
417 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator =
418 scanner_data_.kIsLineTerminator_;
420 int start_position = source_pos(); 419 int start_position = source_pos();
421 420
422 while (true) { 421 while (true) {
423 // We treat byte-order marks (BOMs) as whitespace for better 422 // We treat byte-order marks (BOMs) as whitespace for better
424 // compatibility with Spidermonkey and other JavaScript engines. 423 // compatibility with Spidermonkey and other JavaScript engines.
425 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) { 424 while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
426 // IsWhiteSpace() includes line terminators! 425 // IsWhiteSpace() includes line terminators!
427 if (kIsLineTerminator.get(c0_)) { 426 if (kIsLineTerminator.get(c0_)) {
428 // Ignore line terminators, but remember them. This is necessary 427 // Ignore line terminators, but remember them. This is necessary
429 // for automatic semicolon insertion. 428 // for automatic semicolon insertion.
(...skipping 22 matching lines...) Expand all
452 } 451 }
453 // Return whether or not we skipped any characters. 452 // Return whether or not we skipped any characters.
454 return source_pos() != start_position; 453 return source_pos() != start_position;
455 } 454 }
456 } 455 }
457 456
458 457
459 Token::Value Scanner::SkipSingleLineComment() { 458 Token::Value Scanner::SkipSingleLineComment() {
460 Advance(); 459 Advance();
461 460
461 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator =
462 scanner_data_.kIsLineTerminator_;
462 // The line terminator at the end of the line is not considered 463 // The line terminator at the end of the line is not considered
463 // to be part of the single-line comment; it is recognized 464 // to be part of the single-line comment; it is recognized
464 // separately by the lexical grammar and becomes part of the 465 // separately by the lexical grammar and becomes part of the
465 // stream of input elements for the syntactic grammar (see 466 // stream of input elements for the syntactic grammar (see
466 // ECMA-262, section 7.4, page 12). 467 // ECMA-262, section 7.4, page 12).
467 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 468 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
468 Advance(); 469 Advance();
469 } 470 }
470 471
471 return Token::WHITESPACE; 472 return Token::WHITESPACE;
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
506 if (c0_ == '-') return SkipSingleLineComment(); 507 if (c0_ == '-') return SkipSingleLineComment();
507 PushBack('-'); // undo Advance() 508 PushBack('-'); // undo Advance()
508 } 509 }
509 PushBack('!'); // undo Advance() 510 PushBack('!'); // undo Advance()
510 ASSERT(c0_ == '!'); 511 ASSERT(c0_ == '!');
511 return Token::LT; 512 return Token::LT;
512 } 513 }
513 514
514 515
515 void Scanner::Scan() { 516 void Scanner::Scan() {
517 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart =
518 scanner_data_.kIsIdentifierPart_;
519 unibrow::Predicate<IdentifierStart, 128>& kIsIdentifierStart =
520 scanner_data_.kIsIdentifierStart_;
521
516 next_.literal_buffer = NULL; 522 next_.literal_buffer = NULL;
517 Token::Value token; 523 Token::Value token;
518 has_line_terminator_before_next_ = false; 524 has_line_terminator_before_next_ = false;
519 do { 525 do {
520 // Remember the position of the next token 526 // Remember the position of the next token
521 next_.location.beg_pos = source_pos(); 527 next_.location.beg_pos = source_pos();
522 528
523 switch (c0_) { 529 switch (c0_) {
524 case ' ': 530 case ' ':
525 case '\t': 531 case '\t':
(...skipping 276 matching lines...) Expand 10 before | Expand all | Expand 10 after
802 } 808 }
803 return x; 809 return x;
804 } 810 }
805 811
806 812
807 void Scanner::ScanEscape() { 813 void Scanner::ScanEscape() {
808 uc32 c = c0_; 814 uc32 c = c0_;
809 Advance(); 815 Advance();
810 816
811 // Skip escaped newlines. 817 // Skip escaped newlines.
812 if (kIsLineTerminator.get(c)) { 818 if (scanner_data_.kIsLineTerminator_.get(c)) {
813 // Allow CR+LF newlines in multiline string literals. 819 // Allow CR+LF newlines in multiline string literals.
814 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 820 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
815 // Allow LF+CR newlines in multiline string literals. 821 // Allow LF+CR newlines in multiline string literals.
816 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 822 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
817 return; 823 return;
818 } 824 }
819 825
820 switch (c) { 826 switch (c) {
821 case '\'': // fall through 827 case '\'': // fall through
822 case '"' : // fall through 828 case '"' : // fall through
(...skipping 21 matching lines...) Expand all
844 // as non-escaped characters by JS VMs. 850 // as non-escaped characters by JS VMs.
845 AddChar(c); 851 AddChar(c);
846 } 852 }
847 853
848 854
849 Token::Value Scanner::ScanString() { 855 Token::Value Scanner::ScanString() {
850 uc32 quote = c0_; 856 uc32 quote = c0_;
851 Advance(); // consume quote 857 Advance(); // consume quote
852 858
853 StartLiteral(); 859 StartLiteral();
860 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator =
861 scanner_data_.kIsLineTerminator_;
854 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { 862 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
855 uc32 c = c0_; 863 uc32 c = c0_;
856 Advance(); 864 Advance();
857 if (c == '\\') { 865 if (c == '\\') {
858 if (c0_ < 0) return Token::ILLEGAL; 866 if (c0_ < 0) return Token::ILLEGAL;
859 ScanEscape(); 867 ScanEscape();
860 } else { 868 } else {
861 AddChar(c); 869 AddChar(c);
862 } 870 }
863 } 871 }
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
958 // we must have at least one decimal digit after 'e'/'E' 966 // we must have at least one decimal digit after 'e'/'E'
959 return Token::ILLEGAL; 967 return Token::ILLEGAL;
960 ScanDecimalDigits(); 968 ScanDecimalDigits();
961 } 969 }
962 TerminateLiteral(); 970 TerminateLiteral();
963 971
964 // The source character immediately following a numeric literal must 972 // The source character immediately following a numeric literal must
965 // not be an identifier start or a decimal digit; see ECMA-262 973 // not be an identifier start or a decimal digit; see ECMA-262
966 // section 7.8.3, page 17 (note that we read only one decimal digit 974 // section 7.8.3, page 17 (note that we read only one decimal digit
967 // if the value is 0). 975 // if the value is 0).
968 if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) 976 if (IsDecimalDigit(c0_) ||
977 scanner_data_.kIsIdentifierStart_.get(c0_))
969 return Token::ILLEGAL; 978 return Token::ILLEGAL;
970 979
971 return Token::NUMBER; 980 return Token::NUMBER;
972 } 981 }
973 982
974 983
975 uc32 Scanner::ScanIdentifierUnicodeEscape() { 984 uc32 Scanner::ScanIdentifierUnicodeEscape() {
976 Advance(); 985 Advance();
977 if (c0_ != 'u') return unibrow::Utf8::kBadChar; 986 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
978 Advance(); 987 Advance();
979 uc32 c = ScanHexEscape('u', 4); 988 uc32 c = ScanHexEscape('u', 4);
980 // We do not allow a unicode escape sequence to start another 989 // We do not allow a unicode escape sequence to start another
981 // unicode escape sequence. 990 // unicode escape sequence.
982 if (c == '\\') return unibrow::Utf8::kBadChar; 991 if (c == '\\') return unibrow::Utf8::kBadChar;
983 return c; 992 return c;
984 } 993 }
985 994
986 995
987 Token::Value Scanner::ScanIdentifier() { 996 Token::Value Scanner::ScanIdentifier() {
988 ASSERT(kIsIdentifierStart.get(c0_)); 997 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart =
998 scanner_data_.kIsIdentifierPart_;
999
1000 ASSERT(scanner_data_.kIsIdentifierStart_.get(c0_));
989 1001
990 StartLiteral(); 1002 StartLiteral();
991 KeywordMatcher keyword_match; 1003 KeywordMatcher keyword_match;
992 1004
993 // Scan identifier start character. 1005 // Scan identifier start character.
994 if (c0_ == '\\') { 1006 if (c0_ == '\\') {
995 uc32 c = ScanIdentifierUnicodeEscape(); 1007 uc32 c = ScanIdentifierUnicodeEscape();
996 // Only allow legal identifier start characters. 1008 // Only allow legal identifier start characters.
997 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; 1009 if (!scanner_data_.kIsIdentifierStart_.get(c)) return Token::ILLEGAL;
998 AddChar(c); 1010 AddChar(c);
999 keyword_match.Fail(); 1011 keyword_match.Fail();
1000 } else { 1012 } else {
1001 AddChar(c0_); 1013 AddChar(c0_);
1002 keyword_match.AddChar(c0_); 1014 keyword_match.AddChar(c0_);
1003 Advance(); 1015 Advance();
1004 } 1016 }
1005 1017
1006 // Scan the rest of the identifier characters. 1018 // Scan the rest of the identifier characters.
1007 while (kIsIdentifierPart.get(c0_)) { 1019 while (kIsIdentifierPart.get(c0_)) {
(...skipping 10 matching lines...) Expand all
1018 } 1030 }
1019 } 1031 }
1020 TerminateLiteral(); 1032 TerminateLiteral();
1021 1033
1022 return keyword_match.token(); 1034 return keyword_match.token();
1023 } 1035 }
1024 1036
1025 1037
1026 1038
1027 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { 1039 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
1040 ScannerData& scanner_data = v8_context()->scanner_data_;
1041 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart =
1042 scanner_data.kIsIdentifierPart_;
1043
1028 // Checks whether the buffer contains an identifier (no escape). 1044 // Checks whether the buffer contains an identifier (no escape).
1029 if (!buffer->has_more()) return false; 1045 if (!buffer->has_more()) return false;
1030 if (!kIsIdentifierStart.get(buffer->GetNext())) return false; 1046 if (!scanner_data.kIsIdentifierStart_.get(buffer->GetNext())) return false;
1031 while (buffer->has_more()) { 1047 while (buffer->has_more()) {
1032 if (!kIsIdentifierPart.get(buffer->GetNext())) return false; 1048 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
1033 } 1049 }
1034 return true; 1050 return true;
1035 } 1051 }
1036 1052
1037 1053
1038 bool Scanner::ScanRegExpPattern(bool seen_equal) { 1054 bool Scanner::ScanRegExpPattern(bool seen_equal) {
1039 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 1055 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
1040 bool in_character_class = false; 1056 bool in_character_class = false;
1041 1057
1042 // Previous token is either '/' or '/=', in the second case, the 1058 // Previous token is either '/' or '/=', in the second case, the
1043 // pattern starts at =. 1059 // pattern starts at =.
1044 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 1060 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
1045 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 1061 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1046 1062
1047 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1063 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1048 // the scanner should pass uninterpreted bodies to the RegExp 1064 // the scanner should pass uninterpreted bodies to the RegExp
1049 // constructor. 1065 // constructor.
1050 StartLiteral(); 1066 StartLiteral();
1051 if (seen_equal) 1067 if (seen_equal)
1052 AddChar('='); 1068 AddChar('=');
1053 1069
1070 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator =
1071 scanner_data_.kIsLineTerminator_;
1072
1054 while (c0_ != '/' || in_character_class) { 1073 while (c0_ != '/' || in_character_class) {
1055 if (kIsLineTerminator.get(c0_) || c0_ < 0) 1074 if (kIsLineTerminator.get(c0_) || c0_ < 0)
1056 return false; 1075 return false;
1057 if (c0_ == '\\') { // escaped character 1076 if (c0_ == '\\') { // escaped character
1058 AddCharAdvance(); 1077 AddCharAdvance();
1059 if (kIsLineTerminator.get(c0_) || c0_ < 0) 1078 if (kIsLineTerminator.get(c0_) || c0_ < 0)
1060 return false; 1079 return false;
1061 AddCharAdvance(); 1080 AddCharAdvance();
1062 } else { // unescaped character 1081 } else { // unescaped character
1063 if (c0_ == '[') 1082 if (c0_ == '[')
1064 in_character_class = true; 1083 in_character_class = true;
1065 if (c0_ == ']') 1084 if (c0_ == ']')
1066 in_character_class = false; 1085 in_character_class = false;
1067 AddCharAdvance(); 1086 AddCharAdvance();
1068 } 1087 }
1069 } 1088 }
1070 Advance(); // consume '/' 1089 Advance(); // consume '/'
1071 1090
1072 TerminateLiteral(); 1091 TerminateLiteral();
1073 1092
1074 return true; 1093 return true;
1075 } 1094 }
1076 1095
1077 bool Scanner::ScanRegExpFlags() { 1096 bool Scanner::ScanRegExpFlags() {
1078 // Scan regular expression flags. 1097 // Scan regular expression flags.
1079 StartLiteral(); 1098 StartLiteral();
1099 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart =
1100 scanner_data_.kIsIdentifierPart_;
1101
1080 while (kIsIdentifierPart.get(c0_)) { 1102 while (kIsIdentifierPart.get(c0_)) {
1081 if (c0_ == '\\') { 1103 if (c0_ == '\\') {
1082 uc32 c = ScanIdentifierUnicodeEscape(); 1104 uc32 c = ScanIdentifierUnicodeEscape();
1083 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { 1105 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
1084 // We allow any escaped character, unlike the restriction on 1106 // We allow any escaped character, unlike the restriction on
1085 // IdentifierPart when it is used to build an IdentifierName. 1107 // IdentifierPart when it is used to build an IdentifierName.
1086 AddChar(c); 1108 AddChar(c);
1087 continue; 1109 continue;
1088 } 1110 }
1089 } 1111 }
1090 AddCharAdvance(); 1112 AddCharAdvance();
1091 } 1113 }
1092 TerminateLiteral(); 1114 TerminateLiteral();
1093 1115
1094 next_.location.end_pos = source_pos() - 1; 1116 next_.location.end_pos = source_pos() - 1;
1095 return true; 1117 return true;
1096 } 1118 }
1097 1119
1098 } } // namespace v8::internal 1120 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | src/scopeinfo.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698