src/scanner.cc - Issue 435003: Patch for allowing several V8 instances in process:...

Side by Side Diff: src/scanner.cc

Issue 435003: Patch for allowing several V8 instances in process:... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: '' Created 11 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 19 matching lines...) Expand all Loading...
30 #include "ast.h"	30 #include "ast.h"

31 #include "scanner.h"	31 #include "scanner.h"

32	32

33 namespace v8 {	33 namespace v8 {

34 namespace internal {	34 namespace internal {

35	35

36 // ----------------------------------------------------------------------------	36 // ----------------------------------------------------------------------------

37 // Character predicates	37 // Character predicates

38	38

39	39

40 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;	40 ScannerData::ScannerData() {}

41 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;

42 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;

43 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;

44

45

46 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;

47

48	41

49 // ----------------------------------------------------------------------------	42 // ----------------------------------------------------------------------------

50 // UTF8Buffer	43 // UTF8Buffer

51	44

52 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }	45 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }

53	46

54	47

55 UTF8Buffer::~UTF8Buffer() {	48 UTF8Buffer::~UTF8Buffer() {

56 if (data_ != NULL) DeleteArray(data_);	49 if (data_ != NULL) DeleteArray(data_);

57 }	50 }

(...skipping 258 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
316 UNREACHABLE();	309 UNREACHABLE();

317 }	310 }

318 // On fallthrough, it's a failure.	311 // On fallthrough, it's a failure.

319 state_ = UNMATCHABLE;	312 state_ = UNMATCHABLE;

320 }	313 }

321	314

322	315

323 // ----------------------------------------------------------------------------	316 // ----------------------------------------------------------------------------

324 // Scanner	317 // Scanner

325	318

326 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { }	319 Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre),

	320 scanner_data_(v8_context()->scanner_data_) {

	321 }

327	322

328	323

329 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,	324 void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,

330 int position) {	325 int position) {

331 // Initialize the source buffer.	326 // Initialize the source buffer.

332 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {	327 if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {

333 two_byte_string_buffer_.Initialize(	328 two_byte_string_buffer_.Initialize(

334 Handle<ExternalTwoByteString>::cast(source));	329 Handle<ExternalTwoByteString>::cast(source));

335 source_ = &two_byte_string_buffer_;	330 source_ = &two_byte_string_buffer_;

336 } else {	331 } else {

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
410 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	405 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

411 // character expressed in little-endian byte order (since it could	406 // character expressed in little-endian byte order (since it could

412 // not be a U+FFFE character expressed in big-endian byte	407 // not be a U+FFFE character expressed in big-endian byte

413 // order). Nevertheless, we check for it to be compatible with	408 // order). Nevertheless, we check for it to be compatible with

414 // Spidermonkey.	409 // Spidermonkey.

415 return c == 0xFEFF \|\| c == 0xFFFE;	410 return c == 0xFEFF \|\| c == 0xFFFE;

416 }	411 }

417	412

418	413

419 bool Scanner::SkipWhiteSpace() {	414 bool Scanner::SkipWhiteSpace() {

	415 unibrow::Predicate<unibrow::WhiteSpace, 128>& kIsWhiteSpace =

	416 scanner_data_.kIsWhiteSpace_;

	417 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator =

	418 scanner_data_.kIsLineTerminator_;

420 int start_position = source_pos();	419 int start_position = source_pos();

421	420

422 while (true) {	421 while (true) {

423 // We treat byte-order marks (BOMs) as whitespace for better	422 // We treat byte-order marks (BOMs) as whitespace for better

424 // compatibility with Spidermonkey and other JavaScript engines.	423 // compatibility with Spidermonkey and other JavaScript engines.

425 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {	424 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {

426 // IsWhiteSpace() includes line terminators!	425 // IsWhiteSpace() includes line terminators!

427 if (kIsLineTerminator.get(c0_)) {	426 if (kIsLineTerminator.get(c0_)) {

428 // Ignore line terminators, but remember them. This is necessary	427 // Ignore line terminators, but remember them. This is necessary

429 // for automatic semicolon insertion.	428 // for automatic semicolon insertion.

(...skipping 22 matching lines...) Expand all Loading...
452 }	451 }

453 // Return whether or not we skipped any characters.	452 // Return whether or not we skipped any characters.

454 return source_pos() != start_position;	453 return source_pos() != start_position;

455 }	454 }

456 }	455 }

457	456

458	457

459 Token::Value Scanner::SkipSingleLineComment() {	458 Token::Value Scanner::SkipSingleLineComment() {

460 Advance();	459 Advance();

461	460

	461 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator =

	462 scanner_data_.kIsLineTerminator_;

462 // The line terminator at the end of the line is not considered	463 // The line terminator at the end of the line is not considered

463 // to be part of the single-line comment; it is recognized	464 // to be part of the single-line comment; it is recognized

464 // separately by the lexical grammar and becomes part of the	465 // separately by the lexical grammar and becomes part of the

465 // stream of input elements for the syntactic grammar (see	466 // stream of input elements for the syntactic grammar (see

466 // ECMA-262, section 7.4, page 12).	467 // ECMA-262, section 7.4, page 12).

467 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	468 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {

468 Advance();	469 Advance();

469 }	470 }

470	471

471 return Token::WHITESPACE;	472 return Token::WHITESPACE;

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
506 if (c0_ == '-') return SkipSingleLineComment();	507 if (c0_ == '-') return SkipSingleLineComment();

507 PushBack('-'); // undo Advance()	508 PushBack('-'); // undo Advance()

508 }	509 }

509 PushBack('!'); // undo Advance()	510 PushBack('!'); // undo Advance()

510 ASSERT(c0_ == '!');	511 ASSERT(c0_ == '!');

511 return Token::LT;	512 return Token::LT;

512 }	513 }

513	514

514	515

515 void Scanner::Scan() {	516 void Scanner::Scan() {

	517 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart =

	518 scanner_data_.kIsIdentifierPart_;

	519 unibrow::Predicate<IdentifierStart, 128>& kIsIdentifierStart =

	520 scanner_data_.kIsIdentifierStart_;

	521

516 next_.literal_buffer = NULL;	522 next_.literal_buffer = NULL;

517 Token::Value token;	523 Token::Value token;

518 has_line_terminator_before_next_ = false;	524 has_line_terminator_before_next_ = false;

519 do {	525 do {

520 // Remember the position of the next token	526 // Remember the position of the next token

521 next_.location.beg_pos = source_pos();	527 next_.location.beg_pos = source_pos();

522	528

523 switch (c0_) {	529 switch (c0_) {

524 case ' ':	530 case ' ':

525 case '\t':	531 case '\t':

(...skipping 276 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
802 }	808 }

803 return x;	809 return x;

804 }	810 }

805	811

806	812

807 void Scanner::ScanEscape() {	813 void Scanner::ScanEscape() {

808 uc32 c = c0_;	814 uc32 c = c0_;

809 Advance();	815 Advance();

810	816

811 // Skip escaped newlines.	817 // Skip escaped newlines.

812 if (kIsLineTerminator.get(c)) {	818 if (scanner_data_.kIsLineTerminator_.get(c)) {

813 // Allow CR+LF newlines in multiline string literals.	819 // Allow CR+LF newlines in multiline string literals.

814 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	820 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

815 // Allow LF+CR newlines in multiline string literals.	821 // Allow LF+CR newlines in multiline string literals.

816 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	822 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

817 return;	823 return;

818 }	824 }

819	825

820 switch (c) {	826 switch (c) {

821 case '\'': // fall through	827 case '\'': // fall through

822 case '"' : // fall through	828 case '"' : // fall through

(...skipping 21 matching lines...) Expand all Loading...
844 // as non-escaped characters by JS VMs.	850 // as non-escaped characters by JS VMs.

845 AddChar(c);	851 AddChar(c);

846 }	852 }

847	853

848	854

849 Token::Value Scanner::ScanString() {	855 Token::Value Scanner::ScanString() {

850 uc32 quote = c0_;	856 uc32 quote = c0_;

851 Advance(); // consume quote	857 Advance(); // consume quote

852	858

853 StartLiteral();	859 StartLiteral();

	860 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator =

	861 scanner_data_.kIsLineTerminator_;

854 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	862 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {

855 uc32 c = c0_;	863 uc32 c = c0_;

856 Advance();	864 Advance();

857 if (c == '\\') {	865 if (c == '\\') {

858 if (c0_ < 0) return Token::ILLEGAL;	866 if (c0_ < 0) return Token::ILLEGAL;

859 ScanEscape();	867 ScanEscape();

860 } else {	868 } else {

861 AddChar(c);	869 AddChar(c);

862 }	870 }

863 }	871 }

(...skipping 94 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
958 // we must have at least one decimal digit after 'e'/'E'	966 // we must have at least one decimal digit after 'e'/'E'

959 return Token::ILLEGAL;	967 return Token::ILLEGAL;

960 ScanDecimalDigits();	968 ScanDecimalDigits();

961 }	969 }

962 TerminateLiteral();	970 TerminateLiteral();

963	971

964 // The source character immediately following a numeric literal must	972 // The source character immediately following a numeric literal must

965 // not be an identifier start or a decimal digit; see ECMA-262	973 // not be an identifier start or a decimal digit; see ECMA-262

966 // section 7.8.3, page 17 (note that we read only one decimal digit	974 // section 7.8.3, page 17 (note that we read only one decimal digit

967 // if the value is 0).	975 // if the value is 0).

968 if (IsDecimalDigit(c0_) \|\| kIsIdentifierStart.get(c0_))	976 if (IsDecimalDigit(c0_) \|\|

	977 scanner_data_.kIsIdentifierStart_.get(c0_))

969 return Token::ILLEGAL;	978 return Token::ILLEGAL;

970	979

971 return Token::NUMBER;	980 return Token::NUMBER;

972 }	981 }

973	982

974	983

975 uc32 Scanner::ScanIdentifierUnicodeEscape() {	984 uc32 Scanner::ScanIdentifierUnicodeEscape() {

976 Advance();	985 Advance();

977 if (c0_ != 'u') return unibrow::Utf8::kBadChar;	986 if (c0_ != 'u') return unibrow::Utf8::kBadChar;

978 Advance();	987 Advance();

979 uc32 c = ScanHexEscape('u', 4);	988 uc32 c = ScanHexEscape('u', 4);

980 // We do not allow a unicode escape sequence to start another	989 // We do not allow a unicode escape sequence to start another

981 // unicode escape sequence.	990 // unicode escape sequence.

982 if (c == '\\') return unibrow::Utf8::kBadChar;	991 if (c == '\\') return unibrow::Utf8::kBadChar;

983 return c;	992 return c;

984 }	993 }

985	994

986	995

987 Token::Value Scanner::ScanIdentifier() {	996 Token::Value Scanner::ScanIdentifier() {

988 ASSERT(kIsIdentifierStart.get(c0_));	997 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart =

	998 scanner_data_.kIsIdentifierPart_;

	999

	1000 ASSERT(scanner_data_.kIsIdentifierStart_.get(c0_));

989	1001

990 StartLiteral();	1002 StartLiteral();

991 KeywordMatcher keyword_match;	1003 KeywordMatcher keyword_match;

992	1004

993 // Scan identifier start character.	1005 // Scan identifier start character.

994 if (c0_ == '\\') {	1006 if (c0_ == '\\') {

995 uc32 c = ScanIdentifierUnicodeEscape();	1007 uc32 c = ScanIdentifierUnicodeEscape();

996 // Only allow legal identifier start characters.	1008 // Only allow legal identifier start characters.

997 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;	1009 if (!scanner_data_.kIsIdentifierStart_.get(c)) return Token::ILLEGAL;

998 AddChar(c);	1010 AddChar(c);

999 keyword_match.Fail();	1011 keyword_match.Fail();

1000 } else {	1012 } else {

1001 AddChar(c0_);	1013 AddChar(c0_);

1002 keyword_match.AddChar(c0_);	1014 keyword_match.AddChar(c0_);

1003 Advance();	1015 Advance();

1004 }	1016 }

1005	1017

1006 // Scan the rest of the identifier characters.	1018 // Scan the rest of the identifier characters.

1007 while (kIsIdentifierPart.get(c0_)) {	1019 while (kIsIdentifierPart.get(c0_)) {

(...skipping 10 matching lines...) Expand all Loading...
1018 }	1030 }

1019 }	1031 }

1020 TerminateLiteral();	1032 TerminateLiteral();

1021	1033

1022 return keyword_match.token();	1034 return keyword_match.token();

1023 }	1035 }

1024	1036

1025	1037

1026	1038

1027 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {	1039 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {

	1040 ScannerData& scanner_data = v8_context()->scanner_data_;

	1041 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart =

	1042 scanner_data.kIsIdentifierPart_;

	1043

1028 // Checks whether the buffer contains an identifier (no escape).	1044 // Checks whether the buffer contains an identifier (no escape).

1029 if (!buffer->has_more()) return false;	1045 if (!buffer->has_more()) return false;

1030 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;	1046 if (!scanner_data.kIsIdentifierStart_.get(buffer->GetNext())) return false;

1031 while (buffer->has_more()) {	1047 while (buffer->has_more()) {

1032 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;	1048 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;

1033 }	1049 }

1034 return true;	1050 return true;

1035 }	1051 }

1036	1052

1037	1053

1038 bool Scanner::ScanRegExpPattern(bool seen_equal) {	1054 bool Scanner::ScanRegExpPattern(bool seen_equal) {

1039 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	1055 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

1040 bool in_character_class = false;	1056 bool in_character_class = false;

1041	1057

1042 // Previous token is either '/' or '/=', in the second case, the	1058 // Previous token is either '/' or '/=', in the second case, the

1043 // pattern starts at =.	1059 // pattern starts at =.

1044 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);	1060 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

1045 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);	1061 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

1046	1062

1047 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	1063 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1048 // the scanner should pass uninterpreted bodies to the RegExp	1064 // the scanner should pass uninterpreted bodies to the RegExp

1049 // constructor.	1065 // constructor.

1050 StartLiteral();	1066 StartLiteral();

1051 if (seen_equal)	1067 if (seen_equal)

1052 AddChar('=');	1068 AddChar('=');

1053	1069

	1070 unibrow::Predicate<unibrow::LineTerminator, 128>& kIsLineTerminator =

	1071 scanner_data_.kIsLineTerminator_;

	1072

1054 while (c0_ != '/' \|\| in_character_class) {	1073 while (c0_ != '/' \|\| in_character_class) {

1055 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)	1074 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)

1056 return false;	1075 return false;

1057 if (c0_ == '\\') { // escaped character	1076 if (c0_ == '\\') { // escaped character

1058 AddCharAdvance();	1077 AddCharAdvance();

1059 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)	1078 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0)

1060 return false;	1079 return false;

1061 AddCharAdvance();	1080 AddCharAdvance();

1062 } else { // unescaped character	1081 } else { // unescaped character

1063 if (c0_ == '[')	1082 if (c0_ == '[')

1064 in_character_class = true;	1083 in_character_class = true;

1065 if (c0_ == ']')	1084 if (c0_ == ']')

1066 in_character_class = false;	1085 in_character_class = false;

1067 AddCharAdvance();	1086 AddCharAdvance();

1068 }	1087 }

1069 }	1088 }

1070 Advance(); // consume '/'	1089 Advance(); // consume '/'

1071	1090

1072 TerminateLiteral();	1091 TerminateLiteral();

1073	1092

1074 return true;	1093 return true;

1075 }	1094 }

1076	1095

1077 bool Scanner::ScanRegExpFlags() {	1096 bool Scanner::ScanRegExpFlags() {

1078 // Scan regular expression flags.	1097 // Scan regular expression flags.

1079 StartLiteral();	1098 StartLiteral();

	1099 unibrow::Predicate<IdentifierPart, 128>& kIsIdentifierPart =

	1100 scanner_data_.kIsIdentifierPart_;

	1101

1080 while (kIsIdentifierPart.get(c0_)) {	1102 while (kIsIdentifierPart.get(c0_)) {

1081 if (c0_ == '\\') {	1103 if (c0_ == '\\') {

1082 uc32 c = ScanIdentifierUnicodeEscape();	1104 uc32 c = ScanIdentifierUnicodeEscape();

1083 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {	1105 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

1084 // We allow any escaped character, unlike the restriction on	1106 // We allow any escaped character, unlike the restriction on

1085 // IdentifierPart when it is used to build an IdentifierName.	1107 // IdentifierPart when it is used to build an IdentifierName.

1086 AddChar(c);	1108 AddChar(c);

1087 continue;	1109 continue;

1088 }	1110 }

1089 }	1111 }

1090 AddCharAdvance();	1112 AddCharAdvance();

1091 }	1113 }

1092 TerminateLiteral();	1114 TerminateLiteral();

1093	1115

1094 next_.location.end_pos = source_pos() - 1;	1116 next_.location.end_pos = source_pos() - 1;

1095 return true;	1117 return true;

1096 }	1118 }

1097	1119

1098 } } // namespace v8::internal	1120 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | src/scopeinfo.h » ('j') | no next file with comments »