src/scanner.cc - Issue 700963002: Replace C++ bitfields with our own BitFields

Side by Side Diff: src/scanner.cc

Issue 700963002: Replace C++ bitfields with our own BitFields (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: fixed AST node field sizes; more scanner fixes; undid hydrogen.h/cc changes Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include <stdint.h>	7 #include <stdint.h>

8	8

9 #include <cmath>	9 #include <cmath>

10	10

(...skipping 227 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
238 // Spidermonkey.	238 // Spidermonkey.

239 return c == 0xFFFE;	239 return c == 0xFFFE;

240 }	240 }

241	241

242	242

243 bool Scanner::SkipWhiteSpace() {	243 bool Scanner::SkipWhiteSpace() {

244 int start_position = source_pos();	244 int start_position = source_pos();

245	245

246 while (true) {	246 while (true) {

247 while (true) {	247 while (true) {

	248 // The unicode cache accepts unsigned inputs.

	249 if (c0_ < 0) break;

248 // Advance as long as character is a WhiteSpace or LineTerminator.	250 // Advance as long as character is a WhiteSpace or LineTerminator.

249 // Remember if the latter is the case.	251 // Remember if the latter is the case.

250 if (unicode_cache_->IsLineTerminator(c0_)) {	252 if (unicode_cache_->IsLineTerminator(c0_)) {

251 has_line_terminator_before_next_ = true;	253 has_line_terminator_before_next_ = true;

252 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&	254 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&

253 !IsLittleEndianByteOrderMark(c0_)) {	255 !IsLittleEndianByteOrderMark(c0_)) {

254 break;	256 break;

255 }	257 }

256 Advance();	258 Advance();

257 }	259 }

(...skipping 100 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
358 }	360 }

359	361

360	362

361 Token::Value Scanner::SkipMultiLineComment() {	363 Token::Value Scanner::SkipMultiLineComment() {

362 DCHECK(c0_ == '*');	364 DCHECK(c0_ == '*');

363 Advance();	365 Advance();

364	366

365 while (c0_ >= 0) {	367 while (c0_ >= 0) {

366 uc32 ch = c0_;	368 uc32 ch = c0_;

367 Advance();	369 Advance();

368 if (unicode_cache_->IsLineTerminator(ch)) {	370 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {

369 // Following ECMA-262, section 7.4, a comment containing	371 // Following ECMA-262, section 7.4, a comment containing

370 // a newline will make the comment count as a line-terminator.	372 // a newline will make the comment count as a line-terminator.

371 has_multiline_comment_before_next_ = true;	373 has_multiline_comment_before_next_ = true;

372 }	374 }

373 // If we have reached the end of the multi-line comment, we	375 // If we have reached the end of the multi-line comment, we

374 // consume the '/' and insert a whitespace. This way all	376 // consume the '/' and insert a whitespace. This way all

375 // multi-line comments are treated as whitespace.	377 // multi-line comments are treated as whitespace.

376 if (ch == '*' && c0_ == '/') {	378 if (ch == '*' && c0_ == '/') {

377 c0_ = ' ';	379 c0_ = ' ';

378 return Token::WHITESPACE;	380 return Token::WHITESPACE;

(...skipping 239 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
618	620

619 case '?':	621 case '?':

620 token = Select(Token::CONDITIONAL);	622 token = Select(Token::CONDITIONAL);

621 break;	623 break;

622	624

623 case '~':	625 case '~':

624 token = Select(Token::BIT_NOT);	626 token = Select(Token::BIT_NOT);

625 break;	627 break;

626	628

627 default:	629 default:

628 if (unicode_cache_->IsIdentifierStart(c0_)) {	630 if (c0_ < 0) {

	631 token = Token::EOS;

	632 } else if (unicode_cache_->IsIdentifierStart(c0_)) {

629 token = ScanIdentifierOrKeyword();	633 token = ScanIdentifierOrKeyword();

630 } else if (IsDecimalDigit(c0_)) {	634 } else if (IsDecimalDigit(c0_)) {

631 token = ScanNumber(false);	635 token = ScanNumber(false);

632 } else if (SkipWhiteSpace()) {	636 } else if (SkipWhiteSpace()) {

633 token = Token::WHITESPACE;	637 token = Token::WHITESPACE;

634 } else if (c0_ < 0) {

635 token = Token::EOS;

636 } else {	638 } else {

637 token = Select(Token::ILLEGAL);	639 token = Select(Token::ILLEGAL);

638 }	640 }

639 break;	641 break;

640 }	642 }

641	643

642 // Continue scanning for tokens as long as we're just skipping	644 // Continue scanning for tokens as long as we're just skipping

643 // whitespace.	645 // whitespace.

644 } while (token == Token::WHITESPACE);	646 } while (token == Token::WHITESPACE);

645	647

(...skipping 21 matching lines...) Expand all Loading...
667 }	669 }

668 Scan();	670 Scan();

669 }	671 }

670	672

671	673

672 bool Scanner::ScanEscape() {	674 bool Scanner::ScanEscape() {

673 uc32 c = c0_;	675 uc32 c = c0_;

674 Advance();	676 Advance();

675	677

676 // Skip escaped newlines.	678 // Skip escaped newlines.

677 if (unicode_cache_->IsLineTerminator(c)) {	679 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {

678 // Allow CR+LF newlines in multiline string literals.	680 // Allow CR+LF newlines in multiline string literals.

679 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	681 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

680 // Allow LF+CR newlines in multiline string literals.	682 // Allow LF+CR newlines in multiline string literals.

681 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	683 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

682 return true;	684 return true;

683 }	685 }

684	686

685 switch (c) {	687 switch (c) {

686 case '\'': // fall through	688 case '\'': // fall through

687 case '"' : // fall through	689 case '"' : // fall through

(...skipping 176 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
864 // we must have at least one decimal digit after 'e'/'E'	866 // we must have at least one decimal digit after 'e'/'E'

865 return Token::ILLEGAL;	867 return Token::ILLEGAL;

866 }	868 }

867 ScanDecimalDigits();	869 ScanDecimalDigits();

868 }	870 }

869	871

870 // The source character immediately following a numeric literal must	872 // The source character immediately following a numeric literal must

871 // not be an identifier start or a decimal digit; see ECMA-262	873 // not be an identifier start or a decimal digit; see ECMA-262

872 // section 7.8.3, page 17 (note that we read only one decimal digit	874 // section 7.8.3, page 17 (note that we read only one decimal digit

873 // if the value is 0).	875 // if the value is 0).

874 if (IsDecimalDigit(c0_) \|\| unicode_cache_->IsIdentifierStart(c0_))	876 if (IsDecimalDigit(c0_) \|\|

	877 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_)))

875 return Token::ILLEGAL;	878 return Token::ILLEGAL;

876	879

877 literal.Complete();	880 literal.Complete();

878	881

879 return Token::NUMBER;	882 return Token::NUMBER;

880 }	883 }

881	884

882	885

883 uc32 Scanner::ScanIdentifierUnicodeEscape() {	886 uc32 Scanner::ScanIdentifierUnicodeEscape() {

884 Advance();	887 Advance();

(...skipping 147 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1032 }	1035 }

1033 AddLiteralChar(c);	1036 AddLiteralChar(c);

1034 return ScanIdentifierSuffix(&literal);	1037 return ScanIdentifierSuffix(&literal);

1035 }	1038 }

1036	1039

1037 uc32 first_char = c0_;	1040 uc32 first_char = c0_;

1038 Advance();	1041 Advance();

1039 AddLiteralChar(first_char);	1042 AddLiteralChar(first_char);

1040	1043

1041 // Scan the rest of the identifier characters.	1044 // Scan the rest of the identifier characters.

1042 while (unicode_cache_->IsIdentifierPart(c0_)) {	1045 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {

1043 if (c0_ != '\\') {	1046 if (c0_ != '\\') {

1044 uc32 next_char = c0_;	1047 uc32 next_char = c0_;

1045 Advance();	1048 Advance();

1046 AddLiteralChar(next_char);	1049 AddLiteralChar(next_char);

1047 continue;	1050 continue;

1048 }	1051 }

1049 // Fallthrough if no longer able to complete keyword.	1052 // Fallthrough if no longer able to complete keyword.

1050 return ScanIdentifierSuffix(&literal);	1053 return ScanIdentifierSuffix(&literal);

1051 }	1054 }

1052	1055

1053 literal.Complete();	1056 literal.Complete();

1054	1057

1055 if (next_.literal_chars->is_one_byte()) {	1058 if (next_.literal_chars->is_one_byte()) {

1056 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();	1059 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();

1057 return KeywordOrIdentifierToken(chars.start(),	1060 return KeywordOrIdentifierToken(chars.start(),

1058 chars.length(),	1061 chars.length(),

1059 harmony_scoping_,	1062 harmony_scoping_,

1060 harmony_modules_,	1063 harmony_modules_,

1061 harmony_classes_);	1064 harmony_classes_);

1062 }	1065 }

1063	1066

1064 return Token::IDENTIFIER;	1067 return Token::IDENTIFIER;

1065 }	1068 }

1066	1069

1067	1070

1068 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {	1071 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {

1069 // Scan the rest of the identifier characters.	1072 // Scan the rest of the identifier characters.

1070 while (unicode_cache_->IsIdentifierPart(c0_)) {	1073 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {

1071 if (c0_ == '\\') {	1074 if (c0_ == '\\') {

1072 uc32 c = ScanIdentifierUnicodeEscape();	1075 uc32 c = ScanIdentifierUnicodeEscape();

1073 // Only allow legal identifier part characters.	1076 // Only allow legal identifier part characters.

1074 if (c < 0 \|\|	1077 if (c < 0 \|\|

1075 c == '\\' \|\|	1078 c == '\\' \|\|

1076 !unicode_cache_->IsIdentifierPart(c)) {	1079 !unicode_cache_->IsIdentifierPart(c)) {

1077 return Token::ILLEGAL;	1080 return Token::ILLEGAL;

1078 }	1081 }

1079 AddLiteralChar(c);	1082 AddLiteralChar(c);

1080 } else {	1083 } else {

(...skipping 18 matching lines...) Expand all Loading...
1099	1102

1100 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	1103 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1101 // the scanner should pass uninterpreted bodies to the RegExp	1104 // the scanner should pass uninterpreted bodies to the RegExp

1102 // constructor.	1105 // constructor.

1103 LiteralScope literal(this);	1106 LiteralScope literal(this);

1104 if (seen_equal) {	1107 if (seen_equal) {

1105 AddLiteralChar('=');	1108 AddLiteralChar('=');

1106 }	1109 }

1107	1110

1108 while (c0_ != '/' \|\| in_character_class) {	1111 while (c0_ != '/' \|\| in_character_class) {

1109 if (unicode_cache_->IsLineTerminator(c0_) \|\| c0_ < 0) return false;	1112 if (c0_ < 0 \|\| unicode_cache_->IsLineTerminator(c0_)) return false;

1110 if (c0_ == '\\') { // Escape sequence.	1113 if (c0_ == '\\') { // Escape sequence.

1111 AddLiteralCharAdvance();	1114 AddLiteralCharAdvance();

1112 if (unicode_cache_->IsLineTerminator(c0_) \|\| c0_ < 0) return false;	1115 if (c0_ < 0 \|\| unicode_cache_->IsLineTerminator(c0_)) return false;

1113 AddLiteralCharAdvance();	1116 AddLiteralCharAdvance();

1114 // If the escape allows more characters, i.e., \x??, \u????, or \c?,	1117 // If the escape allows more characters, i.e., \x??, \u????, or \c?,

1115 // only "safe" characters are allowed (letters, digits, underscore),	1118 // only "safe" characters are allowed (letters, digits, underscore),

1116 // otherwise the escape isn't valid and the invalid character has	1119 // otherwise the escape isn't valid and the invalid character has

1117 // its normal meaning. I.e., we can just continue scanning without	1120 // its normal meaning. I.e., we can just continue scanning without

1118 // worrying whether the following characters are part of the escape	1121 // worrying whether the following characters are part of the escape

1119 // or not, since any '/', '\\' or '[' is guaranteed to not be part	1122 // or not, since any '/', '\\' or '[' is guaranteed to not be part

1120 // of the escape sequence.	1123 // of the escape sequence.

1121	1124

1122 // TODO(896): At some point, parse RegExps more throughly to capture	1125 // TODO(896): At some point, parse RegExps more throughly to capture

(...skipping 26 matching lines...) Expand all Loading...
1149 ++hex_digits_read;	1152 ++hex_digits_read;

1150 }	1153 }

1151 }	1154 }

1152 return hex_digits_read == 4;	1155 return hex_digits_read == 4;

1153 }	1156 }

1154	1157

1155	1158

1156 bool Scanner::ScanRegExpFlags() {	1159 bool Scanner::ScanRegExpFlags() {

1157 // Scan regular expression flags.	1160 // Scan regular expression flags.

1158 LiteralScope literal(this);	1161 LiteralScope literal(this);

1159 while (unicode_cache_->IsIdentifierPart(c0_)) {	1162 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {

1160 if (c0_ != '\\') {	1163 if (c0_ != '\\') {

1161 AddLiteralCharAdvance();	1164 AddLiteralCharAdvance();

1162 } else {	1165 } else {

1163 if (!ScanLiteralUnicodeEscape()) {	1166 if (!ScanLiteralUnicodeEscape()) {

1164 return false;	1167 return false;

1165 }	1168 }

1166 Advance();	1169 Advance();

1167 }	1170 }

1168 }	1171 }

1169 literal.Complete();	1172 literal.Complete();

(...skipping 174 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1344 }	1347 }

1345 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));	1348 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1346 }	1349 }

1347 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));	1350 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1348	1351

1349 backing_store_.AddBlock(bytes);	1352 backing_store_.AddBlock(bytes);

1350 return backing_store_.EndSequence().start();	1353 return backing_store_.EndSequence().start();

1351 }	1354 }

1352	1355

1353 } } // namespace v8::internal	1356 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/runtime/runtime-array.cc ('k') | src/unicode.h » ('j') | no next file with comments »