Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(54)

Side by Side Diff: src/scanner.cc

Issue 700963002: Replace C++ bitfields with our own BitFields (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge
Patch Set: fixed AST node field sizes; more scanner fixes; undid hydrogen.h/cc changes Created 6 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « src/runtime/runtime-array.cc ('k') | src/unicode.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #include <stdint.h> 7 #include <stdint.h>
8 8
9 #include <cmath> 9 #include <cmath>
10 10
(...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after
238 // Spidermonkey. 238 // Spidermonkey.
239 return c == 0xFFFE; 239 return c == 0xFFFE;
240 } 240 }
241 241
242 242
243 bool Scanner::SkipWhiteSpace() { 243 bool Scanner::SkipWhiteSpace() {
244 int start_position = source_pos(); 244 int start_position = source_pos();
245 245
246 while (true) { 246 while (true) {
247 while (true) { 247 while (true) {
248 // The unicode cache accepts unsigned inputs.
249 if (c0_ < 0) break;
248 // Advance as long as character is a WhiteSpace or LineTerminator. 250 // Advance as long as character is a WhiteSpace or LineTerminator.
249 // Remember if the latter is the case. 251 // Remember if the latter is the case.
250 if (unicode_cache_->IsLineTerminator(c0_)) { 252 if (unicode_cache_->IsLineTerminator(c0_)) {
251 has_line_terminator_before_next_ = true; 253 has_line_terminator_before_next_ = true;
252 } else if (!unicode_cache_->IsWhiteSpace(c0_) && 254 } else if (!unicode_cache_->IsWhiteSpace(c0_) &&
253 !IsLittleEndianByteOrderMark(c0_)) { 255 !IsLittleEndianByteOrderMark(c0_)) {
254 break; 256 break;
255 } 257 }
256 Advance(); 258 Advance();
257 } 259 }
(...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after
358 } 360 }
359 361
360 362
361 Token::Value Scanner::SkipMultiLineComment() { 363 Token::Value Scanner::SkipMultiLineComment() {
362 DCHECK(c0_ == '*'); 364 DCHECK(c0_ == '*');
363 Advance(); 365 Advance();
364 366
365 while (c0_ >= 0) { 367 while (c0_ >= 0) {
366 uc32 ch = c0_; 368 uc32 ch = c0_;
367 Advance(); 369 Advance();
368 if (unicode_cache_->IsLineTerminator(ch)) { 370 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) {
369 // Following ECMA-262, section 7.4, a comment containing 371 // Following ECMA-262, section 7.4, a comment containing
370 // a newline will make the comment count as a line-terminator. 372 // a newline will make the comment count as a line-terminator.
371 has_multiline_comment_before_next_ = true; 373 has_multiline_comment_before_next_ = true;
372 } 374 }
373 // If we have reached the end of the multi-line comment, we 375 // If we have reached the end of the multi-line comment, we
374 // consume the '/' and insert a whitespace. This way all 376 // consume the '/' and insert a whitespace. This way all
375 // multi-line comments are treated as whitespace. 377 // multi-line comments are treated as whitespace.
376 if (ch == '*' && c0_ == '/') { 378 if (ch == '*' && c0_ == '/') {
377 c0_ = ' '; 379 c0_ = ' ';
378 return Token::WHITESPACE; 380 return Token::WHITESPACE;
(...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after
618 620
619 case '?': 621 case '?':
620 token = Select(Token::CONDITIONAL); 622 token = Select(Token::CONDITIONAL);
621 break; 623 break;
622 624
623 case '~': 625 case '~':
624 token = Select(Token::BIT_NOT); 626 token = Select(Token::BIT_NOT);
625 break; 627 break;
626 628
627 default: 629 default:
628 if (unicode_cache_->IsIdentifierStart(c0_)) { 630 if (c0_ < 0) {
631 token = Token::EOS;
632 } else if (unicode_cache_->IsIdentifierStart(c0_)) {
629 token = ScanIdentifierOrKeyword(); 633 token = ScanIdentifierOrKeyword();
630 } else if (IsDecimalDigit(c0_)) { 634 } else if (IsDecimalDigit(c0_)) {
631 token = ScanNumber(false); 635 token = ScanNumber(false);
632 } else if (SkipWhiteSpace()) { 636 } else if (SkipWhiteSpace()) {
633 token = Token::WHITESPACE; 637 token = Token::WHITESPACE;
634 } else if (c0_ < 0) {
635 token = Token::EOS;
636 } else { 638 } else {
637 token = Select(Token::ILLEGAL); 639 token = Select(Token::ILLEGAL);
638 } 640 }
639 break; 641 break;
640 } 642 }
641 643
642 // Continue scanning for tokens as long as we're just skipping 644 // Continue scanning for tokens as long as we're just skipping
643 // whitespace. 645 // whitespace.
644 } while (token == Token::WHITESPACE); 646 } while (token == Token::WHITESPACE);
645 647
(...skipping 21 matching lines...) Expand all
667 } 669 }
668 Scan(); 670 Scan();
669 } 671 }
670 672
671 673
672 bool Scanner::ScanEscape() { 674 bool Scanner::ScanEscape() {
673 uc32 c = c0_; 675 uc32 c = c0_;
674 Advance(); 676 Advance();
675 677
676 // Skip escaped newlines. 678 // Skip escaped newlines.
677 if (unicode_cache_->IsLineTerminator(c)) { 679 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {
678 // Allow CR+LF newlines in multiline string literals. 680 // Allow CR+LF newlines in multiline string literals.
679 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); 681 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
680 // Allow LF+CR newlines in multiline string literals. 682 // Allow LF+CR newlines in multiline string literals.
681 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); 683 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
682 return true; 684 return true;
683 } 685 }
684 686
685 switch (c) { 687 switch (c) {
686 case '\'': // fall through 688 case '\'': // fall through
687 case '"' : // fall through 689 case '"' : // fall through
(...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after
864 // we must have at least one decimal digit after 'e'/'E' 866 // we must have at least one decimal digit after 'e'/'E'
865 return Token::ILLEGAL; 867 return Token::ILLEGAL;
866 } 868 }
867 ScanDecimalDigits(); 869 ScanDecimalDigits();
868 } 870 }
869 871
870 // The source character immediately following a numeric literal must 872 // The source character immediately following a numeric literal must
871 // not be an identifier start or a decimal digit; see ECMA-262 873 // not be an identifier start or a decimal digit; see ECMA-262
872 // section 7.8.3, page 17 (note that we read only one decimal digit 874 // section 7.8.3, page 17 (note that we read only one decimal digit
873 // if the value is 0). 875 // if the value is 0).
874 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) 876 if (IsDecimalDigit(c0_) ||
877 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_)))
875 return Token::ILLEGAL; 878 return Token::ILLEGAL;
876 879
877 literal.Complete(); 880 literal.Complete();
878 881
879 return Token::NUMBER; 882 return Token::NUMBER;
880 } 883 }
881 884
882 885
883 uc32 Scanner::ScanIdentifierUnicodeEscape() { 886 uc32 Scanner::ScanIdentifierUnicodeEscape() {
884 Advance(); 887 Advance();
(...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after
1032 } 1035 }
1033 AddLiteralChar(c); 1036 AddLiteralChar(c);
1034 return ScanIdentifierSuffix(&literal); 1037 return ScanIdentifierSuffix(&literal);
1035 } 1038 }
1036 1039
1037 uc32 first_char = c0_; 1040 uc32 first_char = c0_;
1038 Advance(); 1041 Advance();
1039 AddLiteralChar(first_char); 1042 AddLiteralChar(first_char);
1040 1043
1041 // Scan the rest of the identifier characters. 1044 // Scan the rest of the identifier characters.
1042 while (unicode_cache_->IsIdentifierPart(c0_)) { 1045 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
1043 if (c0_ != '\\') { 1046 if (c0_ != '\\') {
1044 uc32 next_char = c0_; 1047 uc32 next_char = c0_;
1045 Advance(); 1048 Advance();
1046 AddLiteralChar(next_char); 1049 AddLiteralChar(next_char);
1047 continue; 1050 continue;
1048 } 1051 }
1049 // Fallthrough if no longer able to complete keyword. 1052 // Fallthrough if no longer able to complete keyword.
1050 return ScanIdentifierSuffix(&literal); 1053 return ScanIdentifierSuffix(&literal);
1051 } 1054 }
1052 1055
1053 literal.Complete(); 1056 literal.Complete();
1054 1057
1055 if (next_.literal_chars->is_one_byte()) { 1058 if (next_.literal_chars->is_one_byte()) {
1056 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); 1059 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1057 return KeywordOrIdentifierToken(chars.start(), 1060 return KeywordOrIdentifierToken(chars.start(),
1058 chars.length(), 1061 chars.length(),
1059 harmony_scoping_, 1062 harmony_scoping_,
1060 harmony_modules_, 1063 harmony_modules_,
1061 harmony_classes_); 1064 harmony_classes_);
1062 } 1065 }
1063 1066
1064 return Token::IDENTIFIER; 1067 return Token::IDENTIFIER;
1065 } 1068 }
1066 1069
1067 1070
1068 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { 1071 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {
1069 // Scan the rest of the identifier characters. 1072 // Scan the rest of the identifier characters.
1070 while (unicode_cache_->IsIdentifierPart(c0_)) { 1073 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
1071 if (c0_ == '\\') { 1074 if (c0_ == '\\') {
1072 uc32 c = ScanIdentifierUnicodeEscape(); 1075 uc32 c = ScanIdentifierUnicodeEscape();
1073 // Only allow legal identifier part characters. 1076 // Only allow legal identifier part characters.
1074 if (c < 0 || 1077 if (c < 0 ||
1075 c == '\\' || 1078 c == '\\' ||
1076 !unicode_cache_->IsIdentifierPart(c)) { 1079 !unicode_cache_->IsIdentifierPart(c)) {
1077 return Token::ILLEGAL; 1080 return Token::ILLEGAL;
1078 } 1081 }
1079 AddLiteralChar(c); 1082 AddLiteralChar(c);
1080 } else { 1083 } else {
(...skipping 18 matching lines...) Expand all
1099 1102
1100 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 1103 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
1101 // the scanner should pass uninterpreted bodies to the RegExp 1104 // the scanner should pass uninterpreted bodies to the RegExp
1102 // constructor. 1105 // constructor.
1103 LiteralScope literal(this); 1106 LiteralScope literal(this);
1104 if (seen_equal) { 1107 if (seen_equal) {
1105 AddLiteralChar('='); 1108 AddLiteralChar('=');
1106 } 1109 }
1107 1110
1108 while (c0_ != '/' || in_character_class) { 1111 while (c0_ != '/' || in_character_class) {
1109 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; 1112 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false;
1110 if (c0_ == '\\') { // Escape sequence. 1113 if (c0_ == '\\') { // Escape sequence.
1111 AddLiteralCharAdvance(); 1114 AddLiteralCharAdvance();
1112 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; 1115 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false;
1113 AddLiteralCharAdvance(); 1116 AddLiteralCharAdvance();
1114 // If the escape allows more characters, i.e., \x??, \u????, or \c?, 1117 // If the escape allows more characters, i.e., \x??, \u????, or \c?,
1115 // only "safe" characters are allowed (letters, digits, underscore), 1118 // only "safe" characters are allowed (letters, digits, underscore),
1116 // otherwise the escape isn't valid and the invalid character has 1119 // otherwise the escape isn't valid and the invalid character has
1117 // its normal meaning. I.e., we can just continue scanning without 1120 // its normal meaning. I.e., we can just continue scanning without
1118 // worrying whether the following characters are part of the escape 1121 // worrying whether the following characters are part of the escape
1119 // or not, since any '/', '\\' or '[' is guaranteed to not be part 1122 // or not, since any '/', '\\' or '[' is guaranteed to not be part
1120 // of the escape sequence. 1123 // of the escape sequence.
1121 1124
1122 // TODO(896): At some point, parse RegExps more throughly to capture 1125 // TODO(896): At some point, parse RegExps more throughly to capture
(...skipping 26 matching lines...) Expand all
1149 ++hex_digits_read; 1152 ++hex_digits_read;
1150 } 1153 }
1151 } 1154 }
1152 return hex_digits_read == 4; 1155 return hex_digits_read == 4;
1153 } 1156 }
1154 1157
1155 1158
1156 bool Scanner::ScanRegExpFlags() { 1159 bool Scanner::ScanRegExpFlags() {
1157 // Scan regular expression flags. 1160 // Scan regular expression flags.
1158 LiteralScope literal(this); 1161 LiteralScope literal(this);
1159 while (unicode_cache_->IsIdentifierPart(c0_)) { 1162 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
1160 if (c0_ != '\\') { 1163 if (c0_ != '\\') {
1161 AddLiteralCharAdvance(); 1164 AddLiteralCharAdvance();
1162 } else { 1165 } else {
1163 if (!ScanLiteralUnicodeEscape()) { 1166 if (!ScanLiteralUnicodeEscape()) {
1164 return false; 1167 return false;
1165 } 1168 }
1166 Advance(); 1169 Advance();
1167 } 1170 }
1168 } 1171 }
1169 literal.Complete(); 1172 literal.Complete();
(...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after
1344 } 1347 }
1345 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); 1348 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
1346 } 1349 }
1347 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); 1350 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
1348 1351
1349 backing_store_.AddBlock(bytes); 1352 backing_store_.AddBlock(bytes);
1350 return backing_store_.EndSequence().start(); 1353 return backing_store_.EndSequence().start();
1351 } 1354 }
1352 1355
1353 } } // namespace v8::internal 1356 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/runtime/runtime-array.cc ('k') | src/unicode.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698