| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include <stdint.h> | 7 #include <stdint.h> |
| 8 | 8 |
| 9 #include <cmath> | 9 #include <cmath> |
| 10 | 10 |
| (...skipping 227 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 238 // Spidermonkey. | 238 // Spidermonkey. |
| 239 return c == 0xFFFE; | 239 return c == 0xFFFE; |
| 240 } | 240 } |
| 241 | 241 |
| 242 | 242 |
| 243 bool Scanner::SkipWhiteSpace() { | 243 bool Scanner::SkipWhiteSpace() { |
| 244 int start_position = source_pos(); | 244 int start_position = source_pos(); |
| 245 | 245 |
| 246 while (true) { | 246 while (true) { |
| 247 while (true) { | 247 while (true) { |
| 248 // The unicode cache accepts unsigned inputs. |
| 249 if (c0_ < 0) break; |
| 248 // Advance as long as character is a WhiteSpace or LineTerminator. | 250 // Advance as long as character is a WhiteSpace or LineTerminator. |
| 249 // Remember if the latter is the case. | 251 // Remember if the latter is the case. |
| 250 if (unicode_cache_->IsLineTerminator(c0_)) { | 252 if (unicode_cache_->IsLineTerminator(c0_)) { |
| 251 has_line_terminator_before_next_ = true; | 253 has_line_terminator_before_next_ = true; |
| 252 } else if (!unicode_cache_->IsWhiteSpace(c0_) && | 254 } else if (!unicode_cache_->IsWhiteSpace(c0_) && |
| 253 !IsLittleEndianByteOrderMark(c0_)) { | 255 !IsLittleEndianByteOrderMark(c0_)) { |
| 254 break; | 256 break; |
| 255 } | 257 } |
| 256 Advance(); | 258 Advance(); |
| 257 } | 259 } |
| (...skipping 100 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 358 } | 360 } |
| 359 | 361 |
| 360 | 362 |
| 361 Token::Value Scanner::SkipMultiLineComment() { | 363 Token::Value Scanner::SkipMultiLineComment() { |
| 362 DCHECK(c0_ == '*'); | 364 DCHECK(c0_ == '*'); |
| 363 Advance(); | 365 Advance(); |
| 364 | 366 |
| 365 while (c0_ >= 0) { | 367 while (c0_ >= 0) { |
| 366 uc32 ch = c0_; | 368 uc32 ch = c0_; |
| 367 Advance(); | 369 Advance(); |
| 368 if (unicode_cache_->IsLineTerminator(ch)) { | 370 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(ch)) { |
| 369 // Following ECMA-262, section 7.4, a comment containing | 371 // Following ECMA-262, section 7.4, a comment containing |
| 370 // a newline will make the comment count as a line-terminator. | 372 // a newline will make the comment count as a line-terminator. |
| 371 has_multiline_comment_before_next_ = true; | 373 has_multiline_comment_before_next_ = true; |
| 372 } | 374 } |
| 373 // If we have reached the end of the multi-line comment, we | 375 // If we have reached the end of the multi-line comment, we |
| 374 // consume the '/' and insert a whitespace. This way all | 376 // consume the '/' and insert a whitespace. This way all |
| 375 // multi-line comments are treated as whitespace. | 377 // multi-line comments are treated as whitespace. |
| 376 if (ch == '*' && c0_ == '/') { | 378 if (ch == '*' && c0_ == '/') { |
| 377 c0_ = ' '; | 379 c0_ = ' '; |
| 378 return Token::WHITESPACE; | 380 return Token::WHITESPACE; |
| (...skipping 239 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 618 | 620 |
| 619 case '?': | 621 case '?': |
| 620 token = Select(Token::CONDITIONAL); | 622 token = Select(Token::CONDITIONAL); |
| 621 break; | 623 break; |
| 622 | 624 |
| 623 case '~': | 625 case '~': |
| 624 token = Select(Token::BIT_NOT); | 626 token = Select(Token::BIT_NOT); |
| 625 break; | 627 break; |
| 626 | 628 |
| 627 default: | 629 default: |
| 628 if (unicode_cache_->IsIdentifierStart(c0_)) { | 630 if (c0_ < 0) { |
| 631 token = Token::EOS; |
| 632 } else if (unicode_cache_->IsIdentifierStart(c0_)) { |
| 629 token = ScanIdentifierOrKeyword(); | 633 token = ScanIdentifierOrKeyword(); |
| 630 } else if (IsDecimalDigit(c0_)) { | 634 } else if (IsDecimalDigit(c0_)) { |
| 631 token = ScanNumber(false); | 635 token = ScanNumber(false); |
| 632 } else if (SkipWhiteSpace()) { | 636 } else if (SkipWhiteSpace()) { |
| 633 token = Token::WHITESPACE; | 637 token = Token::WHITESPACE; |
| 634 } else if (c0_ < 0) { | |
| 635 token = Token::EOS; | |
| 636 } else { | 638 } else { |
| 637 token = Select(Token::ILLEGAL); | 639 token = Select(Token::ILLEGAL); |
| 638 } | 640 } |
| 639 break; | 641 break; |
| 640 } | 642 } |
| 641 | 643 |
| 642 // Continue scanning for tokens as long as we're just skipping | 644 // Continue scanning for tokens as long as we're just skipping |
| 643 // whitespace. | 645 // whitespace. |
| 644 } while (token == Token::WHITESPACE); | 646 } while (token == Token::WHITESPACE); |
| 645 | 647 |
| (...skipping 21 matching lines...) Expand all Loading... |
| 667 } | 669 } |
| 668 Scan(); | 670 Scan(); |
| 669 } | 671 } |
| 670 | 672 |
| 671 | 673 |
| 672 bool Scanner::ScanEscape() { | 674 bool Scanner::ScanEscape() { |
| 673 uc32 c = c0_; | 675 uc32 c = c0_; |
| 674 Advance(); | 676 Advance(); |
| 675 | 677 |
| 676 // Skip escaped newlines. | 678 // Skip escaped newlines. |
| 677 if (unicode_cache_->IsLineTerminator(c)) { | 679 if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { |
| 678 // Allow CR+LF newlines in multiline string literals. | 680 // Allow CR+LF newlines in multiline string literals. |
| 679 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); | 681 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); |
| 680 // Allow LF+CR newlines in multiline string literals. | 682 // Allow LF+CR newlines in multiline string literals. |
| 681 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); | 683 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); |
| 682 return true; | 684 return true; |
| 683 } | 685 } |
| 684 | 686 |
| 685 switch (c) { | 687 switch (c) { |
| 686 case '\'': // fall through | 688 case '\'': // fall through |
| 687 case '"' : // fall through | 689 case '"' : // fall through |
| (...skipping 176 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 864 // we must have at least one decimal digit after 'e'/'E' | 866 // we must have at least one decimal digit after 'e'/'E' |
| 865 return Token::ILLEGAL; | 867 return Token::ILLEGAL; |
| 866 } | 868 } |
| 867 ScanDecimalDigits(); | 869 ScanDecimalDigits(); |
| 868 } | 870 } |
| 869 | 871 |
| 870 // The source character immediately following a numeric literal must | 872 // The source character immediately following a numeric literal must |
| 871 // not be an identifier start or a decimal digit; see ECMA-262 | 873 // not be an identifier start or a decimal digit; see ECMA-262 |
| 872 // section 7.8.3, page 17 (note that we read only one decimal digit | 874 // section 7.8.3, page 17 (note that we read only one decimal digit |
| 873 // if the value is 0). | 875 // if the value is 0). |
| 874 if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) | 876 if (IsDecimalDigit(c0_) || |
| 877 (c0_ >= 0 && unicode_cache_->IsIdentifierStart(c0_))) |
| 875 return Token::ILLEGAL; | 878 return Token::ILLEGAL; |
| 876 | 879 |
| 877 literal.Complete(); | 880 literal.Complete(); |
| 878 | 881 |
| 879 return Token::NUMBER; | 882 return Token::NUMBER; |
| 880 } | 883 } |
| 881 | 884 |
| 882 | 885 |
| 883 uc32 Scanner::ScanIdentifierUnicodeEscape() { | 886 uc32 Scanner::ScanIdentifierUnicodeEscape() { |
| 884 Advance(); | 887 Advance(); |
| (...skipping 147 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1032 } | 1035 } |
| 1033 AddLiteralChar(c); | 1036 AddLiteralChar(c); |
| 1034 return ScanIdentifierSuffix(&literal); | 1037 return ScanIdentifierSuffix(&literal); |
| 1035 } | 1038 } |
| 1036 | 1039 |
| 1037 uc32 first_char = c0_; | 1040 uc32 first_char = c0_; |
| 1038 Advance(); | 1041 Advance(); |
| 1039 AddLiteralChar(first_char); | 1042 AddLiteralChar(first_char); |
| 1040 | 1043 |
| 1041 // Scan the rest of the identifier characters. | 1044 // Scan the rest of the identifier characters. |
| 1042 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1045 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
| 1043 if (c0_ != '\\') { | 1046 if (c0_ != '\\') { |
| 1044 uc32 next_char = c0_; | 1047 uc32 next_char = c0_; |
| 1045 Advance(); | 1048 Advance(); |
| 1046 AddLiteralChar(next_char); | 1049 AddLiteralChar(next_char); |
| 1047 continue; | 1050 continue; |
| 1048 } | 1051 } |
| 1049 // Fallthrough if no longer able to complete keyword. | 1052 // Fallthrough if no longer able to complete keyword. |
| 1050 return ScanIdentifierSuffix(&literal); | 1053 return ScanIdentifierSuffix(&literal); |
| 1051 } | 1054 } |
| 1052 | 1055 |
| 1053 literal.Complete(); | 1056 literal.Complete(); |
| 1054 | 1057 |
| 1055 if (next_.literal_chars->is_one_byte()) { | 1058 if (next_.literal_chars->is_one_byte()) { |
| 1056 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); | 1059 Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal(); |
| 1057 return KeywordOrIdentifierToken(chars.start(), | 1060 return KeywordOrIdentifierToken(chars.start(), |
| 1058 chars.length(), | 1061 chars.length(), |
| 1059 harmony_scoping_, | 1062 harmony_scoping_, |
| 1060 harmony_modules_, | 1063 harmony_modules_, |
| 1061 harmony_classes_); | 1064 harmony_classes_); |
| 1062 } | 1065 } |
| 1063 | 1066 |
| 1064 return Token::IDENTIFIER; | 1067 return Token::IDENTIFIER; |
| 1065 } | 1068 } |
| 1066 | 1069 |
| 1067 | 1070 |
| 1068 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { | 1071 Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { |
| 1069 // Scan the rest of the identifier characters. | 1072 // Scan the rest of the identifier characters. |
| 1070 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1073 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
| 1071 if (c0_ == '\\') { | 1074 if (c0_ == '\\') { |
| 1072 uc32 c = ScanIdentifierUnicodeEscape(); | 1075 uc32 c = ScanIdentifierUnicodeEscape(); |
| 1073 // Only allow legal identifier part characters. | 1076 // Only allow legal identifier part characters. |
| 1074 if (c < 0 || | 1077 if (c < 0 || |
| 1075 c == '\\' || | 1078 c == '\\' || |
| 1076 !unicode_cache_->IsIdentifierPart(c)) { | 1079 !unicode_cache_->IsIdentifierPart(c)) { |
| 1077 return Token::ILLEGAL; | 1080 return Token::ILLEGAL; |
| 1078 } | 1081 } |
| 1079 AddLiteralChar(c); | 1082 AddLiteralChar(c); |
| 1080 } else { | 1083 } else { |
| (...skipping 18 matching lines...) Expand all Loading... |
| 1099 | 1102 |
| 1100 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 1103 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 1101 // the scanner should pass uninterpreted bodies to the RegExp | 1104 // the scanner should pass uninterpreted bodies to the RegExp |
| 1102 // constructor. | 1105 // constructor. |
| 1103 LiteralScope literal(this); | 1106 LiteralScope literal(this); |
| 1104 if (seen_equal) { | 1107 if (seen_equal) { |
| 1105 AddLiteralChar('='); | 1108 AddLiteralChar('='); |
| 1106 } | 1109 } |
| 1107 | 1110 |
| 1108 while (c0_ != '/' || in_character_class) { | 1111 while (c0_ != '/' || in_character_class) { |
| 1109 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1112 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; |
| 1110 if (c0_ == '\\') { // Escape sequence. | 1113 if (c0_ == '\\') { // Escape sequence. |
| 1111 AddLiteralCharAdvance(); | 1114 AddLiteralCharAdvance(); |
| 1112 if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; | 1115 if (c0_ < 0 || unicode_cache_->IsLineTerminator(c0_)) return false; |
| 1113 AddLiteralCharAdvance(); | 1116 AddLiteralCharAdvance(); |
| 1114 // If the escape allows more characters, i.e., \x??, \u????, or \c?, | 1117 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
| 1115 // only "safe" characters are allowed (letters, digits, underscore), | 1118 // only "safe" characters are allowed (letters, digits, underscore), |
| 1116 // otherwise the escape isn't valid and the invalid character has | 1119 // otherwise the escape isn't valid and the invalid character has |
| 1117 // its normal meaning. I.e., we can just continue scanning without | 1120 // its normal meaning. I.e., we can just continue scanning without |
| 1118 // worrying whether the following characters are part of the escape | 1121 // worrying whether the following characters are part of the escape |
| 1119 // or not, since any '/', '\\' or '[' is guaranteed to not be part | 1122 // or not, since any '/', '\\' or '[' is guaranteed to not be part |
| 1120 // of the escape sequence. | 1123 // of the escape sequence. |
| 1121 | 1124 |
| 1122 // TODO(896): At some point, parse RegExps more throughly to capture | 1125 // TODO(896): At some point, parse RegExps more throughly to capture |
| (...skipping 26 matching lines...) Expand all Loading... |
| 1149 ++hex_digits_read; | 1152 ++hex_digits_read; |
| 1150 } | 1153 } |
| 1151 } | 1154 } |
| 1152 return hex_digits_read == 4; | 1155 return hex_digits_read == 4; |
| 1153 } | 1156 } |
| 1154 | 1157 |
| 1155 | 1158 |
| 1156 bool Scanner::ScanRegExpFlags() { | 1159 bool Scanner::ScanRegExpFlags() { |
| 1157 // Scan regular expression flags. | 1160 // Scan regular expression flags. |
| 1158 LiteralScope literal(this); | 1161 LiteralScope literal(this); |
| 1159 while (unicode_cache_->IsIdentifierPart(c0_)) { | 1162 while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { |
| 1160 if (c0_ != '\\') { | 1163 if (c0_ != '\\') { |
| 1161 AddLiteralCharAdvance(); | 1164 AddLiteralCharAdvance(); |
| 1162 } else { | 1165 } else { |
| 1163 if (!ScanLiteralUnicodeEscape()) { | 1166 if (!ScanLiteralUnicodeEscape()) { |
| 1164 return false; | 1167 return false; |
| 1165 } | 1168 } |
| 1166 Advance(); | 1169 Advance(); |
| 1167 } | 1170 } |
| 1168 } | 1171 } |
| 1169 literal.Complete(); | 1172 literal.Complete(); |
| (...skipping 174 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1344 } | 1347 } |
| 1345 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | 1348 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); |
| 1346 } | 1349 } |
| 1347 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | 1350 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); |
| 1348 | 1351 |
| 1349 backing_store_.AddBlock(bytes); | 1352 backing_store_.AddBlock(bytes); |
| 1350 return backing_store_.EndSequence().start(); | 1353 return backing_store_.EndSequence().start(); |
| 1351 } | 1354 } |
| 1352 | 1355 |
| 1353 } } // namespace v8::internal | 1356 } } // namespace v8::internal |
| OLD | NEW |