| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 23 matching lines...) Expand all Loading... |
| 34 some classes (NativeAllocationChecker) are moved into isolate.h | 34 some classes (NativeAllocationChecker) are moved into isolate.h |
| 35 #include "../include/v8stdint.h" | 35 #include "../include/v8stdint.h" |
| 36 */ | 36 */ |
| 37 #include "scanner-base.h" | 37 #include "scanner-base.h" |
| 38 #include "char-predicates-inl.h" | 38 #include "char-predicates-inl.h" |
| 39 | 39 |
| 40 namespace v8 { | 40 namespace v8 { |
| 41 namespace internal { | 41 namespace internal { |
| 42 | 42 |
| 43 // ---------------------------------------------------------------------------- | 43 // ---------------------------------------------------------------------------- |
| 44 // LiteralCollector | |
| 45 | |
| 46 LiteralCollector::LiteralCollector() | |
| 47 : buffer_(kInitialCapacity), recording_(false) { } | |
| 48 | |
| 49 | |
| 50 LiteralCollector::~LiteralCollector() {} | |
| 51 | |
| 52 | |
| 53 void LiteralCollector::AddCharSlow(uc32 c) { | |
| 54 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); | |
| 55 int length = unibrow::Utf8::Length(c); | |
| 56 Vector<char> block = buffer_.AddBlock(length, '\0'); | |
| 57 #ifdef DEBUG | |
| 58 int written_length = unibrow::Utf8::Encode(block.start(), c); | |
| 59 CHECK_EQ(length, written_length); | |
| 60 #else | |
| 61 unibrow::Utf8::Encode(block.start(), c); | |
| 62 #endif | |
| 63 } | |
| 64 | |
| 65 // Compound predicates. | 44 // Compound predicates. |
| 66 | 45 |
| 67 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) { | 46 bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) { |
| 68 // Checks whether the buffer contains an identifier (no escape). | 47 // Checks whether the buffer contains an identifier (no escape). |
| 69 if (!buffer->has_more()) return false; | 48 if (!buffer->has_more()) return false; |
| 70 if (!kIsIdentifierStart.get(buffer->GetNext())) { | 49 if (!kIsIdentifierStart.get(buffer->GetNext())) { |
| 71 return false; | 50 return false; |
| 72 } | 51 } |
| 73 while (buffer->has_more()) { | 52 while (buffer->has_more()) { |
| 74 if (!kIsIdentifierPart.get(buffer->GetNext())) { | 53 if (!kIsIdentifierPart.get(buffer->GetNext())) { |
| (...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 246 if (c0_ == '-') return SkipSingleLineComment(); | 225 if (c0_ == '-') return SkipSingleLineComment(); |
| 247 PushBack('-'); // undo Advance() | 226 PushBack('-'); // undo Advance() |
| 248 } | 227 } |
| 249 PushBack('!'); // undo Advance() | 228 PushBack('!'); // undo Advance() |
| 250 ASSERT(c0_ == '!'); | 229 ASSERT(c0_ == '!'); |
| 251 return Token::LT; | 230 return Token::LT; |
| 252 } | 231 } |
| 253 | 232 |
| 254 | 233 |
| 255 void JavaScriptScanner::Scan() { | 234 void JavaScriptScanner::Scan() { |
| 256 next_.literal_chars = Vector<const char>(); | 235 next_.literal_chars = NULL; |
| 257 Token::Value token; | 236 Token::Value token; |
| 258 do { | 237 do { |
| 259 // Remember the position of the next token | 238 // Remember the position of the next token |
| 260 next_.location.beg_pos = source_pos(); | 239 next_.location.beg_pos = source_pos(); |
| 261 | 240 |
| 262 switch (c0_) { | 241 switch (c0_) { |
| 263 case ' ': | 242 case ' ': |
| 264 case '\t': | 243 case '\t': |
| 265 Advance(); | 244 Advance(); |
| 266 token = Token::WHITESPACE; | 245 token = Token::WHITESPACE; |
| (...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 551 // should be illegal, but they are commonly handled | 530 // should be illegal, but they are commonly handled |
| 552 // as non-escaped characters by JS VMs. | 531 // as non-escaped characters by JS VMs. |
| 553 AddLiteralChar(c); | 532 AddLiteralChar(c); |
| 554 } | 533 } |
| 555 | 534 |
| 556 | 535 |
| 557 Token::Value JavaScriptScanner::ScanString() { | 536 Token::Value JavaScriptScanner::ScanString() { |
| 558 uc32 quote = c0_; | 537 uc32 quote = c0_; |
| 559 Advance(); // consume quote | 538 Advance(); // consume quote |
| 560 | 539 |
| 561 LiteralScope literal(this, kLiteralString); | 540 LiteralScope literal(this); |
| 562 while (c0_ != quote && c0_ >= 0 | 541 while (c0_ != quote && c0_ >= 0 |
| 563 && !scanner_constants_->IsLineTerminator(c0_)) { | 542 && !scanner_constants_->IsLineTerminator(c0_)) { |
| 564 uc32 c = c0_; | 543 uc32 c = c0_; |
| 565 Advance(); | 544 Advance(); |
| 566 if (c == '\\') { | 545 if (c == '\\') { |
| 567 if (c0_ < 0) return Token::ILLEGAL; | 546 if (c0_ < 0) return Token::ILLEGAL; |
| 568 ScanEscape(); | 547 ScanEscape(); |
| 569 } else { | 548 } else { |
| 570 AddLiteralChar(c); | 549 AddLiteralChar(c); |
| 571 } | 550 } |
| (...skipping 10 matching lines...) Expand all Loading... |
| 582 while (IsDecimalDigit(c0_)) | 561 while (IsDecimalDigit(c0_)) |
| 583 AddLiteralCharAdvance(); | 562 AddLiteralCharAdvance(); |
| 584 } | 563 } |
| 585 | 564 |
| 586 | 565 |
| 587 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { | 566 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { |
| 588 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 567 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
| 589 | 568 |
| 590 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; | 569 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
| 591 | 570 |
| 592 LiteralScope literal(this, kLiteralNumber); | 571 LiteralScope literal(this); |
| 593 if (seen_period) { | 572 if (seen_period) { |
| 594 // we have already seen a decimal point of the float | 573 // we have already seen a decimal point of the float |
| 595 AddLiteralChar('.'); | 574 AddLiteralChar('.'); |
| 596 ScanDecimalDigits(); // we know we have at least one digit | 575 ScanDecimalDigits(); // we know we have at least one digit |
| 597 | 576 |
| 598 } else { | 577 } else { |
| 599 // if the first character is '0' we must check for octals and hex | 578 // if the first character is '0' we must check for octals and hex |
| 600 if (c0_ == '0') { | 579 if (c0_ == '0') { |
| 601 AddLiteralCharAdvance(); | 580 AddLiteralCharAdvance(); |
| 602 | 581 |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 671 uc32 c = ScanHexEscape('u', 4); | 650 uc32 c = ScanHexEscape('u', 4); |
| 672 // We do not allow a unicode escape sequence to start another | 651 // We do not allow a unicode escape sequence to start another |
| 673 // unicode escape sequence. | 652 // unicode escape sequence. |
| 674 if (c == '\\') return unibrow::Utf8::kBadChar; | 653 if (c == '\\') return unibrow::Utf8::kBadChar; |
| 675 return c; | 654 return c; |
| 676 } | 655 } |
| 677 | 656 |
| 678 | 657 |
| 679 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 658 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { |
| 680 ASSERT(scanner_constants_->IsIdentifierStart(c0_)); | 659 ASSERT(scanner_constants_->IsIdentifierStart(c0_)); |
| 681 LiteralScope literal(this, kLiteralIdentifier); | 660 LiteralScope literal(this); |
| 682 KeywordMatcher keyword_match; | 661 KeywordMatcher keyword_match; |
| 683 // Scan identifier start character. | 662 // Scan identifier start character. |
| 684 if (c0_ == '\\') { | 663 if (c0_ == '\\') { |
| 685 uc32 c = ScanIdentifierUnicodeEscape(); | 664 uc32 c = ScanIdentifierUnicodeEscape(); |
| 686 // Only allow legal identifier start characters. | 665 // Only allow legal identifier start characters. |
| 687 if (!scanner_constants_->IsIdentifierStart(c)) return Token::ILLEGAL; | 666 if (!scanner_constants_->IsIdentifierStart(c)) return Token::ILLEGAL; |
| 688 AddLiteralChar(c); | 667 AddLiteralChar(c); |
| 689 return ScanIdentifierSuffix(&literal); | 668 return ScanIdentifierSuffix(&literal); |
| 690 } | 669 } |
| 691 | 670 |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 737 bool in_character_class = false; | 716 bool in_character_class = false; |
| 738 | 717 |
| 739 // Previous token is either '/' or '/=', in the second case, the | 718 // Previous token is either '/' or '/=', in the second case, the |
| 740 // pattern starts at =. | 719 // pattern starts at =. |
| 741 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 720 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| 742 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 721 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| 743 | 722 |
| 744 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 723 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 745 // the scanner should pass uninterpreted bodies to the RegExp | 724 // the scanner should pass uninterpreted bodies to the RegExp |
| 746 // constructor. | 725 // constructor. |
| 747 LiteralScope literal(this, kLiteralRegExp); | 726 LiteralScope literal(this); |
| 748 if (seen_equal) | 727 if (seen_equal) |
| 749 AddLiteralChar('='); | 728 AddLiteralChar('='); |
| 750 | 729 |
| 751 while (c0_ != '/' || in_character_class) { | 730 while (c0_ != '/' || in_character_class) { |
| 752 if (scanner_constants_->IsLineTerminator(c0_) || c0_ < 0) return false; | 731 if (scanner_constants_->IsLineTerminator(c0_) || c0_ < 0) return false; |
| 753 if (c0_ == '\\') { // escaped character | 732 if (c0_ == '\\') { // Escape sequence. |
| 754 AddLiteralCharAdvance(); | 733 AddLiteralCharAdvance(); |
| 755 if (scanner_constants_->IsLineTerminator(c0_) || c0_ < 0) return false; | 734 if (scanner_constants_->IsLineTerminator(c0_) || c0_ < 0) return false; |
| 756 AddLiteralCharAdvance(); | 735 AddLiteralCharAdvance(); |
| 757 } else { // unescaped character | 736 // If the escape allows more characters, i.e., \x??, \u????, or \c?, |
| 737 // only "safe" characters are allowed (letters, digits, underscore), |
| 738 // otherwise the escape isn't valid and the invalid character has |
| 739 // its normal meaning. I.e., we can just continue scanning without |
| 740 // worrying whether the following characters are part of the escape |
| 741 // or not, since any '/', '\\' or '[' is guaranteed to not be part |
| 742 // of the escape sequence. |
| 743 } else { // Unescaped character. |
| 758 if (c0_ == '[') in_character_class = true; | 744 if (c0_ == '[') in_character_class = true; |
| 759 if (c0_ == ']') in_character_class = false; | 745 if (c0_ == ']') in_character_class = false; |
| 760 AddLiteralCharAdvance(); | 746 AddLiteralCharAdvance(); |
| 761 } | 747 } |
| 762 } | 748 } |
| 763 Advance(); // consume '/' | 749 Advance(); // consume '/' |
| 764 | 750 |
| 765 literal.Complete(); | 751 literal.Complete(); |
| 766 | 752 |
| 767 return true; | 753 return true; |
| 768 } | 754 } |
| 769 | 755 |
| 770 | 756 |
| 771 bool JavaScriptScanner::ScanRegExpFlags() { | 757 bool JavaScriptScanner::ScanRegExpFlags() { |
| 772 // Scan regular expression flags. | 758 // Scan regular expression flags. |
| 773 LiteralScope literal(this, kLiteralRegExpFlags); | 759 LiteralScope literal(this); |
| 774 while (scanner_constants_->IsIdentifierPart(c0_)) { | 760 while (scanner_constants_->IsIdentifierPart(c0_)) { |
| 775 if (c0_ == '\\') { | 761 if (c0_ == '\\') { |
| 776 uc32 c = ScanIdentifierUnicodeEscape(); | 762 uc32 c = ScanIdentifierUnicodeEscape(); |
| 777 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 763 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
| 778 // We allow any escaped character, unlike the restriction on | 764 // We allow any escaped character, unlike the restriction on |
| 779 // IdentifierPart when it is used to build an IdentifierName. | 765 // IdentifierPart when it is used to build an IdentifierName. |
| 780 AddLiteralChar(c); | 766 AddLiteralChar(c); |
| 781 continue; | 767 continue; |
| 782 } | 768 } |
| 783 } | 769 } |
| (...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 913 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | 899 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
| 914 break; | 900 break; |
| 915 case UNMATCHABLE: | 901 case UNMATCHABLE: |
| 916 break; | 902 break; |
| 917 } | 903 } |
| 918 // On fallthrough, it's a failure. | 904 // On fallthrough, it's a failure. |
| 919 state_ = UNMATCHABLE; | 905 state_ = UNMATCHABLE; |
| 920 } | 906 } |
| 921 | 907 |
| 922 } } // namespace v8::internal | 908 } } // namespace v8::internal |
| OLD | NEW |