| OLD | NEW |
| 1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 17 matching lines...) Expand all Loading... |
| 28 // Features shared by parsing and pre-parsing scanners. | 28 // Features shared by parsing and pre-parsing scanners. |
| 29 | 29 |
| 30 #include "../include/v8stdint.h" | 30 #include "../include/v8stdint.h" |
| 31 #include "scanner-base.h" | 31 #include "scanner-base.h" |
| 32 #include "char-predicates-inl.h" | 32 #include "char-predicates-inl.h" |
| 33 | 33 |
| 34 namespace v8 { | 34 namespace v8 { |
| 35 namespace internal { | 35 namespace internal { |
| 36 | 36 |
| 37 // ---------------------------------------------------------------------------- | 37 // ---------------------------------------------------------------------------- |
| 38 // LiteralCollector | |
| 39 | |
| 40 LiteralCollector::LiteralCollector() | |
| 41 : buffer_(kInitialCapacity), recording_(false) { } | |
| 42 | |
| 43 | |
| 44 LiteralCollector::~LiteralCollector() {} | |
| 45 | |
| 46 | |
| 47 void LiteralCollector::AddCharSlow(uc32 c) { | |
| 48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); | |
| 49 int length = unibrow::Utf8::Length(c); | |
| 50 Vector<char> block = buffer_.AddBlock(length, '\0'); | |
| 51 #ifdef DEBUG | |
| 52 int written_length = unibrow::Utf8::Encode(block.start(), c); | |
| 53 CHECK_EQ(length, written_length); | |
| 54 #else | |
| 55 unibrow::Utf8::Encode(block.start(), c); | |
| 56 #endif | |
| 57 } | |
| 58 | |
| 59 // ---------------------------------------------------------------------------- | |
| 60 // Character predicates | 38 // Character predicates |
| 61 | 39 |
| 62 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart; | 40 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart; |
| 63 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart; | 41 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart; |
| 64 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace; | 42 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace; |
| 65 unibrow::Predicate<unibrow::LineTerminator, 128> | 43 unibrow::Predicate<unibrow::LineTerminator, 128> |
| 66 ScannerConstants::kIsLineTerminator; | 44 ScannerConstants::kIsLineTerminator; |
| 67 | 45 |
| 68 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_; | 46 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_; |
| 69 | 47 |
| (...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 249 if (c0_ == '-') return SkipSingleLineComment(); | 227 if (c0_ == '-') return SkipSingleLineComment(); |
| 250 PushBack('-'); // undo Advance() | 228 PushBack('-'); // undo Advance() |
| 251 } | 229 } |
| 252 PushBack('!'); // undo Advance() | 230 PushBack('!'); // undo Advance() |
| 253 ASSERT(c0_ == '!'); | 231 ASSERT(c0_ == '!'); |
| 254 return Token::LT; | 232 return Token::LT; |
| 255 } | 233 } |
| 256 | 234 |
| 257 | 235 |
| 258 void JavaScriptScanner::Scan() { | 236 void JavaScriptScanner::Scan() { |
| 259 next_.literal_chars = Vector<const char>(); | 237 next_.literal_chars = NULL; |
| 260 Token::Value token; | 238 Token::Value token; |
| 261 do { | 239 do { |
| 262 // Remember the position of the next token | 240 // Remember the position of the next token |
| 263 next_.location.beg_pos = source_pos(); | 241 next_.location.beg_pos = source_pos(); |
| 264 | 242 |
| 265 switch (c0_) { | 243 switch (c0_) { |
| 266 case ' ': | 244 case ' ': |
| 267 case '\t': | 245 case '\t': |
| 268 Advance(); | 246 Advance(); |
| 269 token = Token::WHITESPACE; | 247 token = Token::WHITESPACE; |
| (...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 554 // should be illegal, but they are commonly handled | 532 // should be illegal, but they are commonly handled |
| 555 // as non-escaped characters by JS VMs. | 533 // as non-escaped characters by JS VMs. |
| 556 AddLiteralChar(c); | 534 AddLiteralChar(c); |
| 557 } | 535 } |
| 558 | 536 |
| 559 | 537 |
| 560 Token::Value JavaScriptScanner::ScanString() { | 538 Token::Value JavaScriptScanner::ScanString() { |
| 561 uc32 quote = c0_; | 539 uc32 quote = c0_; |
| 562 Advance(); // consume quote | 540 Advance(); // consume quote |
| 563 | 541 |
| 564 LiteralScope literal(this, kLiteralString); | 542 LiteralScope literal(this); |
| 565 while (c0_ != quote && c0_ >= 0 | 543 while (c0_ != quote && c0_ >= 0 |
| 566 && !ScannerConstants::kIsLineTerminator.get(c0_)) { | 544 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
| 567 uc32 c = c0_; | 545 uc32 c = c0_; |
| 568 Advance(); | 546 Advance(); |
| 569 if (c == '\\') { | 547 if (c == '\\') { |
| 570 if (c0_ < 0) return Token::ILLEGAL; | 548 if (c0_ < 0) return Token::ILLEGAL; |
| 571 ScanEscape(); | 549 ScanEscape(); |
| 572 } else { | 550 } else { |
| 573 AddLiteralChar(c); | 551 AddLiteralChar(c); |
| 574 } | 552 } |
| (...skipping 10 matching lines...) Expand all Loading... |
| 585 while (IsDecimalDigit(c0_)) | 563 while (IsDecimalDigit(c0_)) |
| 586 AddLiteralCharAdvance(); | 564 AddLiteralCharAdvance(); |
| 587 } | 565 } |
| 588 | 566 |
| 589 | 567 |
| 590 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { | 568 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { |
| 591 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 569 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
| 592 | 570 |
| 593 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; | 571 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
| 594 | 572 |
| 595 LiteralScope literal(this, kLiteralNumber); | 573 LiteralScope literal(this); |
| 596 if (seen_period) { | 574 if (seen_period) { |
| 597 // we have already seen a decimal point of the float | 575 // we have already seen a decimal point of the float |
| 598 AddLiteralChar('.'); | 576 AddLiteralChar('.'); |
| 599 ScanDecimalDigits(); // we know we have at least one digit | 577 ScanDecimalDigits(); // we know we have at least one digit |
| 600 | 578 |
| 601 } else { | 579 } else { |
| 602 // if the first character is '0' we must check for octals and hex | 580 // if the first character is '0' we must check for octals and hex |
| 603 if (c0_ == '0') { | 581 if (c0_ == '0') { |
| 604 AddLiteralCharAdvance(); | 582 AddLiteralCharAdvance(); |
| 605 | 583 |
| (...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 674 uc32 c = ScanHexEscape('u', 4); | 652 uc32 c = ScanHexEscape('u', 4); |
| 675 // We do not allow a unicode escape sequence to start another | 653 // We do not allow a unicode escape sequence to start another |
| 676 // unicode escape sequence. | 654 // unicode escape sequence. |
| 677 if (c == '\\') return unibrow::Utf8::kBadChar; | 655 if (c == '\\') return unibrow::Utf8::kBadChar; |
| 678 return c; | 656 return c; |
| 679 } | 657 } |
| 680 | 658 |
| 681 | 659 |
| 682 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 660 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { |
| 683 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); | 661 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); |
| 684 LiteralScope literal(this, kLiteralIdentifier); | 662 LiteralScope literal(this); |
| 685 KeywordMatcher keyword_match; | 663 KeywordMatcher keyword_match; |
| 686 // Scan identifier start character. | 664 // Scan identifier start character. |
| 687 if (c0_ == '\\') { | 665 if (c0_ == '\\') { |
| 688 uc32 c = ScanIdentifierUnicodeEscape(); | 666 uc32 c = ScanIdentifierUnicodeEscape(); |
| 689 // Only allow legal identifier start characters. | 667 // Only allow legal identifier start characters. |
| 690 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 668 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
| 691 AddLiteralChar(c); | 669 AddLiteralChar(c); |
| 692 return ScanIdentifierSuffix(&literal); | 670 return ScanIdentifierSuffix(&literal); |
| 693 } | 671 } |
| 694 | 672 |
| (...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 740 bool in_character_class = false; | 718 bool in_character_class = false; |
| 741 | 719 |
| 742 // Previous token is either '/' or '/=', in the second case, the | 720 // Previous token is either '/' or '/=', in the second case, the |
| 743 // pattern starts at =. | 721 // pattern starts at =. |
| 744 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 722 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
| 745 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 723 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
| 746 | 724 |
| 747 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 725 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
| 748 // the scanner should pass uninterpreted bodies to the RegExp | 726 // the scanner should pass uninterpreted bodies to the RegExp |
| 749 // constructor. | 727 // constructor. |
| 750 LiteralScope literal(this, kLiteralRegExp); | 728 LiteralScope literal(this); |
| 751 if (seen_equal) | 729 if (seen_equal) |
| 752 AddLiteralChar('='); | 730 AddLiteralChar('='); |
| 753 | 731 |
| 754 while (c0_ != '/' || in_character_class) { | 732 while (c0_ != '/' || in_character_class) { |
| 755 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 733 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
| 756 if (c0_ == '\\') { // escaped character | 734 if (c0_ == '\\') { // escaped character |
| 757 AddLiteralCharAdvance(); | 735 AddLiteralCharAdvance(); |
| 758 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 736 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
| 759 AddLiteralCharAdvance(); | 737 AddLiteralCharAdvance(); |
| 760 } else { // unescaped character | 738 } else { // unescaped character |
| 761 if (c0_ == '[') in_character_class = true; | 739 if (c0_ == '[') in_character_class = true; |
| 762 if (c0_ == ']') in_character_class = false; | 740 if (c0_ == ']') in_character_class = false; |
| 763 AddLiteralCharAdvance(); | 741 AddLiteralCharAdvance(); |
| 764 } | 742 } |
| 765 } | 743 } |
| 766 Advance(); // consume '/' | 744 Advance(); // consume '/' |
| 767 | 745 |
| 768 literal.Complete(); | 746 literal.Complete(); |
| 769 | 747 |
| 770 return true; | 748 return true; |
| 771 } | 749 } |
| 772 | 750 |
| 773 | 751 |
| 774 bool JavaScriptScanner::ScanRegExpFlags() { | 752 bool JavaScriptScanner::ScanRegExpFlags() { |
| 775 // Scan regular expression flags. | 753 // Scan regular expression flags. |
| 776 LiteralScope literal(this, kLiteralRegExpFlags); | 754 LiteralScope literal(this); |
| 777 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { | 755 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
| 778 if (c0_ == '\\') { | 756 if (c0_ == '\\') { |
| 779 uc32 c = ScanIdentifierUnicodeEscape(); | 757 uc32 c = ScanIdentifierUnicodeEscape(); |
| 780 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 758 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
| 781 // We allow any escaped character, unlike the restriction on | 759 // We allow any escaped character, unlike the restriction on |
| 782 // IdentifierPart when it is used to build an IdentifierName. | 760 // IdentifierPart when it is used to build an IdentifierName. |
| 783 AddLiteralChar(c); | 761 AddLiteralChar(c); |
| 784 continue; | 762 continue; |
| 785 } | 763 } |
| 786 } | 764 } |
| (...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 916 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | 894 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
| 917 break; | 895 break; |
| 918 case UNMATCHABLE: | 896 case UNMATCHABLE: |
| 919 break; | 897 break; |
| 920 } | 898 } |
| 921 // On fallthrough, it's a failure. | 899 // On fallthrough, it's a failure. |
| 922 state_ = UNMATCHABLE; | 900 state_ = UNMATCHABLE; |
| 923 } | 901 } |
| 924 | 902 |
| 925 } } // namespace v8::internal | 903 } } // namespace v8::internal |
| OLD | NEW |