OLD | NEW |
1 // Copyright 2010 the V8 project authors. All rights reserved. | 1 // Copyright 2010 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 17 matching lines...) Expand all Loading... |
28 // Features shared by parsing and pre-parsing scanners. | 28 // Features shared by parsing and pre-parsing scanners. |
29 | 29 |
30 #include "../include/v8stdint.h" | 30 #include "../include/v8stdint.h" |
31 #include "scanner-base.h" | 31 #include "scanner-base.h" |
32 #include "char-predicates-inl.h" | 32 #include "char-predicates-inl.h" |
33 | 33 |
34 namespace v8 { | 34 namespace v8 { |
35 namespace internal { | 35 namespace internal { |
36 | 36 |
37 // ---------------------------------------------------------------------------- | 37 // ---------------------------------------------------------------------------- |
38 // LiteralCollector | |
39 | |
40 LiteralCollector::LiteralCollector() | |
41 : buffer_(kInitialCapacity), recording_(false) { } | |
42 | |
43 | |
44 LiteralCollector::~LiteralCollector() {} | |
45 | |
46 | |
47 void LiteralCollector::AddCharSlow(uc32 c) { | |
48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); | |
49 int length = unibrow::Utf8::Length(c); | |
50 Vector<char> block = buffer_.AddBlock(length, '\0'); | |
51 #ifdef DEBUG | |
52 int written_length = unibrow::Utf8::Encode(block.start(), c); | |
53 CHECK_EQ(length, written_length); | |
54 #else | |
55 unibrow::Utf8::Encode(block.start(), c); | |
56 #endif | |
57 } | |
58 | |
59 // ---------------------------------------------------------------------------- | |
60 // Character predicates | 38 // Character predicates |
61 | 39 |
62 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart; | 40 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart; |
63 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart; | 41 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart; |
64 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace; | 42 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace; |
65 unibrow::Predicate<unibrow::LineTerminator, 128> | 43 unibrow::Predicate<unibrow::LineTerminator, 128> |
66 ScannerConstants::kIsLineTerminator; | 44 ScannerConstants::kIsLineTerminator; |
67 | 45 |
68 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_; | 46 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_; |
69 | 47 |
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
249 if (c0_ == '-') return SkipSingleLineComment(); | 227 if (c0_ == '-') return SkipSingleLineComment(); |
250 PushBack('-'); // undo Advance() | 228 PushBack('-'); // undo Advance() |
251 } | 229 } |
252 PushBack('!'); // undo Advance() | 230 PushBack('!'); // undo Advance() |
253 ASSERT(c0_ == '!'); | 231 ASSERT(c0_ == '!'); |
254 return Token::LT; | 232 return Token::LT; |
255 } | 233 } |
256 | 234 |
257 | 235 |
258 void JavaScriptScanner::Scan() { | 236 void JavaScriptScanner::Scan() { |
259 next_.literal_chars = Vector<const char>(); | 237 next_.literal_chars = NULL; |
260 Token::Value token; | 238 Token::Value token; |
261 do { | 239 do { |
262 // Remember the position of the next token | 240 // Remember the position of the next token |
263 next_.location.beg_pos = source_pos(); | 241 next_.location.beg_pos = source_pos(); |
264 | 242 |
265 switch (c0_) { | 243 switch (c0_) { |
266 case ' ': | 244 case ' ': |
267 case '\t': | 245 case '\t': |
268 Advance(); | 246 Advance(); |
269 token = Token::WHITESPACE; | 247 token = Token::WHITESPACE; |
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
554 // should be illegal, but they are commonly handled | 532 // should be illegal, but they are commonly handled |
555 // as non-escaped characters by JS VMs. | 533 // as non-escaped characters by JS VMs. |
556 AddLiteralChar(c); | 534 AddLiteralChar(c); |
557 } | 535 } |
558 | 536 |
559 | 537 |
560 Token::Value JavaScriptScanner::ScanString() { | 538 Token::Value JavaScriptScanner::ScanString() { |
561 uc32 quote = c0_; | 539 uc32 quote = c0_; |
562 Advance(); // consume quote | 540 Advance(); // consume quote |
563 | 541 |
564 LiteralScope literal(this, kLiteralString); | 542 LiteralScope literal(this); |
565 while (c0_ != quote && c0_ >= 0 | 543 while (c0_ != quote && c0_ >= 0 |
566 && !ScannerConstants::kIsLineTerminator.get(c0_)) { | 544 && !ScannerConstants::kIsLineTerminator.get(c0_)) { |
567 uc32 c = c0_; | 545 uc32 c = c0_; |
568 Advance(); | 546 Advance(); |
569 if (c == '\\') { | 547 if (c == '\\') { |
570 if (c0_ < 0) return Token::ILLEGAL; | 548 if (c0_ < 0) return Token::ILLEGAL; |
571 ScanEscape(); | 549 ScanEscape(); |
572 } else { | 550 } else { |
573 AddLiteralChar(c); | 551 AddLiteralChar(c); |
574 } | 552 } |
(...skipping 10 matching lines...) Expand all Loading... |
585 while (IsDecimalDigit(c0_)) | 563 while (IsDecimalDigit(c0_)) |
586 AddLiteralCharAdvance(); | 564 AddLiteralCharAdvance(); |
587 } | 565 } |
588 | 566 |
589 | 567 |
590 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { | 568 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { |
591 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction | 569 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction |
592 | 570 |
593 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; | 571 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; |
594 | 572 |
595 LiteralScope literal(this, kLiteralNumber); | 573 LiteralScope literal(this); |
596 if (seen_period) { | 574 if (seen_period) { |
597 // we have already seen a decimal point of the float | 575 // we have already seen a decimal point of the float |
598 AddLiteralChar('.'); | 576 AddLiteralChar('.'); |
599 ScanDecimalDigits(); // we know we have at least one digit | 577 ScanDecimalDigits(); // we know we have at least one digit |
600 | 578 |
601 } else { | 579 } else { |
602 // if the first character is '0' we must check for octals and hex | 580 // if the first character is '0' we must check for octals and hex |
603 if (c0_ == '0') { | 581 if (c0_ == '0') { |
604 AddLiteralCharAdvance(); | 582 AddLiteralCharAdvance(); |
605 | 583 |
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
674 uc32 c = ScanHexEscape('u', 4); | 652 uc32 c = ScanHexEscape('u', 4); |
675 // We do not allow a unicode escape sequence to start another | 653 // We do not allow a unicode escape sequence to start another |
676 // unicode escape sequence. | 654 // unicode escape sequence. |
677 if (c == '\\') return unibrow::Utf8::kBadChar; | 655 if (c == '\\') return unibrow::Utf8::kBadChar; |
678 return c; | 656 return c; |
679 } | 657 } |
680 | 658 |
681 | 659 |
682 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { | 660 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { |
683 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); | 661 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); |
684 LiteralScope literal(this, kLiteralIdentifier); | 662 LiteralScope literal(this); |
685 KeywordMatcher keyword_match; | 663 KeywordMatcher keyword_match; |
686 // Scan identifier start character. | 664 // Scan identifier start character. |
687 if (c0_ == '\\') { | 665 if (c0_ == '\\') { |
688 uc32 c = ScanIdentifierUnicodeEscape(); | 666 uc32 c = ScanIdentifierUnicodeEscape(); |
689 // Only allow legal identifier start characters. | 667 // Only allow legal identifier start characters. |
690 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; | 668 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; |
691 AddLiteralChar(c); | 669 AddLiteralChar(c); |
692 return ScanIdentifierSuffix(&literal); | 670 return ScanIdentifierSuffix(&literal); |
693 } | 671 } |
694 | 672 |
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
740 bool in_character_class = false; | 718 bool in_character_class = false; |
741 | 719 |
742 // Previous token is either '/' or '/=', in the second case, the | 720 // Previous token is either '/' or '/=', in the second case, the |
743 // pattern starts at =. | 721 // pattern starts at =. |
744 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); | 722 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); |
745 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); | 723 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); |
746 | 724 |
747 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, | 725 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, |
748 // the scanner should pass uninterpreted bodies to the RegExp | 726 // the scanner should pass uninterpreted bodies to the RegExp |
749 // constructor. | 727 // constructor. |
750 LiteralScope literal(this, kLiteralRegExp); | 728 LiteralScope literal(this); |
751 if (seen_equal) | 729 if (seen_equal) |
752 AddLiteralChar('='); | 730 AddLiteralChar('='); |
753 | 731 |
754 while (c0_ != '/' || in_character_class) { | 732 while (c0_ != '/' || in_character_class) { |
755 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 733 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
756 if (c0_ == '\\') { // escaped character | 734 if (c0_ == '\\') { // escaped character |
757 AddLiteralCharAdvance(); | 735 AddLiteralCharAdvance(); |
758 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; | 736 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; |
759 AddLiteralCharAdvance(); | 737 AddLiteralCharAdvance(); |
760 } else { // unescaped character | 738 } else { // unescaped character |
761 if (c0_ == '[') in_character_class = true; | 739 if (c0_ == '[') in_character_class = true; |
762 if (c0_ == ']') in_character_class = false; | 740 if (c0_ == ']') in_character_class = false; |
763 AddLiteralCharAdvance(); | 741 AddLiteralCharAdvance(); |
764 } | 742 } |
765 } | 743 } |
766 Advance(); // consume '/' | 744 Advance(); // consume '/' |
767 | 745 |
768 literal.Complete(); | 746 literal.Complete(); |
769 | 747 |
770 return true; | 748 return true; |
771 } | 749 } |
772 | 750 |
773 | 751 |
774 bool JavaScriptScanner::ScanRegExpFlags() { | 752 bool JavaScriptScanner::ScanRegExpFlags() { |
775 // Scan regular expression flags. | 753 // Scan regular expression flags. |
776 LiteralScope literal(this, kLiteralRegExpFlags); | 754 LiteralScope literal(this); |
777 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { | 755 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { |
778 if (c0_ == '\\') { | 756 if (c0_ == '\\') { |
779 uc32 c = ScanIdentifierUnicodeEscape(); | 757 uc32 c = ScanIdentifierUnicodeEscape(); |
780 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { | 758 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { |
781 // We allow any escaped character, unlike the restriction on | 759 // We allow any escaped character, unlike the restriction on |
782 // IdentifierPart when it is used to build an IdentifierName. | 760 // IdentifierPart when it is used to build an IdentifierName. |
783 AddLiteralChar(c); | 761 AddLiteralChar(c); |
784 continue; | 762 continue; |
785 } | 763 } |
786 } | 764 } |
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
916 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; | 894 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; |
917 break; | 895 break; |
918 case UNMATCHABLE: | 896 case UNMATCHABLE: |
919 break; | 897 break; |
920 } | 898 } |
921 // On fallthrough, it's a failure. | 899 // On fallthrough, it's a failure. |
922 state_ = UNMATCHABLE; | 900 state_ = UNMATCHABLE; |
923 } | 901 } |
924 | 902 |
925 } } // namespace v8::internal | 903 } } // namespace v8::internal |
OLD | NEW |