Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(67)

Side by Side Diff: src/scanner-base.cc

Issue 5188009: Merge preparser Scanner with main JavaScript scanner. (Closed)
Patch Set: Address review. Fix thinko in keyword matcher. Created 10 years, 1 month ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner-base.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 462 matching lines...) Expand 10 before | Expand all | Expand 10 after
473 case '?': 473 case '?':
474 token = Select(Token::CONDITIONAL); 474 token = Select(Token::CONDITIONAL);
475 break; 475 break;
476 476
477 case '~': 477 case '~':
478 token = Select(Token::BIT_NOT); 478 token = Select(Token::BIT_NOT);
479 break; 479 break;
480 480
481 default: 481 default:
482 if (ScannerConstants::kIsIdentifierStart.get(c0_)) { 482 if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
483 token = ScanIdentifier(); 483 token = ScanIdentifierOrKeyword();
484 } else if (IsDecimalDigit(c0_)) { 484 } else if (IsDecimalDigit(c0_)) {
485 token = ScanNumber(false); 485 token = ScanNumber(false);
486 } else if (SkipWhiteSpace()) { 486 } else if (SkipWhiteSpace()) {
487 token = Token::WHITESPACE; 487 token = Token::WHITESPACE;
488 } else if (c0_ < 0) { 488 } else if (c0_ < 0) {
489 token = Token::EOS; 489 token = Token::EOS;
490 } else { 490 } else {
491 token = Select(Token::ILLEGAL); 491 token = Select(Token::ILLEGAL);
492 } 492 }
493 break; 493 break;
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
552 // should be illegal, but they are commonly handled 552 // should be illegal, but they are commonly handled
553 // as non-escaped characters by JS VMs. 553 // as non-escaped characters by JS VMs.
554 AddLiteralChar(c); 554 AddLiteralChar(c);
555 } 555 }
556 556
557 557
558 Token::Value JavaScriptScanner::ScanString() { 558 Token::Value JavaScriptScanner::ScanString() {
559 uc32 quote = c0_; 559 uc32 quote = c0_;
560 Advance(); // consume quote 560 Advance(); // consume quote
561 561
562 LiteralScope literal(this); 562 LiteralScope literal(this, kLiteralString);
563 while (c0_ != quote && c0_ >= 0 563 while (c0_ != quote && c0_ >= 0
564 && !ScannerConstants::kIsLineTerminator.get(c0_)) { 564 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
565 uc32 c = c0_; 565 uc32 c = c0_;
566 Advance(); 566 Advance();
567 if (c == '\\') { 567 if (c == '\\') {
568 if (c0_ < 0) return Token::ILLEGAL; 568 if (c0_ < 0) return Token::ILLEGAL;
569 ScanEscape(); 569 ScanEscape();
570 } else { 570 } else {
571 AddLiteralChar(c); 571 AddLiteralChar(c);
572 } 572 }
(...skipping 10 matching lines...) Expand all
583 while (IsDecimalDigit(c0_)) 583 while (IsDecimalDigit(c0_))
584 AddLiteralCharAdvance(); 584 AddLiteralCharAdvance();
585 } 585 }
586 586
587 587
588 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { 588 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
589 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 589 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
590 590
591 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; 591 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
592 592
593 LiteralScope literal(this); 593 LiteralScope literal(this, kLiteralNumber);
594 if (seen_period) { 594 if (seen_period) {
595 // we have already seen a decimal point of the float 595 // we have already seen a decimal point of the float
596 AddLiteralChar('.'); 596 AddLiteralChar('.');
597 ScanDecimalDigits(); // we know we have at least one digit 597 ScanDecimalDigits(); // we know we have at least one digit
598 598
599 } else { 599 } else {
600 // if the first character is '0' we must check for octals and hex 600 // if the first character is '0' we must check for octals and hex
601 if (c0_ == '0') { 601 if (c0_ == '0') {
602 AddLiteralCharAdvance(); 602 AddLiteralCharAdvance();
603 603
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
670 if (c0_ != 'u') return unibrow::Utf8::kBadChar; 670 if (c0_ != 'u') return unibrow::Utf8::kBadChar;
671 Advance(); 671 Advance();
672 uc32 c = ScanHexEscape('u', 4); 672 uc32 c = ScanHexEscape('u', 4);
673 // We do not allow a unicode escape sequence to start another 673 // We do not allow a unicode escape sequence to start another
674 // unicode escape sequence. 674 // unicode escape sequence.
675 if (c == '\\') return unibrow::Utf8::kBadChar; 675 if (c == '\\') return unibrow::Utf8::kBadChar;
676 return c; 676 return c;
677 } 677 }
678 678
679 679
680 Token::Value JavaScriptScanner::ScanIdentifier() { 680 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
681 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); 681 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
682 682 LiteralScope literal(this, kLiteralIdentifier);
683 LiteralScope literal(this);
684 KeywordMatcher keyword_match; 683 KeywordMatcher keyword_match;
685
686 // Scan identifier start character. 684 // Scan identifier start character.
687 if (c0_ == '\\') { 685 if (c0_ == '\\') {
688 uc32 c = ScanIdentifierUnicodeEscape(); 686 uc32 c = ScanIdentifierUnicodeEscape();
689 // Only allow legal identifier start characters. 687 // Only allow legal identifier start characters.
690 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; 688 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
691 AddLiteralChar(c); 689 AddLiteralChar(c);
692 keyword_match.Fail(); 690 return ScanIdentifierSuffix(&literal);
693 } else { 691 }
694 AddLiteralChar(c0_); 692
695 keyword_match.AddChar(c0_); 693 uc32 first_char = c0_;
696 Advance(); 694 Advance();
695 AddLiteralChar(first_char);
696 if (!keyword_match.AddChar(first_char)) {
697 return ScanIdentifierSuffix(&literal);
697 } 698 }
698 699
699 // Scan the rest of the identifier characters. 700 // Scan the rest of the identifier characters.
700 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { 701 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
702 if (c0_ != '\\') {
703 uc32 next_char = c0_;
704 Advance();
705 AddLiteralChar(next_char);
706 if (keyword_match.AddChar(next_char)) continue;
707 }
708 // Fallthrough if no loner able to complete keyword.
709 return ScanIdentifierSuffix(&literal);
710 }
711 literal.Complete();
712
713 return keyword_match.token();
714 }
715
716
717 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {
718 // Scan the rest of the identifier characters.
719 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
701 if (c0_ == '\\') { 720 if (c0_ == '\\') {
702 uc32 c = ScanIdentifierUnicodeEscape(); 721 uc32 c = ScanIdentifierUnicodeEscape();
703 // Only allow legal identifier part characters. 722 // Only allow legal identifier part characters.
704 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL; 723 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
705 AddLiteralChar(c); 724 AddLiteralChar(c);
706 keyword_match.Fail();
707 } else { 725 } else {
708 AddLiteralChar(c0_); 726 AddLiteralChar(c0_);
709 keyword_match.AddChar(c0_);
710 Advance(); 727 Advance();
711 } 728 }
712 } 729 }
713 literal.Complete(); 730 literal->Complete();
714 731
715 return keyword_match.token(); 732 return Token::IDENTIFIER;
716 } 733 }
717 734
718 735
719
720 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) { 736 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
721 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags 737 // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
722 bool in_character_class = false; 738 bool in_character_class = false;
723 739
724 // Previous token is either '/' or '/=', in the second case, the 740 // Previous token is either '/' or '/=', in the second case, the
725 // pattern starts at =. 741 // pattern starts at =.
726 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 742 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
727 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 743 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
728 744
729 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 745 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
730 // the scanner should pass uninterpreted bodies to the RegExp 746 // the scanner should pass uninterpreted bodies to the RegExp
731 // constructor. 747 // constructor.
732 LiteralScope literal(this); 748 LiteralScope literal(this, kLiteralRegExp);
733 if (seen_equal) 749 if (seen_equal)
734 AddLiteralChar('='); 750 AddLiteralChar('=');
735 751
736 while (c0_ != '/' || in_character_class) { 752 while (c0_ != '/' || in_character_class) {
737 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; 753 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
738 if (c0_ == '\\') { // escaped character 754 if (c0_ == '\\') { // escaped character
739 AddLiteralCharAdvance(); 755 AddLiteralCharAdvance();
740 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; 756 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
741 AddLiteralCharAdvance(); 757 AddLiteralCharAdvance();
742 } else { // unescaped character 758 } else { // unescaped character
743 if (c0_ == '[') in_character_class = true; 759 if (c0_ == '[') in_character_class = true;
744 if (c0_ == ']') in_character_class = false; 760 if (c0_ == ']') in_character_class = false;
745 AddLiteralCharAdvance(); 761 AddLiteralCharAdvance();
746 } 762 }
747 } 763 }
748 Advance(); // consume '/' 764 Advance(); // consume '/'
749 765
750 literal.Complete(); 766 literal.Complete();
751 767
752 return true; 768 return true;
753 } 769 }
754 770
771
755 bool JavaScriptScanner::ScanRegExpFlags() { 772 bool JavaScriptScanner::ScanRegExpFlags() {
756 // Scan regular expression flags. 773 // Scan regular expression flags.
757 LiteralScope literal(this); 774 LiteralScope literal(this, kLiteralRegExpFlags);
758 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { 775 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
759 if (c0_ == '\\') { 776 if (c0_ == '\\') {
760 uc32 c = ScanIdentifierUnicodeEscape(); 777 uc32 c = ScanIdentifierUnicodeEscape();
761 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { 778 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
762 // We allow any escaped character, unlike the restriction on 779 // We allow any escaped character, unlike the restriction on
763 // IdentifierPart when it is used to build an IdentifierName. 780 // IdentifierPart when it is used to build an IdentifierName.
764 AddLiteralChar(c); 781 AddLiteralChar(c);
765 continue; 782 continue;
766 } 783 }
767 } 784 }
(...skipping 93 matching lines...) Expand 10 before | Expand all | Expand 10 after
861 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; 878 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
862 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; 879 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
863 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; 880 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
864 break; 881 break;
865 case I: 882 case I:
866 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; 883 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
867 if (MatchKeyword(input, 'n', IN, Token::IN)) return; 884 if (MatchKeyword(input, 'n', IN, Token::IN)) return;
868 break; 885 break;
869 case IN: 886 case IN:
870 token_ = Token::IDENTIFIER; 887 token_ = Token::IDENTIFIER;
871 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) { 888 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return;
872 return;
873 }
874 break; 889 break;
875 case N: 890 case N:
876 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return; 891 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
877 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; 892 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
878 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; 893 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
879 break; 894 break;
880 case T: 895 case T:
881 if (MatchState(input, 'h', TH)) return; 896 if (MatchState(input, 'h', TH)) return;
882 if (MatchState(input, 'r', TR)) return; 897 if (MatchState(input, 'r', TR)) return;
883 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; 898 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
(...skipping 15 matching lines...) Expand all
899 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; 914 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
900 break; 915 break;
901 case UNMATCHABLE: 916 case UNMATCHABLE:
902 break; 917 break;
903 } 918 }
904 // On fallthrough, it's a failure. 919 // On fallthrough, it's a failure.
905 state_ = UNMATCHABLE; 920 state_ = UNMATCHABLE;
906 } 921 }
907 922
908 } } // namespace v8::internal 923 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner-base.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698