src/scanner-base.cc - Issue 5188009: Merge preparser Scanner with main JavaScript scanner.

Side by Side Diff: src/scanner-base.cc

Issue 5188009: Merge preparser Scanner with main JavaScript scanner. (Closed)

Patch Set: Address review. Fix thinko in keyword matcher. Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 462 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
473 case '?':	473 case '?':

474 token = Select(Token::CONDITIONAL);	474 token = Select(Token::CONDITIONAL);

475 break;	475 break;

476	476

477 case '~':	477 case '~':

478 token = Select(Token::BIT_NOT);	478 token = Select(Token::BIT_NOT);

479 break;	479 break;

480	480

481 default:	481 default:

482 if (ScannerConstants::kIsIdentifierStart.get(c0_)) {	482 if (ScannerConstants::kIsIdentifierStart.get(c0_)) {

483 token = ScanIdentifier();	483 token = ScanIdentifierOrKeyword();

484 } else if (IsDecimalDigit(c0_)) {	484 } else if (IsDecimalDigit(c0_)) {

485 token = ScanNumber(false);	485 token = ScanNumber(false);

486 } else if (SkipWhiteSpace()) {	486 } else if (SkipWhiteSpace()) {

487 token = Token::WHITESPACE;	487 token = Token::WHITESPACE;

488 } else if (c0_ < 0) {	488 } else if (c0_ < 0) {

489 token = Token::EOS;	489 token = Token::EOS;

490 } else {	490 } else {

491 token = Select(Token::ILLEGAL);	491 token = Select(Token::ILLEGAL);

492 }	492 }

493 break;	493 break;

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
552 // should be illegal, but they are commonly handled	552 // should be illegal, but they are commonly handled

553 // as non-escaped characters by JS VMs.	553 // as non-escaped characters by JS VMs.

554 AddLiteralChar(c);	554 AddLiteralChar(c);

555 }	555 }

556	556

557	557

558 Token::Value JavaScriptScanner::ScanString() {	558 Token::Value JavaScriptScanner::ScanString() {

559 uc32 quote = c0_;	559 uc32 quote = c0_;

560 Advance(); // consume quote	560 Advance(); // consume quote

561	561

562 LiteralScope literal(this);	562 LiteralScope literal(this, kLiteralString);

563 while (c0_ != quote && c0_ >= 0	563 while (c0_ != quote && c0_ >= 0

564 && !ScannerConstants::kIsLineTerminator.get(c0_)) {	564 && !ScannerConstants::kIsLineTerminator.get(c0_)) {

565 uc32 c = c0_;	565 uc32 c = c0_;

566 Advance();	566 Advance();

567 if (c == '\\') {	567 if (c == '\\') {

568 if (c0_ < 0) return Token::ILLEGAL;	568 if (c0_ < 0) return Token::ILLEGAL;

569 ScanEscape();	569 ScanEscape();

570 } else {	570 } else {

571 AddLiteralChar(c);	571 AddLiteralChar(c);

572 }	572 }

(...skipping 10 matching lines...) Expand all Loading...
583 while (IsDecimalDigit(c0_))	583 while (IsDecimalDigit(c0_))

584 AddLiteralCharAdvance();	584 AddLiteralCharAdvance();

585 }	585 }

586	586

587	587

588 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {	588 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {

589 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	589 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

590	590

591 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;	591 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;

592	592

593 LiteralScope literal(this);	593 LiteralScope literal(this, kLiteralNumber);

594 if (seen_period) {	594 if (seen_period) {

595 // we have already seen a decimal point of the float	595 // we have already seen a decimal point of the float

596 AddLiteralChar('.');	596 AddLiteralChar('.');

597 ScanDecimalDigits(); // we know we have at least one digit	597 ScanDecimalDigits(); // we know we have at least one digit

598	598

599 } else {	599 } else {

600 // if the first character is '0' we must check for octals and hex	600 // if the first character is '0' we must check for octals and hex

601 if (c0_ == '0') {	601 if (c0_ == '0') {

602 AddLiteralCharAdvance();	602 AddLiteralCharAdvance();

603	603

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
670 if (c0_ != 'u') return unibrow::Utf8::kBadChar;	670 if (c0_ != 'u') return unibrow::Utf8::kBadChar;

671 Advance();	671 Advance();

672 uc32 c = ScanHexEscape('u', 4);	672 uc32 c = ScanHexEscape('u', 4);

673 // We do not allow a unicode escape sequence to start another	673 // We do not allow a unicode escape sequence to start another

674 // unicode escape sequence.	674 // unicode escape sequence.

675 if (c == '\\') return unibrow::Utf8::kBadChar;	675 if (c == '\\') return unibrow::Utf8::kBadChar;

676 return c;	676 return c;

677 }	677 }

678	678

679	679

680 Token::Value JavaScriptScanner::ScanIdentifier() {	680 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {

681 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));	681 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));

682	682 LiteralScope literal(this, kLiteralIdentifier);

683 LiteralScope literal(this);

684 KeywordMatcher keyword_match;	683 KeywordMatcher keyword_match;

685

686 // Scan identifier start character.	684 // Scan identifier start character.

687 if (c0_ == '\\') {	685 if (c0_ == '\\') {

688 uc32 c = ScanIdentifierUnicodeEscape();	686 uc32 c = ScanIdentifierUnicodeEscape();

689 // Only allow legal identifier start characters.	687 // Only allow legal identifier start characters.

690 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;	688 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;

691 AddLiteralChar(c);	689 AddLiteralChar(c);

692 keyword_match.Fail();	690 return ScanIdentifierSuffix(&literal);

693 } else {	691 }

694 AddLiteralChar(c0_);	692

695 keyword_match.AddChar(c0_);	693 uc32 first_char = c0_;

696 Advance();	694 Advance();

	695 AddLiteralChar(first_char);

	696 if (!keyword_match.AddChar(first_char)) {

	697 return ScanIdentifierSuffix(&literal);

697 }	698 }

698	699

699 // Scan the rest of the identifier characters.	700 // Scan the rest of the identifier characters.

700 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {	701 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

	702 if (c0_ != '\\') {

	703 uc32 next_char = c0_;

	704 Advance();

	705 AddLiteralChar(next_char);

	706 if (keyword_match.AddChar(next_char)) continue;

	707 }

	708 // Fallthrough if no loner able to complete keyword.

	709 return ScanIdentifierSuffix(&literal);

	710 }

	711 literal.Complete();

	712

	713 return keyword_match.token();

	714 }

	715

	716

	717 Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {

	718 // Scan the rest of the identifier characters.

	719 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

701 if (c0_ == '\\') {	720 if (c0_ == '\\') {

702 uc32 c = ScanIdentifierUnicodeEscape();	721 uc32 c = ScanIdentifierUnicodeEscape();

703 // Only allow legal identifier part characters.	722 // Only allow legal identifier part characters.

704 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;	723 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;

705 AddLiteralChar(c);	724 AddLiteralChar(c);

706 keyword_match.Fail();

707 } else {	725 } else {

708 AddLiteralChar(c0_);	726 AddLiteralChar(c0_);

709 keyword_match.AddChar(c0_);

710 Advance();	727 Advance();

711 }	728 }

712 }	729 }

713 literal.Complete();	730 literal->Complete();

714	731

715 return keyword_match.token();	732 return Token::IDENTIFIER;

716 }	733 }

717	734

718	735

719

720 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {	736 bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {

721 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	737 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

722 bool in_character_class = false;	738 bool in_character_class = false;

723	739

724 // Previous token is either '/' or '/=', in the second case, the	740 // Previous token is either '/' or '/=', in the second case, the

725 // pattern starts at =.	741 // pattern starts at =.

726 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);	742 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

727 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);	743 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

728	744

729 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	745 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

730 // the scanner should pass uninterpreted bodies to the RegExp	746 // the scanner should pass uninterpreted bodies to the RegExp

731 // constructor.	747 // constructor.

732 LiteralScope literal(this);	748 LiteralScope literal(this, kLiteralRegExp);

733 if (seen_equal)	749 if (seen_equal)

734 AddLiteralChar('=');	750 AddLiteralChar('=');

735	751

736 while (c0_ != '/' \|\| in_character_class) {	752 while (c0_ != '/' \|\| in_character_class) {

737 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;	753 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

738 if (c0_ == '\\') { // escaped character	754 if (c0_ == '\\') { // escaped character

739 AddLiteralCharAdvance();	755 AddLiteralCharAdvance();

740 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;	756 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

741 AddLiteralCharAdvance();	757 AddLiteralCharAdvance();

742 } else { // unescaped character	758 } else { // unescaped character

743 if (c0_ == '[') in_character_class = true;	759 if (c0_ == '[') in_character_class = true;

744 if (c0_ == ']') in_character_class = false;	760 if (c0_ == ']') in_character_class = false;

745 AddLiteralCharAdvance();	761 AddLiteralCharAdvance();

746 }	762 }

747 }	763 }

748 Advance(); // consume '/'	764 Advance(); // consume '/'

749	765

750 literal.Complete();	766 literal.Complete();

751	767

752 return true;	768 return true;

753 }	769 }

754	770

	771

755 bool JavaScriptScanner::ScanRegExpFlags() {	772 bool JavaScriptScanner::ScanRegExpFlags() {

756 // Scan regular expression flags.	773 // Scan regular expression flags.

757 LiteralScope literal(this);	774 LiteralScope literal(this, kLiteralRegExpFlags);

758 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {	775 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

759 if (c0_ == '\\') {	776 if (c0_ == '\\') {

760 uc32 c = ScanIdentifierUnicodeEscape();	777 uc32 c = ScanIdentifierUnicodeEscape();

761 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {	778 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

762 // We allow any escaped character, unlike the restriction on	779 // We allow any escaped character, unlike the restriction on

763 // IdentifierPart when it is used to build an IdentifierName.	780 // IdentifierPart when it is used to build an IdentifierName.

764 AddLiteralChar(c);	781 AddLiteralChar(c);

765 continue;	782 continue;

766 }	783 }

767 }	784 }

(...skipping 93 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
861 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;	878 if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;

862 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;	879 if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;

863 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;	880 if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;

864 break;	881 break;

865 case I:	882 case I:

866 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;	883 if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;

867 if (MatchKeyword(input, 'n', IN, Token::IN)) return;	884 if (MatchKeyword(input, 'n', IN, Token::IN)) return;

868 break;	885 break;

869 case IN:	886 case IN:

870 token_ = Token::IDENTIFIER;	887 token_ = Token::IDENTIFIER;

871 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {	888 if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) return;

872 return;

873 }

874 break;	889 break;

875 case N:	890 case N:

876 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;	891 if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;

877 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;	892 if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;

878 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;	893 if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;

879 break;	894 break;

880 case T:	895 case T:

881 if (MatchState(input, 'h', TH)) return;	896 if (MatchState(input, 'h', TH)) return;

882 if (MatchState(input, 'r', TR)) return;	897 if (MatchState(input, 'r', TR)) return;

883 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;	898 if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;

(...skipping 15 matching lines...) Expand all Loading...
899 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;	914 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;

900 break;	915 break;

901 case UNMATCHABLE:	916 case UNMATCHABLE:

902 break;	917 break;

903 }	918 }

904 // On fallthrough, it's a failure.	919 // On fallthrough, it's a failure.

905 state_ = UNMATCHABLE;	920 state_ = UNMATCHABLE;

906 }	921 }

907	922

908 } } // namespace v8::internal	923 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner-base.h ('k') | no next file » | no next file with comments »