| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
| 3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
| 4 // met: | 4 // met: |
| 5 // | 5 // |
| 6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
| 7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
| 8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
| 9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
| 10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
| (...skipping 401 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 412 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 412 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
| 413 Advance(); | 413 Advance(); |
| 414 if (c0_ == next) { | 414 if (c0_ == next) { |
| 415 Advance(); | 415 Advance(); |
| 416 return then; | 416 return then; |
| 417 } else { | 417 } else { |
| 418 return else_; | 418 return else_; |
| 419 } | 419 } |
| 420 } | 420 } |
| 421 | 421 |
| 422 uc32 ScanHexEscape(uc32 c, int length); | 422 uc32 ScanHexNumber(int expected_length); |
| 423 | 423 |
| 424 // Return the current source position. | 424 // Return the current source position. |
| 425 int source_pos() { | 425 int source_pos() { |
| 426 return source_->pos() - kCharacterLookaheadBufferSize; | 426 return source_->pos() - kCharacterLookaheadBufferSize; |
| 427 } | 427 } |
| 428 | 428 |
| 429 UnicodeCache* unicode_cache_; | 429 UnicodeCache* unicode_cache_; |
| 430 | 430 |
| 431 // Buffers collecting literal strings, numbers, etc. | 431 // Buffers collecting literal strings, numbers, etc. |
| 432 LiteralBuffer literal_buffer1_; | 432 LiteralBuffer literal_buffer1_; |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 500 // Returns the location of the last seen octal literal | 500 // Returns the location of the last seen octal literal |
| 501 Location octal_position() const { return octal_pos_; } | 501 Location octal_position() const { return octal_pos_; } |
| 502 void clear_octal_position() { octal_pos_ = Location::invalid(); } | 502 void clear_octal_position() { octal_pos_ = Location::invalid(); } |
| 503 | 503 |
| 504 // Seek forward to the given position. This operation does not | 504 // Seek forward to the given position. This operation does not |
| 505 // work in general, for instance when there are pushed back | 505 // work in general, for instance when there are pushed back |
| 506 // characters, but works for seeking forward until simple delimiter | 506 // characters, but works for seeking forward until simple delimiter |
| 507 // tokens, which is what it is used for. | 507 // tokens, which is what it is used for. |
| 508 void SeekForward(int pos); | 508 void SeekForward(int pos); |
| 509 | 509 |
| 510 bool HarmonyBlockScoping() const { |
| 511 return harmony_block_scoping_; |
| 512 } |
| 513 void SetHarmonyBlockScoping(bool block_scoping) { |
| 514 harmony_block_scoping_ = block_scoping; |
| 515 } |
| 516 |
| 517 |
| 510 protected: | 518 protected: |
| 511 bool SkipWhiteSpace(); | 519 bool SkipWhiteSpace(); |
| 512 Token::Value SkipSingleLineComment(); | 520 Token::Value SkipSingleLineComment(); |
| 513 Token::Value SkipMultiLineComment(); | 521 Token::Value SkipMultiLineComment(); |
| 514 | 522 |
| 515 // Scans a single JavaScript token. | 523 // Scans a single JavaScript token. |
| 516 void Scan(); | 524 void Scan(); |
| 517 | 525 |
| 518 void ScanDecimalDigits(); | 526 void ScanDecimalDigits(); |
| 519 Token::Value ScanNumber(bool seen_period); | 527 Token::Value ScanNumber(bool seen_period); |
| 520 Token::Value ScanIdentifierOrKeyword(); | 528 Token::Value ScanIdentifierOrKeyword(); |
| 521 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | 529 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
| 522 | 530 |
| 523 void ScanEscape(); | 531 void ScanEscape(); |
| 524 Token::Value ScanString(); | 532 Token::Value ScanString(); |
| 525 | 533 |
| 526 // Scans a possible HTML comment -- begins with '<!'. | 534 // Scans a possible HTML comment -- begins with '<!'. |
| 527 Token::Value ScanHtmlComment(); | 535 Token::Value ScanHtmlComment(); |
| 528 | 536 |
| 529 // Decodes a unicode escape-sequence which is part of an identifier. | 537 // Decodes a unicode escape-sequence which is part of an identifier. |
| 530 // If the escape sequence cannot be decoded the result is kBadChar. | 538 // If the escape sequence cannot be decoded the result is kBadChar. |
| 531 uc32 ScanIdentifierUnicodeEscape(); | 539 uc32 ScanIdentifierUnicodeEscape(); |
| 540 // Recognizes a uniocde escape-sequence and adds its characters, |
| 541 // uninterpreted, to the current literal. Used for parsing RegExp |
| 542 // flags. |
| 543 bool ScanLiteralUnicodeEscape(); |
| 532 | 544 |
| 533 // Start position of the octal literal last scanned. | 545 // Start position of the octal literal last scanned. |
| 534 Location octal_pos_; | 546 Location octal_pos_; |
| 535 | 547 |
| 536 // Whether there is a line terminator whitespace character after | 548 // Whether there is a line terminator whitespace character after |
| 537 // the current token, and before the next. Does not count newlines | 549 // the current token, and before the next. Does not count newlines |
| 538 // inside multiline comments. | 550 // inside multiline comments. |
| 539 bool has_line_terminator_before_next_; | 551 bool has_line_terminator_before_next_; |
| 540 // Whether there is a multi-line comment that contains a | 552 // Whether there is a multi-line comment that contains a |
| 541 // line-terminator after the current token, and before the next. | 553 // line-terminator after the current token, and before the next. |
| 542 bool has_multiline_comment_before_next_; | 554 bool has_multiline_comment_before_next_; |
| 555 // Whether we scan 'let' as a keyword for harmony block scoped |
| 556 // let bindings. |
| 557 bool harmony_block_scoping_; |
| 543 }; | 558 }; |
| 544 | 559 |
| 545 | |
| 546 // ---------------------------------------------------------------------------- | |
| 547 // Keyword matching state machine. | |
| 548 | |
| 549 class KeywordMatcher { | |
| 550 // Incrementally recognize keywords. | |
| 551 // | |
| 552 // We distinguish between normal future reserved words and words that are | |
| 553 // considered to be future reserved words only in strict mode as required by | |
| 554 // ECMA-262 7.6.1.2. | |
| 555 // | |
| 556 // Recognized as keywords: | |
| 557 // break, case, catch, const*, continue, debugger, default, delete, do, | |
| 558 // else, finally, false, for, function, if, in, instanceof, new, null, | |
| 559 // return, switch, this, throw, true, try, typeof, var, void, while, with. | |
| 560 // | |
| 561 // Recognized as Future Reserved Keywords: | |
| 562 // class, enum, export, extends, import, super. | |
| 563 // | |
| 564 // Recognized as Future Reserved Keywords (strict mode only): | |
| 565 // implements, interface, let, package, private, protected, public, | |
| 566 // static, yield. | |
| 567 // | |
| 568 // *: Actually a "future reserved keyword". It's the only one we are | |
| 569 // recognizing outside of ES5 strict mode, the remaining are allowed | |
| 570 // as identifiers. | |
| 571 // | |
| 572 public: | |
| 573 KeywordMatcher() | |
| 574 : state_(INITIAL), | |
| 575 token_(Token::IDENTIFIER), | |
| 576 keyword_(NULL), | |
| 577 counter_(0), | |
| 578 keyword_token_(Token::ILLEGAL) {} | |
| 579 | |
| 580 Token::Value token() { return token_; } | |
| 581 | |
| 582 inline bool AddChar(unibrow::uchar input) { | |
| 583 if (state_ != UNMATCHABLE) { | |
| 584 Step(input); | |
| 585 } | |
| 586 return state_ != UNMATCHABLE; | |
| 587 } | |
| 588 | |
| 589 void Fail() { | |
| 590 token_ = Token::IDENTIFIER; | |
| 591 state_ = UNMATCHABLE; | |
| 592 } | |
| 593 | |
| 594 private: | |
| 595 enum State { | |
| 596 UNMATCHABLE, | |
| 597 INITIAL, | |
| 598 KEYWORD_PREFIX, | |
| 599 KEYWORD_MATCHED, | |
| 600 C, | |
| 601 CA, | |
| 602 CO, | |
| 603 CON, | |
| 604 D, | |
| 605 DE, | |
| 606 E, | |
| 607 EX, | |
| 608 F, | |
| 609 I, | |
| 610 IM, | |
| 611 IMP, | |
| 612 IN, | |
| 613 N, | |
| 614 P, | |
| 615 PR, | |
| 616 S, | |
| 617 T, | |
| 618 TH, | |
| 619 TR, | |
| 620 V, | |
| 621 W, | |
| 622 LAST_STATE = W | |
| 623 }; | |
| 624 | |
| 625 | |
| 626 STATIC_ASSERT(LAST_STATE <= 0xFF); | |
| 627 STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); | |
| 628 struct FirstState { | |
| 629 const char* keyword; | |
| 630 State state : 8; | |
| 631 Token::Value token : 8; | |
| 632 }; | |
| 633 | |
| 634 // Range of possible first characters of a keyword. | |
| 635 static const unsigned int kFirstCharRangeMin = 'b'; | |
| 636 static const unsigned int kFirstCharRangeMax = 'y'; | |
| 637 static const unsigned int kFirstCharRangeLength = | |
| 638 kFirstCharRangeMax - kFirstCharRangeMin + 1; | |
| 639 // State map for first keyword character range. | |
| 640 static const FirstState first_states_[kFirstCharRangeLength]; | |
| 641 | |
| 642 // If input equals keyword's character at position, continue matching keyword | |
| 643 // from that position. | |
| 644 inline bool MatchKeywordStart(unibrow::uchar input, | |
| 645 const char* keyword, | |
| 646 int position, | |
| 647 Token::Value token_if_match) { | |
| 648 if (input != static_cast<unibrow::uchar>(keyword[position])) { | |
| 649 return false; | |
| 650 } | |
| 651 state_ = KEYWORD_PREFIX; | |
| 652 this->keyword_ = keyword; | |
| 653 this->counter_ = position + 1; | |
| 654 this->keyword_token_ = token_if_match; | |
| 655 return true; | |
| 656 } | |
| 657 | |
| 658 // If input equals match character, transition to new state and return true. | |
| 659 inline bool MatchState(unibrow::uchar input, char match, State new_state) { | |
| 660 if (input != static_cast<unibrow::uchar>(match)) { | |
| 661 return false; | |
| 662 } | |
| 663 state_ = new_state; | |
| 664 return true; | |
| 665 } | |
| 666 | |
| 667 inline bool MatchKeyword(unibrow::uchar input, | |
| 668 char match, | |
| 669 State new_state, | |
| 670 Token::Value keyword_token) { | |
| 671 if (input != static_cast<unibrow::uchar>(match)) { | |
| 672 return false; | |
| 673 } | |
| 674 state_ = new_state; | |
| 675 token_ = keyword_token; | |
| 676 return true; | |
| 677 } | |
| 678 | |
| 679 void Step(unibrow::uchar input); | |
| 680 | |
| 681 // Current state. | |
| 682 State state_; | |
| 683 // Token for currently added characters. | |
| 684 Token::Value token_; | |
| 685 | |
| 686 // Matching a specific keyword string (there is only one possible valid | |
| 687 // keyword with the current prefix). | |
| 688 const char* keyword_; | |
| 689 int counter_; | |
| 690 Token::Value keyword_token_; | |
| 691 }; | |
| 692 | |
| 693 | |
| 694 } } // namespace v8::internal | 560 } } // namespace v8::internal |
| 695 | 561 |
| 696 #endif // V8_SCANNER_BASE_H_ | 562 #endif // V8_SCANNER_BASE_H_ |
| OLD | NEW |