src/scanner.h - Issue 160073006: Implement handling of arrow functions in the parser

Side by Side Diff: src/scanner.h

Issue 160073006: Implement handling of arrow functions in the parser (Closed) Base URL: https://v8.googlecode.com/svn/branches/bleeding_edge

Patch Set: Version with parsing code only, tests into test-parsing.cc Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #ifndef V8_SCANNER_H_	7 #ifndef V8_SCANNER_H_

8 #define V8_SCANNER_H_	8 #define V8_SCANNER_H_

9	9

10 #include "src/allocation.h"	10 #include "src/allocation.h"

(...skipping 171 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
182 UnicodeCache* unicode_constants_;	182 UnicodeCache* unicode_constants_;

183 // Backing store used to store strings used as hashmap keys.	183 // Backing store used to store strings used as hashmap keys.

184 SequenceCollector<unsigned char> backing_store_;	184 SequenceCollector<unsigned char> backing_store_;

185 HashMap map_;	185 HashMap map_;

186 // Buffer used for string->number->canonical string conversions.	186 // Buffer used for string->number->canonical string conversions.

187 char number_buffer_[kBufferSize];	187 char number_buffer_[kBufferSize];

188 };	188 };

189	189

190	190

191 // ----------------------------------------------------------------------------	191 // ----------------------------------------------------------------------------

	192 // ParamListFinder discovers sequences of tokens which form a valid function

	193 // parameter list.

	194 class Scanner;

	195

	196 class ParamListFinder {

	197 public:

	198 explicit ParamListFinder(UnicodeCache* unicode_cache)

	199 : state_(Invalid)

	200 , start_pos_(-1)

	201 , identifier_pos_(-1)

	202 , duplicate_pos_(-1)

	203 , duplicate_finder_(unicode_cache) { }

	204

	205 V8_INLINE void Update(Scanner* scanner);

	206

	207 bool IsValid(int pos) const {

	208 return (state_ == Valid && start_pos_ == pos)

	209 \|\| (identifier_pos_ == pos);

	210 }

	211

	212 bool HasDuplicateIdentifiers() const {

	213 return duplicate_pos_ > start_pos_;

	214 }

	215

	216 int FirstDuplicatePosition() const {

	217 return duplicate_pos_;

	218 }

	219

	220 private:

	221 enum State {

	222 Invalid,

	223 Valid,

	224 LeftParen,

	225 Identifier,

	226 Comma

	227 };

	228

	229 V8_INLINE void AddIdentifier(Scanner* scanner);

	230

	231 State state_;

	232 int start_pos_;

	233 int identifier_pos_;

	234 int duplicate_pos_;

	235 DuplicateFinder duplicate_finder_;

	236 };

	237

	238

	239 // ----------------------------------------------------------------------------

192 // LiteralBuffer - Collector of chars of literals.	240 // LiteralBuffer - Collector of chars of literals.

193	241

194 class LiteralBuffer {	242 class LiteralBuffer {

195 public:	243 public:

196 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }	244 LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }

197	245

198 ~LiteralBuffer() {	246 ~LiteralBuffer() {

199 if (backing_store_.length() > 0) {	247 if (backing_store_.length() > 0) {

200 backing_store_.Dispose();	248 backing_store_.Dispose();

201 }	249 }

(...skipping 194 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
396 if (is_literal_one_byte() &&	444 if (is_literal_one_byte() &&

397 literal_length() == 3 &&	445 literal_length() == 3 &&

398 !literal_contains_escapes()) {	446 !literal_contains_escapes()) {

399 const char* token =	447 const char* token =

400 reinterpret_cast<const char*>(literal_one_byte_string().start());	448 reinterpret_cast<const char*>(literal_one_byte_string().start());

401 *is_get = strncmp(token, "get", 3) == 0;	449 *is_get = strncmp(token, "get", 3) == 0;

402 is_set = !is_get && strncmp(token, "set", 3) == 0;	450 is_set = !is_get && strncmp(token, "set", 3) == 0;

403 }	451 }

404 }	452 }

405	453

	454 ParamListFinder* parameter_list() {

	455 return &param_list_finder_;

	456 }

	457

406 int FindNumber(DuplicateFinder* finder, int value);	458 int FindNumber(DuplicateFinder* finder, int value);

407 int FindSymbol(DuplicateFinder* finder, int value);	459 int FindSymbol(DuplicateFinder* finder, int value);

408	460

409 UnicodeCache* unicode_cache() { return unicode_cache_; }	461 UnicodeCache* unicode_cache() { return unicode_cache_; }

410	462

411 // Returns the location of the last seen octal literal.	463 // Returns the location of the last seen octal literal.

412 Location octal_position() const { return octal_pos_; }	464 Location octal_position() const { return octal_pos_; }

413 void clear_octal_position() { octal_pos_ = Location::invalid(); }	465 void clear_octal_position() { octal_pos_ = Location::invalid(); }

414	466

415 // Seek forward to the given position. This operation does not	467 // Seek forward to the given position. This operation does not

(...skipping 28 matching lines...) Expand all Loading...
444 has_multiline_comment_before_next_;	496 has_multiline_comment_before_next_;

445 }	497 }

446	498

447 // Scans the input as a regular expression pattern, previous	499 // Scans the input as a regular expression pattern, previous

448 // character(s) must be /(=). Returns true if a pattern is scanned.	500 // character(s) must be /(=). Returns true if a pattern is scanned.

449 bool ScanRegExpPattern(bool seen_equal);	501 bool ScanRegExpPattern(bool seen_equal);

450 // Returns true if regexp flags are scanned (always since flags can	502 // Returns true if regexp flags are scanned (always since flags can

451 // be empty).	503 // be empty).

452 bool ScanRegExpFlags();	504 bool ScanRegExpFlags();

453	505

	506 bool IdentifierIsFutureStrictReserved(const AstString* string) const;

	507

454 private:	508 private:

455 // The current and look-ahead token.	509 // The current and look-ahead token.

456 struct TokenDesc {	510 struct TokenDesc {

457 Token::Value token;	511 Token::Value token;

458 Location location;	512 Location location;

459 LiteralBuffer* literal_chars;	513 LiteralBuffer* literal_chars;

460 };	514 };

461	515

462 static const int kCharacterLookaheadBufferSize = 1;	516 static const int kCharacterLookaheadBufferSize = 1;

463	517

(...skipping 135 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
599 int source_pos() {	653 int source_pos() {

600 return source_->pos() - kCharacterLookaheadBufferSize;	654 return source_->pos() - kCharacterLookaheadBufferSize;

601 }	655 }

602	656

603 UnicodeCache* unicode_cache_;	657 UnicodeCache* unicode_cache_;

604	658

605 // Buffers collecting literal strings, numbers, etc.	659 // Buffers collecting literal strings, numbers, etc.

606 LiteralBuffer literal_buffer1_;	660 LiteralBuffer literal_buffer1_;

607 LiteralBuffer literal_buffer2_;	661 LiteralBuffer literal_buffer2_;

608	662

	663 ParamListFinder param_list_finder_;

	664

609 TokenDesc current_; // desc for current token (as returned by Next())	665 TokenDesc current_; // desc for current token (as returned by Next())

610 TokenDesc next_; // desc for next token (one token look-ahead)	666 TokenDesc next_; // desc for next token (one token look-ahead)

611	667

612 // Input stream. Must be initialized to an Utf16CharacterStream.	668 // Input stream. Must be initialized to an Utf16CharacterStream.

613 Utf16CharacterStream* source_;	669 Utf16CharacterStream* source_;

614	670

615	671

616 // Start position of the octal literal last scanned.	672 // Start position of the octal literal last scanned.

617 Location octal_pos_;	673 Location octal_pos_;

618	674

619 // One Unicode character look-ahead; c0_ < 0 at the end of the input.	675 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

620 uc32 c0_;	676 uc32 c0_;

621	677

622 // Whether there is a line terminator whitespace character after	678 // Whether there is a line terminator whitespace character after

623 // the current token, and before the next. Does not count newlines	679 // the current token, and before the next. Does not count newlines

624 // inside multiline comments.	680 // inside multiline comments.

625 bool has_line_terminator_before_next_;	681 bool has_line_terminator_before_next_;

626 // Whether there is a multi-line comment that contains a	682 // Whether there is a multi-line comment that contains a

627 // line-terminator after the current token, and before the next.	683 // line-terminator after the current token, and before the next.

628 bool has_multiline_comment_before_next_;	684 bool has_multiline_comment_before_next_;

629 // Whether we scan 'let' as a keyword for harmony block-scoped let bindings.	685 // Whether we scan 'let' as a keyword for harmony block-scoped let bindings.

630 bool harmony_scoping_;	686 bool harmony_scoping_;

631 // Whether we scan 'module', 'import', 'export' as keywords.	687 // Whether we scan 'module', 'import', 'export' as keywords.

632 bool harmony_modules_;	688 bool harmony_modules_;

633 // Whether we scan 0o777 and 0b111 as numbers.	689 // Whether we scan 0o777 and 0b111 as numbers.

634 bool harmony_numeric_literals_;	690 bool harmony_numeric_literals_;

635 };	691 };

636	692

	693

	694 void ParamListFinder::Update(Scanner* scanner) {
	marja 2014/06/17 11:47:38 So you have this state machine which is updated fo So you have this state machine which is updated for each token when scanning, and then there is ParserTraits::ParameterListFromExpression too, for sort of getting the same data "after the fact". This looks pretty duplicated to me. Also, you're adding this to be executed after each token during scanning, and I'm worried about the overhead. Would it be feasible to 1) only do the parameter list dance if we have some indicator that we are inside the parameter list (seen "(", for example, and quit as soon as we realize it's not a valid parameter list) 2) do it either when reading the parameter list, or after, but not both? And I'm not sure if Scanner is the right place for the parameter list logic... What's the reason why the "after the fact" processing is not sufficient? Is it because of PreParser? Did you try out the "PreParserExpression keeps track of the "this is a comma-separated identifier name list" property" approach, and why didn't it fly? Btw, how much does this regress the scanning performance? There's this tool lexer-shell which you can use for finding out..
	695 const Token::Value token = scanner->current_token();

	696

	697 switch (token) {

	698 case Token::IDENTIFIER:

	699 case Token::YIELD:

	700 case Token::FUTURE_STRICT_RESERVED_WORD:

	701 identifier_pos_ = scanner->location().beg_pos;

	702 AddIdentifier(scanner);

	703 break;

	704 default:

	705 identifier_pos_ = -1;

	706 }

	707

	708 switch (state_) {

	709 case Valid:

	710 state_ = Invalid;

	711 start_pos_ = -1;

	712 // Fall-through.

	713 case Invalid:

	714 switch (token) {

	715 case Token::LPAREN:

	716 state_ = LeftParen;

	717 start_pos_ = scanner->location().beg_pos;

	718 break;

	719 default:

	720 // Stay in Invalid state.

	721 break;

	722 }

	723 break;

	724

	725 case LeftParen:

	726 switch (token) {

	727 case Token::LPAREN:

	728 start_pos_ = scanner->location().beg_pos;

	729 break;

	730 case Token::RPAREN:

	731 state_ = Valid;

	732 break;

	733 case Token::YIELD:

	734 case Token::IDENTIFIER:

	735 case Token::FUTURE_STRICT_RESERVED_WORD:

	736 state_ = Identifier;

	737 break;

	738 default:

	739 state_ = Invalid;

	740 }

	741 break;

	742

	743 case Identifier:

	744 switch (token) {

	745 case Token::RPAREN:

	746 state_ = Valid;

	747 break;

	748 case Token::COMMA:

	749 state_ = Comma;

	750 break;

	751 default:

	752 state_ = Invalid;

	753 }

	754 break;

	755

	756 case Comma:

	757 switch (token) {

	758 case Token::YIELD:

	759 case Token::IDENTIFIER:

	760 case Token::FUTURE_STRICT_RESERVED_WORD:

	761 state_ = Identifier;

	762 break;

	763 default:

	764 state_ = Invalid;

	765 }

	766 break;

	767 }

	768 }

	769

	770

	771 void ParamListFinder::AddIdentifier(Scanner* scanner) {

	772 // A duplicate has already been found.

	773 if (duplicate_pos_ >= start_pos_)

	774 return;

	775

	776 int prev_value = scanner->FindSymbol(&duplicate_finder_, 1);

	777 if (prev_value != 0) {

	778 duplicate_pos_ = scanner->location().beg_pos;

	779 }

	780 }

	781

637 } } // namespace v8::internal	782 } } // namespace v8::internal

638	783

639 #endif // V8_SCANNER_H_	784 #endif // V8_SCANNER_H_

OLD	NEW

« src/parser.cc ('K') | « src/preparser.h ('k') | src/scanner.cc » ('j') | src/scanner.cc » ('J')