src/scanner-base.h - Issue 5188009: Merge preparser Scanner with main JavaScript scanner.

Side by Side Diff: src/scanner-base.h

Issue 5188009: Merge preparser Scanner with main JavaScript scanner. (Closed)

Patch Set: Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 309 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
320	320

321 // One Unicode character look-ahead; c0_ < 0 at the end of the input.	321 // One Unicode character look-ahead; c0_ < 0 at the end of the input.

322 uc32 c0_;	322 uc32 c0_;

323 };	323 };

324	324

325 // ----------------------------------------------------------------------------	325 // ----------------------------------------------------------------------------

326 // JavaScriptScanner - base logic for JavaScript scanning.	326 // JavaScriptScanner - base logic for JavaScript scanning.

327	327

328 class JavaScriptScanner : public Scanner {	328 class JavaScriptScanner : public Scanner {

329 public:	329 public:

	330

	331 enum LiteralType {

	332 kLiteralNumber = 1,

	333 kLiteralIdentifier = 2,

	334 kLiteralString = 4,

	335 kLiteralRegExp = 8,

	336 kLiteralRegExpFlags = 16

	337 };

	338

	339 // More specialized literal scope.
	Søren Thygesen Gjesse 2010/11/19 08:17:21 More specialized - in what sense? More specialized - in what sense? Lasse Reichstein 2010/11/19 08:51:24 Elaborated. Elaborated.
	340 class LiteralScope {

	341 public:

	342 LiteralScope(JavaScriptScanner* self, LiteralType type)

	343 : scanner_(self), complete_(false) {

	344 if (scanner_->RecordsLiteral(type)) {

	345 scanner_->StartLiteral();

	346 }

	347 }

	348 ~LiteralScope() {

	349 if (!complete_) scanner_->DropLiteral();

	350 }

	351 void Complete() {

	352 scanner_->TerminateLiteral();

	353 complete_ = true;

	354 }

	355

	356 private:

	357 JavaScriptScanner* scanner_;

	358 bool complete_;

	359 };

	360

330 JavaScriptScanner();	361 JavaScriptScanner();

331	362

332 // Returns the next token.	363 // Returns the next token.

333 Token::Value Next();	364 Token::Value Next();

334	365

335 // Returns true if there was a line terminator before the peek'ed token.	366 // Returns true if there was a line terminator before the peek'ed token.

336 bool has_line_terminator_before_next() const {	367 bool has_line_terminator_before_next() const {

337 return has_line_terminator_before_next_;	368 return has_line_terminator_before_next_;

338 }	369 }

339	370

340 // Scans the input as a regular expression pattern, previous	371 // Scans the input as a regular expression pattern, previous

341 // character(s) must be /(=). Returns true if a pattern is scanned.	372 // character(s) must be /(=). Returns true if a pattern is scanned.

342 bool ScanRegExpPattern(bool seen_equal);	373 bool ScanRegExpPattern(bool seen_equal);

343 // Returns true if regexp flags are scanned (always since flags can	374 // Returns true if regexp flags are scanned (always since flags can

344 // be empty).	375 // be empty).

345 bool ScanRegExpFlags();	376 bool ScanRegExpFlags();

346	377

347 // Tells whether the buffer contains an identifier (no escapes).	378 // Tells whether the buffer contains an identifier (no escapes).

348 // Used for checking if a property name is an identifier.	379 // Used for checking if a property name is an identifier.

349 static bool IsIdentifier(unibrow::CharacterStream* buffer);	380 static bool IsIdentifier(unibrow::CharacterStream* buffer);

350	381

351 // Seek forward to the given position. This operation does not	382 // Seek forward to the given position. This operation does not

352 // work in general, for instance when there are pushed back	383 // work in general, for instance when there are pushed back

353 // characters, but works for seeking forward until simple delimiter	384 // characters, but works for seeking forward until simple delimiter

354 // tokens, which is what it is used for.	385 // tokens, which is what it is used for.

355 void SeekForward(int pos);	386 void SeekForward(int pos);

356	387

	388 // Whether this scanner records the given literal type or not.

	389 bool RecordsLiteral(LiteralType type) {

	390 return (literal_flags_ & type) != 0;

	391 }

	392

357 protected:	393 protected:

358 bool SkipWhiteSpace();	394 bool SkipWhiteSpace();

359 Token::Value SkipSingleLineComment();	395 Token::Value SkipSingleLineComment();

360 Token::Value SkipMultiLineComment();	396 Token::Value SkipMultiLineComment();

361	397

362 // Scans a single JavaScript token.	398 // Scans a single JavaScript token.

363 void Scan();	399 void Scan();

364	400

365 void ScanDecimalDigits();	401 void ScanDecimalDigits();

366 Token::Value ScanNumber(bool seen_period);	402 Token::Value ScanNumber(bool seen_period);

367 Token::Value ScanIdentifier();	403 Token::Value ScanIdentifierOrKeyword();

	404 Token::Value ScanIdentifierSuffix(LiteralScope* literal);

368	405

369 void ScanEscape();	406 void ScanEscape();

370 Token::Value ScanString();	407 Token::Value ScanString();

371	408

372 // Scans a possible HTML comment -- begins with '<!'.	409 // Scans a possible HTML comment -- begins with '<!'.

373 Token::Value ScanHtmlComment();	410 Token::Value ScanHtmlComment();

374	411

375 // Decodes a unicode escape-sequence which is part of an identifier.	412 // Decodes a unicode escape-sequence which is part of an identifier.

376 // If the escape sequence cannot be decoded the result is kBadChar.	413 // If the escape sequence cannot be decoded the result is kBadChar.

377 uc32 ScanIdentifierUnicodeEscape();	414 uc32 ScanIdentifierUnicodeEscape();

378	415

	416 int literal_flags_;

379 bool has_line_terminator_before_next_;	417 bool has_line_terminator_before_next_;

380 };	418 };

381	419

382	420

383 // ----------------------------------------------------------------------------	421 // ----------------------------------------------------------------------------

384 // Keyword matching state machine.	422 // Keyword matching state machine.

385	423

386 class KeywordMatcher {	424 class KeywordMatcher {

387 // Incrementally recognize keywords.	425 // Incrementally recognize keywords.

388 //	426 //

389 // Recognized keywords:	427 // Recognized keywords:

390 // break case catch const* continue debugger* default delete do else	428 // break case catch const* continue debugger* default delete do else

391 // finally false for function if in instanceof native* new null	429 // finally false for function if in instanceof native* new null

392 // return switch this throw true try typeof var void while with	430 // return switch this throw true try typeof var void while with

393 //	431 //

394 // *: Actually "future reserved keywords". These are the only ones we	432 // *: Actually "future reserved keywords". These are the only ones we

395 // recognize, the remaining are allowed as identifiers.	433 // recognize, the remaining are allowed as identifiers.

396 // In ES5 strict mode, we should disallow all reserved keywords.	434 // In ES5 strict mode, we should disallow all reserved keywords.

397 public:	435 public:

398 KeywordMatcher()	436 KeywordMatcher()

399 : state_(INITIAL),	437 : state_(INITIAL),

400 token_(Token::IDENTIFIER),	438 token_(Token::IDENTIFIER),

401 keyword_(NULL),	439 keyword_(NULL),

402 counter_(0),	440 counter_(0),

403 keyword_token_(Token::ILLEGAL) {}	441 keyword_token_(Token::ILLEGAL) {}

404	442

405 Token::Value token() { return token_; }	443 Token::Value token() { return token_; }

406	444

407 inline void AddChar(unibrow::uchar input) {	445 inline bool AddChar(unibrow::uchar input) {

408 if (state_ != UNMATCHABLE) {	446 if (state_ != UNMATCHABLE) {

409 Step(input);	447 Step(input);

410 }	448 }

	449 return state_ != UNMATCHABLE;

411 }	450 }

412	451

413 void Fail() {	452 void Fail() {

414 token_ = Token::IDENTIFIER;	453 token_ = Token::IDENTIFIER;

415 state_ = UNMATCHABLE;	454 state_ = UNMATCHABLE;

416 }	455 }

417	456

418 private:	457 private:

419 enum State {	458 enum State {

420 UNMATCHABLE,	459 UNMATCHABLE,

(...skipping 30 matching lines...) Expand all Loading...
451 kFirstCharRangeMax - kFirstCharRangeMin + 1;	490 kFirstCharRangeMax - kFirstCharRangeMin + 1;

452 // State map for first keyword character range.	491 // State map for first keyword character range.

453 static FirstState first_states_[kFirstCharRangeLength];	492 static FirstState first_states_[kFirstCharRangeLength];

454	493

455 // If input equals keyword's character at position, continue matching keyword	494 // If input equals keyword's character at position, continue matching keyword

456 // from that position.	495 // from that position.

457 inline bool MatchKeywordStart(unibrow::uchar input,	496 inline bool MatchKeywordStart(unibrow::uchar input,

458 const char* keyword,	497 const char* keyword,

459 int position,	498 int position,

460 Token::Value token_if_match) {	499 Token::Value token_if_match) {

461 if (input == static_cast<unibrow::uchar>(keyword[position])) {	500 if (input != static_cast<unibrow::uchar>(keyword[position])) {

462 state_ = KEYWORD_PREFIX;	501 return false;

463 this->keyword_ = keyword;

464 this->counter_ = position + 1;

465 this->keyword_token_ = token_if_match;

466 return true;

467 }	502 }

468 return false;	503 state_ = KEYWORD_PREFIX;

	504 this->keyword_ = keyword;

	505 this->counter_ = position + 1;

	506 this->keyword_token_ = token_if_match;

	507 return true;

469 }	508 }

470	509

471 // If input equals match character, transition to new state and return true.	510 // If input equals match character, transition to new state and return true.

472 inline bool MatchState(unibrow::uchar input, char match, State new_state) {	511 inline bool MatchState(unibrow::uchar input, char match, State new_state) {

473 if (input == static_cast<unibrow::uchar>(match)) {	512 if (input != static_cast<unibrow::uchar>(match)) {

474 state_ = new_state;	513 return false;

475 return true;

476 }	514 }

477 return false;	515 state_ = new_state;

	516 return true;

478 }	517 }

479	518

480 inline bool MatchKeyword(unibrow::uchar input,	519 inline bool MatchKeyword(unibrow::uchar input,

481 char match,	520 char match,

482 State new_state,

483 Token::Value keyword_token) {	521 Token::Value keyword_token) {

484 if (input != static_cast<unibrow::uchar>(match)) {	522 if (input != static_cast<unibrow::uchar>(match)) {

485 return false;	523 return false;

486 }	524 }

487 state_ = new_state;	525 state_ = KEYWORD_MATCHED;

488 token_ = keyword_token;	526 token_ = keyword_token;

489 return true;	527 return true;

490 }	528 }

491	529

492 void Step(unibrow::uchar input);	530 void Step(unibrow::uchar input);

493	531

494 // Current state.	532 // Current state.

495 State state_;	533 State state_;

496 // Token for currently added characters.	534 // Token for currently added characters.

497 Token::Value token_;	535 Token::Value token_;

498	536

499 // Matching a specific keyword string (there is only one possible valid	537 // Matching a specific keyword string (there is only one possible valid

500 // keyword with the current prefix).	538 // keyword with the current prefix).

501 const char* keyword_;	539 const char* keyword_;

502 int counter_;	540 int counter_;

503 Token::Value keyword_token_;	541 Token::Value keyword_token_;

504 };	542 };

505	543

506	544

507 } } // namespace v8::internal	545 } } // namespace v8::internal

508	546

509 #endif // V8_SCANNER_BASE_H_	547 #endif // V8_SCANNER_BASE_H_

OLD	NEW

« src/scanner.h ('K') | « src/scanner.cc ('k') | src/scanner-base.cc » ('j') | no next file with comments »