src/scanner.h - Issue 768203002: Simplify template literal raw string creation

Side by Side Diff: src/scanner.h

Issue 768203002: Simplify template literal raw string creation (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Add harmony unicode test Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #ifndef V8_SCANNER_H_	7 #ifndef V8_SCANNER_H_

8 #define V8_SCANNER_H_	8 #define V8_SCANNER_H_

9	9

10 #include "src/allocation.h"	10 #include "src/allocation.h"

(...skipping 234 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
245 DCHECK(is_one_byte_);	245 DCHECK(is_one_byte_);

246 return Vector<const uint8_t>(	246 return Vector<const uint8_t>(

247 reinterpret_cast<const uint8_t*>(backing_store_.start()),	247 reinterpret_cast<const uint8_t*>(backing_store_.start()),

248 position_);	248 position_);

249 }	249 }

250	250

251 int length() const {	251 int length() const {

252 return is_one_byte_ ? position_ : (position_ >> 1);	252 return is_one_byte_ ? position_ : (position_ >> 1);

253 }	253 }

254	254

	255 void ReduceLength(int delta) {

	256 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);

	257 }

	258

255 void Reset() {	259 void Reset() {

256 position_ = 0;	260 position_ = 0;

257 is_one_byte_ = true;	261 is_one_byte_ = true;

258 }	262 }

259	263

260 Handle<String> Internalize(Isolate* isolate) const;	264 Handle<String> Internalize(Isolate* isolate) const;

261	265

262 private:	266 private:

263 static const int kInitialCapacity = 16;	267 static const int kInitialCapacity = 16;

264 static const int kGrowthFactory = 4;	268 static const int kGrowthFactory = 4;

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
311	315

312 // ----------------------------------------------------------------------------	316 // ----------------------------------------------------------------------------

313 // JavaScript Scanner.	317 // JavaScript Scanner.

314	318

315 class Scanner {	319 class Scanner {

316 public:	320 public:

317 // Scoped helper for literal recording. Automatically drops the literal	321 // Scoped helper for literal recording. Automatically drops the literal

318 // if aborting the scanning before it's complete.	322 // if aborting the scanning before it's complete.

319 class LiteralScope {	323 class LiteralScope {

320 public:	324 public:

321 explicit LiteralScope(Scanner* self)	325 explicit LiteralScope(Scanner* self, bool capture_raw = false)

322 : scanner_(self), complete_(false) {	326 : scanner_(self), complete_(false) {

323 scanner_->StartLiteral();	327 scanner_->StartLiteral();

	328 if (capture_raw) scanner_->StartRawLiteral();

324 }	329 }

325 ~LiteralScope() {	330 ~LiteralScope() {

326 if (!complete_) scanner_->DropLiteral();	331 if (!complete_) scanner_->DropLiteral();

327 }	332 }

328 void Complete() {	333 void Complete() {

329 scanner_->TerminateLiteral();	334 scanner_->TerminateLiteral();

330 complete_ = true;	335 complete_ = true;

331 }	336 }

332	337

333 private:	338 private:

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
385 DCHECK_NOT_NULL(current_.literal_chars);	390 DCHECK_NOT_NULL(current_.literal_chars);

386 return current_.literal_chars->is_contextual_keyword(keyword);	391 return current_.literal_chars->is_contextual_keyword(keyword);

387 }	392 }

388 bool is_next_contextual_keyword(Vector<const char> keyword) {	393 bool is_next_contextual_keyword(Vector<const char> keyword) {

389 DCHECK_NOT_NULL(next_.literal_chars);	394 DCHECK_NOT_NULL(next_.literal_chars);

390 return next_.literal_chars->is_contextual_keyword(keyword);	395 return next_.literal_chars->is_contextual_keyword(keyword);

391 }	396 }

392	397

393 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);	398 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);

394 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);	399 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);

	400 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);

395	401

396 double DoubleValue();	402 double DoubleValue();

397 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {	403 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {

398 if (is_literal_one_byte() &&	404 if (is_literal_one_byte() &&

399 literal_length() == length &&	405 literal_length() == length &&

400 (allow_escapes \|\| !literal_contains_escapes())) {	406 (allow_escapes \|\| !literal_contains_escapes())) {

401 const char* token =	407 const char* token =

402 reinterpret_cast<const char*>(literal_one_byte_string().start());	408 reinterpret_cast<const char*>(literal_one_byte_string().start());

403 return !strncmp(token, data, length);	409 return !strncmp(token, data, length);

404 }	410 }

(...skipping 81 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
486 }	492 }

487	493

488 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;	494 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;

489	495

490 private:	496 private:

491 // The current and look-ahead token.	497 // The current and look-ahead token.

492 struct TokenDesc {	498 struct TokenDesc {

493 Token::Value token;	499 Token::Value token;

494 Location location;	500 Location location;

495 LiteralBuffer* literal_chars;	501 LiteralBuffer* literal_chars;

	502 LiteralBuffer* raw_literal_chars;

496 };	503 };

497	504

498 static const int kCharacterLookaheadBufferSize = 1;	505 static const int kCharacterLookaheadBufferSize = 1;

499	506

500 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.	507 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.

501 uc32 ScanOctalEscape(uc32 c, int length);	508 uc32 ScanOctalEscape(uc32 c, int length);

502	509

503 // Call this after setting source_ to the input.	510 // Call this after setting source_ to the input.

504 void Init() {	511 void Init() {

505 // Set c0_ (one character ahead)	512 // Set c0_ (one character ahead)

506 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);	513 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);

507 Advance();	514 Advance();

508 // Initialize current_ to not refer to a literal.	515 // Initialize current_ to not refer to a literal.

509 current_.literal_chars = NULL;	516 current_.literal_chars = NULL;

	517 current_.raw_literal_chars = NULL;

510 }	518 }

511	519

512 // Literal buffer support	520 // Literal buffer support

513 inline void StartLiteral() {	521 inline void StartLiteral() {

514 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?	522 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?

515 &literal_buffer2_ : &literal_buffer1_;	523 &literal_buffer2_ : &literal_buffer1_;

516 free_buffer->Reset();	524 free_buffer->Reset();

517 next_.literal_chars = free_buffer;	525 next_.literal_chars = free_buffer;

518 }	526 }

519	527

	528 inline void StartRawLiteral() {

	529 raw_literal_buffer_.Reset();

	530 next_.raw_literal_chars = &raw_literal_buffer_;

	531 capturing_raw_literal_ = true;

	532 }

	533

520 INLINE(void AddLiteralChar(uc32 c)) {	534 INLINE(void AddLiteralChar(uc32 c)) {

521 DCHECK_NOT_NULL(next_.literal_chars);	535 DCHECK_NOT_NULL(next_.literal_chars);

522 next_.literal_chars->AddChar(c);	536 next_.literal_chars->AddChar(c);

523 }	537 }

524	538

	539 INLINE(void AddRawLiteralChar(uc32 c)) {

	540 DCHECK(capturing_raw_literal_);

	541 DCHECK_NOT_NULL(next_.raw_literal_chars);

	542 next_.raw_literal_chars->AddChar(c);

	543 }

	544

	545 INLINE(void ReduceRawLiteralLength(int delta)) {

	546 DCHECK(capturing_raw_literal_);

	547 DCHECK_NOT_NULL(next_.raw_literal_chars);

	548 next_.raw_literal_chars->ReduceLength(delta);

	549 }

	550

525 // Complete scanning of a literal.	551 // Complete scanning of a literal.

526 inline void TerminateLiteral() {	552 inline void TerminateLiteral() { capturing_raw_literal_ = false; }

527 // Does nothing in the current implementation.

528 }

529	553

530 // Stops scanning of a literal and drop the collected characters,	554 // Stops scanning of a literal and drop the collected characters,

531 // e.g., due to an encountered error.	555 // e.g., due to an encountered error.

532 inline void DropLiteral() {	556 inline void DropLiteral() {

533 next_.literal_chars = NULL;	557 next_.literal_chars = NULL;

	558 next_.raw_literal_chars = NULL;

	559 capturing_raw_literal_ = false;

534 }	560 }

535	561

536 inline void AddLiteralCharAdvance() {	562 inline void AddLiteralCharAdvance() {

537 AddLiteralChar(c0_);	563 AddLiteralChar(c0_);

538 Advance();	564 Advance();

539 }	565 }

540	566

541 // Low-level scanning support.	567 // Low-level scanning support.

542 void Advance() {	568 void Advance() {

	569 if (capturing_raw_literal_) {

	570 AddRawLiteralChar(c0_);

	571 }

543 c0_ = source_->Advance();	572 c0_ = source_->Advance();

544 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {	573 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {

545 uc32 c1 = source_->Advance();	574 uc32 c1 = source_->Advance();

546 if (!unibrow::Utf16::IsTrailSurrogate(c1)) {	575 if (!unibrow::Utf16::IsTrailSurrogate(c1)) {

547 source_->PushBack(c1);	576 source_->PushBack(c1);

548 } else {	577 } else {

549 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);	578 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);

550 }	579 }

551 }	580 }

552 }	581 }

553	582

554 void PushBack(uc32 ch) {	583 void PushBack(uc32 ch) {

555 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {	584 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {

556 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));	585 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));

557 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));	586 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));

	587 if (capturing_raw_literal_) ReduceRawLiteralLength(2);

558 } else {	588 } else {

559 source_->PushBack(c0_);	589 source_->PushBack(c0_);

	590 if (capturing_raw_literal_) ReduceRawLiteralLength(1);

560 }	591 }

561 c0_ = ch;	592 c0_ = ch;

562 }	593 }

563	594

564 inline Token::Value Select(Token::Value tok) {	595 inline Token::Value Select(Token::Value tok) {

565 Advance();	596 Advance();

566 return tok;	597 return tok;

567 }	598 }

568	599

569 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {	600 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {

570 Advance();	601 Advance();

571 if (c0_ == next) {	602 if (c0_ == next) {

572 Advance();	603 Advance();

573 return then;	604 return then;

574 } else {	605 } else {

575 return else_;	606 return else_;

576 }	607 }

577 }	608 }

578	609

579 // Returns the literal string, if any, for the current token (the	610 // Returns the literal string, if any, for the current token (the

580 // token last returned by Next()). The string is 0-terminated.	611 // token last returned by Next()). The string is 0-terminated.

581 // Literal strings are collected for identifiers, strings, and	612 // Literal strings are collected for identifiers, strings, numbers as well

582 // numbers.	613 // as for template literals. For template literals we also collect the raw

	614 // form.

583 // These functions only give the correct result if the literal	615 // These functions only give the correct result if the literal

584 // was scanned between calls to StartLiteral() and TerminateLiteral().	616 // was scanned between calls to StartLiteral() and TerminateLiteral().

585 Vector<const uint8_t> literal_one_byte_string() {	617 Vector<const uint8_t> literal_one_byte_string() {

586 DCHECK_NOT_NULL(current_.literal_chars);	618 DCHECK_NOT_NULL(current_.literal_chars);

587 return current_.literal_chars->one_byte_literal();	619 return current_.literal_chars->one_byte_literal();

588 }	620 }

589 Vector<const uint16_t> literal_two_byte_string() {	621 Vector<const uint16_t> literal_two_byte_string() {

590 DCHECK_NOT_NULL(current_.literal_chars);	622 DCHECK_NOT_NULL(current_.literal_chars);

591 return current_.literal_chars->two_byte_literal();	623 return current_.literal_chars->two_byte_literal();

592 }	624 }

(...skipping 12 matching lines...) Expand all Loading...
605 return next_.literal_chars->one_byte_literal();	637 return next_.literal_chars->one_byte_literal();

606 }	638 }

607 Vector<const uint16_t> next_literal_two_byte_string() {	639 Vector<const uint16_t> next_literal_two_byte_string() {

608 DCHECK_NOT_NULL(next_.literal_chars);	640 DCHECK_NOT_NULL(next_.literal_chars);

609 return next_.literal_chars->two_byte_literal();	641 return next_.literal_chars->two_byte_literal();

610 }	642 }

611 bool is_next_literal_one_byte() {	643 bool is_next_literal_one_byte() {

612 DCHECK_NOT_NULL(next_.literal_chars);	644 DCHECK_NOT_NULL(next_.literal_chars);

613 return next_.literal_chars->is_one_byte();	645 return next_.literal_chars->is_one_byte();

614 }	646 }

615 int next_literal_length() const {	647 Vector<const uint8_t> raw_literal_one_byte_string() {

616 DCHECK_NOT_NULL(next_.literal_chars);	648 DCHECK_NOT_NULL(current_.raw_literal_chars);

617 return next_.literal_chars->length();	649 return current_.raw_literal_chars->one_byte_literal();

618 }	650 }

	651 Vector<const uint16_t> raw_literal_two_byte_string() {

	652 DCHECK_NOT_NULL(current_.raw_literal_chars);

	653 return current_.raw_literal_chars->two_byte_literal();

	654 }

	655 bool is_raw_literal_one_byte() {

	656 DCHECK_NOT_NULL(current_.raw_literal_chars);

	657 return current_.raw_literal_chars->is_one_byte();

	658 }

	659

619	660

620 uc32 ScanHexNumber(int expected_length);	661 uc32 ScanHexNumber(int expected_length);

621 // Scan a number of any length but not bigger than max_value. For example, the	662 // Scan a number of any length but not bigger than max_value. For example, the

622 // number can be 000000001, so it's very long in characters but its value is	663 // number can be 000000001, so it's very long in characters but its value is

623 // small.	664 // small.

624 uc32 ScanUnlimitedLengthHexNumber(int max_value);	665 uc32 ScanUnlimitedLengthHexNumber(int max_value);

625	666

626 // Scans a single JavaScript token.	667 // Scans a single JavaScript token.

627 void Scan();	668 void Scan();

628	669

(...skipping 30 matching lines...) Expand all Loading...
659 UnicodeCache* unicode_cache_;	700 UnicodeCache* unicode_cache_;

660	701

661 // Buffers collecting literal strings, numbers, etc.	702 // Buffers collecting literal strings, numbers, etc.

662 LiteralBuffer literal_buffer1_;	703 LiteralBuffer literal_buffer1_;

663 LiteralBuffer literal_buffer2_;	704 LiteralBuffer literal_buffer2_;

664	705

665 // Values parsed from magic comments.	706 // Values parsed from magic comments.

666 LiteralBuffer source_url_;	707 LiteralBuffer source_url_;

667 LiteralBuffer source_mapping_url_;	708 LiteralBuffer source_mapping_url_;

668	709

	710 // Buffer to store raw string values

	711 LiteralBuffer raw_literal_buffer_;

	712

	713 // We only need to capture the raw literal when we are scanning template

	714 // literal spans.

	715 bool capturing_raw_literal_;

	716

669 TokenDesc current_; // desc for current token (as returned by Next())	717 TokenDesc current_; // desc for current token (as returned by Next())

670 TokenDesc next_; // desc for next token (one token look-ahead)	718 TokenDesc next_; // desc for next token (one token look-ahead)

671	719

672 // Input stream. Must be initialized to an Utf16CharacterStream.	720 // Input stream. Must be initialized to an Utf16CharacterStream.

673 Utf16CharacterStream* source_;	721 Utf16CharacterStream* source_;

674	722

675	723

676 // Start position of the octal literal last scanned.	724 // Start position of the octal literal last scanned.

677 Location octal_pos_;	725 Location octal_pos_;

678	726

(...skipping 17 matching lines...) Expand all Loading...
696 bool harmony_classes_;	744 bool harmony_classes_;

697 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL	745 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL

698 bool harmony_templates_;	746 bool harmony_templates_;

699 // Whether we allow \u{xxxxx}.	747 // Whether we allow \u{xxxxx}.

700 bool harmony_unicode_;	748 bool harmony_unicode_;

701 };	749 };

702	750

703 } } // namespace v8::internal	751 } } // namespace v8::internal

704	752

705 #endif // V8_SCANNER_H_	753 #endif // V8_SCANNER_H_

OLD	NEW

« no previous file with comments | « src/parser.cc ('k') | src/scanner.cc » ('j') | no next file with comments »