src/scanner.h - Issue 768203002: Simplify template literal raw string creation

Side by Side Diff: src/scanner.h

Issue 768203002: Simplify template literal raw string creation (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Add DCHECK in PushBack and remove some unused functions Created 6 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #ifndef V8_SCANNER_H_	7 #ifndef V8_SCANNER_H_

8 #define V8_SCANNER_H_	8 #define V8_SCANNER_H_

9	9

10 #include "src/allocation.h"	10 #include "src/allocation.h"

(...skipping 234 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
245 DCHECK(is_one_byte_);	245 DCHECK(is_one_byte_);

246 return Vector<const uint8_t>(	246 return Vector<const uint8_t>(

247 reinterpret_cast<const uint8_t*>(backing_store_.start()),	247 reinterpret_cast<const uint8_t*>(backing_store_.start()),

248 position_);	248 position_);

249 }	249 }

250	250

251 int length() const {	251 int length() const {

252 return is_one_byte_ ? position_ : (position_ >> 1);	252 return is_one_byte_ ? position_ : (position_ >> 1);

253 }	253 }

254	254

	255 void ReduceLength(int delta) {

	256 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);

	257 }

	258

255 void Reset() {	259 void Reset() {

256 position_ = 0;	260 position_ = 0;

257 is_one_byte_ = true;	261 is_one_byte_ = true;

258 }	262 }

259	263

260 Handle<String> Internalize(Isolate* isolate) const;	264 Handle<String> Internalize(Isolate* isolate) const;

261	265

262 private:	266 private:

263 static const int kInitialCapacity = 16;	267 static const int kInitialCapacity = 16;

264 static const int kGrowthFactory = 4;	268 static const int kGrowthFactory = 4;

(...skipping 46 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
311	315

312 // ----------------------------------------------------------------------------	316 // ----------------------------------------------------------------------------

313 // JavaScript Scanner.	317 // JavaScript Scanner.

314	318

315 class Scanner {	319 class Scanner {

316 public:	320 public:

317 // Scoped helper for literal recording. Automatically drops the literal	321 // Scoped helper for literal recording. Automatically drops the literal

318 // if aborting the scanning before it's complete.	322 // if aborting the scanning before it's complete.

319 class LiteralScope {	323 class LiteralScope {

320 public:	324 public:

321 explicit LiteralScope(Scanner* self)	325 explicit LiteralScope(Scanner* self, bool capture_raw = false)

322 : scanner_(self), complete_(false) {	326 : scanner_(self), complete_(false) {

323 scanner_->StartLiteral();	327 scanner_->StartLiteral();

	328 if (capture_raw) scanner_->StartRawLiteral();

324 }	329 }

325 ~LiteralScope() {	330 ~LiteralScope() {

326 if (!complete_) scanner_->DropLiteral();	331 if (!complete_) scanner_->DropLiteral();

327 }	332 }

328 void Complete() {	333 void Complete() {

329 scanner_->TerminateLiteral();	334 scanner_->TerminateLiteral();

330 complete_ = true;	335 complete_ = true;

331 }	336 }

332	337

333 private:	338 private:

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
385 DCHECK_NOT_NULL(current_.literal_chars);	390 DCHECK_NOT_NULL(current_.literal_chars);

386 return current_.literal_chars->is_contextual_keyword(keyword);	391 return current_.literal_chars->is_contextual_keyword(keyword);

387 }	392 }

388 bool is_next_contextual_keyword(Vector<const char> keyword) {	393 bool is_next_contextual_keyword(Vector<const char> keyword) {

389 DCHECK_NOT_NULL(next_.literal_chars);	394 DCHECK_NOT_NULL(next_.literal_chars);

390 return next_.literal_chars->is_contextual_keyword(keyword);	395 return next_.literal_chars->is_contextual_keyword(keyword);

391 }	396 }

392	397

393 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);	398 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);

394 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);	399 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);

	400 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);

395	401

396 double DoubleValue();	402 double DoubleValue();

397 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {	403 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {

398 if (is_literal_one_byte() &&	404 if (is_literal_one_byte() &&

399 literal_length() == length &&	405 literal_length() == length &&

400 (allow_escapes \|\| !literal_contains_escapes())) {	406 (allow_escapes \|\| !literal_contains_escapes())) {

401 const char* token =	407 const char* token =

402 reinterpret_cast<const char*>(literal_one_byte_string().start());	408 reinterpret_cast<const char*>(literal_one_byte_string().start());

403 return !strncmp(token, data, length);	409 return !strncmp(token, data, length);

404 }	410 }

(...skipping 79 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
484 }	490 }

485	491

486 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;	492 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;

487	493

488 private:	494 private:

489 // The current and look-ahead token.	495 // The current and look-ahead token.

490 struct TokenDesc {	496 struct TokenDesc {

491 Token::Value token;	497 Token::Value token;

492 Location location;	498 Location location;

493 LiteralBuffer* literal_chars;	499 LiteralBuffer* literal_chars;

	500 LiteralBuffer* raw_literal_chars;

494 };	501 };

495	502

496 static const int kCharacterLookaheadBufferSize = 1;	503 static const int kCharacterLookaheadBufferSize = 1;

497	504

498 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.	505 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.

499 uc32 ScanOctalEscape(uc32 c, int length);	506 uc32 ScanOctalEscape(uc32 c, int length);

500	507

501 // Call this after setting source_ to the input.	508 // Call this after setting source_ to the input.

502 void Init() {	509 void Init() {

503 // Set c0_ (one character ahead)	510 // Set c0_ (one character ahead)

504 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);	511 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);

505 Advance();	512 Advance();

506 // Initialize current_ to not refer to a literal.	513 // Initialize current_ to not refer to a literal.

507 current_.literal_chars = NULL;	514 current_.literal_chars = NULL;

	515 current_.raw_literal_chars = NULL;

508 }	516 }

509	517

510 // Literal buffer support	518 // Literal buffer support

511 inline void StartLiteral() {	519 inline void StartLiteral() {

512 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?	520 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?

513 &literal_buffer2_ : &literal_buffer1_;	521 &literal_buffer2_ : &literal_buffer1_;

514 free_buffer->Reset();	522 free_buffer->Reset();

515 next_.literal_chars = free_buffer;	523 next_.literal_chars = free_buffer;

516 }	524 }

517	525

	526 inline void StartRawLiteral() {

	527 raw_literal_buffer_.Reset();

	528 next_.raw_literal_chars = &raw_literal_buffer_;

	529 capturing_raw_literal_ = true;

	530 }

	531

518 INLINE(void AddLiteralChar(uc32 c)) {	532 INLINE(void AddLiteralChar(uc32 c)) {

519 DCHECK_NOT_NULL(next_.literal_chars);	533 DCHECK_NOT_NULL(next_.literal_chars);

520 next_.literal_chars->AddChar(c);	534 next_.literal_chars->AddChar(c);

521 }	535 }

522	536

	537 INLINE(void AddRawLiteralChar(uc32 c)) {

	538 DCHECK(capturing_raw_literal_);

	539 DCHECK_NOT_NULL(next_.raw_literal_chars);

	540 next_.raw_literal_chars->AddChar(c);

	541 }

	542

	543 INLINE(void ReduceRawLiteralLength(int delta)) {

	544 DCHECK(capturing_raw_literal_);

	545 DCHECK_NOT_NULL(next_.raw_literal_chars);

	546 next_.raw_literal_chars->ReduceLength(delta);

	547 }

	548

523 // Complete scanning of a literal.	549 // Complete scanning of a literal.

524 inline void TerminateLiteral() {	550 inline void TerminateLiteral() { capturing_raw_literal_ = false; }

525 // Does nothing in the current implementation.

526 }

527	551

528 // Stops scanning of a literal and drop the collected characters,	552 // Stops scanning of a literal and drop the collected characters,

529 // e.g., due to an encountered error.	553 // e.g., due to an encountered error.

530 inline void DropLiteral() {	554 inline void DropLiteral() {

531 next_.literal_chars = NULL;	555 next_.literal_chars = NULL;

	556 next_.raw_literal_chars = NULL;

	557 capturing_raw_literal_ = false;

532 }	558 }

533	559

534 inline void AddLiteralCharAdvance() {	560 inline void AddLiteralCharAdvance() {

535 AddLiteralChar(c0_);	561 AddLiteralChar(c0_);

536 Advance();	562 Advance();

537 }	563 }

538	564

539 // Low-level scanning support.	565 // Low-level scanning support.

540 void Advance() {	566 void Advance() {

	567 if (capturing_raw_literal_) {

	568 AddRawLiteralChar(c0_);

	569 }

541 c0_ = source_->Advance();	570 c0_ = source_->Advance();

542 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {	571 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {

543 uc32 c1 = source_->Advance();	572 uc32 c1 = source_->Advance();

544 if (!unibrow::Utf16::IsTrailSurrogate(c1)) {	573 if (!unibrow::Utf16::IsTrailSurrogate(c1)) {

545 source_->PushBack(c1);	574 source_->PushBack(c1);

546 } else {	575 } else {

547 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);	576 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);

548 }	577 }

549 }	578 }

550 }	579 }

551	580

552 void PushBack(uc32 ch) {	581 void PushBack(uc32 ch) {

	582 DCHECK(ch < 0 \|\| !capturing_raw_literal_);
	caitp (gmail) 2014/12/02 21:37:35 I feel like PushBack() is okay --- it's more just I feel like PushBack() is okay --- it's more just that you want to make sure PushBack() is closely followed by a ReduceRawLiteralLength() if raw literals are being captured, am I wrong about that?
553 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {	583 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {

554 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));	584 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));

555 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));	585 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));

556 } else {	586 } else {

557 source_->PushBack(c0_);	587 source_->PushBack(c0_);

558 }	588 }

559 c0_ = ch;	589 c0_ = ch;

560 }	590 }

561	591

562 inline Token::Value Select(Token::Value tok) {	592 inline Token::Value Select(Token::Value tok) {

563 Advance();	593 Advance();

564 return tok;	594 return tok;

565 }	595 }

566	596

567 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {	597 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {

568 Advance();	598 Advance();

569 if (c0_ == next) {	599 if (c0_ == next) {

570 Advance();	600 Advance();

571 return then;	601 return then;

572 } else {	602 } else {

573 return else_;	603 return else_;

574 }	604 }

575 }	605 }

576	606

577 // Returns the literal string, if any, for the current token (the	607 // Returns the literal string, if any, for the current token (the

578 // token last returned by Next()). The string is 0-terminated.	608 // token last returned by Next()). The string is 0-terminated.

579 // Literal strings are collected for identifiers, strings, and	609 // Literal strings are collected for identifiers, strings, numbers as well

580 // numbers.	610 // as for template literals. For template literals we also collect the raw

	611 // form.

581 // These functions only give the correct result if the literal	612 // These functions only give the correct result if the literal

582 // was scanned between calls to StartLiteral() and TerminateLiteral().	613 // was scanned between calls to StartLiteral() and TerminateLiteral().

583 Vector<const uint8_t> literal_one_byte_string() {	614 Vector<const uint8_t> literal_one_byte_string() {

584 DCHECK_NOT_NULL(current_.literal_chars);	615 DCHECK_NOT_NULL(current_.literal_chars);

585 return current_.literal_chars->one_byte_literal();	616 return current_.literal_chars->one_byte_literal();

586 }	617 }

587 Vector<const uint16_t> literal_two_byte_string() {	618 Vector<const uint16_t> literal_two_byte_string() {

588 DCHECK_NOT_NULL(current_.literal_chars);	619 DCHECK_NOT_NULL(current_.literal_chars);

589 return current_.literal_chars->two_byte_literal();	620 return current_.literal_chars->two_byte_literal();

590 }	621 }

(...skipping 12 matching lines...) Expand all Loading...
603 return next_.literal_chars->one_byte_literal();	634 return next_.literal_chars->one_byte_literal();

604 }	635 }

605 Vector<const uint16_t> next_literal_two_byte_string() {	636 Vector<const uint16_t> next_literal_two_byte_string() {

606 DCHECK_NOT_NULL(next_.literal_chars);	637 DCHECK_NOT_NULL(next_.literal_chars);

607 return next_.literal_chars->two_byte_literal();	638 return next_.literal_chars->two_byte_literal();

608 }	639 }

609 bool is_next_literal_one_byte() {	640 bool is_next_literal_one_byte() {

610 DCHECK_NOT_NULL(next_.literal_chars);	641 DCHECK_NOT_NULL(next_.literal_chars);

611 return next_.literal_chars->is_one_byte();	642 return next_.literal_chars->is_one_byte();

612 }	643 }

613 int next_literal_length() const {	644 Vector<const uint8_t> raw_literal_one_byte_string() {

614 DCHECK_NOT_NULL(next_.literal_chars);	645 DCHECK_NOT_NULL(current_.raw_literal_chars);

615 return next_.literal_chars->length();	646 return current_.raw_literal_chars->one_byte_literal();

616 }	647 }

	648 Vector<const uint16_t> raw_literal_two_byte_string() {

	649 DCHECK_NOT_NULL(current_.raw_literal_chars);

	650 return current_.raw_literal_chars->two_byte_literal();

	651 }

	652 bool is_raw_literal_one_byte() {

	653 DCHECK_NOT_NULL(current_.raw_literal_chars);

	654 return current_.raw_literal_chars->is_one_byte();

	655 }

	656

617	657

618 uc32 ScanHexNumber(int expected_length);	658 uc32 ScanHexNumber(int expected_length);

619	659

620 // Scans a single JavaScript token.	660 // Scans a single JavaScript token.

621 void Scan();	661 void Scan();

622	662

623 bool SkipWhiteSpace();	663 bool SkipWhiteSpace();

624 Token::Value SkipSingleLineComment();	664 Token::Value SkipSingleLineComment();

625 Token::Value SkipSourceURLComment();	665 Token::Value SkipSourceURLComment();

626 void TryToParseSourceURLComment();	666 void TryToParseSourceURLComment();

(...skipping 24 matching lines...) Expand all Loading...
651 UnicodeCache* unicode_cache_;	691 UnicodeCache* unicode_cache_;

652	692

653 // Buffers collecting literal strings, numbers, etc.	693 // Buffers collecting literal strings, numbers, etc.

654 LiteralBuffer literal_buffer1_;	694 LiteralBuffer literal_buffer1_;

655 LiteralBuffer literal_buffer2_;	695 LiteralBuffer literal_buffer2_;

656	696

657 // Values parsed from magic comments.	697 // Values parsed from magic comments.

658 LiteralBuffer source_url_;	698 LiteralBuffer source_url_;

659 LiteralBuffer source_mapping_url_;	699 LiteralBuffer source_mapping_url_;

660	700

	701 // Buffer to store raw string values

	702 LiteralBuffer raw_literal_buffer_;

	703

	704 // We only need to capture the raw literal when we are scanning template

	705 // literal spans.

	706 bool capturing_raw_literal_;

	707

661 TokenDesc current_; // desc for current token (as returned by Next())	708 TokenDesc current_; // desc for current token (as returned by Next())

662 TokenDesc next_; // desc for next token (one token look-ahead)	709 TokenDesc next_; // desc for next token (one token look-ahead)

663	710

664 // Input stream. Must be initialized to an Utf16CharacterStream.	711 // Input stream. Must be initialized to an Utf16CharacterStream.

665 Utf16CharacterStream* source_;	712 Utf16CharacterStream* source_;

666	713

667	714

668 // Start position of the octal literal last scanned.	715 // Start position of the octal literal last scanned.

669 Location octal_pos_;	716 Location octal_pos_;

670	717

(...skipping 15 matching lines...) Expand all Loading...
686 bool harmony_numeric_literals_;	733 bool harmony_numeric_literals_;

687 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords.	734 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords.

688 bool harmony_classes_;	735 bool harmony_classes_;

689 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL	736 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL

690 bool harmony_templates_;	737 bool harmony_templates_;

691 };	738 };

692	739

693 } } // namespace v8::internal	740 } } // namespace v8::internal

694	741

695 #endif // V8_SCANNER_H_	742 #endif // V8_SCANNER_H_

OLD	NEW

« no previous file with comments | « src/parser.cc ('k') | src/scanner.cc » ('j') | no next file with comments »