Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(19)

Side by Side Diff: src/scanner.h

Issue 768203002: Simplify template literal raw string creation (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Add harmony unicode test Created 6 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/parser.cc ('k') | src/scanner.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2011 the V8 project authors. All rights reserved. 1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Features shared by parsing and pre-parsing scanners. 5 // Features shared by parsing and pre-parsing scanners.
6 6
7 #ifndef V8_SCANNER_H_ 7 #ifndef V8_SCANNER_H_
8 #define V8_SCANNER_H_ 8 #define V8_SCANNER_H_
9 9
10 #include "src/allocation.h" 10 #include "src/allocation.h"
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after
245 DCHECK(is_one_byte_); 245 DCHECK(is_one_byte_);
246 return Vector<const uint8_t>( 246 return Vector<const uint8_t>(
247 reinterpret_cast<const uint8_t*>(backing_store_.start()), 247 reinterpret_cast<const uint8_t*>(backing_store_.start()),
248 position_); 248 position_);
249 } 249 }
250 250
251 int length() const { 251 int length() const {
252 return is_one_byte_ ? position_ : (position_ >> 1); 252 return is_one_byte_ ? position_ : (position_ >> 1);
253 } 253 }
254 254
255 void ReduceLength(int delta) {
256 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
257 }
258
255 void Reset() { 259 void Reset() {
256 position_ = 0; 260 position_ = 0;
257 is_one_byte_ = true; 261 is_one_byte_ = true;
258 } 262 }
259 263
260 Handle<String> Internalize(Isolate* isolate) const; 264 Handle<String> Internalize(Isolate* isolate) const;
261 265
262 private: 266 private:
263 static const int kInitialCapacity = 16; 267 static const int kInitialCapacity = 16;
264 static const int kGrowthFactory = 4; 268 static const int kGrowthFactory = 4;
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after
311 315
312 // ---------------------------------------------------------------------------- 316 // ----------------------------------------------------------------------------
313 // JavaScript Scanner. 317 // JavaScript Scanner.
314 318
315 class Scanner { 319 class Scanner {
316 public: 320 public:
317 // Scoped helper for literal recording. Automatically drops the literal 321 // Scoped helper for literal recording. Automatically drops the literal
318 // if aborting the scanning before it's complete. 322 // if aborting the scanning before it's complete.
319 class LiteralScope { 323 class LiteralScope {
320 public: 324 public:
321 explicit LiteralScope(Scanner* self) 325 explicit LiteralScope(Scanner* self, bool capture_raw = false)
322 : scanner_(self), complete_(false) { 326 : scanner_(self), complete_(false) {
323 scanner_->StartLiteral(); 327 scanner_->StartLiteral();
328 if (capture_raw) scanner_->StartRawLiteral();
324 } 329 }
325 ~LiteralScope() { 330 ~LiteralScope() {
326 if (!complete_) scanner_->DropLiteral(); 331 if (!complete_) scanner_->DropLiteral();
327 } 332 }
328 void Complete() { 333 void Complete() {
329 scanner_->TerminateLiteral(); 334 scanner_->TerminateLiteral();
330 complete_ = true; 335 complete_ = true;
331 } 336 }
332 337
333 private: 338 private:
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
385 DCHECK_NOT_NULL(current_.literal_chars); 390 DCHECK_NOT_NULL(current_.literal_chars);
386 return current_.literal_chars->is_contextual_keyword(keyword); 391 return current_.literal_chars->is_contextual_keyword(keyword);
387 } 392 }
388 bool is_next_contextual_keyword(Vector<const char> keyword) { 393 bool is_next_contextual_keyword(Vector<const char> keyword) {
389 DCHECK_NOT_NULL(next_.literal_chars); 394 DCHECK_NOT_NULL(next_.literal_chars);
390 return next_.literal_chars->is_contextual_keyword(keyword); 395 return next_.literal_chars->is_contextual_keyword(keyword);
391 } 396 }
392 397
393 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); 398 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
394 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); 399 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
400 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
395 401
396 double DoubleValue(); 402 double DoubleValue();
397 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { 403 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
398 if (is_literal_one_byte() && 404 if (is_literal_one_byte() &&
399 literal_length() == length && 405 literal_length() == length &&
400 (allow_escapes || !literal_contains_escapes())) { 406 (allow_escapes || !literal_contains_escapes())) {
401 const char* token = 407 const char* token =
402 reinterpret_cast<const char*>(literal_one_byte_string().start()); 408 reinterpret_cast<const char*>(literal_one_byte_string().start());
403 return !strncmp(token, data, length); 409 return !strncmp(token, data, length);
404 } 410 }
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after
486 } 492 }
487 493
488 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; 494 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;
489 495
490 private: 496 private:
491 // The current and look-ahead token. 497 // The current and look-ahead token.
492 struct TokenDesc { 498 struct TokenDesc {
493 Token::Value token; 499 Token::Value token;
494 Location location; 500 Location location;
495 LiteralBuffer* literal_chars; 501 LiteralBuffer* literal_chars;
502 LiteralBuffer* raw_literal_chars;
496 }; 503 };
497 504
498 static const int kCharacterLookaheadBufferSize = 1; 505 static const int kCharacterLookaheadBufferSize = 1;
499 506
500 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. 507 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
501 uc32 ScanOctalEscape(uc32 c, int length); 508 uc32 ScanOctalEscape(uc32 c, int length);
502 509
503 // Call this after setting source_ to the input. 510 // Call this after setting source_ to the input.
504 void Init() { 511 void Init() {
505 // Set c0_ (one character ahead) 512 // Set c0_ (one character ahead)
506 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); 513 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
507 Advance(); 514 Advance();
508 // Initialize current_ to not refer to a literal. 515 // Initialize current_ to not refer to a literal.
509 current_.literal_chars = NULL; 516 current_.literal_chars = NULL;
517 current_.raw_literal_chars = NULL;
510 } 518 }
511 519
512 // Literal buffer support 520 // Literal buffer support
513 inline void StartLiteral() { 521 inline void StartLiteral() {
514 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? 522 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ?
515 &literal_buffer2_ : &literal_buffer1_; 523 &literal_buffer2_ : &literal_buffer1_;
516 free_buffer->Reset(); 524 free_buffer->Reset();
517 next_.literal_chars = free_buffer; 525 next_.literal_chars = free_buffer;
518 } 526 }
519 527
528 inline void StartRawLiteral() {
529 raw_literal_buffer_.Reset();
530 next_.raw_literal_chars = &raw_literal_buffer_;
531 capturing_raw_literal_ = true;
532 }
533
520 INLINE(void AddLiteralChar(uc32 c)) { 534 INLINE(void AddLiteralChar(uc32 c)) {
521 DCHECK_NOT_NULL(next_.literal_chars); 535 DCHECK_NOT_NULL(next_.literal_chars);
522 next_.literal_chars->AddChar(c); 536 next_.literal_chars->AddChar(c);
523 } 537 }
524 538
539 INLINE(void AddRawLiteralChar(uc32 c)) {
540 DCHECK(capturing_raw_literal_);
541 DCHECK_NOT_NULL(next_.raw_literal_chars);
542 next_.raw_literal_chars->AddChar(c);
543 }
544
545 INLINE(void ReduceRawLiteralLength(int delta)) {
546 DCHECK(capturing_raw_literal_);
547 DCHECK_NOT_NULL(next_.raw_literal_chars);
548 next_.raw_literal_chars->ReduceLength(delta);
549 }
550
525 // Complete scanning of a literal. 551 // Complete scanning of a literal.
526 inline void TerminateLiteral() { 552 inline void TerminateLiteral() { capturing_raw_literal_ = false; }
527 // Does nothing in the current implementation.
528 }
529 553
530 // Stops scanning of a literal and drop the collected characters, 554 // Stops scanning of a literal and drop the collected characters,
531 // e.g., due to an encountered error. 555 // e.g., due to an encountered error.
532 inline void DropLiteral() { 556 inline void DropLiteral() {
533 next_.literal_chars = NULL; 557 next_.literal_chars = NULL;
558 next_.raw_literal_chars = NULL;
559 capturing_raw_literal_ = false;
534 } 560 }
535 561
536 inline void AddLiteralCharAdvance() { 562 inline void AddLiteralCharAdvance() {
537 AddLiteralChar(c0_); 563 AddLiteralChar(c0_);
538 Advance(); 564 Advance();
539 } 565 }
540 566
541 // Low-level scanning support. 567 // Low-level scanning support.
542 void Advance() { 568 void Advance() {
569 if (capturing_raw_literal_) {
570 AddRawLiteralChar(c0_);
571 }
543 c0_ = source_->Advance(); 572 c0_ = source_->Advance();
544 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { 573 if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
545 uc32 c1 = source_->Advance(); 574 uc32 c1 = source_->Advance();
546 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { 575 if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
547 source_->PushBack(c1); 576 source_->PushBack(c1);
548 } else { 577 } else {
549 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); 578 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
550 } 579 }
551 } 580 }
552 } 581 }
553 582
554 void PushBack(uc32 ch) { 583 void PushBack(uc32 ch) {
555 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { 584 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
556 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); 585 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
557 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); 586 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
587 if (capturing_raw_literal_) ReduceRawLiteralLength(2);
558 } else { 588 } else {
559 source_->PushBack(c0_); 589 source_->PushBack(c0_);
590 if (capturing_raw_literal_) ReduceRawLiteralLength(1);
560 } 591 }
561 c0_ = ch; 592 c0_ = ch;
562 } 593 }
563 594
564 inline Token::Value Select(Token::Value tok) { 595 inline Token::Value Select(Token::Value tok) {
565 Advance(); 596 Advance();
566 return tok; 597 return tok;
567 } 598 }
568 599
569 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { 600 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
570 Advance(); 601 Advance();
571 if (c0_ == next) { 602 if (c0_ == next) {
572 Advance(); 603 Advance();
573 return then; 604 return then;
574 } else { 605 } else {
575 return else_; 606 return else_;
576 } 607 }
577 } 608 }
578 609
579 // Returns the literal string, if any, for the current token (the 610 // Returns the literal string, if any, for the current token (the
580 // token last returned by Next()). The string is 0-terminated. 611 // token last returned by Next()). The string is 0-terminated.
581 // Literal strings are collected for identifiers, strings, and 612 // Literal strings are collected for identifiers, strings, numbers as well
582 // numbers. 613 // as for template literals. For template literals we also collect the raw
614 // form.
583 // These functions only give the correct result if the literal 615 // These functions only give the correct result if the literal
584 // was scanned between calls to StartLiteral() and TerminateLiteral(). 616 // was scanned between calls to StartLiteral() and TerminateLiteral().
585 Vector<const uint8_t> literal_one_byte_string() { 617 Vector<const uint8_t> literal_one_byte_string() {
586 DCHECK_NOT_NULL(current_.literal_chars); 618 DCHECK_NOT_NULL(current_.literal_chars);
587 return current_.literal_chars->one_byte_literal(); 619 return current_.literal_chars->one_byte_literal();
588 } 620 }
589 Vector<const uint16_t> literal_two_byte_string() { 621 Vector<const uint16_t> literal_two_byte_string() {
590 DCHECK_NOT_NULL(current_.literal_chars); 622 DCHECK_NOT_NULL(current_.literal_chars);
591 return current_.literal_chars->two_byte_literal(); 623 return current_.literal_chars->two_byte_literal();
592 } 624 }
(...skipping 12 matching lines...) Expand all
605 return next_.literal_chars->one_byte_literal(); 637 return next_.literal_chars->one_byte_literal();
606 } 638 }
607 Vector<const uint16_t> next_literal_two_byte_string() { 639 Vector<const uint16_t> next_literal_two_byte_string() {
608 DCHECK_NOT_NULL(next_.literal_chars); 640 DCHECK_NOT_NULL(next_.literal_chars);
609 return next_.literal_chars->two_byte_literal(); 641 return next_.literal_chars->two_byte_literal();
610 } 642 }
611 bool is_next_literal_one_byte() { 643 bool is_next_literal_one_byte() {
612 DCHECK_NOT_NULL(next_.literal_chars); 644 DCHECK_NOT_NULL(next_.literal_chars);
613 return next_.literal_chars->is_one_byte(); 645 return next_.literal_chars->is_one_byte();
614 } 646 }
615 int next_literal_length() const { 647 Vector<const uint8_t> raw_literal_one_byte_string() {
616 DCHECK_NOT_NULL(next_.literal_chars); 648 DCHECK_NOT_NULL(current_.raw_literal_chars);
617 return next_.literal_chars->length(); 649 return current_.raw_literal_chars->one_byte_literal();
618 } 650 }
651 Vector<const uint16_t> raw_literal_two_byte_string() {
652 DCHECK_NOT_NULL(current_.raw_literal_chars);
653 return current_.raw_literal_chars->two_byte_literal();
654 }
655 bool is_raw_literal_one_byte() {
656 DCHECK_NOT_NULL(current_.raw_literal_chars);
657 return current_.raw_literal_chars->is_one_byte();
658 }
659
619 660
620 uc32 ScanHexNumber(int expected_length); 661 uc32 ScanHexNumber(int expected_length);
621 // Scan a number of any length but not bigger than max_value. For example, the 662 // Scan a number of any length but not bigger than max_value. For example, the
622 // number can be 000000001, so it's very long in characters but its value is 663 // number can be 000000001, so it's very long in characters but its value is
623 // small. 664 // small.
624 uc32 ScanUnlimitedLengthHexNumber(int max_value); 665 uc32 ScanUnlimitedLengthHexNumber(int max_value);
625 666
626 // Scans a single JavaScript token. 667 // Scans a single JavaScript token.
627 void Scan(); 668 void Scan();
628 669
(...skipping 30 matching lines...) Expand all
659 UnicodeCache* unicode_cache_; 700 UnicodeCache* unicode_cache_;
660 701
661 // Buffers collecting literal strings, numbers, etc. 702 // Buffers collecting literal strings, numbers, etc.
662 LiteralBuffer literal_buffer1_; 703 LiteralBuffer literal_buffer1_;
663 LiteralBuffer literal_buffer2_; 704 LiteralBuffer literal_buffer2_;
664 705
665 // Values parsed from magic comments. 706 // Values parsed from magic comments.
666 LiteralBuffer source_url_; 707 LiteralBuffer source_url_;
667 LiteralBuffer source_mapping_url_; 708 LiteralBuffer source_mapping_url_;
668 709
710 // Buffer to store raw string values
711 LiteralBuffer raw_literal_buffer_;
712
713 // We only need to capture the raw literal when we are scanning template
714 // literal spans.
715 bool capturing_raw_literal_;
716
669 TokenDesc current_; // desc for current token (as returned by Next()) 717 TokenDesc current_; // desc for current token (as returned by Next())
670 TokenDesc next_; // desc for next token (one token look-ahead) 718 TokenDesc next_; // desc for next token (one token look-ahead)
671 719
672 // Input stream. Must be initialized to an Utf16CharacterStream. 720 // Input stream. Must be initialized to an Utf16CharacterStream.
673 Utf16CharacterStream* source_; 721 Utf16CharacterStream* source_;
674 722
675 723
676 // Start position of the octal literal last scanned. 724 // Start position of the octal literal last scanned.
677 Location octal_pos_; 725 Location octal_pos_;
678 726
(...skipping 17 matching lines...) Expand all
696 bool harmony_classes_; 744 bool harmony_classes_;
697 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL 745 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL
698 bool harmony_templates_; 746 bool harmony_templates_;
699 // Whether we allow \u{xxxxx}. 747 // Whether we allow \u{xxxxx}.
700 bool harmony_unicode_; 748 bool harmony_unicode_;
701 }; 749 };
702 750
703 } } // namespace v8::internal 751 } } // namespace v8::internal
704 752
705 #endif // V8_SCANNER_H_ 753 #endif // V8_SCANNER_H_
OLDNEW
« no previous file with comments | « src/parser.cc ('k') | src/scanner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698