OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #ifndef V8_SCANNER_H_ | 7 #ifndef V8_SCANNER_H_ |
8 #define V8_SCANNER_H_ | 8 #define V8_SCANNER_H_ |
9 | 9 |
10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
(...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
315 | 315 |
316 // ---------------------------------------------------------------------------- | 316 // ---------------------------------------------------------------------------- |
317 // JavaScript Scanner. | 317 // JavaScript Scanner. |
318 | 318 |
319 class Scanner { | 319 class Scanner { |
320 public: | 320 public: |
321 // Scoped helper for literal recording. Automatically drops the literal | 321 // Scoped helper for literal recording. Automatically drops the literal |
322 // if aborting the scanning before it's complete. | 322 // if aborting the scanning before it's complete. |
323 class LiteralScope { | 323 class LiteralScope { |
324 public: | 324 public: |
325 explicit LiteralScope(Scanner* self, bool capture_raw = false) | 325 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { |
326 : scanner_(self), complete_(false) { | |
327 scanner_->StartLiteral(); | 326 scanner_->StartLiteral(); |
328 if (capture_raw) scanner_->StartRawLiteral(); | |
329 } | 327 } |
330 ~LiteralScope() { | 328 ~LiteralScope() { |
331 if (!complete_) scanner_->DropLiteral(); | 329 if (!complete_) scanner_->DropLiteral(); |
332 } | 330 } |
333 void Complete() { | 331 void Complete() { |
334 scanner_->TerminateLiteral(); | |
335 complete_ = true; | 332 complete_ = true; |
336 } | 333 } |
337 | 334 |
338 private: | 335 private: |
339 Scanner* scanner_; | 336 Scanner* scanner_; |
340 bool complete_; | 337 bool complete_; |
341 }; | 338 }; |
342 | 339 |
343 // Representation of an interval of source positions. | 340 // Representation of an interval of source positions. |
344 struct Location { | 341 struct Location { |
(...skipping 154 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
499 struct TokenDesc { | 496 struct TokenDesc { |
500 Token::Value token; | 497 Token::Value token; |
501 Location location; | 498 Location location; |
502 LiteralBuffer* literal_chars; | 499 LiteralBuffer* literal_chars; |
503 LiteralBuffer* raw_literal_chars; | 500 LiteralBuffer* raw_literal_chars; |
504 }; | 501 }; |
505 | 502 |
506 static const int kCharacterLookaheadBufferSize = 1; | 503 static const int kCharacterLookaheadBufferSize = 1; |
507 | 504 |
508 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 505 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 506 template <bool capture_raw> |
509 uc32 ScanOctalEscape(uc32 c, int length); | 507 uc32 ScanOctalEscape(uc32 c, int length); |
510 | 508 |
511 // Call this after setting source_ to the input. | 509 // Call this after setting source_ to the input. |
512 void Init() { | 510 void Init() { |
513 // Set c0_ (one character ahead) | 511 // Set c0_ (one character ahead) |
514 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 512 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
515 Advance(); | 513 Advance(); |
516 // Initialize current_ to not refer to a literal. | 514 // Initialize current_ to not refer to a literal. |
517 current_.literal_chars = NULL; | 515 current_.literal_chars = NULL; |
518 current_.raw_literal_chars = NULL; | 516 current_.raw_literal_chars = NULL; |
519 } | 517 } |
520 | 518 |
521 // Literal buffer support | 519 // Literal buffer support |
522 inline void StartLiteral() { | 520 inline void StartLiteral() { |
523 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? | 521 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
524 &literal_buffer2_ : &literal_buffer1_; | 522 &literal_buffer2_ : &literal_buffer1_; |
525 free_buffer->Reset(); | 523 free_buffer->Reset(); |
526 next_.literal_chars = free_buffer; | 524 next_.literal_chars = free_buffer; |
527 } | 525 } |
528 | 526 |
529 inline void StartRawLiteral() { | 527 inline void StartRawLiteral() { |
530 raw_literal_buffer_.Reset(); | 528 raw_literal_buffer_.Reset(); |
531 next_.raw_literal_chars = &raw_literal_buffer_; | 529 next_.raw_literal_chars = &raw_literal_buffer_; |
532 capturing_raw_literal_ = true; | |
533 } | 530 } |
534 | 531 |
535 INLINE(void AddLiteralChar(uc32 c)) { | 532 INLINE(void AddLiteralChar(uc32 c)) { |
536 DCHECK_NOT_NULL(next_.literal_chars); | 533 DCHECK_NOT_NULL(next_.literal_chars); |
537 next_.literal_chars->AddChar(c); | 534 next_.literal_chars->AddChar(c); |
538 } | 535 } |
539 | 536 |
540 INLINE(void AddRawLiteralChar(uc32 c)) { | 537 INLINE(void AddRawLiteralChar(uc32 c)) { |
541 DCHECK(capturing_raw_literal_); | |
542 DCHECK_NOT_NULL(next_.raw_literal_chars); | 538 DCHECK_NOT_NULL(next_.raw_literal_chars); |
543 next_.raw_literal_chars->AddChar(c); | 539 next_.raw_literal_chars->AddChar(c); |
544 } | 540 } |
545 | 541 |
546 INLINE(void ReduceRawLiteralLength(int delta)) { | 542 INLINE(void ReduceRawLiteralLength(int delta)) { |
547 DCHECK(capturing_raw_literal_); | |
548 DCHECK_NOT_NULL(next_.raw_literal_chars); | 543 DCHECK_NOT_NULL(next_.raw_literal_chars); |
549 next_.raw_literal_chars->ReduceLength(delta); | 544 next_.raw_literal_chars->ReduceLength(delta); |
550 } | 545 } |
551 | 546 |
552 // Complete scanning of a literal. | |
553 inline void TerminateLiteral() { capturing_raw_literal_ = false; } | |
554 | |
555 // Stops scanning of a literal and drop the collected characters, | 547 // Stops scanning of a literal and drop the collected characters, |
556 // e.g., due to an encountered error. | 548 // e.g., due to an encountered error. |
557 inline void DropLiteral() { | 549 inline void DropLiteral() { |
558 next_.literal_chars = NULL; | 550 next_.literal_chars = NULL; |
559 next_.raw_literal_chars = NULL; | 551 next_.raw_literal_chars = NULL; |
560 capturing_raw_literal_ = false; | |
561 } | 552 } |
562 | 553 |
563 inline void AddLiteralCharAdvance() { | 554 inline void AddLiteralCharAdvance() { |
564 AddLiteralChar(c0_); | 555 AddLiteralChar(c0_); |
565 Advance(); | 556 Advance(); |
566 } | 557 } |
567 | 558 |
568 // Low-level scanning support. | 559 // Low-level scanning support. |
| 560 template <bool capture_raw = false> |
569 void Advance() { | 561 void Advance() { |
570 if (capturing_raw_literal_) { | 562 if (capture_raw) { |
571 AddRawLiteralChar(c0_); | 563 AddRawLiteralChar(c0_); |
572 } | 564 } |
573 c0_ = source_->Advance(); | 565 c0_ = source_->Advance(); |
574 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { | 566 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { |
575 uc32 c1 = source_->Advance(); | 567 uc32 c1 = source_->Advance(); |
576 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { | 568 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { |
577 source_->PushBack(c1); | 569 source_->PushBack(c1); |
578 } else { | 570 } else { |
579 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); | 571 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); |
580 } | 572 } |
581 } | 573 } |
582 } | 574 } |
583 | 575 |
584 void PushBack(uc32 ch) { | 576 void PushBack(uc32 ch) { |
585 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { | 577 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { |
586 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); | 578 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); |
587 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); | 579 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); |
588 if (capturing_raw_literal_) ReduceRawLiteralLength(2); | |
589 } else { | 580 } else { |
590 source_->PushBack(c0_); | 581 source_->PushBack(c0_); |
591 if (capturing_raw_literal_) ReduceRawLiteralLength(1); | |
592 } | 582 } |
593 c0_ = ch; | 583 c0_ = ch; |
594 } | 584 } |
595 | 585 |
596 inline Token::Value Select(Token::Value tok) { | 586 inline Token::Value Select(Token::Value tok) { |
597 Advance(); | 587 Advance(); |
598 return tok; | 588 return tok; |
599 } | 589 } |
600 | 590 |
601 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 591 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
602 Advance(); | 592 Advance(); |
603 if (c0_ == next) { | 593 if (c0_ == next) { |
604 Advance(); | 594 Advance(); |
605 return then; | 595 return then; |
606 } else { | 596 } else { |
607 return else_; | 597 return else_; |
608 } | 598 } |
609 } | 599 } |
610 | 600 |
611 // Returns the literal string, if any, for the current token (the | 601 // Returns the literal string, if any, for the current token (the |
612 // token last returned by Next()). The string is 0-terminated. | 602 // token last returned by Next()). The string is 0-terminated. |
613 // Literal strings are collected for identifiers, strings, numbers as well | 603 // Literal strings are collected for identifiers, strings, numbers as well |
614 // as for template literals. For template literals we also collect the raw | 604 // as for template literals. For template literals we also collect the raw |
615 // form. | 605 // form. |
616 // These functions only give the correct result if the literal | 606 // These functions only give the correct result if the literal was scanned |
617 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 607 // when a LiteralScope object is alive. |
618 Vector<const uint8_t> literal_one_byte_string() { | 608 Vector<const uint8_t> literal_one_byte_string() { |
619 DCHECK_NOT_NULL(current_.literal_chars); | 609 DCHECK_NOT_NULL(current_.literal_chars); |
620 return current_.literal_chars->one_byte_literal(); | 610 return current_.literal_chars->one_byte_literal(); |
621 } | 611 } |
622 Vector<const uint16_t> literal_two_byte_string() { | 612 Vector<const uint16_t> literal_two_byte_string() { |
623 DCHECK_NOT_NULL(current_.literal_chars); | 613 DCHECK_NOT_NULL(current_.literal_chars); |
624 return current_.literal_chars->two_byte_literal(); | 614 return current_.literal_chars->two_byte_literal(); |
625 } | 615 } |
626 bool is_literal_one_byte() { | 616 bool is_literal_one_byte() { |
627 DCHECK_NOT_NULL(current_.literal_chars); | 617 DCHECK_NOT_NULL(current_.literal_chars); |
(...skipping 23 matching lines...) Expand all Loading... |
651 } | 641 } |
652 Vector<const uint16_t> raw_literal_two_byte_string() { | 642 Vector<const uint16_t> raw_literal_two_byte_string() { |
653 DCHECK_NOT_NULL(current_.raw_literal_chars); | 643 DCHECK_NOT_NULL(current_.raw_literal_chars); |
654 return current_.raw_literal_chars->two_byte_literal(); | 644 return current_.raw_literal_chars->two_byte_literal(); |
655 } | 645 } |
656 bool is_raw_literal_one_byte() { | 646 bool is_raw_literal_one_byte() { |
657 DCHECK_NOT_NULL(current_.raw_literal_chars); | 647 DCHECK_NOT_NULL(current_.raw_literal_chars); |
658 return current_.raw_literal_chars->is_one_byte(); | 648 return current_.raw_literal_chars->is_one_byte(); |
659 } | 649 } |
660 | 650 |
661 | 651 template <bool capture_raw> |
662 uc32 ScanHexNumber(int expected_length); | 652 uc32 ScanHexNumber(int expected_length); |
663 // Scan a number of any length but not bigger than max_value. For example, the | 653 // Scan a number of any length but not bigger than max_value. For example, the |
664 // number can be 000000001, so it's very long in characters but its value is | 654 // number can be 000000001, so it's very long in characters but its value is |
665 // small. | 655 // small. |
| 656 template <bool capture_raw> |
666 uc32 ScanUnlimitedLengthHexNumber(int max_value); | 657 uc32 ScanUnlimitedLengthHexNumber(int max_value); |
667 | 658 |
668 // Scans a single JavaScript token. | 659 // Scans a single JavaScript token. |
669 void Scan(); | 660 void Scan(); |
670 | 661 |
671 bool SkipWhiteSpace(); | 662 bool SkipWhiteSpace(); |
672 Token::Value SkipSingleLineComment(); | 663 Token::Value SkipSingleLineComment(); |
673 Token::Value SkipSourceURLComment(); | 664 Token::Value SkipSourceURLComment(); |
674 void TryToParseSourceURLComment(); | 665 void TryToParseSourceURLComment(); |
675 Token::Value SkipMultiLineComment(); | 666 Token::Value SkipMultiLineComment(); |
676 // Scans a possible HTML comment -- begins with '<!'. | 667 // Scans a possible HTML comment -- begins with '<!'. |
677 Token::Value ScanHtmlComment(); | 668 Token::Value ScanHtmlComment(); |
678 | 669 |
679 void ScanDecimalDigits(); | 670 void ScanDecimalDigits(); |
680 Token::Value ScanNumber(bool seen_period); | 671 Token::Value ScanNumber(bool seen_period); |
681 Token::Value ScanIdentifierOrKeyword(); | 672 Token::Value ScanIdentifierOrKeyword(); |
682 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | 673 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
683 | 674 |
684 Token::Value ScanString(); | 675 Token::Value ScanString(); |
685 | 676 |
686 // Scans an escape-sequence which is part of a string and adds the | 677 // Scans an escape-sequence which is part of a string and adds the |
687 // decoded character to the current literal. Returns true if a pattern | 678 // decoded character to the current literal. Returns true if a pattern |
688 // is scanned. | 679 // is scanned. |
| 680 template <bool capture_raw> |
689 bool ScanEscape(); | 681 bool ScanEscape(); |
690 // Decodes a Unicode escape-sequence which is part of an identifier. | 682 // Decodes a Unicode escape-sequence which is part of an identifier. |
691 // If the escape sequence cannot be decoded the result is kBadChar. | 683 // If the escape sequence cannot be decoded the result is kBadChar. |
692 uc32 ScanIdentifierUnicodeEscape(); | 684 uc32 ScanIdentifierUnicodeEscape(); |
693 // Helper for the above functions. | 685 // Helper for the above functions. |
| 686 template <bool capture_raw> |
694 uc32 ScanUnicodeEscape(); | 687 uc32 ScanUnicodeEscape(); |
695 | 688 |
696 Token::Value ScanTemplateSpan(); | 689 Token::Value ScanTemplateSpan(); |
697 | 690 |
698 // Return the current source position. | 691 // Return the current source position. |
699 int source_pos() { | 692 int source_pos() { |
700 return source_->pos() - kCharacterLookaheadBufferSize; | 693 return source_->pos() - kCharacterLookaheadBufferSize; |
701 } | 694 } |
702 | 695 |
703 UnicodeCache* unicode_cache_; | 696 UnicodeCache* unicode_cache_; |
704 | 697 |
705 // Buffers collecting literal strings, numbers, etc. | 698 // Buffers collecting literal strings, numbers, etc. |
706 LiteralBuffer literal_buffer1_; | 699 LiteralBuffer literal_buffer1_; |
707 LiteralBuffer literal_buffer2_; | 700 LiteralBuffer literal_buffer2_; |
708 | 701 |
709 // Values parsed from magic comments. | 702 // Values parsed from magic comments. |
710 LiteralBuffer source_url_; | 703 LiteralBuffer source_url_; |
711 LiteralBuffer source_mapping_url_; | 704 LiteralBuffer source_mapping_url_; |
712 | 705 |
713 // Buffer to store raw string values | 706 // Buffer to store raw string values |
714 LiteralBuffer raw_literal_buffer_; | 707 LiteralBuffer raw_literal_buffer_; |
715 | 708 |
716 // We only need to capture the raw literal when we are scanning template | |
717 // literal spans. | |
718 bool capturing_raw_literal_; | |
719 | |
720 TokenDesc current_; // desc for current token (as returned by Next()) | 709 TokenDesc current_; // desc for current token (as returned by Next()) |
721 TokenDesc next_; // desc for next token (one token look-ahead) | 710 TokenDesc next_; // desc for next token (one token look-ahead) |
722 | 711 |
723 // Input stream. Must be initialized to an Utf16CharacterStream. | 712 // Input stream. Must be initialized to an Utf16CharacterStream. |
724 Utf16CharacterStream* source_; | 713 Utf16CharacterStream* source_; |
725 | 714 |
726 | 715 |
727 // Start position of the octal literal last scanned. | 716 // Start position of the octal literal last scanned. |
728 Location octal_pos_; | 717 Location octal_pos_; |
729 | 718 |
(...skipping 17 matching lines...) Expand all Loading... |
747 bool harmony_classes_; | 736 bool harmony_classes_; |
748 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL | 737 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL |
749 bool harmony_templates_; | 738 bool harmony_templates_; |
750 // Whether we allow \u{xxxxx}. | 739 // Whether we allow \u{xxxxx}. |
751 bool harmony_unicode_; | 740 bool harmony_unicode_; |
752 }; | 741 }; |
753 | 742 |
754 } } // namespace v8::internal | 743 } } // namespace v8::internal |
755 | 744 |
756 #endif // V8_SCANNER_H_ | 745 #endif // V8_SCANNER_H_ |
OLD | NEW |