Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #ifndef V8_SCANNER_H_ | 7 #ifndef V8_SCANNER_H_ |
| 8 #define V8_SCANNER_H_ | 8 #define V8_SCANNER_H_ |
| 9 | 9 |
| 10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
| (...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 315 | 315 |
| 316 // ---------------------------------------------------------------------------- | 316 // ---------------------------------------------------------------------------- |
| 317 // JavaScript Scanner. | 317 // JavaScript Scanner. |
| 318 | 318 |
| 319 class Scanner { | 319 class Scanner { |
| 320 public: | 320 public: |
| 321 // Scoped helper for literal recording. Automatically drops the literal | 321 // Scoped helper for literal recording. Automatically drops the literal |
| 322 // if aborting the scanning before it's complete. | 322 // if aborting the scanning before it's complete. |
| 323 class LiteralScope { | 323 class LiteralScope { |
| 324 public: | 324 public: |
| 325 explicit LiteralScope(Scanner* self, bool capture_raw = false) | 325 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { |
| 326 : scanner_(self), complete_(false) { | |
| 327 scanner_->StartLiteral(); | 326 scanner_->StartLiteral(); |
| 328 if (capture_raw) scanner_->StartRawLiteral(); | |
| 329 } | 327 } |
| 330 ~LiteralScope() { | 328 ~LiteralScope() { |
| 331 if (!complete_) scanner_->DropLiteral(); | 329 if (!complete_) scanner_->DropLiteral(); |
| 332 } | 330 } |
| 333 void Complete() { | 331 void Complete() { |
| 334 scanner_->TerminateLiteral(); | 332 scanner_->TerminateLiteral(); |
| 335 complete_ = true; | 333 complete_ = true; |
| 336 } | 334 } |
| 337 | 335 |
| 338 private: | 336 private: |
| (...skipping 160 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 499 struct TokenDesc { | 497 struct TokenDesc { |
| 500 Token::Value token; | 498 Token::Value token; |
| 501 Location location; | 499 Location location; |
| 502 LiteralBuffer* literal_chars; | 500 LiteralBuffer* literal_chars; |
| 503 LiteralBuffer* raw_literal_chars; | 501 LiteralBuffer* raw_literal_chars; |
| 504 }; | 502 }; |
| 505 | 503 |
| 506 static const int kCharacterLookaheadBufferSize = 1; | 504 static const int kCharacterLookaheadBufferSize = 1; |
| 507 | 505 |
| 508 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 506 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 507 template <bool capture_raw> | |
| 509 uc32 ScanOctalEscape(uc32 c, int length); | 508 uc32 ScanOctalEscape(uc32 c, int length); |
| 510 | 509 |
| 511 // Call this after setting source_ to the input. | 510 // Call this after setting source_ to the input. |
| 512 void Init() { | 511 void Init() { |
| 513 // Set c0_ (one character ahead) | 512 // Set c0_ (one character ahead) |
| 514 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 513 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
| 515 Advance(); | 514 Advance(); |
| 516 // Initialize current_ to not refer to a literal. | 515 // Initialize current_ to not refer to a literal. |
| 517 current_.literal_chars = NULL; | 516 current_.literal_chars = NULL; |
| 518 current_.raw_literal_chars = NULL; | 517 current_.raw_literal_chars = NULL; |
| 519 } | 518 } |
| 520 | 519 |
| 521 // Literal buffer support | 520 // Literal buffer support |
| 522 inline void StartLiteral() { | 521 inline void StartLiteral() { |
| 523 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? | 522 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
| 524 &literal_buffer2_ : &literal_buffer1_; | 523 &literal_buffer2_ : &literal_buffer1_; |
| 525 free_buffer->Reset(); | 524 free_buffer->Reset(); |
| 526 next_.literal_chars = free_buffer; | 525 next_.literal_chars = free_buffer; |
| 527 } | 526 } |
| 528 | 527 |
| 529 inline void StartRawLiteral() { | 528 inline void StartRawLiteral() { |
| 530 raw_literal_buffer_.Reset(); | 529 raw_literal_buffer_.Reset(); |
| 531 next_.raw_literal_chars = &raw_literal_buffer_; | 530 next_.raw_literal_chars = &raw_literal_buffer_; |
| 532 capturing_raw_literal_ = true; | |
| 533 } | 531 } |
| 534 | 532 |
| 535 INLINE(void AddLiteralChar(uc32 c)) { | 533 INLINE(void AddLiteralChar(uc32 c)) { |
| 536 DCHECK_NOT_NULL(next_.literal_chars); | 534 DCHECK_NOT_NULL(next_.literal_chars); |
| 537 next_.literal_chars->AddChar(c); | 535 next_.literal_chars->AddChar(c); |
| 538 } | 536 } |
| 539 | 537 |
| 540 INLINE(void AddRawLiteralChar(uc32 c)) { | 538 INLINE(void AddRawLiteralChar(uc32 c)) { |
| 541 DCHECK(capturing_raw_literal_); | |
| 542 DCHECK_NOT_NULL(next_.raw_literal_chars); | 539 DCHECK_NOT_NULL(next_.raw_literal_chars); |
| 543 next_.raw_literal_chars->AddChar(c); | 540 next_.raw_literal_chars->AddChar(c); |
| 544 } | 541 } |
| 545 | 542 |
| 546 INLINE(void ReduceRawLiteralLength(int delta)) { | 543 INLINE(void ReduceRawLiteralLength(int delta)) { |
| 547 DCHECK(capturing_raw_literal_); | |
| 548 DCHECK_NOT_NULL(next_.raw_literal_chars); | 544 DCHECK_NOT_NULL(next_.raw_literal_chars); |
| 549 next_.raw_literal_chars->ReduceLength(delta); | 545 next_.raw_literal_chars->ReduceLength(delta); |
| 550 } | 546 } |
| 551 | 547 |
| 552 // Complete scanning of a literal. | 548 // Complete scanning of a literal. |
| 553 inline void TerminateLiteral() { capturing_raw_literal_ = false; } | 549 inline void TerminateLiteral() { |
|
Dmitry Lomov (no reviews)
2014/12/04 18:04:23
Nit: if it does nothing, why not remove it?
arv (Not doing code reviews)
2014/12/04 18:07:09
Old code... I guess at some point it did something
| |
| 550 // Does nothing in the current implementation. | |
| 551 } | |
| 554 | 552 |
| 555 // Stops scanning of a literal and drop the collected characters, | 553 // Stops scanning of a literal and drop the collected characters, |
| 556 // e.g., due to an encountered error. | 554 // e.g., due to an encountered error. |
| 557 inline void DropLiteral() { | 555 inline void DropLiteral() { |
| 558 next_.literal_chars = NULL; | 556 next_.literal_chars = NULL; |
| 559 next_.raw_literal_chars = NULL; | 557 next_.raw_literal_chars = NULL; |
| 560 capturing_raw_literal_ = false; | |
| 561 } | 558 } |
| 562 | 559 |
| 563 inline void AddLiteralCharAdvance() { | 560 inline void AddLiteralCharAdvance() { |
| 564 AddLiteralChar(c0_); | 561 AddLiteralChar(c0_); |
| 565 Advance(); | 562 Advance(); |
| 566 } | 563 } |
| 567 | 564 |
| 568 // Low-level scanning support. | 565 // Low-level scanning support. |
| 566 template <bool capture_raw> | |
| 569 void Advance() { | 567 void Advance() { |
| 570 if (capturing_raw_literal_) { | 568 if (capture_raw) { |
| 571 AddRawLiteralChar(c0_); | 569 AddRawLiteralChar(c0_); |
| 572 } | 570 } |
| 573 c0_ = source_->Advance(); | 571 c0_ = source_->Advance(); |
| 574 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { | 572 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { |
| 575 uc32 c1 = source_->Advance(); | 573 uc32 c1 = source_->Advance(); |
| 576 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { | 574 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { |
| 577 source_->PushBack(c1); | 575 source_->PushBack(c1); |
| 578 } else { | 576 } else { |
| 579 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); | 577 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); |
| 580 } | 578 } |
| 581 } | 579 } |
| 582 } | 580 } |
| 583 | 581 |
| 582 void Advance() { Advance<false>(); } | |
| 583 | |
| 584 void PushBack(uc32 ch) { | 584 void PushBack(uc32 ch) { |
| 585 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { | 585 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { |
| 586 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); | 586 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); |
| 587 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); | 587 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); |
| 588 if (capturing_raw_literal_) ReduceRawLiteralLength(2); | |
| 589 } else { | 588 } else { |
| 590 source_->PushBack(c0_); | 589 source_->PushBack(c0_); |
| 591 if (capturing_raw_literal_) ReduceRawLiteralLength(1); | |
| 592 } | 590 } |
| 593 c0_ = ch; | 591 c0_ = ch; |
| 594 } | 592 } |
| 595 | 593 |
| 596 inline Token::Value Select(Token::Value tok) { | 594 inline Token::Value Select(Token::Value tok) { |
| 597 Advance(); | 595 Advance(); |
| 598 return tok; | 596 return tok; |
| 599 } | 597 } |
| 600 | 598 |
| 601 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 599 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 651 } | 649 } |
| 652 Vector<const uint16_t> raw_literal_two_byte_string() { | 650 Vector<const uint16_t> raw_literal_two_byte_string() { |
| 653 DCHECK_NOT_NULL(current_.raw_literal_chars); | 651 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 654 return current_.raw_literal_chars->two_byte_literal(); | 652 return current_.raw_literal_chars->two_byte_literal(); |
| 655 } | 653 } |
| 656 bool is_raw_literal_one_byte() { | 654 bool is_raw_literal_one_byte() { |
| 657 DCHECK_NOT_NULL(current_.raw_literal_chars); | 655 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 658 return current_.raw_literal_chars->is_one_byte(); | 656 return current_.raw_literal_chars->is_one_byte(); |
| 659 } | 657 } |
| 660 | 658 |
| 661 | 659 template <bool capture_raw> |
| 662 uc32 ScanHexNumber(int expected_length); | 660 uc32 ScanHexNumber(int expected_length); |
| 663 // Scan a number of any length but not bigger than max_value. For example, the | 661 // Scan a number of any length but not bigger than max_value. For example, the |
| 664 // number can be 000000001, so it's very long in characters but its value is | 662 // number can be 000000001, so it's very long in characters but its value is |
| 665 // small. | 663 // small. |
| 664 template <bool capture_raw> | |
| 666 uc32 ScanUnlimitedLengthHexNumber(int max_value); | 665 uc32 ScanUnlimitedLengthHexNumber(int max_value); |
| 667 | 666 |
| 668 // Scans a single JavaScript token. | 667 // Scans a single JavaScript token. |
| 669 void Scan(); | 668 void Scan(); |
| 670 | 669 |
| 671 bool SkipWhiteSpace(); | 670 bool SkipWhiteSpace(); |
| 672 Token::Value SkipSingleLineComment(); | 671 Token::Value SkipSingleLineComment(); |
| 673 Token::Value SkipSourceURLComment(); | 672 Token::Value SkipSourceURLComment(); |
| 674 void TryToParseSourceURLComment(); | 673 void TryToParseSourceURLComment(); |
| 675 Token::Value SkipMultiLineComment(); | 674 Token::Value SkipMultiLineComment(); |
| 676 // Scans a possible HTML comment -- begins with '<!'. | 675 // Scans a possible HTML comment -- begins with '<!'. |
| 677 Token::Value ScanHtmlComment(); | 676 Token::Value ScanHtmlComment(); |
| 678 | 677 |
| 679 void ScanDecimalDigits(); | 678 void ScanDecimalDigits(); |
| 680 Token::Value ScanNumber(bool seen_period); | 679 Token::Value ScanNumber(bool seen_period); |
| 681 Token::Value ScanIdentifierOrKeyword(); | 680 Token::Value ScanIdentifierOrKeyword(); |
| 682 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | 681 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
| 683 | 682 |
| 684 Token::Value ScanString(); | 683 Token::Value ScanString(); |
| 685 | 684 |
| 686 // Scans an escape-sequence which is part of a string and adds the | 685 // Scans an escape-sequence which is part of a string and adds the |
| 687 // decoded character to the current literal. Returns true if a pattern | 686 // decoded character to the current literal. Returns true if a pattern |
| 688 // is scanned. | 687 // is scanned. |
| 688 template <bool capture_raw> | |
| 689 bool ScanEscape(); | 689 bool ScanEscape(); |
| 690 // Decodes a Unicode escape-sequence which is part of an identifier. | 690 // Decodes a Unicode escape-sequence which is part of an identifier. |
| 691 // If the escape sequence cannot be decoded the result is kBadChar. | 691 // If the escape sequence cannot be decoded the result is kBadChar. |
| 692 uc32 ScanIdentifierUnicodeEscape(); | 692 uc32 ScanIdentifierUnicodeEscape(); |
| 693 // Helper for the above functions. | 693 // Helper for the above functions. |
| 694 template <bool capture_raw> | |
| 694 uc32 ScanUnicodeEscape(); | 695 uc32 ScanUnicodeEscape(); |
| 695 | 696 |
| 696 Token::Value ScanTemplateSpan(); | 697 Token::Value ScanTemplateSpan(); |
| 697 | 698 |
| 698 // Return the current source position. | 699 // Return the current source position. |
| 699 int source_pos() { | 700 int source_pos() { |
| 700 return source_->pos() - kCharacterLookaheadBufferSize; | 701 return source_->pos() - kCharacterLookaheadBufferSize; |
| 701 } | 702 } |
| 702 | 703 |
| 703 UnicodeCache* unicode_cache_; | 704 UnicodeCache* unicode_cache_; |
| 704 | 705 |
| 705 // Buffers collecting literal strings, numbers, etc. | 706 // Buffers collecting literal strings, numbers, etc. |
| 706 LiteralBuffer literal_buffer1_; | 707 LiteralBuffer literal_buffer1_; |
| 707 LiteralBuffer literal_buffer2_; | 708 LiteralBuffer literal_buffer2_; |
| 708 | 709 |
| 709 // Values parsed from magic comments. | 710 // Values parsed from magic comments. |
| 710 LiteralBuffer source_url_; | 711 LiteralBuffer source_url_; |
| 711 LiteralBuffer source_mapping_url_; | 712 LiteralBuffer source_mapping_url_; |
| 712 | 713 |
| 713 // Buffer to store raw string values | 714 // Buffer to store raw string values |
| 714 LiteralBuffer raw_literal_buffer_; | 715 LiteralBuffer raw_literal_buffer_; |
| 715 | 716 |
| 716 // We only need to capture the raw literal when we are scanning template | |
| 717 // literal spans. | |
| 718 bool capturing_raw_literal_; | |
| 719 | |
| 720 TokenDesc current_; // desc for current token (as returned by Next()) | 717 TokenDesc current_; // desc for current token (as returned by Next()) |
| 721 TokenDesc next_; // desc for next token (one token look-ahead) | 718 TokenDesc next_; // desc for next token (one token look-ahead) |
| 722 | 719 |
| 723 // Input stream. Must be initialized to an Utf16CharacterStream. | 720 // Input stream. Must be initialized to an Utf16CharacterStream. |
| 724 Utf16CharacterStream* source_; | 721 Utf16CharacterStream* source_; |
| 725 | 722 |
| 726 | 723 |
| 727 // Start position of the octal literal last scanned. | 724 // Start position of the octal literal last scanned. |
| 728 Location octal_pos_; | 725 Location octal_pos_; |
| 729 | 726 |
| (...skipping 17 matching lines...) Expand all Loading... | |
| 747 bool harmony_classes_; | 744 bool harmony_classes_; |
| 748 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL | 745 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL |
| 749 bool harmony_templates_; | 746 bool harmony_templates_; |
| 750 // Whether we allow \u{xxxxx}. | 747 // Whether we allow \u{xxxxx}. |
| 751 bool harmony_unicode_; | 748 bool harmony_unicode_; |
| 752 }; | 749 }; |
| 753 | 750 |
| 754 } } // namespace v8::internal | 751 } } // namespace v8::internal |
| 755 | 752 |
| 756 #endif // V8_SCANNER_H_ | 753 #endif // V8_SCANNER_H_ |
| OLD | NEW |