| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #ifndef V8_SCANNER_H_ | 7 #ifndef V8_SCANNER_H_ |
| 8 #define V8_SCANNER_H_ | 8 #define V8_SCANNER_H_ |
| 9 | 9 |
| 10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
| (...skipping 304 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 315 Vector<byte> backing_store_; | 315 Vector<byte> backing_store_; |
| 316 | 316 |
| 317 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); | 317 DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); |
| 318 }; | 318 }; |
| 319 | 319 |
| 320 | 320 |
| 321 // ---------------------------------------------------------------------------- | 321 // ---------------------------------------------------------------------------- |
| 322 // JavaScript Scanner. | 322 // JavaScript Scanner. |
| 323 | 323 |
| 324 class Scanner { | 324 class Scanner { |
| 325 struct TokenDesc; |
| 326 |
| 325 public: | 327 public: |
| 328 static const int kMaxLookahead = 2; |
| 329 |
| 326 // Scoped helper for literal recording. Automatically drops the literal | 330 // Scoped helper for literal recording. Automatically drops the literal |
| 327 // if aborting the scanning before it's complete. | 331 // if aborting the scanning before it's complete. |
| 328 class LiteralScope { | 332 class LiteralScope { |
| 329 public: | 333 public: |
| 330 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { | 334 explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { |
| 331 scanner_->StartLiteral(); | 335 next_ = scanner_->PeekTokenDesc(); |
| 336 scanner_->StartLiteral(next_); |
| 332 } | 337 } |
| 333 ~LiteralScope() { | 338 ~LiteralScope() { |
| 334 if (!complete_) scanner_->DropLiteral(); | 339 if (!complete_) scanner_->DropLiteral(next_); |
| 335 } | 340 } |
| 336 void Complete() { | 341 void Complete() { |
| 337 complete_ = true; | 342 complete_ = true; |
| 338 } | 343 } |
| 339 | 344 |
| 340 private: | 345 private: |
| 346 friend class Scanner; |
| 341 Scanner* scanner_; | 347 Scanner* scanner_; |
| 348 TokenDesc* next_; |
| 342 bool complete_; | 349 bool complete_; |
| 343 }; | 350 }; |
| 344 | 351 |
| 345 // Representation of an interval of source positions. | 352 // Representation of an interval of source positions. |
| 346 struct Location { | 353 struct Location { |
| 347 Location(int b, int e) : beg_pos(b), end_pos(e) { } | 354 Location(int b, int e) : beg_pos(b), end_pos(e) { } |
| 348 Location() : beg_pos(0), end_pos(0) { } | 355 Location() : beg_pos(0), end_pos(0) { } |
| 349 | 356 |
| 350 bool IsValid() const { | 357 bool IsValid() const { |
| 351 return beg_pos >= 0 && end_pos >= beg_pos; | 358 return beg_pos >= 0 && end_pos >= beg_pos; |
| (...skipping 16 matching lines...) Expand all Loading... |
| 368 Token::Value Next(); | 375 Token::Value Next(); |
| 369 // Returns the current token again. | 376 // Returns the current token again. |
| 370 Token::Value current_token() { return current_.token; } | 377 Token::Value current_token() { return current_.token; } |
| 371 // Returns the location information for the current token | 378 // Returns the location information for the current token |
| 372 // (the token last returned by Next()). | 379 // (the token last returned by Next()). |
| 373 Location location() const { return current_.location; } | 380 Location location() const { return current_.location; } |
| 374 | 381 |
| 375 // Similar functions for the upcoming token. | 382 // Similar functions for the upcoming token. |
| 376 | 383 |
| 377 // One token look-ahead (past the token returned by Next()). | 384 // One token look-ahead (past the token returned by Next()). |
| 378 Token::Value peek() const { return next_.token; } | 385 Token::Value peek() const { return next_[0].token; } |
| 386 Token::Value peek(int n); |
| 379 | 387 |
| 380 Location peek_location() const { return next_.location; } | 388 Location peek_location() const { return next_[0].location; } |
| 389 Location peek_location(int n); |
| 381 | 390 |
| 382 bool literal_contains_escapes() const { | 391 bool literal_contains_escapes() const { |
| 383 Location location = current_.location; | 392 Location location = current_.location; |
| 384 int source_length = (location.end_pos - location.beg_pos); | 393 int source_length = (location.end_pos - location.beg_pos); |
| 385 if (current_.token == Token::STRING) { | 394 if (current_.token == Token::STRING) { |
| 386 // Subtract delimiters. | 395 // Subtract delimiters. |
| 387 source_length -= 2; | 396 source_length -= 2; |
| 388 } | 397 } |
| 389 return current_.literal_chars->length() != source_length; | 398 return current_.literal_chars->length() != source_length; |
| 390 } | 399 } |
| 391 bool is_literal_contextual_keyword(Vector<const char> keyword) { | 400 bool is_literal_contextual_keyword(Vector<const char> keyword) { |
| 392 DCHECK_NOT_NULL(current_.literal_chars); | 401 DCHECK_NOT_NULL(current_.literal_chars); |
| 393 return current_.literal_chars->is_contextual_keyword(keyword); | 402 return current_.literal_chars->is_contextual_keyword(keyword); |
| 394 } | 403 } |
| 395 bool is_next_contextual_keyword(Vector<const char> keyword) { | 404 bool is_next_contextual_keyword(Vector<const char> keyword) { |
| 396 DCHECK_NOT_NULL(next_.literal_chars); | 405 DCHECK_NOT_NULL(next_[0].literal_chars); |
| 397 return next_.literal_chars->is_contextual_keyword(keyword); | 406 return next_[0].literal_chars->is_contextual_keyword(keyword); |
| 398 } | 407 } |
| 399 | 408 |
| 400 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); | 409 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); |
| 401 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); | 410 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); |
| 402 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); | 411 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); |
| 403 | 412 |
| 404 double DoubleValue(); | 413 double DoubleValue(); |
| 405 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { | 414 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { |
| 406 if (is_literal_one_byte() && | 415 if (is_literal_one_byte() && |
| 407 literal_length() == length && | 416 literal_length() == length && |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 518 void Init() { | 527 void Init() { |
| 519 // Set c0_ (one character ahead) | 528 // Set c0_ (one character ahead) |
| 520 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 529 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
| 521 Advance(); | 530 Advance(); |
| 522 // Initialize current_ to not refer to a literal. | 531 // Initialize current_ to not refer to a literal. |
| 523 current_.literal_chars = NULL; | 532 current_.literal_chars = NULL; |
| 524 current_.raw_literal_chars = NULL; | 533 current_.raw_literal_chars = NULL; |
| 525 } | 534 } |
| 526 | 535 |
| 527 // Literal buffer support | 536 // Literal buffer support |
| 528 inline void StartLiteral() { | 537 inline void StartLiteral(TokenDesc* next) { |
| 529 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? | 538 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
| 530 &literal_buffer2_ : &literal_buffer1_; | 539 &literal_buffer2_ : &literal_buffer1_; |
| 531 free_buffer->Reset(); | 540 free_buffer->Reset(); |
| 532 next_.literal_chars = free_buffer; | 541 next->literal_chars = free_buffer; |
| 533 } | 542 } |
| 534 | 543 |
| 535 inline void StartRawLiteral() { | 544 inline void StartRawLiteral() { |
| 545 TokenDesc* next = PeekTokenDesc(); |
| 536 raw_literal_buffer_.Reset(); | 546 raw_literal_buffer_.Reset(); |
| 537 next_.raw_literal_chars = &raw_literal_buffer_; | 547 next->raw_literal_chars = &raw_literal_buffer_; |
| 538 } | 548 } |
| 539 | 549 |
| 540 INLINE(void AddLiteralChar(uc32 c)) { | 550 INLINE(void AddLiteralChar(TokenDesc* next, uc32 c)) { |
| 541 DCHECK_NOT_NULL(next_.literal_chars); | 551 DCHECK_NOT_NULL(next->literal_chars); |
| 542 next_.literal_chars->AddChar(c); | 552 next->literal_chars->AddChar(c); |
| 543 } | 553 } |
| 544 | 554 |
| 545 INLINE(void AddRawLiteralChar(uc32 c)) { | 555 INLINE(void AddRawLiteralChar(TokenDesc* next, uc32 c)) { |
| 546 DCHECK_NOT_NULL(next_.raw_literal_chars); | 556 DCHECK_NOT_NULL(next->raw_literal_chars); |
| 547 next_.raw_literal_chars->AddChar(c); | 557 next->raw_literal_chars->AddChar(c); |
| 548 } | 558 } |
| 549 | 559 |
| 550 INLINE(void ReduceRawLiteralLength(int delta)) { | 560 INLINE(void ReduceRawLiteralLength(TokenDesc* next, int delta)) { |
| 551 DCHECK_NOT_NULL(next_.raw_literal_chars); | 561 DCHECK_NOT_NULL(next->raw_literal_chars); |
| 552 next_.raw_literal_chars->ReduceLength(delta); | 562 next->raw_literal_chars->ReduceLength(delta); |
| 553 } | 563 } |
| 554 | 564 |
| 555 // Stops scanning of a literal and drop the collected characters, | 565 // Stops scanning of a literal and drop the collected characters, |
| 556 // e.g., due to an encountered error. | 566 // e.g., due to an encountered error. |
| 557 inline void DropLiteral() { | 567 inline void DropLiteral(TokenDesc* next) { |
| 558 next_.literal_chars = NULL; | 568 next->literal_chars = NULL; |
| 559 next_.raw_literal_chars = NULL; | 569 next->raw_literal_chars = NULL; |
| 560 } | 570 } |
| 561 | 571 |
| 562 inline void AddLiteralCharAdvance() { | 572 inline void AddLiteralCharAdvance(TokenDesc* next) { |
| 563 AddLiteralChar(c0_); | 573 AddLiteralChar(next, c0_); |
| 564 Advance(); | 574 Advance(); |
| 565 } | 575 } |
| 566 | 576 |
| 567 // Low-level scanning support. | 577 // Low-level scanning support. |
| 568 template <bool capture_raw = false, bool check_surrogate = true> | 578 template <bool capture_raw = false, bool check_surrogate = true> |
| 569 void Advance() { | 579 void Advance() { |
| 570 if (capture_raw) { | 580 if (capture_raw) { |
| 571 AddRawLiteralChar(c0_); | 581 AddRawLiteralChar(PeekTokenDesc(), c0_); |
| 572 } | 582 } |
| 573 c0_ = source_->Advance(); | 583 c0_ = source_->Advance(); |
| 574 if (check_surrogate) HandleLeadSurrogate(); | 584 if (check_surrogate) HandleLeadSurrogate(); |
| 575 } | 585 } |
| 576 | 586 |
| 577 void HandleLeadSurrogate() { | 587 void HandleLeadSurrogate() { |
| 578 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { | 588 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { |
| 579 uc32 c1 = source_->Advance(); | 589 uc32 c1 = source_->Advance(); |
| 580 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { | 590 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { |
| 581 source_->PushBack(c1); | 591 source_->PushBack(c1); |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 629 DCHECK_NOT_NULL(current_.literal_chars); | 639 DCHECK_NOT_NULL(current_.literal_chars); |
| 630 return current_.literal_chars->is_one_byte(); | 640 return current_.literal_chars->is_one_byte(); |
| 631 } | 641 } |
| 632 int literal_length() const { | 642 int literal_length() const { |
| 633 DCHECK_NOT_NULL(current_.literal_chars); | 643 DCHECK_NOT_NULL(current_.literal_chars); |
| 634 return current_.literal_chars->length(); | 644 return current_.literal_chars->length(); |
| 635 } | 645 } |
| 636 // Returns the literal string for the next token (the token that | 646 // Returns the literal string for the next token (the token that |
| 637 // would be returned if Next() were called). | 647 // would be returned if Next() were called). |
| 638 Vector<const uint8_t> next_literal_one_byte_string() { | 648 Vector<const uint8_t> next_literal_one_byte_string() { |
| 639 DCHECK_NOT_NULL(next_.literal_chars); | 649 DCHECK_NOT_NULL(next_[0].literal_chars); |
| 640 return next_.literal_chars->one_byte_literal(); | 650 return next_[0].literal_chars->one_byte_literal(); |
| 641 } | 651 } |
| 642 Vector<const uint16_t> next_literal_two_byte_string() { | 652 Vector<const uint16_t> next_literal_two_byte_string() { |
| 643 DCHECK_NOT_NULL(next_.literal_chars); | 653 DCHECK_NOT_NULL(next_[0].literal_chars); |
| 644 return next_.literal_chars->two_byte_literal(); | 654 return next_[0].literal_chars->two_byte_literal(); |
| 645 } | 655 } |
| 646 bool is_next_literal_one_byte() { | 656 bool is_next_literal_one_byte() { |
| 647 DCHECK_NOT_NULL(next_.literal_chars); | 657 DCHECK_NOT_NULL(next_[0].literal_chars); |
| 648 return next_.literal_chars->is_one_byte(); | 658 return next_[0].literal_chars->is_one_byte(); |
| 649 } | 659 } |
| 650 Vector<const uint8_t> raw_literal_one_byte_string() { | 660 Vector<const uint8_t> raw_literal_one_byte_string() { |
| 651 DCHECK_NOT_NULL(current_.raw_literal_chars); | 661 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 652 return current_.raw_literal_chars->one_byte_literal(); | 662 return current_.raw_literal_chars->one_byte_literal(); |
| 653 } | 663 } |
| 654 Vector<const uint16_t> raw_literal_two_byte_string() { | 664 Vector<const uint16_t> raw_literal_two_byte_string() { |
| 655 DCHECK_NOT_NULL(current_.raw_literal_chars); | 665 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 656 return current_.raw_literal_chars->two_byte_literal(); | 666 return current_.raw_literal_chars->two_byte_literal(); |
| 657 } | 667 } |
| 658 bool is_raw_literal_one_byte() { | 668 bool is_raw_literal_one_byte() { |
| 659 DCHECK_NOT_NULL(current_.raw_literal_chars); | 669 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 660 return current_.raw_literal_chars->is_one_byte(); | 670 return current_.raw_literal_chars->is_one_byte(); |
| 661 } | 671 } |
| 662 | 672 |
| 663 template <bool capture_raw> | 673 template <bool capture_raw> |
| 664 uc32 ScanHexNumber(int expected_length); | 674 uc32 ScanHexNumber(int expected_length); |
| 665 // Scan a number of any length but not bigger than max_value. For example, the | 675 // Scan a number of any length but not bigger than max_value. For example, the |
| 666 // number can be 000000001, so it's very long in characters but its value is | 676 // number can be 000000001, so it's very long in characters but its value is |
| 667 // small. | 677 // small. |
| 668 template <bool capture_raw> | 678 template <bool capture_raw> |
| 669 uc32 ScanUnlimitedLengthHexNumber(int max_value); | 679 uc32 ScanUnlimitedLengthHexNumber(int max_value); |
| 670 | 680 |
| 671 // Scans a single JavaScript token. | 681 // Scans a single JavaScript token. |
| 672 void Scan(); | 682 void Scan(TokenDesc* next); |
| 673 | 683 |
| 674 bool SkipWhiteSpace(); | 684 bool SkipWhiteSpace(); |
| 675 Token::Value SkipSingleLineComment(); | 685 Token::Value SkipSingleLineComment(); |
| 676 Token::Value SkipSourceURLComment(); | 686 Token::Value SkipSourceURLComment(); |
| 677 void TryToParseSourceURLComment(); | 687 void TryToParseSourceURLComment(); |
| 678 Token::Value SkipMultiLineComment(); | 688 Token::Value SkipMultiLineComment(); |
| 679 // Scans a possible HTML comment -- begins with '<!'. | 689 // Scans a possible HTML comment -- begins with '<!'. |
| 680 Token::Value ScanHtmlComment(); | 690 Token::Value ScanHtmlComment(); |
| 681 | 691 |
| 682 void ScanDecimalDigits(); | 692 void ScanDecimalDigits(TokenDesc* next); |
| 683 Token::Value ScanNumber(bool seen_period); | 693 Token::Value ScanNumber(bool seen_period); |
| 684 Token::Value ScanIdentifierOrKeyword(); | 694 Token::Value ScanIdentifierOrKeyword(); |
| 685 Token::Value ScanIdentifierSuffix(LiteralScope* literal); | 695 Token::Value ScanIdentifierSuffix(LiteralScope* literal); |
| 686 | 696 |
| 687 Token::Value ScanString(); | 697 Token::Value ScanString(); |
| 688 | 698 |
| 689 // Scans an escape-sequence which is part of a string and adds the | 699 // Scans an escape-sequence which is part of a string and adds the |
| 690 // decoded character to the current literal. Returns true if a pattern | 700 // decoded character to the current literal. Returns true if a pattern |
| 691 // is scanned. | 701 // is scanned. |
| 692 template <bool capture_raw, bool in_template_literal> | 702 template <bool capture_raw, bool in_template_literal> |
| 693 bool ScanEscape(); | 703 bool ScanEscape(TokenDesc* next); |
| 694 | 704 |
| 695 // Decodes a Unicode escape-sequence which is part of an identifier. | 705 // Decodes a Unicode escape-sequence which is part of an identifier. |
| 696 // If the escape sequence cannot be decoded the result is kBadChar. | 706 // If the escape sequence cannot be decoded the result is kBadChar. |
| 697 uc32 ScanIdentifierUnicodeEscape(); | 707 uc32 ScanIdentifierUnicodeEscape(); |
| 698 // Helper for the above functions. | 708 // Helper for the above functions. |
| 699 template <bool capture_raw> | 709 template <bool capture_raw> |
| 700 uc32 ScanUnicodeEscape(); | 710 uc32 ScanUnicodeEscape(); |
| 701 | 711 |
| 702 Token::Value ScanTemplateSpan(); | 712 Token::Value ScanTemplateSpan(); |
| 703 | 713 |
| 704 // Return the current source position. | 714 // Return the current source position. |
| 705 int source_pos() { | 715 int source_pos() { |
| 706 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize; | 716 return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize; |
| 707 } | 717 } |
| 708 | 718 |
| 709 UnicodeCache* unicode_cache_; | 719 UnicodeCache* unicode_cache_; |
| 710 | 720 |
| 711 // Buffers collecting literal strings, numbers, etc. | 721 // Buffers collecting literal strings, numbers, etc. |
| 712 LiteralBuffer literal_buffer1_; | 722 LiteralBuffer literal_buffer1_; |
| 713 LiteralBuffer literal_buffer2_; | 723 LiteralBuffer literal_buffer2_; |
| 714 | 724 |
| 715 // Values parsed from magic comments. | 725 // Values parsed from magic comments. |
| 716 LiteralBuffer source_url_; | 726 LiteralBuffer source_url_; |
| 717 LiteralBuffer source_mapping_url_; | 727 LiteralBuffer source_mapping_url_; |
| 718 | 728 |
| 719 // Buffer to store raw string values | 729 // Buffer to store raw string values |
| 720 LiteralBuffer raw_literal_buffer_; | 730 LiteralBuffer raw_literal_buffer_; |
| 721 | 731 |
| 722 TokenDesc current_; // desc for current token (as returned by Next()) | 732 TokenDesc current_; // desc for current token (as returned by Next()) |
| 723 TokenDesc next_; // desc for next token (one token look-ahead) | 733 TokenDesc next_[kMaxLookahead]; // desc for next (look-ahead) tokens |
| 734 int peek_count_; // number of peek tokens available (normally 1) |
| 735 |
| 736 INLINE(TokenDesc* PeekTokenDesc(int count)) { |
| 737 DCHECK(count >= 0 && count < kMaxLookahead); |
| 738 return &(next_[count]); |
| 739 } |
| 740 |
| 741 INLINE(TokenDesc* PeekTokenDesc()) { return PeekTokenDesc(peek_count_ - 1); } |
| 742 |
| 743 // Scan multiple lookahead tokens |
| 744 void PeekScan(int count); |
| 724 | 745 |
| 725 // Input stream. Must be initialized to an Utf16CharacterStream. | 746 // Input stream. Must be initialized to an Utf16CharacterStream. |
| 726 Utf16CharacterStream* source_; | 747 Utf16CharacterStream* source_; |
| 727 | 748 |
| 728 | 749 |
| 729 // Start position of the octal literal last scanned. | 750 // Start position of the octal literal last scanned. |
| 730 Location octal_pos_; | 751 Location octal_pos_; |
| 731 | 752 |
| 732 // Value of the last smi that was scanned. | 753 // Value of the last smi that was scanned. |
| 733 int smi_value_; | 754 int smi_value_; |
| (...skipping 18 matching lines...) Expand all Loading... |
| 752 bool harmony_classes_; | 773 bool harmony_classes_; |
| 753 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL | 774 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL |
| 754 bool harmony_templates_; | 775 bool harmony_templates_; |
| 755 // Whether we allow \u{xxxxx}. | 776 // Whether we allow \u{xxxxx}. |
| 756 bool harmony_unicode_; | 777 bool harmony_unicode_; |
| 757 }; | 778 }; |
| 758 | 779 |
| 759 } } // namespace v8::internal | 780 } } // namespace v8::internal |
| 760 | 781 |
| 761 #endif // V8_SCANNER_H_ | 782 #endif // V8_SCANNER_H_ |
| OLD | NEW |