| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #ifndef V8_SCANNER_H_ | 7 #ifndef V8_SCANNER_H_ |
| 8 #define V8_SCANNER_H_ | 8 #define V8_SCANNER_H_ |
| 9 | 9 |
| 10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
| (...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 245 DCHECK(is_one_byte_); | 245 DCHECK(is_one_byte_); |
| 246 return Vector<const uint8_t>( | 246 return Vector<const uint8_t>( |
| 247 reinterpret_cast<const uint8_t*>(backing_store_.start()), | 247 reinterpret_cast<const uint8_t*>(backing_store_.start()), |
| 248 position_); | 248 position_); |
| 249 } | 249 } |
| 250 | 250 |
| 251 int length() const { | 251 int length() const { |
| 252 return is_one_byte_ ? position_ : (position_ >> 1); | 252 return is_one_byte_ ? position_ : (position_ >> 1); |
| 253 } | 253 } |
| 254 | 254 |
| 255 void ReduceLength(int delta) { |
| 256 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); |
| 257 } |
| 258 |
| 255 void Reset() { | 259 void Reset() { |
| 256 position_ = 0; | 260 position_ = 0; |
| 257 is_one_byte_ = true; | 261 is_one_byte_ = true; |
| 258 } | 262 } |
| 259 | 263 |
| 260 Handle<String> Internalize(Isolate* isolate) const; | 264 Handle<String> Internalize(Isolate* isolate) const; |
| 261 | 265 |
| 262 private: | 266 private: |
| 263 static const int kInitialCapacity = 16; | 267 static const int kInitialCapacity = 16; |
| 264 static const int kGrowthFactory = 4; | 268 static const int kGrowthFactory = 4; |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 311 | 315 |
| 312 // ---------------------------------------------------------------------------- | 316 // ---------------------------------------------------------------------------- |
| 313 // JavaScript Scanner. | 317 // JavaScript Scanner. |
| 314 | 318 |
| 315 class Scanner { | 319 class Scanner { |
| 316 public: | 320 public: |
| 317 // Scoped helper for literal recording. Automatically drops the literal | 321 // Scoped helper for literal recording. Automatically drops the literal |
| 318 // if aborting the scanning before it's complete. | 322 // if aborting the scanning before it's complete. |
| 319 class LiteralScope { | 323 class LiteralScope { |
| 320 public: | 324 public: |
| 321 explicit LiteralScope(Scanner* self) | 325 explicit LiteralScope(Scanner* self, bool capture_raw = false) |
| 322 : scanner_(self), complete_(false) { | 326 : scanner_(self), complete_(false) { |
| 323 scanner_->StartLiteral(); | 327 scanner_->StartLiteral(); |
| 328 if (capture_raw) scanner_->StartRawLiteral(); |
| 324 } | 329 } |
| 325 ~LiteralScope() { | 330 ~LiteralScope() { |
| 326 if (!complete_) scanner_->DropLiteral(); | 331 if (!complete_) scanner_->DropLiteral(); |
| 327 } | 332 } |
| 328 void Complete() { | 333 void Complete() { |
| 329 scanner_->TerminateLiteral(); | 334 scanner_->TerminateLiteral(); |
| 330 complete_ = true; | 335 complete_ = true; |
| 331 } | 336 } |
| 332 | 337 |
| 333 private: | 338 private: |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 385 DCHECK_NOT_NULL(current_.literal_chars); | 390 DCHECK_NOT_NULL(current_.literal_chars); |
| 386 return current_.literal_chars->is_contextual_keyword(keyword); | 391 return current_.literal_chars->is_contextual_keyword(keyword); |
| 387 } | 392 } |
| 388 bool is_next_contextual_keyword(Vector<const char> keyword) { | 393 bool is_next_contextual_keyword(Vector<const char> keyword) { |
| 389 DCHECK_NOT_NULL(next_.literal_chars); | 394 DCHECK_NOT_NULL(next_.literal_chars); |
| 390 return next_.literal_chars->is_contextual_keyword(keyword); | 395 return next_.literal_chars->is_contextual_keyword(keyword); |
| 391 } | 396 } |
| 392 | 397 |
| 393 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); | 398 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); |
| 394 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); | 399 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); |
| 400 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); |
| 395 | 401 |
| 396 double DoubleValue(); | 402 double DoubleValue(); |
| 397 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { | 403 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { |
| 398 if (is_literal_one_byte() && | 404 if (is_literal_one_byte() && |
| 399 literal_length() == length && | 405 literal_length() == length && |
| 400 (allow_escapes || !literal_contains_escapes())) { | 406 (allow_escapes || !literal_contains_escapes())) { |
| 401 const char* token = | 407 const char* token = |
| 402 reinterpret_cast<const char*>(literal_one_byte_string().start()); | 408 reinterpret_cast<const char*>(literal_one_byte_string().start()); |
| 403 return !strncmp(token, data, length); | 409 return !strncmp(token, data, length); |
| 404 } | 410 } |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 486 } | 492 } |
| 487 | 493 |
| 488 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; | 494 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; |
| 489 | 495 |
| 490 private: | 496 private: |
| 491 // The current and look-ahead token. | 497 // The current and look-ahead token. |
| 492 struct TokenDesc { | 498 struct TokenDesc { |
| 493 Token::Value token; | 499 Token::Value token; |
| 494 Location location; | 500 Location location; |
| 495 LiteralBuffer* literal_chars; | 501 LiteralBuffer* literal_chars; |
| 502 LiteralBuffer* raw_literal_chars; |
| 496 }; | 503 }; |
| 497 | 504 |
| 498 static const int kCharacterLookaheadBufferSize = 1; | 505 static const int kCharacterLookaheadBufferSize = 1; |
| 499 | 506 |
| 500 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 507 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 501 uc32 ScanOctalEscape(uc32 c, int length); | 508 uc32 ScanOctalEscape(uc32 c, int length); |
| 502 | 509 |
| 503 // Call this after setting source_ to the input. | 510 // Call this after setting source_ to the input. |
| 504 void Init() { | 511 void Init() { |
| 505 // Set c0_ (one character ahead) | 512 // Set c0_ (one character ahead) |
| 506 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 513 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
| 507 Advance(); | 514 Advance(); |
| 508 // Initialize current_ to not refer to a literal. | 515 // Initialize current_ to not refer to a literal. |
| 509 current_.literal_chars = NULL; | 516 current_.literal_chars = NULL; |
| 517 current_.raw_literal_chars = NULL; |
| 510 } | 518 } |
| 511 | 519 |
| 512 // Literal buffer support | 520 // Literal buffer support |
| 513 inline void StartLiteral() { | 521 inline void StartLiteral() { |
| 514 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? | 522 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
| 515 &literal_buffer2_ : &literal_buffer1_; | 523 &literal_buffer2_ : &literal_buffer1_; |
| 516 free_buffer->Reset(); | 524 free_buffer->Reset(); |
| 517 next_.literal_chars = free_buffer; | 525 next_.literal_chars = free_buffer; |
| 518 } | 526 } |
| 519 | 527 |
| 528 inline void StartRawLiteral() { |
| 529 raw_literal_buffer_.Reset(); |
| 530 next_.raw_literal_chars = &raw_literal_buffer_; |
| 531 capturing_raw_literal_ = true; |
| 532 } |
| 533 |
| 520 INLINE(void AddLiteralChar(uc32 c)) { | 534 INLINE(void AddLiteralChar(uc32 c)) { |
| 521 DCHECK_NOT_NULL(next_.literal_chars); | 535 DCHECK_NOT_NULL(next_.literal_chars); |
| 522 next_.literal_chars->AddChar(c); | 536 next_.literal_chars->AddChar(c); |
| 523 } | 537 } |
| 524 | 538 |
| 539 INLINE(void AddRawLiteralChar(uc32 c)) { |
| 540 DCHECK(capturing_raw_literal_); |
| 541 DCHECK_NOT_NULL(next_.raw_literal_chars); |
| 542 next_.raw_literal_chars->AddChar(c); |
| 543 } |
| 544 |
| 545 INLINE(void ReduceRawLiteralLength(int delta)) { |
| 546 DCHECK(capturing_raw_literal_); |
| 547 DCHECK_NOT_NULL(next_.raw_literal_chars); |
| 548 next_.raw_literal_chars->ReduceLength(delta); |
| 549 } |
| 550 |
| 525 // Complete scanning of a literal. | 551 // Complete scanning of a literal. |
| 526 inline void TerminateLiteral() { | 552 inline void TerminateLiteral() { capturing_raw_literal_ = false; } |
| 527 // Does nothing in the current implementation. | |
| 528 } | |
| 529 | 553 |
| 530 // Stops scanning of a literal and drop the collected characters, | 554 // Stops scanning of a literal and drop the collected characters, |
| 531 // e.g., due to an encountered error. | 555 // e.g., due to an encountered error. |
| 532 inline void DropLiteral() { | 556 inline void DropLiteral() { |
| 533 next_.literal_chars = NULL; | 557 next_.literal_chars = NULL; |
| 558 next_.raw_literal_chars = NULL; |
| 559 capturing_raw_literal_ = false; |
| 534 } | 560 } |
| 535 | 561 |
| 536 inline void AddLiteralCharAdvance() { | 562 inline void AddLiteralCharAdvance() { |
| 537 AddLiteralChar(c0_); | 563 AddLiteralChar(c0_); |
| 538 Advance(); | 564 Advance(); |
| 539 } | 565 } |
| 540 | 566 |
| 541 // Low-level scanning support. | 567 // Low-level scanning support. |
| 542 void Advance() { | 568 void Advance() { |
| 569 if (capturing_raw_literal_) { |
| 570 AddRawLiteralChar(c0_); |
| 571 } |
| 543 c0_ = source_->Advance(); | 572 c0_ = source_->Advance(); |
| 544 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { | 573 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { |
| 545 uc32 c1 = source_->Advance(); | 574 uc32 c1 = source_->Advance(); |
| 546 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { | 575 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { |
| 547 source_->PushBack(c1); | 576 source_->PushBack(c1); |
| 548 } else { | 577 } else { |
| 549 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); | 578 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); |
| 550 } | 579 } |
| 551 } | 580 } |
| 552 } | 581 } |
| 553 | 582 |
| 554 void PushBack(uc32 ch) { | 583 void PushBack(uc32 ch) { |
| 555 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { | 584 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { |
| 556 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); | 585 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); |
| 557 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); | 586 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); |
| 587 if (capturing_raw_literal_) ReduceRawLiteralLength(2); |
| 558 } else { | 588 } else { |
| 559 source_->PushBack(c0_); | 589 source_->PushBack(c0_); |
| 590 if (capturing_raw_literal_) ReduceRawLiteralLength(1); |
| 560 } | 591 } |
| 561 c0_ = ch; | 592 c0_ = ch; |
| 562 } | 593 } |
| 563 | 594 |
| 564 inline Token::Value Select(Token::Value tok) { | 595 inline Token::Value Select(Token::Value tok) { |
| 565 Advance(); | 596 Advance(); |
| 566 return tok; | 597 return tok; |
| 567 } | 598 } |
| 568 | 599 |
| 569 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 600 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
| 570 Advance(); | 601 Advance(); |
| 571 if (c0_ == next) { | 602 if (c0_ == next) { |
| 572 Advance(); | 603 Advance(); |
| 573 return then; | 604 return then; |
| 574 } else { | 605 } else { |
| 575 return else_; | 606 return else_; |
| 576 } | 607 } |
| 577 } | 608 } |
| 578 | 609 |
| 579 // Returns the literal string, if any, for the current token (the | 610 // Returns the literal string, if any, for the current token (the |
| 580 // token last returned by Next()). The string is 0-terminated. | 611 // token last returned by Next()). The string is 0-terminated. |
| 581 // Literal strings are collected for identifiers, strings, and | 612 // Literal strings are collected for identifiers, strings, numbers as well |
| 582 // numbers. | 613 // as for template literals. For template literals we also collect the raw |
| 614 // form. |
| 583 // These functions only give the correct result if the literal | 615 // These functions only give the correct result if the literal |
| 584 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 616 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 585 Vector<const uint8_t> literal_one_byte_string() { | 617 Vector<const uint8_t> literal_one_byte_string() { |
| 586 DCHECK_NOT_NULL(current_.literal_chars); | 618 DCHECK_NOT_NULL(current_.literal_chars); |
| 587 return current_.literal_chars->one_byte_literal(); | 619 return current_.literal_chars->one_byte_literal(); |
| 588 } | 620 } |
| 589 Vector<const uint16_t> literal_two_byte_string() { | 621 Vector<const uint16_t> literal_two_byte_string() { |
| 590 DCHECK_NOT_NULL(current_.literal_chars); | 622 DCHECK_NOT_NULL(current_.literal_chars); |
| 591 return current_.literal_chars->two_byte_literal(); | 623 return current_.literal_chars->two_byte_literal(); |
| 592 } | 624 } |
| (...skipping 12 matching lines...) Expand all Loading... |
| 605 return next_.literal_chars->one_byte_literal(); | 637 return next_.literal_chars->one_byte_literal(); |
| 606 } | 638 } |
| 607 Vector<const uint16_t> next_literal_two_byte_string() { | 639 Vector<const uint16_t> next_literal_two_byte_string() { |
| 608 DCHECK_NOT_NULL(next_.literal_chars); | 640 DCHECK_NOT_NULL(next_.literal_chars); |
| 609 return next_.literal_chars->two_byte_literal(); | 641 return next_.literal_chars->two_byte_literal(); |
| 610 } | 642 } |
| 611 bool is_next_literal_one_byte() { | 643 bool is_next_literal_one_byte() { |
| 612 DCHECK_NOT_NULL(next_.literal_chars); | 644 DCHECK_NOT_NULL(next_.literal_chars); |
| 613 return next_.literal_chars->is_one_byte(); | 645 return next_.literal_chars->is_one_byte(); |
| 614 } | 646 } |
| 615 int next_literal_length() const { | 647 Vector<const uint8_t> raw_literal_one_byte_string() { |
| 616 DCHECK_NOT_NULL(next_.literal_chars); | 648 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 617 return next_.literal_chars->length(); | 649 return current_.raw_literal_chars->one_byte_literal(); |
| 618 } | 650 } |
| 651 Vector<const uint16_t> raw_literal_two_byte_string() { |
| 652 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 653 return current_.raw_literal_chars->two_byte_literal(); |
| 654 } |
| 655 bool is_raw_literal_one_byte() { |
| 656 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 657 return current_.raw_literal_chars->is_one_byte(); |
| 658 } |
| 659 |
| 619 | 660 |
| 620 uc32 ScanHexNumber(int expected_length); | 661 uc32 ScanHexNumber(int expected_length); |
| 621 // Scan a number of any length but not bigger than max_value. For example, the | 662 // Scan a number of any length but not bigger than max_value. For example, the |
| 622 // number can be 000000001, so it's very long in characters but its value is | 663 // number can be 000000001, so it's very long in characters but its value is |
| 623 // small. | 664 // small. |
| 624 uc32 ScanUnlimitedLengthHexNumber(int max_value); | 665 uc32 ScanUnlimitedLengthHexNumber(int max_value); |
| 625 | 666 |
| 626 // Scans a single JavaScript token. | 667 // Scans a single JavaScript token. |
| 627 void Scan(); | 668 void Scan(); |
| 628 | 669 |
| (...skipping 30 matching lines...) Expand all Loading... |
| 659 UnicodeCache* unicode_cache_; | 700 UnicodeCache* unicode_cache_; |
| 660 | 701 |
| 661 // Buffers collecting literal strings, numbers, etc. | 702 // Buffers collecting literal strings, numbers, etc. |
| 662 LiteralBuffer literal_buffer1_; | 703 LiteralBuffer literal_buffer1_; |
| 663 LiteralBuffer literal_buffer2_; | 704 LiteralBuffer literal_buffer2_; |
| 664 | 705 |
| 665 // Values parsed from magic comments. | 706 // Values parsed from magic comments. |
| 666 LiteralBuffer source_url_; | 707 LiteralBuffer source_url_; |
| 667 LiteralBuffer source_mapping_url_; | 708 LiteralBuffer source_mapping_url_; |
| 668 | 709 |
| 710 // Buffer to store raw string values |
| 711 LiteralBuffer raw_literal_buffer_; |
| 712 |
| 713 // We only need to capture the raw literal when we are scanning template |
| 714 // literal spans. |
| 715 bool capturing_raw_literal_; |
| 716 |
| 669 TokenDesc current_; // desc for current token (as returned by Next()) | 717 TokenDesc current_; // desc for current token (as returned by Next()) |
| 670 TokenDesc next_; // desc for next token (one token look-ahead) | 718 TokenDesc next_; // desc for next token (one token look-ahead) |
| 671 | 719 |
| 672 // Input stream. Must be initialized to an Utf16CharacterStream. | 720 // Input stream. Must be initialized to an Utf16CharacterStream. |
| 673 Utf16CharacterStream* source_; | 721 Utf16CharacterStream* source_; |
| 674 | 722 |
| 675 | 723 |
| 676 // Start position of the octal literal last scanned. | 724 // Start position of the octal literal last scanned. |
| 677 Location octal_pos_; | 725 Location octal_pos_; |
| 678 | 726 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 696 bool harmony_classes_; | 744 bool harmony_classes_; |
| 697 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL | 745 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL |
| 698 bool harmony_templates_; | 746 bool harmony_templates_; |
| 699 // Whether we allow \u{xxxxx}. | 747 // Whether we allow \u{xxxxx}. |
| 700 bool harmony_unicode_; | 748 bool harmony_unicode_; |
| 701 }; | 749 }; |
| 702 | 750 |
| 703 } } // namespace v8::internal | 751 } } // namespace v8::internal |
| 704 | 752 |
| 705 #endif // V8_SCANNER_H_ | 753 #endif // V8_SCANNER_H_ |
| OLD | NEW |