Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #ifndef V8_SCANNER_H_ | 7 #ifndef V8_SCANNER_H_ |
| 8 #define V8_SCANNER_H_ | 8 #define V8_SCANNER_H_ |
| 9 | 9 |
| 10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
| (...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 245 DCHECK(is_one_byte_); | 245 DCHECK(is_one_byte_); |
| 246 return Vector<const uint8_t>( | 246 return Vector<const uint8_t>( |
| 247 reinterpret_cast<const uint8_t*>(backing_store_.start()), | 247 reinterpret_cast<const uint8_t*>(backing_store_.start()), |
| 248 position_); | 248 position_); |
| 249 } | 249 } |
| 250 | 250 |
| 251 int length() const { | 251 int length() const { |
| 252 return is_one_byte_ ? position_ : (position_ >> 1); | 252 return is_one_byte_ ? position_ : (position_ >> 1); |
| 253 } | 253 } |
| 254 | 254 |
| 255 void ReduceLength(int delta) { | |
| 256 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); | |
| 257 } | |
| 258 | |
| 255 void Reset() { | 259 void Reset() { |
| 256 position_ = 0; | 260 position_ = 0; |
| 257 is_one_byte_ = true; | 261 is_one_byte_ = true; |
| 258 } | 262 } |
| 259 | 263 |
| 260 Handle<String> Internalize(Isolate* isolate) const; | 264 Handle<String> Internalize(Isolate* isolate) const; |
| 261 | 265 |
| 262 private: | 266 private: |
| 263 static const int kInitialCapacity = 16; | 267 static const int kInitialCapacity = 16; |
| 264 static const int kGrowthFactory = 4; | 268 static const int kGrowthFactory = 4; |
| (...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 311 | 315 |
| 312 // ---------------------------------------------------------------------------- | 316 // ---------------------------------------------------------------------------- |
| 313 // JavaScript Scanner. | 317 // JavaScript Scanner. |
| 314 | 318 |
| 315 class Scanner { | 319 class Scanner { |
| 316 public: | 320 public: |
| 317 // Scoped helper for literal recording. Automatically drops the literal | 321 // Scoped helper for literal recording. Automatically drops the literal |
| 318 // if aborting the scanning before it's complete. | 322 // if aborting the scanning before it's complete. |
| 319 class LiteralScope { | 323 class LiteralScope { |
| 320 public: | 324 public: |
| 321 explicit LiteralScope(Scanner* self) | 325 explicit LiteralScope(Scanner* self, bool capture_raw = false) |
| 322 : scanner_(self), complete_(false) { | 326 : scanner_(self), complete_(false) { |
| 323 scanner_->StartLiteral(); | 327 scanner_->StartLiteral(); |
| 328 if (capture_raw) scanner_->StartRawLiteral(); | |
| 324 } | 329 } |
| 325 ~LiteralScope() { | 330 ~LiteralScope() { |
| 326 if (!complete_) scanner_->DropLiteral(); | 331 if (!complete_) scanner_->DropLiteral(); |
| 327 } | 332 } |
| 328 void Complete() { | 333 void Complete() { |
| 329 scanner_->TerminateLiteral(); | 334 scanner_->TerminateLiteral(); |
| 330 complete_ = true; | 335 complete_ = true; |
| 331 } | 336 } |
| 332 | 337 |
| 333 private: | 338 private: |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 385 DCHECK_NOT_NULL(current_.literal_chars); | 390 DCHECK_NOT_NULL(current_.literal_chars); |
| 386 return current_.literal_chars->is_contextual_keyword(keyword); | 391 return current_.literal_chars->is_contextual_keyword(keyword); |
| 387 } | 392 } |
| 388 bool is_next_contextual_keyword(Vector<const char> keyword) { | 393 bool is_next_contextual_keyword(Vector<const char> keyword) { |
| 389 DCHECK_NOT_NULL(next_.literal_chars); | 394 DCHECK_NOT_NULL(next_.literal_chars); |
| 390 return next_.literal_chars->is_contextual_keyword(keyword); | 395 return next_.literal_chars->is_contextual_keyword(keyword); |
| 391 } | 396 } |
| 392 | 397 |
| 393 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); | 398 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); |
| 394 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); | 399 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); |
| 400 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); | |
| 395 | 401 |
| 396 double DoubleValue(); | 402 double DoubleValue(); |
| 397 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { | 403 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { |
| 398 if (is_literal_one_byte() && | 404 if (is_literal_one_byte() && |
| 399 literal_length() == length && | 405 literal_length() == length && |
| 400 (allow_escapes || !literal_contains_escapes())) { | 406 (allow_escapes || !literal_contains_escapes())) { |
| 401 const char* token = | 407 const char* token = |
| 402 reinterpret_cast<const char*>(literal_one_byte_string().start()); | 408 reinterpret_cast<const char*>(literal_one_byte_string().start()); |
| 403 return !strncmp(token, data, length); | 409 return !strncmp(token, data, length); |
| 404 } | 410 } |
| (...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 484 } | 490 } |
| 485 | 491 |
| 486 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; | 492 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; |
| 487 | 493 |
| 488 private: | 494 private: |
| 489 // The current and look-ahead token. | 495 // The current and look-ahead token. |
| 490 struct TokenDesc { | 496 struct TokenDesc { |
| 491 Token::Value token; | 497 Token::Value token; |
| 492 Location location; | 498 Location location; |
| 493 LiteralBuffer* literal_chars; | 499 LiteralBuffer* literal_chars; |
| 500 LiteralBuffer* raw_literal_chars; | |
| 494 }; | 501 }; |
| 495 | 502 |
| 496 static const int kCharacterLookaheadBufferSize = 1; | 503 static const int kCharacterLookaheadBufferSize = 1; |
| 497 | 504 |
| 498 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 505 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
| 499 uc32 ScanOctalEscape(uc32 c, int length); | 506 uc32 ScanOctalEscape(uc32 c, int length); |
| 500 | 507 |
| 501 // Call this after setting source_ to the input. | 508 // Call this after setting source_ to the input. |
| 502 void Init() { | 509 void Init() { |
| 503 // Set c0_ (one character ahead) | 510 // Set c0_ (one character ahead) |
| 504 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 511 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
| 505 Advance(); | 512 Advance(); |
| 506 // Initialize current_ to not refer to a literal. | 513 // Initialize current_ to not refer to a literal. |
| 507 current_.literal_chars = NULL; | 514 current_.literal_chars = NULL; |
| 515 current_.raw_literal_chars = NULL; | |
| 508 } | 516 } |
| 509 | 517 |
| 510 // Literal buffer support | 518 // Literal buffer support |
| 511 inline void StartLiteral() { | 519 inline void StartLiteral() { |
| 512 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? | 520 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
| 513 &literal_buffer2_ : &literal_buffer1_; | 521 &literal_buffer2_ : &literal_buffer1_; |
| 514 free_buffer->Reset(); | 522 free_buffer->Reset(); |
| 515 next_.literal_chars = free_buffer; | 523 next_.literal_chars = free_buffer; |
| 516 } | 524 } |
| 517 | 525 |
| 526 inline void StartRawLiteral() { | |
| 527 raw_literal_buffer_.Reset(); | |
| 528 next_.raw_literal_chars = &raw_literal_buffer_; | |
| 529 capturing_raw_literal_ = true; | |
| 530 } | |
| 531 | |
| 518 INLINE(void AddLiteralChar(uc32 c)) { | 532 INLINE(void AddLiteralChar(uc32 c)) { |
| 519 DCHECK_NOT_NULL(next_.literal_chars); | 533 DCHECK_NOT_NULL(next_.literal_chars); |
| 520 next_.literal_chars->AddChar(c); | 534 next_.literal_chars->AddChar(c); |
| 521 } | 535 } |
| 522 | 536 |
| 537 INLINE(void AddRawLiteralChar(uc32 c)) { | |
| 538 DCHECK(capturing_raw_literal_); | |
| 539 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
| 540 next_.raw_literal_chars->AddChar(c); | |
| 541 } | |
| 542 | |
| 543 INLINE(void ReduceRawLiteralLength(int delta)) { | |
| 544 DCHECK(capturing_raw_literal_); | |
| 545 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
| 546 next_.raw_literal_chars->ReduceLength(delta); | |
| 547 } | |
| 548 | |
| 523 // Complete scanning of a literal. | 549 // Complete scanning of a literal. |
| 524 inline void TerminateLiteral() { | 550 inline void TerminateLiteral() { capturing_raw_literal_ = false; } |
| 525 // Does nothing in the current implementation. | |
| 526 } | |
| 527 | 551 |
| 528 // Stops scanning of a literal and drop the collected characters, | 552 // Stops scanning of a literal and drop the collected characters, |
| 529 // e.g., due to an encountered error. | 553 // e.g., due to an encountered error. |
| 530 inline void DropLiteral() { | 554 inline void DropLiteral() { |
| 531 next_.literal_chars = NULL; | 555 next_.literal_chars = NULL; |
| 556 next_.raw_literal_chars = NULL; | |
| 557 capturing_raw_literal_ = false; | |
| 532 } | 558 } |
| 533 | 559 |
| 534 inline void AddLiteralCharAdvance() { | 560 inline void AddLiteralCharAdvance() { |
| 535 AddLiteralChar(c0_); | 561 AddLiteralChar(c0_); |
| 536 Advance(); | 562 Advance(); |
| 537 } | 563 } |
| 538 | 564 |
| 539 // Low-level scanning support. | 565 // Low-level scanning support. |
| 540 void Advance() { | 566 void Advance() { |
| 567 if (capturing_raw_literal_) { | |
| 568 AddRawLiteralChar(c0_); | |
| 569 } | |
| 541 c0_ = source_->Advance(); | 570 c0_ = source_->Advance(); |
| 542 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { | 571 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { |
| 543 uc32 c1 = source_->Advance(); | 572 uc32 c1 = source_->Advance(); |
| 544 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { | 573 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { |
| 545 source_->PushBack(c1); | 574 source_->PushBack(c1); |
| 546 } else { | 575 } else { |
| 547 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); | 576 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); |
| 548 } | 577 } |
| 549 } | 578 } |
| 550 } | 579 } |
| 551 | 580 |
| 552 void PushBack(uc32 ch) { | 581 void PushBack(uc32 ch) { |
| 582 DCHECK(ch < 0 || !capturing_raw_literal_); | |
|
caitp (gmail)
2014/12/02 21:37:35
I feel like PushBack() is okay --- it's more just
| |
| 553 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { | 583 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { |
| 554 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); | 584 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); |
| 555 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); | 585 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); |
| 556 } else { | 586 } else { |
| 557 source_->PushBack(c0_); | 587 source_->PushBack(c0_); |
| 558 } | 588 } |
| 559 c0_ = ch; | 589 c0_ = ch; |
| 560 } | 590 } |
| 561 | 591 |
| 562 inline Token::Value Select(Token::Value tok) { | 592 inline Token::Value Select(Token::Value tok) { |
| 563 Advance(); | 593 Advance(); |
| 564 return tok; | 594 return tok; |
| 565 } | 595 } |
| 566 | 596 |
| 567 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 597 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
| 568 Advance(); | 598 Advance(); |
| 569 if (c0_ == next) { | 599 if (c0_ == next) { |
| 570 Advance(); | 600 Advance(); |
| 571 return then; | 601 return then; |
| 572 } else { | 602 } else { |
| 573 return else_; | 603 return else_; |
| 574 } | 604 } |
| 575 } | 605 } |
| 576 | 606 |
| 577 // Returns the literal string, if any, for the current token (the | 607 // Returns the literal string, if any, for the current token (the |
| 578 // token last returned by Next()). The string is 0-terminated. | 608 // token last returned by Next()). The string is 0-terminated. |
| 579 // Literal strings are collected for identifiers, strings, and | 609 // Literal strings are collected for identifiers, strings, numbers as well |
| 580 // numbers. | 610 // as for template literals. For template literals we also collect the raw |
| 611 // form. | |
| 581 // These functions only give the correct result if the literal | 612 // These functions only give the correct result if the literal |
| 582 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 613 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
| 583 Vector<const uint8_t> literal_one_byte_string() { | 614 Vector<const uint8_t> literal_one_byte_string() { |
| 584 DCHECK_NOT_NULL(current_.literal_chars); | 615 DCHECK_NOT_NULL(current_.literal_chars); |
| 585 return current_.literal_chars->one_byte_literal(); | 616 return current_.literal_chars->one_byte_literal(); |
| 586 } | 617 } |
| 587 Vector<const uint16_t> literal_two_byte_string() { | 618 Vector<const uint16_t> literal_two_byte_string() { |
| 588 DCHECK_NOT_NULL(current_.literal_chars); | 619 DCHECK_NOT_NULL(current_.literal_chars); |
| 589 return current_.literal_chars->two_byte_literal(); | 620 return current_.literal_chars->two_byte_literal(); |
| 590 } | 621 } |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 603 return next_.literal_chars->one_byte_literal(); | 634 return next_.literal_chars->one_byte_literal(); |
| 604 } | 635 } |
| 605 Vector<const uint16_t> next_literal_two_byte_string() { | 636 Vector<const uint16_t> next_literal_two_byte_string() { |
| 606 DCHECK_NOT_NULL(next_.literal_chars); | 637 DCHECK_NOT_NULL(next_.literal_chars); |
| 607 return next_.literal_chars->two_byte_literal(); | 638 return next_.literal_chars->two_byte_literal(); |
| 608 } | 639 } |
| 609 bool is_next_literal_one_byte() { | 640 bool is_next_literal_one_byte() { |
| 610 DCHECK_NOT_NULL(next_.literal_chars); | 641 DCHECK_NOT_NULL(next_.literal_chars); |
| 611 return next_.literal_chars->is_one_byte(); | 642 return next_.literal_chars->is_one_byte(); |
| 612 } | 643 } |
| 613 int next_literal_length() const { | 644 Vector<const uint8_t> raw_literal_one_byte_string() { |
| 614 DCHECK_NOT_NULL(next_.literal_chars); | 645 DCHECK_NOT_NULL(current_.raw_literal_chars); |
| 615 return next_.literal_chars->length(); | 646 return current_.raw_literal_chars->one_byte_literal(); |
| 616 } | 647 } |
| 648 Vector<const uint16_t> raw_literal_two_byte_string() { | |
| 649 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
| 650 return current_.raw_literal_chars->two_byte_literal(); | |
| 651 } | |
| 652 bool is_raw_literal_one_byte() { | |
| 653 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
| 654 return current_.raw_literal_chars->is_one_byte(); | |
| 655 } | |
| 656 | |
| 617 | 657 |
| 618 uc32 ScanHexNumber(int expected_length); | 658 uc32 ScanHexNumber(int expected_length); |
| 619 | 659 |
| 620 // Scans a single JavaScript token. | 660 // Scans a single JavaScript token. |
| 621 void Scan(); | 661 void Scan(); |
| 622 | 662 |
| 623 bool SkipWhiteSpace(); | 663 bool SkipWhiteSpace(); |
| 624 Token::Value SkipSingleLineComment(); | 664 Token::Value SkipSingleLineComment(); |
| 625 Token::Value SkipSourceURLComment(); | 665 Token::Value SkipSourceURLComment(); |
| 626 void TryToParseSourceURLComment(); | 666 void TryToParseSourceURLComment(); |
| (...skipping 24 matching lines...) Expand all Loading... | |
| 651 UnicodeCache* unicode_cache_; | 691 UnicodeCache* unicode_cache_; |
| 652 | 692 |
| 653 // Buffers collecting literal strings, numbers, etc. | 693 // Buffers collecting literal strings, numbers, etc. |
| 654 LiteralBuffer literal_buffer1_; | 694 LiteralBuffer literal_buffer1_; |
| 655 LiteralBuffer literal_buffer2_; | 695 LiteralBuffer literal_buffer2_; |
| 656 | 696 |
| 657 // Values parsed from magic comments. | 697 // Values parsed from magic comments. |
| 658 LiteralBuffer source_url_; | 698 LiteralBuffer source_url_; |
| 659 LiteralBuffer source_mapping_url_; | 699 LiteralBuffer source_mapping_url_; |
| 660 | 700 |
| 701 // Buffer to store raw string values | |
| 702 LiteralBuffer raw_literal_buffer_; | |
| 703 | |
| 704 // We only need to capture the raw literal when we are scanning template | |
| 705 // literal spans. | |
| 706 bool capturing_raw_literal_; | |
| 707 | |
| 661 TokenDesc current_; // desc for current token (as returned by Next()) | 708 TokenDesc current_; // desc for current token (as returned by Next()) |
| 662 TokenDesc next_; // desc for next token (one token look-ahead) | 709 TokenDesc next_; // desc for next token (one token look-ahead) |
| 663 | 710 |
| 664 // Input stream. Must be initialized to an Utf16CharacterStream. | 711 // Input stream. Must be initialized to an Utf16CharacterStream. |
| 665 Utf16CharacterStream* source_; | 712 Utf16CharacterStream* source_; |
| 666 | 713 |
| 667 | 714 |
| 668 // Start position of the octal literal last scanned. | 715 // Start position of the octal literal last scanned. |
| 669 Location octal_pos_; | 716 Location octal_pos_; |
| 670 | 717 |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 686 bool harmony_numeric_literals_; | 733 bool harmony_numeric_literals_; |
| 687 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords. | 734 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords. |
| 688 bool harmony_classes_; | 735 bool harmony_classes_; |
| 689 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL | 736 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL |
| 690 bool harmony_templates_; | 737 bool harmony_templates_; |
| 691 }; | 738 }; |
| 692 | 739 |
| 693 } } // namespace v8::internal | 740 } } // namespace v8::internal |
| 694 | 741 |
| 695 #endif // V8_SCANNER_H_ | 742 #endif // V8_SCANNER_H_ |
| OLD | NEW |