OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #ifndef V8_SCANNER_H_ | 7 #ifndef V8_SCANNER_H_ |
8 #define V8_SCANNER_H_ | 8 #define V8_SCANNER_H_ |
9 | 9 |
10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
245 DCHECK(is_one_byte_); | 245 DCHECK(is_one_byte_); |
246 return Vector<const uint8_t>( | 246 return Vector<const uint8_t>( |
247 reinterpret_cast<const uint8_t*>(backing_store_.start()), | 247 reinterpret_cast<const uint8_t*>(backing_store_.start()), |
248 position_); | 248 position_); |
249 } | 249 } |
250 | 250 |
251 int length() const { | 251 int length() const { |
252 return is_one_byte_ ? position_ : (position_ >> 1); | 252 return is_one_byte_ ? position_ : (position_ >> 1); |
253 } | 253 } |
254 | 254 |
255 void ReduceLength(int delta) { | |
256 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); | |
257 } | |
258 | |
255 void Reset() { | 259 void Reset() { |
256 position_ = 0; | 260 position_ = 0; |
257 is_one_byte_ = true; | 261 is_one_byte_ = true; |
258 } | 262 } |
259 | 263 |
260 Handle<String> Internalize(Isolate* isolate) const; | 264 Handle<String> Internalize(Isolate* isolate) const; |
261 | 265 |
262 private: | 266 private: |
263 static const int kInitialCapacity = 16; | 267 static const int kInitialCapacity = 16; |
264 static const int kGrowthFactory = 4; | 268 static const int kGrowthFactory = 4; |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
311 | 315 |
312 // ---------------------------------------------------------------------------- | 316 // ---------------------------------------------------------------------------- |
313 // JavaScript Scanner. | 317 // JavaScript Scanner. |
314 | 318 |
315 class Scanner { | 319 class Scanner { |
316 public: | 320 public: |
317 // Scoped helper for literal recording. Automatically drops the literal | 321 // Scoped helper for literal recording. Automatically drops the literal |
318 // if aborting the scanning before it's complete. | 322 // if aborting the scanning before it's complete. |
319 class LiteralScope { | 323 class LiteralScope { |
320 public: | 324 public: |
321 explicit LiteralScope(Scanner* self) | 325 explicit LiteralScope(Scanner* self, bool capture_raw = false) |
322 : scanner_(self), complete_(false) { | 326 : scanner_(self), complete_(false) { |
323 scanner_->StartLiteral(); | 327 scanner_->StartLiteral(); |
328 if (capture_raw) scanner_->StartRawLiteral(); | |
324 } | 329 } |
325 ~LiteralScope() { | 330 ~LiteralScope() { |
326 if (!complete_) scanner_->DropLiteral(); | 331 if (!complete_) scanner_->DropLiteral(); |
327 } | 332 } |
328 void Complete() { | 333 void Complete() { |
329 scanner_->TerminateLiteral(); | 334 scanner_->TerminateLiteral(); |
330 complete_ = true; | 335 complete_ = true; |
331 } | 336 } |
332 | 337 |
333 private: | 338 private: |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
385 DCHECK_NOT_NULL(current_.literal_chars); | 390 DCHECK_NOT_NULL(current_.literal_chars); |
386 return current_.literal_chars->is_contextual_keyword(keyword); | 391 return current_.literal_chars->is_contextual_keyword(keyword); |
387 } | 392 } |
388 bool is_next_contextual_keyword(Vector<const char> keyword) { | 393 bool is_next_contextual_keyword(Vector<const char> keyword) { |
389 DCHECK_NOT_NULL(next_.literal_chars); | 394 DCHECK_NOT_NULL(next_.literal_chars); |
390 return next_.literal_chars->is_contextual_keyword(keyword); | 395 return next_.literal_chars->is_contextual_keyword(keyword); |
391 } | 396 } |
392 | 397 |
393 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); | 398 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); |
394 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); | 399 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); |
400 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); | |
395 | 401 |
396 double DoubleValue(); | 402 double DoubleValue(); |
397 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { | 403 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { |
398 if (is_literal_one_byte() && | 404 if (is_literal_one_byte() && |
399 literal_length() == length && | 405 literal_length() == length && |
400 (allow_escapes || !literal_contains_escapes())) { | 406 (allow_escapes || !literal_contains_escapes())) { |
401 const char* token = | 407 const char* token = |
402 reinterpret_cast<const char*>(literal_one_byte_string().start()); | 408 reinterpret_cast<const char*>(literal_one_byte_string().start()); |
403 return !strncmp(token, data, length); | 409 return !strncmp(token, data, length); |
404 } | 410 } |
(...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
486 } | 492 } |
487 | 493 |
488 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; | 494 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; |
489 | 495 |
490 private: | 496 private: |
491 // The current and look-ahead token. | 497 // The current and look-ahead token. |
492 struct TokenDesc { | 498 struct TokenDesc { |
493 Token::Value token; | 499 Token::Value token; |
494 Location location; | 500 Location location; |
495 LiteralBuffer* literal_chars; | 501 LiteralBuffer* literal_chars; |
502 LiteralBuffer* raw_literal_chars; | |
496 }; | 503 }; |
497 | 504 |
498 static const int kCharacterLookaheadBufferSize = 1; | 505 static const int kCharacterLookaheadBufferSize = 1; |
499 | 506 |
500 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 507 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
501 uc32 ScanOctalEscape(uc32 c, int length); | 508 uc32 ScanOctalEscape(uc32 c, int length); |
502 | 509 |
503 // Call this after setting source_ to the input. | 510 // Call this after setting source_ to the input. |
504 void Init() { | 511 void Init() { |
505 // Set c0_ (one character ahead) | 512 // Set c0_ (one character ahead) |
506 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 513 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
507 Advance(); | 514 Advance(); |
508 // Initialize current_ to not refer to a literal. | 515 // Initialize current_ to not refer to a literal. |
509 current_.literal_chars = NULL; | 516 current_.literal_chars = NULL; |
517 current_.raw_literal_chars = NULL; | |
510 } | 518 } |
511 | 519 |
512 // Literal buffer support | 520 // Literal buffer support |
513 inline void StartLiteral() { | 521 inline void StartLiteral() { |
514 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? | 522 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
515 &literal_buffer2_ : &literal_buffer1_; | 523 &literal_buffer2_ : &literal_buffer1_; |
516 free_buffer->Reset(); | 524 free_buffer->Reset(); |
517 next_.literal_chars = free_buffer; | 525 next_.literal_chars = free_buffer; |
518 } | 526 } |
519 | 527 |
528 inline void StartRawLiteral() { | |
529 raw_literal_buffer_.Reset(); | |
530 next_.raw_literal_chars = &raw_literal_buffer_; | |
531 capturing_raw_literal_ = true; | |
532 } | |
533 | |
520 INLINE(void AddLiteralChar(uc32 c)) { | 534 INLINE(void AddLiteralChar(uc32 c)) { |
521 DCHECK_NOT_NULL(next_.literal_chars); | 535 DCHECK_NOT_NULL(next_.literal_chars); |
522 next_.literal_chars->AddChar(c); | 536 next_.literal_chars->AddChar(c); |
523 } | 537 } |
524 | 538 |
539 INLINE(void AddRawLiteralChar(uc32 c)) { | |
540 DCHECK(capturing_raw_literal_); | |
541 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
542 next_.raw_literal_chars->AddChar(c); | |
543 } | |
544 | |
545 INLINE(void ReduceRawLiteralLength(int delta)) { | |
546 DCHECK(capturing_raw_literal_); | |
547 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
548 next_.raw_literal_chars->ReduceLength(delta); | |
549 } | |
550 | |
525 // Complete scanning of a literal. | 551 // Complete scanning of a literal. |
526 inline void TerminateLiteral() { | 552 inline void TerminateLiteral() { capturing_raw_literal_ = false; } |
527 // Does nothing in the current implementation. | |
528 } | |
529 | 553 |
530 // Stops scanning of a literal and drop the collected characters, | 554 // Stops scanning of a literal and drop the collected characters, |
531 // e.g., due to an encountered error. | 555 // e.g., due to an encountered error. |
532 inline void DropLiteral() { | 556 inline void DropLiteral() { |
533 next_.literal_chars = NULL; | 557 next_.literal_chars = NULL; |
558 next_.raw_literal_chars = NULL; | |
559 capturing_raw_literal_ = false; | |
534 } | 560 } |
535 | 561 |
536 inline void AddLiteralCharAdvance() { | 562 inline void AddLiteralCharAdvance() { |
537 AddLiteralChar(c0_); | 563 AddLiteralChar(c0_); |
538 Advance(); | 564 Advance(); |
539 } | 565 } |
540 | 566 |
541 // Low-level scanning support. | 567 // Low-level scanning support. |
542 void Advance() { | 568 void Advance() { |
569 if (capturing_raw_literal_) { | |
570 AddRawLiteralChar(c0_); | |
571 } | |
543 c0_ = source_->Advance(); | 572 c0_ = source_->Advance(); |
544 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { | 573 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { |
545 uc32 c1 = source_->Advance(); | 574 uc32 c1 = source_->Advance(); |
546 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { | 575 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { |
547 source_->PushBack(c1); | 576 source_->PushBack(c1); |
548 } else { | 577 } else { |
549 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); | 578 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); |
550 } | 579 } |
551 } | 580 } |
552 } | 581 } |
553 | 582 |
554 void PushBack(uc32 ch) { | 583 void PushBack(uc32 ch) { |
555 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { | 584 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { |
556 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); | 585 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); |
557 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); | 586 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); |
587 if (capturing_raw_literal_) ReduceRawLiteralLength(2); | |
caitp (gmail)
2014/12/02 22:06:01
Oh, I didn't realize you already changed this. I g
| |
558 } else { | 588 } else { |
559 source_->PushBack(c0_); | 589 source_->PushBack(c0_); |
590 if (capturing_raw_literal_) ReduceRawLiteralLength(1); | |
560 } | 591 } |
561 c0_ = ch; | 592 c0_ = ch; |
562 } | 593 } |
563 | 594 |
564 inline Token::Value Select(Token::Value tok) { | 595 inline Token::Value Select(Token::Value tok) { |
565 Advance(); | 596 Advance(); |
566 return tok; | 597 return tok; |
567 } | 598 } |
568 | 599 |
569 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 600 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
570 Advance(); | 601 Advance(); |
571 if (c0_ == next) { | 602 if (c0_ == next) { |
572 Advance(); | 603 Advance(); |
573 return then; | 604 return then; |
574 } else { | 605 } else { |
575 return else_; | 606 return else_; |
576 } | 607 } |
577 } | 608 } |
578 | 609 |
579 // Returns the literal string, if any, for the current token (the | 610 // Returns the literal string, if any, for the current token (the |
580 // token last returned by Next()). The string is 0-terminated. | 611 // token last returned by Next()). The string is 0-terminated. |
581 // Literal strings are collected for identifiers, strings, and | 612 // Literal strings are collected for identifiers, strings, numbers as well |
582 // numbers. | 613 // as for template literals. For template literals we also collect the raw |
614 // form. | |
583 // These functions only give the correct result if the literal | 615 // These functions only give the correct result if the literal |
584 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 616 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
585 Vector<const uint8_t> literal_one_byte_string() { | 617 Vector<const uint8_t> literal_one_byte_string() { |
586 DCHECK_NOT_NULL(current_.literal_chars); | 618 DCHECK_NOT_NULL(current_.literal_chars); |
587 return current_.literal_chars->one_byte_literal(); | 619 return current_.literal_chars->one_byte_literal(); |
588 } | 620 } |
589 Vector<const uint16_t> literal_two_byte_string() { | 621 Vector<const uint16_t> literal_two_byte_string() { |
590 DCHECK_NOT_NULL(current_.literal_chars); | 622 DCHECK_NOT_NULL(current_.literal_chars); |
591 return current_.literal_chars->two_byte_literal(); | 623 return current_.literal_chars->two_byte_literal(); |
592 } | 624 } |
(...skipping 12 matching lines...) Expand all Loading... | |
605 return next_.literal_chars->one_byte_literal(); | 637 return next_.literal_chars->one_byte_literal(); |
606 } | 638 } |
607 Vector<const uint16_t> next_literal_two_byte_string() { | 639 Vector<const uint16_t> next_literal_two_byte_string() { |
608 DCHECK_NOT_NULL(next_.literal_chars); | 640 DCHECK_NOT_NULL(next_.literal_chars); |
609 return next_.literal_chars->two_byte_literal(); | 641 return next_.literal_chars->two_byte_literal(); |
610 } | 642 } |
611 bool is_next_literal_one_byte() { | 643 bool is_next_literal_one_byte() { |
612 DCHECK_NOT_NULL(next_.literal_chars); | 644 DCHECK_NOT_NULL(next_.literal_chars); |
613 return next_.literal_chars->is_one_byte(); | 645 return next_.literal_chars->is_one_byte(); |
614 } | 646 } |
615 int next_literal_length() const { | 647 Vector<const uint8_t> raw_literal_one_byte_string() { |
616 DCHECK_NOT_NULL(next_.literal_chars); | 648 DCHECK_NOT_NULL(current_.raw_literal_chars); |
617 return next_.literal_chars->length(); | 649 return current_.raw_literal_chars->one_byte_literal(); |
618 } | 650 } |
651 Vector<const uint16_t> raw_literal_two_byte_string() { | |
652 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
653 return current_.raw_literal_chars->two_byte_literal(); | |
654 } | |
655 bool is_raw_literal_one_byte() { | |
656 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
657 return current_.raw_literal_chars->is_one_byte(); | |
658 } | |
659 | |
619 | 660 |
620 uc32 ScanHexNumber(int expected_length); | 661 uc32 ScanHexNumber(int expected_length); |
621 // Scan a number of any length but not bigger than max_value. For example, the | 662 // Scan a number of any length but not bigger than max_value. For example, the |
622 // number can be 000000001, so it's very long in characters but its value is | 663 // number can be 000000001, so it's very long in characters but its value is |
623 // small. | 664 // small. |
624 uc32 ScanUnlimitedLengthHexNumber(int max_value); | 665 uc32 ScanUnlimitedLengthHexNumber(int max_value); |
625 | 666 |
626 // Scans a single JavaScript token. | 667 // Scans a single JavaScript token. |
627 void Scan(); | 668 void Scan(); |
628 | 669 |
(...skipping 30 matching lines...) Expand all Loading... | |
659 UnicodeCache* unicode_cache_; | 700 UnicodeCache* unicode_cache_; |
660 | 701 |
661 // Buffers collecting literal strings, numbers, etc. | 702 // Buffers collecting literal strings, numbers, etc. |
662 LiteralBuffer literal_buffer1_; | 703 LiteralBuffer literal_buffer1_; |
663 LiteralBuffer literal_buffer2_; | 704 LiteralBuffer literal_buffer2_; |
664 | 705 |
665 // Values parsed from magic comments. | 706 // Values parsed from magic comments. |
666 LiteralBuffer source_url_; | 707 LiteralBuffer source_url_; |
667 LiteralBuffer source_mapping_url_; | 708 LiteralBuffer source_mapping_url_; |
668 | 709 |
710 // Buffer to store raw string values | |
711 LiteralBuffer raw_literal_buffer_; | |
712 | |
713 // We only need to capture the raw literal when we are scanning template | |
714 // literal spans. | |
715 bool capturing_raw_literal_; | |
716 | |
669 TokenDesc current_; // desc for current token (as returned by Next()) | 717 TokenDesc current_; // desc for current token (as returned by Next()) |
670 TokenDesc next_; // desc for next token (one token look-ahead) | 718 TokenDesc next_; // desc for next token (one token look-ahead) |
671 | 719 |
672 // Input stream. Must be initialized to an Utf16CharacterStream. | 720 // Input stream. Must be initialized to an Utf16CharacterStream. |
673 Utf16CharacterStream* source_; | 721 Utf16CharacterStream* source_; |
674 | 722 |
675 | 723 |
676 // Start position of the octal literal last scanned. | 724 // Start position of the octal literal last scanned. |
677 Location octal_pos_; | 725 Location octal_pos_; |
678 | 726 |
(...skipping 17 matching lines...) Expand all Loading... | |
696 bool harmony_classes_; | 744 bool harmony_classes_; |
697 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL | 745 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL |
698 bool harmony_templates_; | 746 bool harmony_templates_; |
699 // Whether we allow \u{xxxxx}. | 747 // Whether we allow \u{xxxxx}. |
700 bool harmony_unicode_; | 748 bool harmony_unicode_; |
701 }; | 749 }; |
702 | 750 |
703 } } // namespace v8::internal | 751 } } // namespace v8::internal |
704 | 752 |
705 #endif // V8_SCANNER_H_ | 753 #endif // V8_SCANNER_H_ |
OLD | NEW |