OLD | NEW |
---|---|
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #ifndef V8_SCANNER_H_ | 7 #ifndef V8_SCANNER_H_ |
8 #define V8_SCANNER_H_ | 8 #define V8_SCANNER_H_ |
9 | 9 |
10 #include "src/allocation.h" | 10 #include "src/allocation.h" |
(...skipping 234 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
245 DCHECK(is_one_byte_); | 245 DCHECK(is_one_byte_); |
246 return Vector<const uint8_t>( | 246 return Vector<const uint8_t>( |
247 reinterpret_cast<const uint8_t*>(backing_store_.start()), | 247 reinterpret_cast<const uint8_t*>(backing_store_.start()), |
248 position_); | 248 position_); |
249 } | 249 } |
250 | 250 |
251 int length() const { | 251 int length() const { |
252 return is_one_byte_ ? position_ : (position_ >> 1); | 252 return is_one_byte_ ? position_ : (position_ >> 1); |
253 } | 253 } |
254 | 254 |
255 void ReduceLength(int delta) { | |
256 position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size); | |
257 } | |
258 | |
255 void Reset() { | 259 void Reset() { |
256 position_ = 0; | 260 position_ = 0; |
257 is_one_byte_ = true; | 261 is_one_byte_ = true; |
258 } | 262 } |
259 | 263 |
260 Handle<String> Internalize(Isolate* isolate) const; | 264 Handle<String> Internalize(Isolate* isolate) const; |
261 | 265 |
262 private: | 266 private: |
263 static const int kInitialCapacity = 16; | 267 static const int kInitialCapacity = 16; |
264 static const int kGrowthFactory = 4; | 268 static const int kGrowthFactory = 4; |
(...skipping 46 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
311 | 315 |
312 // ---------------------------------------------------------------------------- | 316 // ---------------------------------------------------------------------------- |
313 // JavaScript Scanner. | 317 // JavaScript Scanner. |
314 | 318 |
315 class Scanner { | 319 class Scanner { |
316 public: | 320 public: |
317 // Scoped helper for literal recording. Automatically drops the literal | 321 // Scoped helper for literal recording. Automatically drops the literal |
318 // if aborting the scanning before it's complete. | 322 // if aborting the scanning before it's complete. |
319 class LiteralScope { | 323 class LiteralScope { |
320 public: | 324 public: |
321 explicit LiteralScope(Scanner* self) | 325 explicit LiteralScope(Scanner* self, bool capture_raw = false) |
322 : scanner_(self), complete_(false) { | 326 : scanner_(self), complete_(false) { |
323 scanner_->StartLiteral(); | 327 scanner_->StartLiteral(); |
328 if (capture_raw) scanner_->StartRawLiteral(); | |
324 } | 329 } |
325 ~LiteralScope() { | 330 ~LiteralScope() { |
326 if (!complete_) scanner_->DropLiteral(); | 331 if (!complete_) scanner_->DropLiteral(); |
327 } | 332 } |
328 void Complete() { | 333 void Complete() { |
329 scanner_->TerminateLiteral(); | 334 scanner_->TerminateLiteral(); |
330 complete_ = true; | 335 complete_ = true; |
331 } | 336 } |
332 | 337 |
333 private: | 338 private: |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
385 DCHECK_NOT_NULL(current_.literal_chars); | 390 DCHECK_NOT_NULL(current_.literal_chars); |
386 return current_.literal_chars->is_contextual_keyword(keyword); | 391 return current_.literal_chars->is_contextual_keyword(keyword); |
387 } | 392 } |
388 bool is_next_contextual_keyword(Vector<const char> keyword) { | 393 bool is_next_contextual_keyword(Vector<const char> keyword) { |
389 DCHECK_NOT_NULL(next_.literal_chars); | 394 DCHECK_NOT_NULL(next_.literal_chars); |
390 return next_.literal_chars->is_contextual_keyword(keyword); | 395 return next_.literal_chars->is_contextual_keyword(keyword); |
391 } | 396 } |
392 | 397 |
393 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); | 398 const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory); |
394 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); | 399 const AstRawString* NextSymbol(AstValueFactory* ast_value_factory); |
400 const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory); | |
395 | 401 |
396 double DoubleValue(); | 402 double DoubleValue(); |
397 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { | 403 bool LiteralMatches(const char* data, int length, bool allow_escapes = true) { |
398 if (is_literal_one_byte() && | 404 if (is_literal_one_byte() && |
399 literal_length() == length && | 405 literal_length() == length && |
400 (allow_escapes || !literal_contains_escapes())) { | 406 (allow_escapes || !literal_contains_escapes())) { |
401 const char* token = | 407 const char* token = |
402 reinterpret_cast<const char*>(literal_one_byte_string().start()); | 408 reinterpret_cast<const char*>(literal_one_byte_string().start()); |
403 return !strncmp(token, data, length); | 409 return !strncmp(token, data, length); |
404 } | 410 } |
(...skipping 79 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
484 } | 490 } |
485 | 491 |
486 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; | 492 bool IdentifierIsFutureStrictReserved(const AstRawString* string) const; |
487 | 493 |
488 private: | 494 private: |
489 // The current and look-ahead token. | 495 // The current and look-ahead token. |
490 struct TokenDesc { | 496 struct TokenDesc { |
491 Token::Value token; | 497 Token::Value token; |
492 Location location; | 498 Location location; |
493 LiteralBuffer* literal_chars; | 499 LiteralBuffer* literal_chars; |
500 LiteralBuffer* raw_literal_chars; | |
494 }; | 501 }; |
495 | 502 |
496 static const int kCharacterLookaheadBufferSize = 1; | 503 static const int kCharacterLookaheadBufferSize = 1; |
497 | 504 |
498 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. | 505 // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. |
499 uc32 ScanOctalEscape(uc32 c, int length); | 506 uc32 ScanOctalEscape(uc32 c, int length); |
500 | 507 |
501 // Call this after setting source_ to the input. | 508 // Call this after setting source_ to the input. |
502 void Init() { | 509 void Init() { |
503 // Set c0_ (one character ahead) | 510 // Set c0_ (one character ahead) |
504 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); | 511 STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); |
505 Advance(); | 512 Advance(); |
506 // Initialize current_ to not refer to a literal. | 513 // Initialize current_ to not refer to a literal. |
507 current_.literal_chars = NULL; | 514 current_.literal_chars = NULL; |
515 current_.raw_literal_chars = NULL; | |
508 } | 516 } |
509 | 517 |
510 // Literal buffer support | 518 // Literal buffer support |
511 inline void StartLiteral() { | 519 inline void StartLiteral() { |
512 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? | 520 LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? |
513 &literal_buffer2_ : &literal_buffer1_; | 521 &literal_buffer2_ : &literal_buffer1_; |
514 free_buffer->Reset(); | 522 free_buffer->Reset(); |
515 next_.literal_chars = free_buffer; | 523 next_.literal_chars = free_buffer; |
516 } | 524 } |
517 | 525 |
526 inline void StartRawLiteral() { | |
527 raw_literal_buffer_.Reset(); | |
528 next_.raw_literal_chars = &raw_literal_buffer_; | |
529 capturing_raw_literal_ = true; | |
530 } | |
531 | |
518 INLINE(void AddLiteralChar(uc32 c)) { | 532 INLINE(void AddLiteralChar(uc32 c)) { |
519 DCHECK_NOT_NULL(next_.literal_chars); | 533 DCHECK_NOT_NULL(next_.literal_chars); |
520 next_.literal_chars->AddChar(c); | 534 next_.literal_chars->AddChar(c); |
521 } | 535 } |
522 | 536 |
537 INLINE(void AddRawLiteralChar(uc32 c)) { | |
538 DCHECK(capturing_raw_literal_); | |
539 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
540 next_.raw_literal_chars->AddChar(c); | |
541 } | |
542 | |
543 INLINE(void ReduceRawLiteralLength(int delta)) { | |
544 DCHECK(capturing_raw_literal_); | |
545 DCHECK_NOT_NULL(next_.raw_literal_chars); | |
546 next_.raw_literal_chars->ReduceLength(delta); | |
547 } | |
548 | |
523 // Complete scanning of a literal. | 549 // Complete scanning of a literal. |
524 inline void TerminateLiteral() { | 550 inline void TerminateLiteral() { capturing_raw_literal_ = false; } |
525 // Does nothing in the current implementation. | |
526 } | |
527 | 551 |
528 // Stops scanning of a literal and drop the collected characters, | 552 // Stops scanning of a literal and drop the collected characters, |
529 // e.g., due to an encountered error. | 553 // e.g., due to an encountered error. |
530 inline void DropLiteral() { | 554 inline void DropLiteral() { |
531 next_.literal_chars = NULL; | 555 next_.literal_chars = NULL; |
556 next_.raw_literal_chars = NULL; | |
557 capturing_raw_literal_ = false; | |
532 } | 558 } |
533 | 559 |
534 inline void AddLiteralCharAdvance() { | 560 inline void AddLiteralCharAdvance() { |
535 AddLiteralChar(c0_); | 561 AddLiteralChar(c0_); |
536 Advance(); | 562 Advance(); |
537 } | 563 } |
538 | 564 |
539 // Low-level scanning support. | 565 // Low-level scanning support. |
540 void Advance() { | 566 void Advance() { |
567 if (capturing_raw_literal_) { | |
568 AddRawLiteralChar(c0_); | |
569 } | |
541 c0_ = source_->Advance(); | 570 c0_ = source_->Advance(); |
542 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { | 571 if (unibrow::Utf16::IsLeadSurrogate(c0_)) { |
543 uc32 c1 = source_->Advance(); | 572 uc32 c1 = source_->Advance(); |
544 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { | 573 if (!unibrow::Utf16::IsTrailSurrogate(c1)) { |
545 source_->PushBack(c1); | 574 source_->PushBack(c1); |
546 } else { | 575 } else { |
547 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); | 576 c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1); |
548 } | 577 } |
549 } | 578 } |
550 } | 579 } |
551 | 580 |
552 void PushBack(uc32 ch) { | 581 void PushBack(uc32 ch) { |
582 DCHECK(ch < 0 || !capturing_raw_literal_); | |
caitp (gmail)
2014/12/02 21:37:35
I feel like PushBack() is okay --- it's more just
| |
553 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { | 583 if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) { |
554 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); | 584 source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); |
555 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); | 585 source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); |
556 } else { | 586 } else { |
557 source_->PushBack(c0_); | 587 source_->PushBack(c0_); |
558 } | 588 } |
559 c0_ = ch; | 589 c0_ = ch; |
560 } | 590 } |
561 | 591 |
562 inline Token::Value Select(Token::Value tok) { | 592 inline Token::Value Select(Token::Value tok) { |
563 Advance(); | 593 Advance(); |
564 return tok; | 594 return tok; |
565 } | 595 } |
566 | 596 |
567 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { | 597 inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { |
568 Advance(); | 598 Advance(); |
569 if (c0_ == next) { | 599 if (c0_ == next) { |
570 Advance(); | 600 Advance(); |
571 return then; | 601 return then; |
572 } else { | 602 } else { |
573 return else_; | 603 return else_; |
574 } | 604 } |
575 } | 605 } |
576 | 606 |
577 // Returns the literal string, if any, for the current token (the | 607 // Returns the literal string, if any, for the current token (the |
578 // token last returned by Next()). The string is 0-terminated. | 608 // token last returned by Next()). The string is 0-terminated. |
579 // Literal strings are collected for identifiers, strings, and | 609 // Literal strings are collected for identifiers, strings, numbers as well |
580 // numbers. | 610 // as for template literals. For template literals we also collect the raw |
611 // form. | |
581 // These functions only give the correct result if the literal | 612 // These functions only give the correct result if the literal |
582 // was scanned between calls to StartLiteral() and TerminateLiteral(). | 613 // was scanned between calls to StartLiteral() and TerminateLiteral(). |
583 Vector<const uint8_t> literal_one_byte_string() { | 614 Vector<const uint8_t> literal_one_byte_string() { |
584 DCHECK_NOT_NULL(current_.literal_chars); | 615 DCHECK_NOT_NULL(current_.literal_chars); |
585 return current_.literal_chars->one_byte_literal(); | 616 return current_.literal_chars->one_byte_literal(); |
586 } | 617 } |
587 Vector<const uint16_t> literal_two_byte_string() { | 618 Vector<const uint16_t> literal_two_byte_string() { |
588 DCHECK_NOT_NULL(current_.literal_chars); | 619 DCHECK_NOT_NULL(current_.literal_chars); |
589 return current_.literal_chars->two_byte_literal(); | 620 return current_.literal_chars->two_byte_literal(); |
590 } | 621 } |
(...skipping 12 matching lines...) Expand all Loading... | |
603 return next_.literal_chars->one_byte_literal(); | 634 return next_.literal_chars->one_byte_literal(); |
604 } | 635 } |
605 Vector<const uint16_t> next_literal_two_byte_string() { | 636 Vector<const uint16_t> next_literal_two_byte_string() { |
606 DCHECK_NOT_NULL(next_.literal_chars); | 637 DCHECK_NOT_NULL(next_.literal_chars); |
607 return next_.literal_chars->two_byte_literal(); | 638 return next_.literal_chars->two_byte_literal(); |
608 } | 639 } |
609 bool is_next_literal_one_byte() { | 640 bool is_next_literal_one_byte() { |
610 DCHECK_NOT_NULL(next_.literal_chars); | 641 DCHECK_NOT_NULL(next_.literal_chars); |
611 return next_.literal_chars->is_one_byte(); | 642 return next_.literal_chars->is_one_byte(); |
612 } | 643 } |
613 int next_literal_length() const { | 644 Vector<const uint8_t> raw_literal_one_byte_string() { |
614 DCHECK_NOT_NULL(next_.literal_chars); | 645 DCHECK_NOT_NULL(current_.raw_literal_chars); |
615 return next_.literal_chars->length(); | 646 return current_.raw_literal_chars->one_byte_literal(); |
616 } | 647 } |
648 Vector<const uint16_t> raw_literal_two_byte_string() { | |
649 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
650 return current_.raw_literal_chars->two_byte_literal(); | |
651 } | |
652 bool is_raw_literal_one_byte() { | |
653 DCHECK_NOT_NULL(current_.raw_literal_chars); | |
654 return current_.raw_literal_chars->is_one_byte(); | |
655 } | |
656 | |
617 | 657 |
618 uc32 ScanHexNumber(int expected_length); | 658 uc32 ScanHexNumber(int expected_length); |
619 | 659 |
620 // Scans a single JavaScript token. | 660 // Scans a single JavaScript token. |
621 void Scan(); | 661 void Scan(); |
622 | 662 |
623 bool SkipWhiteSpace(); | 663 bool SkipWhiteSpace(); |
624 Token::Value SkipSingleLineComment(); | 664 Token::Value SkipSingleLineComment(); |
625 Token::Value SkipSourceURLComment(); | 665 Token::Value SkipSourceURLComment(); |
626 void TryToParseSourceURLComment(); | 666 void TryToParseSourceURLComment(); |
(...skipping 24 matching lines...) Expand all Loading... | |
651 UnicodeCache* unicode_cache_; | 691 UnicodeCache* unicode_cache_; |
652 | 692 |
653 // Buffers collecting literal strings, numbers, etc. | 693 // Buffers collecting literal strings, numbers, etc. |
654 LiteralBuffer literal_buffer1_; | 694 LiteralBuffer literal_buffer1_; |
655 LiteralBuffer literal_buffer2_; | 695 LiteralBuffer literal_buffer2_; |
656 | 696 |
657 // Values parsed from magic comments. | 697 // Values parsed from magic comments. |
658 LiteralBuffer source_url_; | 698 LiteralBuffer source_url_; |
659 LiteralBuffer source_mapping_url_; | 699 LiteralBuffer source_mapping_url_; |
660 | 700 |
701 // Buffer to store raw string values | |
702 LiteralBuffer raw_literal_buffer_; | |
703 | |
704 // We only need to capture the raw literal when we are scanning template | |
705 // literal spans. | |
706 bool capturing_raw_literal_; | |
707 | |
661 TokenDesc current_; // desc for current token (as returned by Next()) | 708 TokenDesc current_; // desc for current token (as returned by Next()) |
662 TokenDesc next_; // desc for next token (one token look-ahead) | 709 TokenDesc next_; // desc for next token (one token look-ahead) |
663 | 710 |
664 // Input stream. Must be initialized to an Utf16CharacterStream. | 711 // Input stream. Must be initialized to an Utf16CharacterStream. |
665 Utf16CharacterStream* source_; | 712 Utf16CharacterStream* source_; |
666 | 713 |
667 | 714 |
668 // Start position of the octal literal last scanned. | 715 // Start position of the octal literal last scanned. |
669 Location octal_pos_; | 716 Location octal_pos_; |
670 | 717 |
(...skipping 15 matching lines...) Expand all Loading... | |
686 bool harmony_numeric_literals_; | 733 bool harmony_numeric_literals_; |
687 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords. | 734 // Whether we scan 'class', 'extends', 'static' and 'super' as keywords. |
688 bool harmony_classes_; | 735 bool harmony_classes_; |
689 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL | 736 // Whether we scan TEMPLATE_SPAN and TEMPLATE_TAIL |
690 bool harmony_templates_; | 737 bool harmony_templates_; |
691 }; | 738 }; |
692 | 739 |
693 } } // namespace v8::internal | 740 } } // namespace v8::internal |
694 | 741 |
695 #endif // V8_SCANNER_H_ | 742 #endif // V8_SCANNER_H_ |
OLD | NEW |