Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(133)

Side by Side Diff: src/scanner-base.cc

Issue 6075005: Change scanner buffers to not use utf-8. (Closed)
Patch Set: Fixed linto. Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner-base.h ('k') | src/utils.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2010 the V8 project authors. All rights reserved. 1 // Copyright 2010 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 17 matching lines...) Expand all
28 // Features shared by parsing and pre-parsing scanners. 28 // Features shared by parsing and pre-parsing scanners.
29 29
30 #include "../include/v8stdint.h" 30 #include "../include/v8stdint.h"
31 #include "scanner-base.h" 31 #include "scanner-base.h"
32 #include "char-predicates-inl.h" 32 #include "char-predicates-inl.h"
33 33
34 namespace v8 { 34 namespace v8 {
35 namespace internal { 35 namespace internal {
36 36
37 // ---------------------------------------------------------------------------- 37 // ----------------------------------------------------------------------------
38 // LiteralCollector
39
40 LiteralCollector::LiteralCollector()
41 : buffer_(kInitialCapacity), recording_(false) { }
42
43
44 LiteralCollector::~LiteralCollector() {}
45
46
47 void LiteralCollector::AddCharSlow(uc32 c) {
48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);
49 int length = unibrow::Utf8::Length(c);
50 Vector<char> block = buffer_.AddBlock(length, '\0');
51 #ifdef DEBUG
52 int written_length = unibrow::Utf8::Encode(block.start(), c);
53 CHECK_EQ(length, written_length);
54 #else
55 unibrow::Utf8::Encode(block.start(), c);
56 #endif
57 }
58
59 // ----------------------------------------------------------------------------
60 // Character predicates 38 // Character predicates
61 39
62 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart; 40 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart;
63 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart; 41 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart;
64 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace; 42 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace;
65 unibrow::Predicate<unibrow::LineTerminator, 128> 43 unibrow::Predicate<unibrow::LineTerminator, 128>
66 ScannerConstants::kIsLineTerminator; 44 ScannerConstants::kIsLineTerminator;
67 45
68 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_; 46 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_;
69 47
(...skipping 179 matching lines...) Expand 10 before | Expand all | Expand 10 after
249 if (c0_ == '-') return SkipSingleLineComment(); 227 if (c0_ == '-') return SkipSingleLineComment();
250 PushBack('-'); // undo Advance() 228 PushBack('-'); // undo Advance()
251 } 229 }
252 PushBack('!'); // undo Advance() 230 PushBack('!'); // undo Advance()
253 ASSERT(c0_ == '!'); 231 ASSERT(c0_ == '!');
254 return Token::LT; 232 return Token::LT;
255 } 233 }
256 234
257 235
258 void JavaScriptScanner::Scan() { 236 void JavaScriptScanner::Scan() {
259 next_.literal_chars = Vector<const char>(); 237 next_.literal_chars = NULL;
260 Token::Value token; 238 Token::Value token;
261 do { 239 do {
262 // Remember the position of the next token 240 // Remember the position of the next token
263 next_.location.beg_pos = source_pos(); 241 next_.location.beg_pos = source_pos();
264 242
265 switch (c0_) { 243 switch (c0_) {
266 case ' ': 244 case ' ':
267 case '\t': 245 case '\t':
268 Advance(); 246 Advance();
269 token = Token::WHITESPACE; 247 token = Token::WHITESPACE;
(...skipping 284 matching lines...) Expand 10 before | Expand all | Expand 10 after
554 // should be illegal, but they are commonly handled 532 // should be illegal, but they are commonly handled
555 // as non-escaped characters by JS VMs. 533 // as non-escaped characters by JS VMs.
556 AddLiteralChar(c); 534 AddLiteralChar(c);
557 } 535 }
558 536
559 537
560 Token::Value JavaScriptScanner::ScanString() { 538 Token::Value JavaScriptScanner::ScanString() {
561 uc32 quote = c0_; 539 uc32 quote = c0_;
562 Advance(); // consume quote 540 Advance(); // consume quote
563 541
564 LiteralScope literal(this, kLiteralString); 542 LiteralScope literal(this);
565 while (c0_ != quote && c0_ >= 0 543 while (c0_ != quote && c0_ >= 0
566 && !ScannerConstants::kIsLineTerminator.get(c0_)) { 544 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
567 uc32 c = c0_; 545 uc32 c = c0_;
568 Advance(); 546 Advance();
569 if (c == '\\') { 547 if (c == '\\') {
570 if (c0_ < 0) return Token::ILLEGAL; 548 if (c0_ < 0) return Token::ILLEGAL;
571 ScanEscape(); 549 ScanEscape();
572 } else { 550 } else {
573 AddLiteralChar(c); 551 AddLiteralChar(c);
574 } 552 }
(...skipping 10 matching lines...) Expand all
585 while (IsDecimalDigit(c0_)) 563 while (IsDecimalDigit(c0_))
586 AddLiteralCharAdvance(); 564 AddLiteralCharAdvance();
587 } 565 }
588 566
589 567
590 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) { 568 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
591 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction 569 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
592 570
593 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; 571 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
594 572
595 LiteralScope literal(this, kLiteralNumber); 573 LiteralScope literal(this);
596 if (seen_period) { 574 if (seen_period) {
597 // we have already seen a decimal point of the float 575 // we have already seen a decimal point of the float
598 AddLiteralChar('.'); 576 AddLiteralChar('.');
599 ScanDecimalDigits(); // we know we have at least one digit 577 ScanDecimalDigits(); // we know we have at least one digit
600 578
601 } else { 579 } else {
602 // if the first character is '0' we must check for octals and hex 580 // if the first character is '0' we must check for octals and hex
603 if (c0_ == '0') { 581 if (c0_ == '0') {
604 AddLiteralCharAdvance(); 582 AddLiteralCharAdvance();
605 583
(...skipping 68 matching lines...) Expand 10 before | Expand all | Expand 10 after
674 uc32 c = ScanHexEscape('u', 4); 652 uc32 c = ScanHexEscape('u', 4);
675 // We do not allow a unicode escape sequence to start another 653 // We do not allow a unicode escape sequence to start another
676 // unicode escape sequence. 654 // unicode escape sequence.
677 if (c == '\\') return unibrow::Utf8::kBadChar; 655 if (c == '\\') return unibrow::Utf8::kBadChar;
678 return c; 656 return c;
679 } 657 }
680 658
681 659
682 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() { 660 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
683 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_)); 661 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
684 LiteralScope literal(this, kLiteralIdentifier); 662 LiteralScope literal(this);
685 KeywordMatcher keyword_match; 663 KeywordMatcher keyword_match;
686 // Scan identifier start character. 664 // Scan identifier start character.
687 if (c0_ == '\\') { 665 if (c0_ == '\\') {
688 uc32 c = ScanIdentifierUnicodeEscape(); 666 uc32 c = ScanIdentifierUnicodeEscape();
689 // Only allow legal identifier start characters. 667 // Only allow legal identifier start characters.
690 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL; 668 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
691 AddLiteralChar(c); 669 AddLiteralChar(c);
692 return ScanIdentifierSuffix(&literal); 670 return ScanIdentifierSuffix(&literal);
693 } 671 }
694 672
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
740 bool in_character_class = false; 718 bool in_character_class = false;
741 719
742 // Previous token is either '/' or '/=', in the second case, the 720 // Previous token is either '/' or '/=', in the second case, the
743 // pattern starts at =. 721 // pattern starts at =.
744 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); 722 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
745 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); 723 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
746 724
747 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, 725 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
748 // the scanner should pass uninterpreted bodies to the RegExp 726 // the scanner should pass uninterpreted bodies to the RegExp
749 // constructor. 727 // constructor.
750 LiteralScope literal(this, kLiteralRegExp); 728 LiteralScope literal(this);
751 if (seen_equal) 729 if (seen_equal)
752 AddLiteralChar('='); 730 AddLiteralChar('=');
753 731
754 while (c0_ != '/' || in_character_class) { 732 while (c0_ != '/' || in_character_class) {
755 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; 733 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
756 if (c0_ == '\\') { // escaped character 734 if (c0_ == '\\') { // escaped character
757 AddLiteralCharAdvance(); 735 AddLiteralCharAdvance();
758 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false; 736 if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
759 AddLiteralCharAdvance(); 737 AddLiteralCharAdvance();
760 } else { // unescaped character 738 } else { // unescaped character
761 if (c0_ == '[') in_character_class = true; 739 if (c0_ == '[') in_character_class = true;
762 if (c0_ == ']') in_character_class = false; 740 if (c0_ == ']') in_character_class = false;
763 AddLiteralCharAdvance(); 741 AddLiteralCharAdvance();
764 } 742 }
765 } 743 }
766 Advance(); // consume '/' 744 Advance(); // consume '/'
767 745
768 literal.Complete(); 746 literal.Complete();
769 747
770 return true; 748 return true;
771 } 749 }
772 750
773 751
774 bool JavaScriptScanner::ScanRegExpFlags() { 752 bool JavaScriptScanner::ScanRegExpFlags() {
775 // Scan regular expression flags. 753 // Scan regular expression flags.
776 LiteralScope literal(this, kLiteralRegExpFlags); 754 LiteralScope literal(this);
777 while (ScannerConstants::kIsIdentifierPart.get(c0_)) { 755 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
778 if (c0_ == '\\') { 756 if (c0_ == '\\') {
779 uc32 c = ScanIdentifierUnicodeEscape(); 757 uc32 c = ScanIdentifierUnicodeEscape();
780 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { 758 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
781 // We allow any escaped character, unlike the restriction on 759 // We allow any escaped character, unlike the restriction on
782 // IdentifierPart when it is used to build an IdentifierName. 760 // IdentifierPart when it is used to build an IdentifierName.
783 AddLiteralChar(c); 761 AddLiteralChar(c);
784 continue; 762 continue;
785 } 763 }
786 } 764 }
(...skipping 129 matching lines...) Expand 10 before | Expand all | Expand 10 after
916 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; 894 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
917 break; 895 break;
918 case UNMATCHABLE: 896 case UNMATCHABLE:
919 break; 897 break;
920 } 898 }
921 // On fallthrough, it's a failure. 899 // On fallthrough, it's a failure.
922 state_ = UNMATCHABLE; 900 state_ = UNMATCHABLE;
923 } 901 }
924 902
925 } } // namespace v8::internal 903 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner-base.h ('k') | src/utils.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698