src/scanner-base.cc - Issue 6075005: Change scanner buffers to not use utf-8.

Side by Side Diff: src/scanner-base.cc

Issue 6075005: Change scanner buffers to not use utf-8. (Closed)

Patch Set: Fixed linto. Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 17 matching lines...) Expand all Loading...
28 // Features shared by parsing and pre-parsing scanners.	28 // Features shared by parsing and pre-parsing scanners.

29	29

30 #include "../include/v8stdint.h"	30 #include "../include/v8stdint.h"

31 #include "scanner-base.h"	31 #include "scanner-base.h"

32 #include "char-predicates-inl.h"	32 #include "char-predicates-inl.h"

33	33

34 namespace v8 {	34 namespace v8 {

35 namespace internal {	35 namespace internal {

36	36

37 // ----------------------------------------------------------------------------	37 // ----------------------------------------------------------------------------

38 // LiteralCollector

39

40 LiteralCollector::LiteralCollector()

41 : buffer_(kInitialCapacity), recording_(false) { }

42

43

44 LiteralCollector::~LiteralCollector() {}

45

46

47 void LiteralCollector::AddCharSlow(uc32 c) {

48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);

49 int length = unibrow::Utf8::Length(c);

50 Vector<char> block = buffer_.AddBlock(length, '\0');

51 #ifdef DEBUG

52 int written_length = unibrow::Utf8::Encode(block.start(), c);

53 CHECK_EQ(length, written_length);

54 #else

55 unibrow::Utf8::Encode(block.start(), c);

56 #endif

57 }

58

59 // ----------------------------------------------------------------------------

60 // Character predicates	38 // Character predicates

61	39

62 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart;	40 unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart;

63 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart;	41 unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart;

64 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace;	42 unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace;

65 unibrow::Predicate<unibrow::LineTerminator, 128>	43 unibrow::Predicate<unibrow::LineTerminator, 128>

66 ScannerConstants::kIsLineTerminator;	44 ScannerConstants::kIsLineTerminator;

67	45

68 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_;	46 StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_;

69	47

(...skipping 179 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
249 if (c0_ == '-') return SkipSingleLineComment();	227 if (c0_ == '-') return SkipSingleLineComment();

250 PushBack('-'); // undo Advance()	228 PushBack('-'); // undo Advance()

251 }	229 }

252 PushBack('!'); // undo Advance()	230 PushBack('!'); // undo Advance()

253 ASSERT(c0_ == '!');	231 ASSERT(c0_ == '!');

254 return Token::LT;	232 return Token::LT;

255 }	233 }

256	234

257	235

258 void JavaScriptScanner::Scan() {	236 void JavaScriptScanner::Scan() {

259 next_.literal_chars = Vector<const char>();	237 next_.literal_chars = NULL;

260 Token::Value token;	238 Token::Value token;

261 do {	239 do {

262 // Remember the position of the next token	240 // Remember the position of the next token

263 next_.location.beg_pos = source_pos();	241 next_.location.beg_pos = source_pos();

264	242

265 switch (c0_) {	243 switch (c0_) {

266 case ' ':	244 case ' ':

267 case '\t':	245 case '\t':

268 Advance();	246 Advance();

269 token = Token::WHITESPACE;	247 token = Token::WHITESPACE;

(...skipping 284 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
554 // should be illegal, but they are commonly handled	532 // should be illegal, but they are commonly handled

555 // as non-escaped characters by JS VMs.	533 // as non-escaped characters by JS VMs.

556 AddLiteralChar(c);	534 AddLiteralChar(c);

557 }	535 }

558	536

559	537

560 Token::Value JavaScriptScanner::ScanString() {	538 Token::Value JavaScriptScanner::ScanString() {

561 uc32 quote = c0_;	539 uc32 quote = c0_;

562 Advance(); // consume quote	540 Advance(); // consume quote

563	541

564 LiteralScope literal(this, kLiteralString);	542 LiteralScope literal(this);

565 while (c0_ != quote && c0_ >= 0	543 while (c0_ != quote && c0_ >= 0

566 && !ScannerConstants::kIsLineTerminator.get(c0_)) {	544 && !ScannerConstants::kIsLineTerminator.get(c0_)) {

567 uc32 c = c0_;	545 uc32 c = c0_;

568 Advance();	546 Advance();

569 if (c == '\\') {	547 if (c == '\\') {

570 if (c0_ < 0) return Token::ILLEGAL;	548 if (c0_ < 0) return Token::ILLEGAL;

571 ScanEscape();	549 ScanEscape();

572 } else {	550 } else {

573 AddLiteralChar(c);	551 AddLiteralChar(c);

574 }	552 }

(...skipping 10 matching lines...) Expand all Loading...
585 while (IsDecimalDigit(c0_))	563 while (IsDecimalDigit(c0_))

586 AddLiteralCharAdvance();	564 AddLiteralCharAdvance();

587 }	565 }

588	566

589	567

590 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {	568 Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {

591 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	569 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

592	570

593 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;	571 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;

594	572

595 LiteralScope literal(this, kLiteralNumber);	573 LiteralScope literal(this);

596 if (seen_period) {	574 if (seen_period) {

597 // we have already seen a decimal point of the float	575 // we have already seen a decimal point of the float

598 AddLiteralChar('.');	576 AddLiteralChar('.');

599 ScanDecimalDigits(); // we know we have at least one digit	577 ScanDecimalDigits(); // we know we have at least one digit

600	578

601 } else {	579 } else {

602 // if the first character is '0' we must check for octals and hex	580 // if the first character is '0' we must check for octals and hex

603 if (c0_ == '0') {	581 if (c0_ == '0') {

604 AddLiteralCharAdvance();	582 AddLiteralCharAdvance();

605	583

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
674 uc32 c = ScanHexEscape('u', 4);	652 uc32 c = ScanHexEscape('u', 4);

675 // We do not allow a unicode escape sequence to start another	653 // We do not allow a unicode escape sequence to start another

676 // unicode escape sequence.	654 // unicode escape sequence.

677 if (c == '\\') return unibrow::Utf8::kBadChar;	655 if (c == '\\') return unibrow::Utf8::kBadChar;

678 return c;	656 return c;

679 }	657 }

680	658

681	659

682 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {	660 Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {

683 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));	661 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));

684 LiteralScope literal(this, kLiteralIdentifier);	662 LiteralScope literal(this);

685 KeywordMatcher keyword_match;	663 KeywordMatcher keyword_match;

686 // Scan identifier start character.	664 // Scan identifier start character.

687 if (c0_ == '\\') {	665 if (c0_ == '\\') {

688 uc32 c = ScanIdentifierUnicodeEscape();	666 uc32 c = ScanIdentifierUnicodeEscape();

689 // Only allow legal identifier start characters.	667 // Only allow legal identifier start characters.

690 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;	668 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;

691 AddLiteralChar(c);	669 AddLiteralChar(c);

692 return ScanIdentifierSuffix(&literal);	670 return ScanIdentifierSuffix(&literal);

693 }	671 }

694	672

(...skipping 45 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
740 bool in_character_class = false;	718 bool in_character_class = false;

741	719

742 // Previous token is either '/' or '/=', in the second case, the	720 // Previous token is either '/' or '/=', in the second case, the

743 // pattern starts at =.	721 // pattern starts at =.

744 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);	722 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

745 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);	723 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

746	724

747 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	725 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

748 // the scanner should pass uninterpreted bodies to the RegExp	726 // the scanner should pass uninterpreted bodies to the RegExp

749 // constructor.	727 // constructor.

750 LiteralScope literal(this, kLiteralRegExp);	728 LiteralScope literal(this);

751 if (seen_equal)	729 if (seen_equal)

752 AddLiteralChar('=');	730 AddLiteralChar('=');

753	731

754 while (c0_ != '/' \|\| in_character_class) {	732 while (c0_ != '/' \|\| in_character_class) {

755 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;	733 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

756 if (c0_ == '\\') { // escaped character	734 if (c0_ == '\\') { // escaped character

757 AddLiteralCharAdvance();	735 AddLiteralCharAdvance();

758 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;	736 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

759 AddLiteralCharAdvance();	737 AddLiteralCharAdvance();

760 } else { // unescaped character	738 } else { // unescaped character

761 if (c0_ == '[') in_character_class = true;	739 if (c0_ == '[') in_character_class = true;

762 if (c0_ == ']') in_character_class = false;	740 if (c0_ == ']') in_character_class = false;

763 AddLiteralCharAdvance();	741 AddLiteralCharAdvance();

764 }	742 }

765 }	743 }

766 Advance(); // consume '/'	744 Advance(); // consume '/'

767	745

768 literal.Complete();	746 literal.Complete();

769	747

770 return true;	748 return true;

771 }	749 }

772	750

773	751

774 bool JavaScriptScanner::ScanRegExpFlags() {	752 bool JavaScriptScanner::ScanRegExpFlags() {

775 // Scan regular expression flags.	753 // Scan regular expression flags.

776 LiteralScope literal(this, kLiteralRegExpFlags);	754 LiteralScope literal(this);

777 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {	755 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

778 if (c0_ == '\\') {	756 if (c0_ == '\\') {

779 uc32 c = ScanIdentifierUnicodeEscape();	757 uc32 c = ScanIdentifierUnicodeEscape();

780 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {	758 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

781 // We allow any escaped character, unlike the restriction on	759 // We allow any escaped character, unlike the restriction on

782 // IdentifierPart when it is used to build an IdentifierName.	760 // IdentifierPart when it is used to build an IdentifierName.

783 AddLiteralChar(c);	761 AddLiteralChar(c);

784 continue;	762 continue;

785 }	763 }

786 }	764 }

(...skipping 129 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
916 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;	894 if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;

917 break;	895 break;

918 case UNMATCHABLE:	896 case UNMATCHABLE:

919 break;	897 break;

920 }	898 }

921 // On fallthrough, it's a failure.	899 // On fallthrough, it's a failure.

922 state_ = UNMATCHABLE;	900 state_ = UNMATCHABLE;

923 }	901 }

924	902

925 } } // namespace v8::internal	903 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner-base.h ('k') | src/utils.h » ('j') | no next file with comments »