src/scanner.cc - Issue 5188006: Push version 2.5.7 to trunk....

Side by Side Diff: src/scanner.cc

Issue 5188006: Push version 2.5.7 to trunk.... (Closed) Base URL: http://v8.googlecode.com/svn/trunk/

Patch Set: Created 10 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2010 the V8 project authors. All rights reserved.	1 // Copyright 2010 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 12 matching lines...) Expand all Loading...
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY	23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT	24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE	25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.	26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

27	27

28 #include "v8.h"	28 #include "v8.h"

29	29

30 #include "ast.h"	30 #include "ast.h"

31 #include "handles.h"	31 #include "handles.h"

32 #include "scanner.h"	32 #include "scanner.h"

	33 #include "unicode-inl.h"

33	34

34 namespace v8 {	35 namespace v8 {

35 namespace internal {	36 namespace internal {

36	37

37 // ----------------------------------------------------------------------------	38 // ----------------------------------------------------------------------------

38 // Character predicates

39

40

41 unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;

42 unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;

43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;

44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;

45

46

47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;

48

49

50 // ----------------------------------------------------------------------------

51 // UTF8Buffer	39 // UTF8Buffer

52	40

53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }	41 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity), recording_(false) { }

54	42

55	43

56 UTF8Buffer::~UTF8Buffer() {}	44 UTF8Buffer::~UTF8Buffer() {}

57	45

58	46

59 void UTF8Buffer::AddCharSlow(uc32 c) {	47 void UTF8Buffer::AddCharSlow(uc32 c) {

60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);	48 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);

61 int length = unibrow::Utf8::Length(c);	49 int length = unibrow::Utf8::Length(c);

62 Vector<char> block = buffer_.AddBlock(length, '\0');	50 Vector<char> block = buffer_.AddBlock(length, '\0');

63 #ifdef DEBUG	51 #ifdef DEBUG

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
128 }	116 }

129	117

130	118

131 void CharacterStreamUTF16Buffer::SeekForward(int pos) {	119 void CharacterStreamUTF16Buffer::SeekForward(int pos) {

132 pos_ = pos;	120 pos_ = pos;

133 ASSERT(pushback_buffer()->is_empty());	121 ASSERT(pushback_buffer()->is_empty());

134 stream_->Seek(pos);	122 stream_->Seek(pos);

135 }	123 }

136	124

137	125

138 // ExternalStringUTF16Buffer

139 template <typename StringType, typename CharType>

140 ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()

141 : raw_data_(NULL) { }

142

143

144 template <typename StringType, typename CharType>

145 void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(

146 Handle<StringType> data,

147 int start_position,

148 int end_position) {

149 ASSERT(!data.is_null());

150 raw_data_ = data->resource()->data();

151

152 ASSERT(end_position <= data->length());

153 if (start_position > 0) {

154 SeekForward(start_position);

155 }

156 end_ =

157 end_position != Scanner::kNoEndPosition ? end_position : data->length();

158 }

159

160

161 template <typename StringType, typename CharType>

162 uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {

163 if (pos_ < end_) {

164 return raw_data_[pos_++];

165 } else {

166 // note: currently the following increment is necessary to avoid a

167 // test-parser problem!

168 pos_++;

169 return static_cast<uc32>(-1);

170 }

171 }

172

173

174 template <typename StringType, typename CharType>

175 void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {

176 pos_--;

177 ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);

178 ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);

179 }

180

181

182 template <typename StringType, typename CharType>

183 void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {

184 pos_ = pos;

185 }

186

187 // ----------------------------------------------------------------------------	126 // ----------------------------------------------------------------------------

188 // Scanner::LiteralScope	127 // Scanner::LiteralScope

189	128

190 Scanner::LiteralScope::LiteralScope(Scanner* self)	129 Scanner::LiteralScope::LiteralScope(Scanner* self)

191 : scanner_(self), complete_(false) {	130 : scanner_(self), complete_(false) {

192 self->StartLiteral();	131 self->StartLiteral();

193 }	132 }

194	133

195	134

196 Scanner::LiteralScope::~LiteralScope() {	135 Scanner::LiteralScope::~LiteralScope() {

(...skipping 105 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
302 }	241 }

303 return current_.token;	242 return current_.token;

304 }	243 }

305	244

306	245

307 void Scanner::StartLiteral() {	246 void Scanner::StartLiteral() {

308 literal_buffer_.StartLiteral();	247 literal_buffer_.StartLiteral();

309 }	248 }

310	249

311	250

312 void Scanner::AddChar(uc32 c) {	251 void Scanner::AddLiteralChar(uc32 c) {

313 literal_buffer_.AddChar(c);	252 literal_buffer_.AddChar(c);

314 }	253 }

315	254

316	255

317 void Scanner::TerminateLiteral() {	256 void Scanner::TerminateLiteral() {

318 next_.literal_chars = literal_buffer_.EndLiteral();	257 next_.literal_chars = literal_buffer_.EndLiteral();

319 }	258 }

320	259

321	260

322 void Scanner::DropLiteral() {	261 void Scanner::DropLiteral() {

323 literal_buffer_.DropLiteral();	262 literal_buffer_.DropLiteral();

324 }	263 }

325	264

326	265

327 void Scanner::AddCharAdvance() {	266 void Scanner::AddLiteralCharAdvance() {

328 AddChar(c0_);	267 AddLiteralChar(c0_);

329 Advance();	268 Advance();

330 }	269 }

331	270

332	271

333 static inline bool IsByteOrderMark(uc32 c) {	272 static inline bool IsByteOrderMark(uc32 c) {

334 // The Unicode value U+FFFE is guaranteed never to be assigned as a	273 // The Unicode value U+FFFE is guaranteed never to be assigned as a

335 // Unicode character; this implies that in a Unicode context the	274 // Unicode character; this implies that in a Unicode context the

336 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	275 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

337 // character expressed in little-endian byte order (since it could	276 // character expressed in little-endian byte order (since it could

338 // not be a U+FFFE character expressed in big-endian byte	277 // not be a U+FFFE character expressed in big-endian byte

(...skipping 12 matching lines...) Expand all Loading...
351 return source_pos() != start_position;	290 return source_pos() != start_position;

352 }	291 }

353	292

354	293

355 bool Scanner::SkipJavaScriptWhiteSpace() {	294 bool Scanner::SkipJavaScriptWhiteSpace() {

356 int start_position = source_pos();	295 int start_position = source_pos();

357	296

358 while (true) {	297 while (true) {

359 // We treat byte-order marks (BOMs) as whitespace for better	298 // We treat byte-order marks (BOMs) as whitespace for better

360 // compatibility with Spidermonkey and other JavaScript engines.	299 // compatibility with Spidermonkey and other JavaScript engines.

361 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {	300 while (ScannerConstants::kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {

362 // IsWhiteSpace() includes line terminators!	301 // IsWhiteSpace() includes line terminators!

363 if (kIsLineTerminator.get(c0_)) {	302 if (ScannerConstants::kIsLineTerminator.get(c0_)) {

364 // Ignore line terminators, but remember them. This is necessary	303 // Ignore line terminators, but remember them. This is necessary

365 // for automatic semicolon insertion.	304 // for automatic semicolon insertion.

366 has_line_terminator_before_next_ = true;	305 has_line_terminator_before_next_ = true;

367 }	306 }

368 Advance();	307 Advance();

369 }	308 }

370	309

371 // If there is an HTML comment end '-->' at the beginning of a	310 // If there is an HTML comment end '-->' at the beginning of a

372 // line (with only whitespace in front of it), we treat the rest	311 // line (with only whitespace in front of it), we treat the rest

373 // of the line as a comment. This is in line with the way	312 // of the line as a comment. This is in line with the way

(...skipping 19 matching lines...) Expand all Loading...
393	332

394	333

395 Token::Value Scanner::SkipSingleLineComment() {	334 Token::Value Scanner::SkipSingleLineComment() {

396 Advance();	335 Advance();

397	336

398 // The line terminator at the end of the line is not considered	337 // The line terminator at the end of the line is not considered

399 // to be part of the single-line comment; it is recognized	338 // to be part of the single-line comment; it is recognized

400 // separately by the lexical grammar and becomes part of the	339 // separately by the lexical grammar and becomes part of the

401 // stream of input elements for the syntactic grammar (see	340 // stream of input elements for the syntactic grammar (see

402 // ECMA-262, section 7.4, page 12).	341 // ECMA-262, section 7.4, page 12).

403 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	342 while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {

404 Advance();	343 Advance();

405 }	344 }

406	345

407 return Token::WHITESPACE;	346 return Token::WHITESPACE;

408 }	347 }

409	348

410	349

411 Token::Value Scanner::SkipMultiLineComment() {	350 Token::Value Scanner::SkipMultiLineComment() {

412 ASSERT(c0_ == '*');	351 ASSERT(c0_ == '*');

413 Advance();	352 Advance();

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
530	469

531	470

532 Token::Value Scanner::ScanJsonString() {	471 Token::Value Scanner::ScanJsonString() {

533 ASSERT_EQ('"', c0_);	472 ASSERT_EQ('"', c0_);

534 Advance();	473 Advance();

535 LiteralScope literal(this);	474 LiteralScope literal(this);

536 while (c0_ != '"' && c0_ > 0) {	475 while (c0_ != '"' && c0_ > 0) {

537 // Check for control character (0x00-0x1f) or unterminated string (<0).	476 // Check for control character (0x00-0x1f) or unterminated string (<0).

538 if (c0_ < 0x20) return Token::ILLEGAL;	477 if (c0_ < 0x20) return Token::ILLEGAL;

539 if (c0_ != '\\') {	478 if (c0_ != '\\') {

540 AddCharAdvance();	479 AddLiteralCharAdvance();

541 } else {	480 } else {

542 Advance();	481 Advance();

543 switch (c0_) {	482 switch (c0_) {

544 case '"':	483 case '"':

545 case '\\':	484 case '\\':

546 case '/':	485 case '/':

547 AddChar(c0_);	486 AddLiteralChar(c0_);

548 break;	487 break;

549 case 'b':	488 case 'b':

550 AddChar('\x08');	489 AddLiteralChar('\x08');

551 break;	490 break;

552 case 'f':	491 case 'f':

553 AddChar('\x0c');	492 AddLiteralChar('\x0c');

554 break;	493 break;

555 case 'n':	494 case 'n':

556 AddChar('\x0a');	495 AddLiteralChar('\x0a');

557 break;	496 break;

558 case 'r':	497 case 'r':

559 AddChar('\x0d');	498 AddLiteralChar('\x0d');

560 break;	499 break;

561 case 't':	500 case 't':

562 AddChar('\x09');	501 AddLiteralChar('\x09');

563 break;	502 break;

564 case 'u': {	503 case 'u': {

565 uc32 value = 0;	504 uc32 value = 0;

566 for (int i = 0; i < 4; i++) {	505 for (int i = 0; i < 4; i++) {

567 Advance();	506 Advance();

568 int digit = HexValue(c0_);	507 int digit = HexValue(c0_);

569 if (digit < 0) {	508 if (digit < 0) {

570 return Token::ILLEGAL;	509 return Token::ILLEGAL;

571 }	510 }

572 value = value * 16 + digit;	511 value = value * 16 + digit;

573 }	512 }

574 AddChar(value);	513 AddLiteralChar(value);

575 break;	514 break;

576 }	515 }

577 default:	516 default:

578 return Token::ILLEGAL;	517 return Token::ILLEGAL;

579 }	518 }

580 Advance();	519 Advance();

581 }	520 }

582 }	521 }

583 if (c0_ != '"') {	522 if (c0_ != '"') {

584 return Token::ILLEGAL;	523 return Token::ILLEGAL;

585 }	524 }

586 literal.Complete();	525 literal.Complete();

587 Advance();	526 Advance();

588 return Token::STRING;	527 return Token::STRING;

589 }	528 }

590	529

591	530

592 Token::Value Scanner::ScanJsonNumber() {	531 Token::Value Scanner::ScanJsonNumber() {

593 LiteralScope literal(this);	532 LiteralScope literal(this);

594 if (c0_ == '-') AddCharAdvance();	533 if (c0_ == '-') AddLiteralCharAdvance();

595 if (c0_ == '0') {	534 if (c0_ == '0') {

596 AddCharAdvance();	535 AddLiteralCharAdvance();

597 // Prefix zero is only allowed if it's the only digit before	536 // Prefix zero is only allowed if it's the only digit before

598 // a decimal point or exponent.	537 // a decimal point or exponent.

599 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;	538 if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;

600 } else {	539 } else {

601 if (c0_ < '1' \|\| c0_ > '9') return Token::ILLEGAL;	540 if (c0_ < '1' \|\| c0_ > '9') return Token::ILLEGAL;

602 do {	541 do {

603 AddCharAdvance();	542 AddLiteralCharAdvance();

604 } while (c0_ >= '0' && c0_ <= '9');	543 } while (c0_ >= '0' && c0_ <= '9');

605 }	544 }

606 if (c0_ == '.') {	545 if (c0_ == '.') {

607 AddCharAdvance();	546 AddLiteralCharAdvance();

608 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;	547 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;

609 do {	548 do {

610 AddCharAdvance();	549 AddLiteralCharAdvance();

611 } while (c0_ >= '0' && c0_ <= '9');	550 } while (c0_ >= '0' && c0_ <= '9');

612 }	551 }

613 if (AsciiAlphaToLower(c0_) == 'e') {	552 if (AsciiAlphaToLower(c0_) == 'e') {

614 AddCharAdvance();	553 AddLiteralCharAdvance();

615 if (c0_ == '-' \|\| c0_ == '+') AddCharAdvance();	554 if (c0_ == '-' \|\| c0_ == '+') AddLiteralCharAdvance();

616 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;	555 if (c0_ < '0' \|\| c0_ > '9') return Token::ILLEGAL;

617 do {	556 do {

618 AddCharAdvance();	557 AddLiteralCharAdvance();

619 } while (c0_ >= '0' && c0_ <= '9');	558 } while (c0_ >= '0' && c0_ <= '9');

620 }	559 }

621 literal.Complete();	560 literal.Complete();

622 return Token::NUMBER;	561 return Token::NUMBER;

623 }	562 }

624	563

625	564

626 Token::Value Scanner::ScanJsonIdentifier(const char* text,	565 Token::Value Scanner::ScanJsonIdentifier(const char* text,

627 Token::Value token) {	566 Token::Value token) {

628 LiteralScope literal(this);	567 LiteralScope literal(this);

629 while (*text != '\0') {	568 while (*text != '\0') {

630 if (c0_ != *text) return Token::ILLEGAL;	569 if (c0_ != *text) return Token::ILLEGAL;

631 Advance();	570 Advance();

632 text++;	571 text++;

633 }	572 }

634 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;	573 if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;

635 literal.Complete();	574 literal.Complete();

636 return token;	575 return token;

637 }	576 }

638	577

639	578

640 void Scanner::ScanJavaScript() {	579 void Scanner::ScanJavaScript() {

641 next_.literal_chars = Vector<const char>();	580 next_.literal_chars = Vector<const char>();

642 Token::Value token;	581 Token::Value token;

643 do {	582 do {

644 // Remember the position of the next token	583 // Remember the position of the next token

(...skipping 202 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
847	786

848 case '?':	787 case '?':

849 token = Select(Token::CONDITIONAL);	788 token = Select(Token::CONDITIONAL);

850 break;	789 break;

851	790

852 case '~':	791 case '~':

853 token = Select(Token::BIT_NOT);	792 token = Select(Token::BIT_NOT);

854 break;	793 break;

855	794

856 default:	795 default:

857 if (kIsIdentifierStart.get(c0_)) {	796 if (ScannerConstants::kIsIdentifierStart.get(c0_)) {

858 token = ScanIdentifier();	797 token = ScanIdentifier();

859 } else if (IsDecimalDigit(c0_)) {	798 } else if (IsDecimalDigit(c0_)) {

860 token = ScanNumber(false);	799 token = ScanNumber(false);

861 } else if (SkipWhiteSpace()) {	800 } else if (SkipWhiteSpace()) {

862 token = Token::WHITESPACE;	801 token = Token::WHITESPACE;

863 } else if (c0_ < 0) {	802 } else if (c0_ < 0) {

864 token = Token::EOS;	803 token = Token::EOS;

865 } else {	804 } else {

866 token = Select(Token::ILLEGAL);	805 token = Select(Token::ILLEGAL);

867 }	806 }

(...skipping 62 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
930 }	869 }

931 return x;	870 return x;

932 }	871 }

933	872

934	873

935 void Scanner::ScanEscape() {	874 void Scanner::ScanEscape() {

936 uc32 c = c0_;	875 uc32 c = c0_;

937 Advance();	876 Advance();

938	877

939 // Skip escaped newlines.	878 // Skip escaped newlines.

940 if (kIsLineTerminator.get(c)) {	879 if (ScannerConstants::kIsLineTerminator.get(c)) {

941 // Allow CR+LF newlines in multiline string literals.	880 // Allow CR+LF newlines in multiline string literals.

942 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();	881 if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();

943 // Allow LF+CR newlines in multiline string literals.	882 // Allow LF+CR newlines in multiline string literals.

944 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();	883 if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();

945 return;	884 return;

946 }	885 }

947	886

948 switch (c) {	887 switch (c) {

949 case '\'': // fall through	888 case '\'': // fall through

950 case '"' : // fall through	889 case '"' : // fall through

(...skipping 12 matching lines...) Expand all Loading...
963 case '3' : // fall through	902 case '3' : // fall through

964 case '4' : // fall through	903 case '4' : // fall through

965 case '5' : // fall through	904 case '5' : // fall through

966 case '6' : // fall through	905 case '6' : // fall through

967 case '7' : c = ScanOctalEscape(c, 2); break;	906 case '7' : c = ScanOctalEscape(c, 2); break;

968 }	907 }

969	908

970 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these	909 // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these

971 // should be illegal, but they are commonly handled	910 // should be illegal, but they are commonly handled

972 // as non-escaped characters by JS VMs.	911 // as non-escaped characters by JS VMs.

973 AddChar(c);	912 AddLiteralChar(c);

974 }	913 }

975	914

976	915

977 Token::Value Scanner::ScanString() {	916 Token::Value Scanner::ScanString() {

978 uc32 quote = c0_;	917 uc32 quote = c0_;

979 Advance(); // consume quote	918 Advance(); // consume quote

980	919

981 LiteralScope literal(this);	920 LiteralScope literal(this);

982 while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	921 while (c0_ != quote && c0_ >= 0

	922 && !ScannerConstants::kIsLineTerminator.get(c0_)) {

983 uc32 c = c0_;	923 uc32 c = c0_;

984 Advance();	924 Advance();

985 if (c == '\\') {	925 if (c == '\\') {

986 if (c0_ < 0) return Token::ILLEGAL;	926 if (c0_ < 0) return Token::ILLEGAL;

987 ScanEscape();	927 ScanEscape();

988 } else {	928 } else {

989 AddChar(c);	929 AddLiteralChar(c);

990 }	930 }

991 }	931 }

992 if (c0_ != quote) return Token::ILLEGAL;	932 if (c0_ != quote) return Token::ILLEGAL;

993 literal.Complete();	933 literal.Complete();

994	934

995 Advance(); // consume quote	935 Advance(); // consume quote

996 return Token::STRING;	936 return Token::STRING;

997 }	937 }

998	938

999	939

(...skipping 10 matching lines...) Expand all Loading...
1010 return then;	950 return then;

1011 } else {	951 } else {

1012 return else_;	952 return else_;

1013 }	953 }

1014 }	954 }

1015	955

1016	956

1017 // Returns true if any decimal digits were scanned, returns false otherwise.	957 // Returns true if any decimal digits were scanned, returns false otherwise.

1018 void Scanner::ScanDecimalDigits() {	958 void Scanner::ScanDecimalDigits() {

1019 while (IsDecimalDigit(c0_))	959 while (IsDecimalDigit(c0_))

1020 AddCharAdvance();	960 AddLiteralCharAdvance();

1021 }	961 }

1022	962

1023	963

1024 Token::Value Scanner::ScanNumber(bool seen_period) {	964 Token::Value Scanner::ScanNumber(bool seen_period) {

1025 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	965 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

1026	966

1027 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;	967 enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;

1028	968

1029 LiteralScope literal(this);	969 LiteralScope literal(this);

1030 if (seen_period) {	970 if (seen_period) {

1031 // we have already seen a decimal point of the float	971 // we have already seen a decimal point of the float

1032 AddChar('.');	972 AddLiteralChar('.');

1033 ScanDecimalDigits(); // we know we have at least one digit	973 ScanDecimalDigits(); // we know we have at least one digit

1034	974

1035 } else {	975 } else {

1036 // if the first character is '0' we must check for octals and hex	976 // if the first character is '0' we must check for octals and hex

1037 if (c0_ == '0') {	977 if (c0_ == '0') {

1038 AddCharAdvance();	978 AddLiteralCharAdvance();

1039	979

1040 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number	980 // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number

1041 if (c0_ == 'x' \|\| c0_ == 'X') {	981 if (c0_ == 'x' \|\| c0_ == 'X') {

1042 // hex number	982 // hex number

1043 kind = HEX;	983 kind = HEX;

1044 AddCharAdvance();	984 AddLiteralCharAdvance();

1045 if (!IsHexDigit(c0_)) {	985 if (!IsHexDigit(c0_)) {

1046 // we must have at least one hex digit after 'x'/'X'	986 // we must have at least one hex digit after 'x'/'X'

1047 return Token::ILLEGAL;	987 return Token::ILLEGAL;

1048 }	988 }

1049 while (IsHexDigit(c0_)) {	989 while (IsHexDigit(c0_)) {

1050 AddCharAdvance();	990 AddLiteralCharAdvance();

1051 }	991 }

1052 } else if ('0' <= c0_ && c0_ <= '7') {	992 } else if ('0' <= c0_ && c0_ <= '7') {

1053 // (possible) octal number	993 // (possible) octal number

1054 kind = OCTAL;	994 kind = OCTAL;

1055 while (true) {	995 while (true) {

1056 if (c0_ == '8' \|\| c0_ == '9') {	996 if (c0_ == '8' \|\| c0_ == '9') {

1057 kind = DECIMAL;	997 kind = DECIMAL;

1058 break;	998 break;

1059 }	999 }

1060 if (c0_ < '0' \|\| '7' < c0_) break;	1000 if (c0_ < '0' \|\| '7' < c0_) break;

1061 AddCharAdvance();	1001 AddLiteralCharAdvance();

1062 }	1002 }

1063 }	1003 }

1064 }	1004 }

1065	1005

1066 // Parse decimal digits and allow trailing fractional part.	1006 // Parse decimal digits and allow trailing fractional part.

1067 if (kind == DECIMAL) {	1007 if (kind == DECIMAL) {

1068 ScanDecimalDigits(); // optional	1008 ScanDecimalDigits(); // optional

1069 if (c0_ == '.') {	1009 if (c0_ == '.') {

1070 AddCharAdvance();	1010 AddLiteralCharAdvance();

1071 ScanDecimalDigits(); // optional	1011 ScanDecimalDigits(); // optional

1072 }	1012 }

1073 }	1013 }

1074 }	1014 }

1075	1015

1076 // scan exponent, if any	1016 // scan exponent, if any

1077 if (c0_ == 'e' \|\| c0_ == 'E') {	1017 if (c0_ == 'e' \|\| c0_ == 'E') {

1078 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number	1018 ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number

1079 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed	1019 if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed

1080 // scan exponent	1020 // scan exponent

1081 AddCharAdvance();	1021 AddLiteralCharAdvance();

1082 if (c0_ == '+' \|\| c0_ == '-')	1022 if (c0_ == '+' \|\| c0_ == '-')

1083 AddCharAdvance();	1023 AddLiteralCharAdvance();

1084 if (!IsDecimalDigit(c0_)) {	1024 if (!IsDecimalDigit(c0_)) {

1085 // we must have at least one decimal digit after 'e'/'E'	1025 // we must have at least one decimal digit after 'e'/'E'

1086 return Token::ILLEGAL;	1026 return Token::ILLEGAL;

1087 }	1027 }

1088 ScanDecimalDigits();	1028 ScanDecimalDigits();

1089 }	1029 }

1090	1030

1091 // The source character immediately following a numeric literal must	1031 // The source character immediately following a numeric literal must

1092 // not be an identifier start or a decimal digit; see ECMA-262	1032 // not be an identifier start or a decimal digit; see ECMA-262

1093 // section 7.8.3, page 17 (note that we read only one decimal digit	1033 // section 7.8.3, page 17 (note that we read only one decimal digit

1094 // if the value is 0).	1034 // if the value is 0).

1095 if (IsDecimalDigit(c0_) \|\| kIsIdentifierStart.get(c0_))	1035 if (IsDecimalDigit(c0_) \|\| ScannerConstants::kIsIdentifierStart.get(c0_))

1096 return Token::ILLEGAL;	1036 return Token::ILLEGAL;

1097	1037

1098 literal.Complete();	1038 literal.Complete();

1099	1039

1100 return Token::NUMBER;	1040 return Token::NUMBER;

1101 }	1041 }

1102	1042

1103	1043

1104 uc32 Scanner::ScanIdentifierUnicodeEscape() {	1044 uc32 Scanner::ScanIdentifierUnicodeEscape() {

1105 Advance();	1045 Advance();

1106 if (c0_ != 'u') return unibrow::Utf8::kBadChar;	1046 if (c0_ != 'u') return unibrow::Utf8::kBadChar;

1107 Advance();	1047 Advance();

1108 uc32 c = ScanHexEscape('u', 4);	1048 uc32 c = ScanHexEscape('u', 4);

1109 // We do not allow a unicode escape sequence to start another	1049 // We do not allow a unicode escape sequence to start another

1110 // unicode escape sequence.	1050 // unicode escape sequence.

1111 if (c == '\\') return unibrow::Utf8::kBadChar;	1051 if (c == '\\') return unibrow::Utf8::kBadChar;

1112 return c;	1052 return c;

1113 }	1053 }

1114	1054

1115	1055

1116 Token::Value Scanner::ScanIdentifier() {	1056 Token::Value Scanner::ScanIdentifier() {

1117 ASSERT(kIsIdentifierStart.get(c0_));	1057 ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));

1118	1058

1119 LiteralScope literal(this);	1059 LiteralScope literal(this);

1120 KeywordMatcher keyword_match;	1060 KeywordMatcher keyword_match;

1121	1061

1122 // Scan identifier start character.	1062 // Scan identifier start character.

1123 if (c0_ == '\\') {	1063 if (c0_ == '\\') {

1124 uc32 c = ScanIdentifierUnicodeEscape();	1064 uc32 c = ScanIdentifierUnicodeEscape();

1125 // Only allow legal identifier start characters.	1065 // Only allow legal identifier start characters.

1126 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;	1066 if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;

1127 AddChar(c);	1067 AddLiteralChar(c);

1128 keyword_match.Fail();	1068 keyword_match.Fail();

1129 } else {	1069 } else {

1130 AddChar(c0_);	1070 AddLiteralChar(c0_);

1131 keyword_match.AddChar(c0_);	1071 keyword_match.AddChar(c0_);

1132 Advance();	1072 Advance();

1133 }	1073 }

1134	1074

1135 // Scan the rest of the identifier characters.	1075 // Scan the rest of the identifier characters.

1136 while (kIsIdentifierPart.get(c0_)) {	1076 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

1137 if (c0_ == '\\') {	1077 if (c0_ == '\\') {

1138 uc32 c = ScanIdentifierUnicodeEscape();	1078 uc32 c = ScanIdentifierUnicodeEscape();

1139 // Only allow legal identifier part characters.	1079 // Only allow legal identifier part characters.

1140 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;	1080 if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;

1141 AddChar(c);	1081 AddLiteralChar(c);

1142 keyword_match.Fail();	1082 keyword_match.Fail();

1143 } else {	1083 } else {

1144 AddChar(c0_);	1084 AddLiteralChar(c0_);

1145 keyword_match.AddChar(c0_);	1085 keyword_match.AddChar(c0_);

1146 Advance();	1086 Advance();

1147 }	1087 }

1148 }	1088 }

1149 literal.Complete();	1089 literal.Complete();

1150	1090

1151 return keyword_match.token();	1091 return keyword_match.token();

1152 }	1092 }

1153	1093

1154	1094

1155	1095

1156 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {

1157 // Checks whether the buffer contains an identifier (no escape).

1158 if (!buffer->has_more()) return false;

1159 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;

1160 while (buffer->has_more()) {

1161 if (!kIsIdentifierPart.get(buffer->GetNext())) return false;

1162 }

1163 return true;

1164 }

1165

1166

1167 bool Scanner::ScanRegExpPattern(bool seen_equal) {	1096 bool Scanner::ScanRegExpPattern(bool seen_equal) {

1168 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags	1097 // Scan: ('/' \| '/=') RegularExpressionBody '/' RegularExpressionFlags

1169 bool in_character_class = false;	1098 bool in_character_class = false;

1170	1099

1171 // Previous token is either '/' or '/=', in the second case, the	1100 // Previous token is either '/' or '/=', in the second case, the

1172 // pattern starts at =.	1101 // pattern starts at =.

1173 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);	1102 next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);

1174 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);	1103 next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);

1175	1104

1176 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,	1105 // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,

1177 // the scanner should pass uninterpreted bodies to the RegExp	1106 // the scanner should pass uninterpreted bodies to the RegExp

1178 // constructor.	1107 // constructor.

1179 LiteralScope literal(this);	1108 LiteralScope literal(this);

1180 if (seen_equal)	1109 if (seen_equal)

1181 AddChar('=');	1110 AddLiteralChar('=');

1182	1111

1183 while (c0_ != '/' \|\| in_character_class) {	1112 while (c0_ != '/' \|\| in_character_class) {

1184 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;	1113 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

1185 if (c0_ == '\\') { // escaped character	1114 if (c0_ == '\\') { // escaped character

1186 AddCharAdvance();	1115 AddLiteralCharAdvance();

1187 if (kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;	1116 if (ScannerConstants::kIsLineTerminator.get(c0_) \|\| c0_ < 0) return false;

1188 AddCharAdvance();	1117 AddLiteralCharAdvance();

1189 } else { // unescaped character	1118 } else { // unescaped character

1190 if (c0_ == '[') in_character_class = true;	1119 if (c0_ == '[') in_character_class = true;

1191 if (c0_ == ']') in_character_class = false;	1120 if (c0_ == ']') in_character_class = false;

1192 AddCharAdvance();	1121 AddLiteralCharAdvance();

1193 }	1122 }

1194 }	1123 }

1195 Advance(); // consume '/'	1124 Advance(); // consume '/'

1196	1125

1197 literal.Complete();	1126 literal.Complete();

1198	1127

1199 return true;	1128 return true;

1200 }	1129 }

1201	1130

1202 bool Scanner::ScanRegExpFlags() {	1131 bool Scanner::ScanRegExpFlags() {

1203 // Scan regular expression flags.	1132 // Scan regular expression flags.

1204 LiteralScope literal(this);	1133 LiteralScope literal(this);

1205 while (kIsIdentifierPart.get(c0_)) {	1134 while (ScannerConstants::kIsIdentifierPart.get(c0_)) {

1206 if (c0_ == '\\') {	1135 if (c0_ == '\\') {

1207 uc32 c = ScanIdentifierUnicodeEscape();	1136 uc32 c = ScanIdentifierUnicodeEscape();

1208 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {	1137 if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {

1209 // We allow any escaped character, unlike the restriction on	1138 // We allow any escaped character, unlike the restriction on

1210 // IdentifierPart when it is used to build an IdentifierName.	1139 // IdentifierPart when it is used to build an IdentifierName.

1211 AddChar(c);	1140 AddLiteralChar(c);

1212 continue;	1141 continue;

1213 }	1142 }

1214 }	1143 }

1215 AddCharAdvance();	1144 AddLiteralCharAdvance();

1216 }	1145 }

1217 literal.Complete();	1146 literal.Complete();

1218	1147

1219 next_.location.end_pos = source_pos() - 1;	1148 next_.location.end_pos = source_pos() - 1;

1220 return true;	1149 return true;

1221 }	1150 }

1222	1151

1223 } } // namespace v8::internal	1152 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | src/scanner-base.h » ('j') | no next file with comments »