src/scanner.cc - Issue 113336: Optimize the lexical scanner by selective inlining, and...

Side by Side Diff: src/scanner.cc

Issue 113336: Optimize the lexical scanner by selective inlining, and... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 11 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 30 matching lines...) Expand all Loading...
41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;	41 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;

42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;	42 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;

43	43

44	44

45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;	45 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;

46	46

47	47

48 // ----------------------------------------------------------------------------	48 // ----------------------------------------------------------------------------

49 // UTF8Buffer	49 // UTF8Buffer

50	50

51 UTF8Buffer::UTF8Buffer() : data_(NULL) {	51 UTF8Buffer::UTF8Buffer() {

52 Initialize(NULL, 0);	52 static const int kInitialCapacity = 1 * KB;

	53 data_ = NewArray<char>(kInitialCapacity);

	54 limit_ = ComputeLimit(data_, kInitialCapacity);

	55 Reset();

	56 ASSERT(Capacity() == kInitialCapacity && pos() == 0);

53 }	57 }

54	58

55	59

56 UTF8Buffer::~UTF8Buffer() {	60 UTF8Buffer::~UTF8Buffer() {

57 DeleteArray(data_);	61 DeleteArray(data_);

58 }	62 }

59	63

60	64

61 void UTF8Buffer::Initialize(char* src, int length) {	65 void UTF8Buffer::AddCharSlow(uc32 c) {

62 DeleteArray(data_);	66 static const int kCapacityGrowthLimit = 1 * MB;

63 data_ = src;	67 if (cursor_ > limit_) {

64 size_ = length;	68 int old_capacity = Capacity();

65 Reset();	69 int old_position = pos();

	70 int new_capacity =

	71 Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit);

	72 char* new_data = NewArray<char>(new_capacity);

	73 memcpy(new_data, data_, old_position);

	74 DeleteArray(data_);

	75 data_ = new_data;

	76 cursor_ = new_data + old_position;

	77 limit_ = ComputeLimit(new_data, new_capacity);

	78 ASSERT(Capacity() == new_capacity && pos() == old_position);

	79 }

	80 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {

	81 *cursor_++ = c; // Common case: 7-bit ASCII.

	82 } else {

	83 cursor_ += unibrow::Utf8::Encode(cursor_, c);

	84 }

	85 ASSERT(pos() <= Capacity());

66 }	86 }

67	87

68	88

69 void UTF8Buffer::AddChar(uc32 c) {

70 const int min_size = 1024;

71 if (pos_ + static_cast<int>(unibrow::Utf8::kMaxEncodedSize) > size_) {

72 int new_size = size_ * 2;

73 if (new_size < min_size) {

74 new_size = min_size;

75 }

76 char* new_data = NewArray<char>(new_size);

77 memcpy(new_data, data_, pos_);

78 DeleteArray(data_);

79 data_ = new_data;

80 size_ = new_size;

81 }

82 if (static_cast<unsigned>(c) < unibrow::Utf8::kMaxOneByteChar) {

83 data_[pos_++] = c; // common case: 7bit ASCII

84 } else {

85 pos_ += unibrow::Utf8::Encode(&data_[pos_], c);

86 }

87 ASSERT(pos_ <= size_);

88 }

89

90

91 // ----------------------------------------------------------------------------	89 // ----------------------------------------------------------------------------

92 // UTF16Buffer	90 // UTF16Buffer

93	91

94	92

95 UTF16Buffer::UTF16Buffer()	93 UTF16Buffer::UTF16Buffer()

96 : pos_(0),	94 : pos_(0),

97 pushback_buffer_(0),	95 pushback_buffer_(0),

98 last_(0),	96 last_(0),

99 stream_(NULL) { }	97 stream_(NULL) { }

100	98

(...skipping 64 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
165 source_.Initialize(source, stream);	163 source_.Initialize(source, stream);

166 position_ = position;	164 position_ = position;

167	165

168 // Reset literals buffer	166 // Reset literals buffer

169 literals_.Reset();	167 literals_.Reset();

170	168

171 // Set c0_ (one character ahead)	169 // Set c0_ (one character ahead)

172 ASSERT(kCharacterLookaheadBufferSize == 1);	170 ASSERT(kCharacterLookaheadBufferSize == 1);

173 Advance();	171 Advance();

174	172

175 // Skip initial whitespace (allowing HTML comment ends) and scan	173 // Skip initial whitespace allowing HTML comment ends just like

176 // first token.	174 // after a newline and scan first token.

177 SkipWhiteSpace(true);	175 has_line_terminator_before_next_ = true;

	176 SkipWhiteSpace();

178 Scan();	177 Scan();

179 }	178 }

180	179

181	180

182 Handle<String> Scanner::SubString(int start, int end) {	181 Handle<String> Scanner::SubString(int start, int end) {

183 return source_.SubString(start - position_, end - position_);	182 return source_.SubString(start - position_, end - position_);

184 }	183 }

185	184

186	185

187 Token::Value Scanner::Next() {	186 Token::Value Scanner::Next() {

(...skipping 51 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
239 // Unicode character; this implies that in a Unicode context the	238 // Unicode character; this implies that in a Unicode context the

240 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	239 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

241 // character expressed in little-endian byte order (since it could	240 // character expressed in little-endian byte order (since it could

242 // not be a U+FFFE character expressed in big-endian byte	241 // not be a U+FFFE character expressed in big-endian byte

243 // order). Nevertheless, we check for it to be compatible with	242 // order). Nevertheless, we check for it to be compatible with

244 // Spidermonkey.	243 // Spidermonkey.

245 return c == 0xFEFF \|\| c == 0xFFFE;	244 return c == 0xFEFF \|\| c == 0xFFFE;

246 }	245 }

247	246

248	247

249 void Scanner::SkipWhiteSpace(bool initial) {	248 bool Scanner::SkipWhiteSpace() {

250 has_line_terminator_before_next_ = initial;	249 int start_position = source_pos();

251	250

252 while (true) {	251 while (true) {

253 // We treat byte-order marks (BOMs) as whitespace for better	252 // We treat byte-order marks (BOMs) as whitespace for better

254 // compatibility with Spidermonkey and other JavaScript engines.	253 // compatibility with Spidermonkey and other JavaScript engines.

255 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {	254 while (kIsWhiteSpace.get(c0_) \|\| IsByteOrderMark(c0_)) {

256 // IsWhiteSpace() includes line terminators!	255 // IsWhiteSpace() includes line terminators!

257 if (kIsLineTerminator.get(c0_))	256 if (kIsLineTerminator.get(c0_)) {

258 // Ignore line terminators, but remember them. This is necessary	257 // Ignore line terminators, but remember them. This is necessary

259 // for automatic semicolon insertion.	258 // for automatic semicolon insertion.

260 has_line_terminator_before_next_ = true;	259 has_line_terminator_before_next_ = true;

	260 }

261 Advance();	261 Advance();

262 }	262 }

263	263

264 // If there is an HTML comment end '-->' at the beginning of a	264 // If there is an HTML comment end '-->' at the beginning of a

265 // line (with only whitespace in front of it), we treat the rest	265 // line (with only whitespace in front of it), we treat the rest

266 // of the line as a comment. This is in line with the way	266 // of the line as a comment. This is in line with the way

267 // SpiderMonkey handles it.	267 // SpiderMonkey handles it.

268 if (c0_ == '-' && has_line_terminator_before_next_) {	268 if (c0_ == '-' && has_line_terminator_before_next_) {

269 Advance();	269 Advance();

270 if (c0_ == '-') {	270 if (c0_ == '-') {

271 Advance();	271 Advance();

272 if (c0_ == '>') {	272 if (c0_ == '>') {

273 // Treat the rest of the line as a comment.	273 // Treat the rest of the line as a comment.

274 SkipSingleLineComment();	274 SkipSingleLineComment();

275 // Continue skipping white space after the comment.	275 // Continue skipping white space after the comment.

276 continue;	276 continue;

277 }	277 }

278 PushBack('-'); // undo Advance()	278 PushBack('-'); // undo Advance()

279 }	279 }

280 PushBack('-'); // undo Advance()	280 PushBack('-'); // undo Advance()

281 }	281 }

282 return;	282 // Return whether or not we skipped any characters.

	283 return source_pos() != start_position;

283 }	284 }

284 }	285 }

285	286

286	287

287 Token::Value Scanner::SkipSingleLineComment() {	288 Token::Value Scanner::SkipSingleLineComment() {

288 Advance();	289 Advance();

289	290

290 // The line terminator at the end of the line is not considered	291 // The line terminator at the end of the line is not considered

291 // to be part of the single-line comment; it is recognized	292 // to be part of the single-line comment; it is recognized

292 // separately by the lexical grammar and becomes part of the	293 // separately by the lexical grammar and becomes part of the

293 // stream of input elements for the syntactic grammar (see	294 // stream of input elements for the syntactic grammar (see

294 // ECMA-262, section 7.4, page 12).	295 // ECMA-262, section 7.4, page 12).

295 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {	296 while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {

296 Advance();	297 Advance();

297 }	298 }

298	299

299 return Token::COMMENT;	300 return Token::WHITESPACE;

300 }	301 }

301	302

302	303

303 Token::Value Scanner::SkipMultiLineComment() {	304 Token::Value Scanner::SkipMultiLineComment() {

304 ASSERT(c0_ == '*');	305 ASSERT(c0_ == '*');

305 Advance();	306 Advance();

306	307

307 while (c0_ >= 0) {	308 while (c0_ >= 0) {

308 char ch = c0_;	309 char ch = c0_;

309 Advance();	310 Advance();

310 // If we have reached the end of the multi-line comment, we	311 // If we have reached the end of the multi-line comment, we

311 // consume the '/' and insert a whitespace. This way all	312 // consume the '/' and insert a whitespace. This way all

312 // multi-line comments are treated as whitespace - even the ones	313 // multi-line comments are treated as whitespace - even the ones

313 // containing line terminators. This contradicts ECMA-262, section	314 // containing line terminators. This contradicts ECMA-262, section

314 // 7.4, page 12, that says that multi-line comments containing	315 // 7.4, page 12, that says that multi-line comments containing

315 // line terminators should be treated as a line terminator, but it	316 // line terminators should be treated as a line terminator, but it

316 // matches the behaviour of SpiderMonkey and KJS.	317 // matches the behaviour of SpiderMonkey and KJS.

317 if (ch == '*' && c0_ == '/') {	318 if (ch == '*' && c0_ == '/') {

318 c0_ = ' ';	319 c0_ = ' ';

319 return Token::COMMENT;	320 return Token::WHITESPACE;

320 }	321 }

321 }	322 }

322	323

323 // Unterminated multi-line comment.	324 // Unterminated multi-line comment.

324 return Token::ILLEGAL;	325 return Token::ILLEGAL;

325 }	326 }

326	327

327	328

328 Token::Value Scanner::ScanHtmlComment() {	329 Token::Value Scanner::ScanHtmlComment() {

329 // Check for <!-- comments.	330 // Check for <!-- comments.

330 ASSERT(c0_ == '!');	331 ASSERT(c0_ == '!');

331 Advance();	332 Advance();

332 if (c0_ == '-') {	333 if (c0_ == '-') {

333 Advance();	334 Advance();

334 if (c0_ == '-') return SkipSingleLineComment();	335 if (c0_ == '-') return SkipSingleLineComment();

335 PushBack('-'); // undo Advance()	336 PushBack('-'); // undo Advance()

336 }	337 }

337 PushBack('!'); // undo Advance()	338 PushBack('!'); // undo Advance()

338 ASSERT(c0_ == '!');	339 ASSERT(c0_ == '!');

339 return Token::LT;	340 return Token::LT;

340 }	341 }

341	342

342	343

343 void Scanner::Scan() {	344 void Scanner::Scan() {

344 Token::Value token;	345 Token::Value token;

345 bool has_line_terminator = false;	346 has_line_terminator_before_next_ = false;

346 do {	347 do {

347 SkipWhiteSpace(has_line_terminator);

348

349 // Remember the line terminator in previous loop

350 has_line_terminator = has_line_terminator_before_next();

351

352 // Remember the position of the next token	348 // Remember the position of the next token

353 next_.location.beg_pos = source_pos();	349 next_.location.beg_pos = source_pos();

354	350

355 token = ScanToken();	351 switch (c0_) {

356 } while (token == Token::COMMENT);	352 case ' ':

	353 case '\t':

	354 Advance();

	355 token = Token::WHITESPACE;

	356 break;

	357

	358 case '\n':

	359 Advance();

	360 has_line_terminator_before_next_ = true;

	361 token = Token::WHITESPACE;

	362 break;

	363

	364 case '"': case '\'':

	365 token = ScanString();

	366 break;

	367

	368 case '<':

	369 // < <= << <<= <!--

	370 Advance();

	371 if (c0_ == '=') {

	372 token = Select(Token::LTE);

	373 } else if (c0_ == '<') {

	374 token = Select('=', Token::ASSIGN_SHL, Token::SHL);

	375 } else if (c0_ == '!') {

	376 token = ScanHtmlComment();

	377 } else {

	378 token = Token::LT;

	379 }

	380 break;

	381

	382 case '>':

	383 // > >= >> >>= >>> >>>=

	384 Advance();

	385 if (c0_ == '=') {

	386 token = Select(Token::GTE);

	387 } else if (c0_ == '>') {

	388 // >> >>= >>> >>>=

	389 Advance();

	390 if (c0_ == '=') {

	391 token = Select(Token::ASSIGN_SAR);

	392 } else if (c0_ == '>') {

	393 token = Select('=', Token::ASSIGN_SHR, Token::SHR);

	394 } else {

	395 token = Token::SAR;

	396 }

	397 } else {

	398 token = Token::GT;

	399 }

	400 break;

	401

	402 case '=':

	403 // = == ===

	404 Advance();

	405 if (c0_ == '=') {

	406 token = Select('=', Token::EQ_STRICT, Token::EQ);

	407 } else {

	408 token = Token::ASSIGN;

	409 }

	410 break;

	411

	412 case '!':

	413 // ! != !==

	414 Advance();

	415 if (c0_ == '=') {

	416 token = Select('=', Token::NE_STRICT, Token::NE);

	417 } else {

	418 token = Token::NOT;

	419 }

	420 break;

	421

	422 case '+':

	423 // + ++ +=

	424 Advance();

	425 if (c0_ == '+') {

	426 token = Select(Token::INC);

	427 } else if (c0_ == '=') {

	428 token = Select(Token::ASSIGN_ADD);

	429 } else {

	430 token = Token::ADD;

	431 }

	432 break;

	433

	434 case '-':

	435 // - -- --> -=

	436 Advance();

	437 if (c0_ == '-') {

	438 Advance();

	439 if (c0_ == '>' && has_line_terminator_before_next_) {

	440 // For compatibility with SpiderMonkey, we skip lines that

	441 // start with an HTML comment end '-->'.

	442 token = SkipSingleLineComment();

	443 } else {

	444 token = Token::DEC;

	445 }

	446 } else if (c0_ == '=') {

	447 token = Select(Token::ASSIGN_SUB);

	448 } else {

	449 token = Token::SUB;

	450 }

	451 break;

	452

	453 case '*':

	454 // * *=

	455 token = Select('=', Token::ASSIGN_MUL, Token::MUL);

	456 break;

	457

	458 case '%':

	459 // % %=

	460 token = Select('=', Token::ASSIGN_MOD, Token::MOD);

	461 break;

	462

	463 case '/':

	464 // / // /* /=

	465 Advance();

	466 if (c0_ == '/') {

	467 token = SkipSingleLineComment();

	468 } else if (c0_ == '*') {

	469 token = SkipMultiLineComment();

	470 } else if (c0_ == '=') {

	471 token = Select(Token::ASSIGN_DIV);

	472 } else {

	473 token = Token::DIV;

	474 }

	475 break;

	476

	477 case '&':

	478 // & && &=

	479 Advance();

	480 if (c0_ == '&') {

	481 token = Select(Token::AND);

	482 } else if (c0_ == '=') {

	483 token = Select(Token::ASSIGN_BIT_AND);

	484 } else {

	485 token = Token::BIT_AND;

	486 }

	487 break;

	488

	489 case '\|':

	490 // \| \|\| \|=

	491 Advance();

	492 if (c0_ == '\|') {

	493 token = Select(Token::OR);

	494 } else if (c0_ == '=') {

	495 token = Select(Token::ASSIGN_BIT_OR);

	496 } else {

	497 token = Token::BIT_OR;

	498 }

	499 break;

	500

	501 case '^':

	502 // ^ ^=

	503 token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);

	504 break;

	505

	506 case '.':

	507 // . Number

	508 Advance();

	509 if (IsDecimalDigit(c0_)) {

	510 token = ScanNumber(true);

	511 } else {

	512 token = Token::PERIOD;

	513 }

	514 break;

	515

	516 case ':':

	517 token = Select(Token::COLON);

	518 break;

	519

	520 case ';':

	521 token = Select(Token::SEMICOLON);

	522 break;

	523

	524 case ',':

	525 token = Select(Token::COMMA);

	526 break;

	527

	528 case '(':

	529 token = Select(Token::LPAREN);

	530 break;

	531

	532 case ')':

	533 token = Select(Token::RPAREN);

	534 break;

	535

	536 case '[':

	537 token = Select(Token::LBRACK);

	538 break;

	539

	540 case ']':

	541 token = Select(Token::RBRACK);

	542 break;

	543

	544 case '{':

	545 token = Select(Token::LBRACE);

	546 break;

	547

	548 case '}':

	549 token = Select(Token::RBRACE);

	550 break;

	551

	552 case '?':

	553 token = Select(Token::CONDITIONAL);

	554 break;

	555

	556 case '~':

	557 token = Select(Token::BIT_NOT);

	558 break;

	559

	560 default:

	561 if (kIsIdentifierStart.get(c0_)) {

	562 token = ScanIdentifier();

	563 } else if (IsDecimalDigit(c0_)) {

	564 token = ScanNumber(false);

	565 } else if (SkipWhiteSpace()) {

	566 token = Token::WHITESPACE;

	567 } else if (c0_ < 0) {

	568 token = Token::EOS;

	569 } else {

	570 token = Select(Token::ILLEGAL);

	571 }

	572 break;

	573 }

	574

	575 // Continue scanning for tokens as long as we're just skipping

	576 // whitespace.

	577 } while (token == Token::WHITESPACE);

357	578

358 next_.location.end_pos = source_pos();	579 next_.location.end_pos = source_pos();

359 next_.token = token;	580 next_.token = token;

360 }	581 }

361	582

362	583

363 void Scanner::SeekForward(int pos) {	584 void Scanner::SeekForward(int pos) {

364 source_.SeekForward(pos - 1);	585 source_.SeekForward(pos - 1);

365 Advance();	586 Advance();

366 Scan();	587 Scan();

(...skipping 121 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
488 Advance();	709 Advance();

489 if (c0_ == next) {	710 if (c0_ == next) {

490 Advance();	711 Advance();

491 return then;	712 return then;

492 } else {	713 } else {

493 return else_;	714 return else_;

494 }	715 }

495 }	716 }

496	717

497	718

498 Token::Value Scanner::ScanToken() {

499 switch (c0_) {

500 // strings

501 case '"': case '\'':

502 return ScanString();

503

504 case '<':

505 // < <= << <<= <!--

506 Advance();

507 if (c0_ == '=') return Select(Token::LTE);

508 if (c0_ == '<') return Select('=', Token::ASSIGN_SHL, Token::SHL);

509 if (c0_ == '!') return ScanHtmlComment();

510 return Token::LT;

511

512 case '>':

513 // > >= >> >>= >>> >>>=

514 Advance();

515 if (c0_ == '=') return Select(Token::GTE);

516 if (c0_ == '>') {

517 // >> >>= >>> >>>=

518 Advance();

519 if (c0_ == '=') return Select(Token::ASSIGN_SAR);

520 if (c0_ == '>') return Select('=', Token::ASSIGN_SHR, Token::SHR);

521 return Token::SAR;

522 }

523 return Token::GT;

524

525 case '=':

526 // = == ===

527 Advance();

528 if (c0_ == '=') return Select('=', Token::EQ_STRICT, Token::EQ);

529 return Token::ASSIGN;

530

531 case '!':

532 // ! != !==

533 Advance();

534 if (c0_ == '=') return Select('=', Token::NE_STRICT, Token::NE);

535 return Token::NOT;

536

537 case '+':

538 // + ++ +=

539 Advance();

540 if (c0_ == '+') return Select(Token::INC);

541 if (c0_ == '=') return Select(Token::ASSIGN_ADD);

542 return Token::ADD;

543

544 case '-':

545 // - -- -=

546 Advance();

547 if (c0_ == '-') return Select(Token::DEC);

548 if (c0_ == '=') return Select(Token::ASSIGN_SUB);

549 return Token::SUB;

550

551 case '*':

552 // * *=

553 return Select('=', Token::ASSIGN_MUL, Token::MUL);

554

555 case '%':

556 // % %=

557 return Select('=', Token::ASSIGN_MOD, Token::MOD);

558

559 case '/':

560 // / // /* /=

561 Advance();

562 if (c0_ == '/') return SkipSingleLineComment();

563 if (c0_ == '*') return SkipMultiLineComment();

564 if (c0_ == '=') return Select(Token::ASSIGN_DIV);

565 return Token::DIV;

566

567 case '&':

568 // & && &=

569 Advance();

570 if (c0_ == '&') return Select(Token::AND);

571 if (c0_ == '=') return Select(Token::ASSIGN_BIT_AND);

572 return Token::BIT_AND;

573

574 case '\|':

575 // \| \|\| \|=

576 Advance();

577 if (c0_ == '\|') return Select(Token::OR);

578 if (c0_ == '=') return Select(Token::ASSIGN_BIT_OR);

579 return Token::BIT_OR;

580

581 case '^':

582 // ^ ^=

583 return Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);

584

585 case '.':

586 // . Number

587 Advance();

588 if (IsDecimalDigit(c0_)) return ScanNumber(true);

589 return Token::PERIOD;

590

591 case ':':

592 return Select(Token::COLON);

593

594 case ';':

595 return Select(Token::SEMICOLON);

596

597 case ',':

598 return Select(Token::COMMA);

599

600 case '(':

601 return Select(Token::LPAREN);

602

603 case ')':

604 return Select(Token::RPAREN);

605

606 case '[':

607 return Select(Token::LBRACK);

608

609 case ']':

610 return Select(Token::RBRACK);

611

612 case '{':

613 return Select(Token::LBRACE);

614

615 case '}':

616 return Select(Token::RBRACE);

617

618 case '?':

619 return Select(Token::CONDITIONAL);

620

621 case '~':

622 return Select(Token::BIT_NOT);

623

624 default:

625 if (kIsIdentifierStart.get(c0_))

626 return ScanIdentifier();

627 if (IsDecimalDigit(c0_))

628 return ScanNumber(false);

629 if (c0_ < 0)

630 return Token::EOS;

631 return Select(Token::ILLEGAL);

632 }

633

634 UNREACHABLE();

635 return Token::ILLEGAL;

636 }

637

638

639 // Returns true if any decimal digits were scanned, returns false otherwise.	719 // Returns true if any decimal digits were scanned, returns false otherwise.

640 void Scanner::ScanDecimalDigits() {	720 void Scanner::ScanDecimalDigits() {

641 while (IsDecimalDigit(c0_))	721 while (IsDecimalDigit(c0_))

642 AddCharAdvance();	722 AddCharAdvance();

643 }	723 }

644	724

645	725

646 Token::Value Scanner::ScanNumber(bool seen_period) {	726 Token::Value Scanner::ScanNumber(bool seen_period) {

647 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction	727 ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction

648	728

(...skipping 78 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
727 uc32 c = ScanHexEscape('u', 4);	807 uc32 c = ScanHexEscape('u', 4);

728 // We do not allow a unicode escape sequence to start another	808 // We do not allow a unicode escape sequence to start another

729 // unicode escape sequence.	809 // unicode escape sequence.

730 if (c == '\\') return unibrow::Utf8::kBadChar;	810 if (c == '\\') return unibrow::Utf8::kBadChar;

731 return c;	811 return c;

732 }	812 }

733	813

734	814

735 Token::Value Scanner::ScanIdentifier() {	815 Token::Value Scanner::ScanIdentifier() {

736 ASSERT(kIsIdentifierStart.get(c0_));	816 ASSERT(kIsIdentifierStart.get(c0_));

737

738 bool has_escapes = false;	817 bool has_escapes = false;

739	818

740 StartLiteral();	819 StartLiteral();

741 // Scan identifier start character.	820 // Scan identifier start character.

742 if (c0_ == '\\') {	821 if (c0_ == '\\') {

743 has_escapes = true;	822 has_escapes = true;

744 uc32 c = ScanIdentifierUnicodeEscape();	823 uc32 c = ScanIdentifierUnicodeEscape();

745 // Only allow legal identifier start characters.	824 // Only allow legal identifier start characters.

746 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;	825 if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;

747 AddChar(c);	826 AddChar(c);

748 } else {	827 } else {

749 AddCharAdvance();	828 AddChar(c0_);

	829 Advance();

750 }	830 }

	831

751 // Scan the rest of the identifier characters.	832 // Scan the rest of the identifier characters.

752 while (kIsIdentifierPart.get(c0_)) {	833 while (kIsIdentifierPart.get(c0_)) {

753 if (c0_ == '\\') {	834 if (c0_ == '\\') {

754 has_escapes = true;	835 has_escapes = true;

755 uc32 c = ScanIdentifierUnicodeEscape();	836 uc32 c = ScanIdentifierUnicodeEscape();

756 // Only allow legal identifier part characters.	837 // Only allow legal identifier part characters.

757 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;	838 if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;

758 AddChar(c);	839 AddChar(c);

759 } else {	840 } else {

760 AddCharAdvance();	841 AddChar(c0_);

	842 Advance();

761 }	843 }

762 }	844 }

763 TerminateLiteral();	845 TerminateLiteral();

764	846

765 // We don't have any 1-letter keywords (this is probably a common case).	847 // We don't have any 1-letter keywords (this is probably a common case).

766 if ((next_.literal_end - next_.literal_pos) == 1)	848 if ((next_.literal_end - next_.literal_pos) == 1) {

767 return Token::IDENTIFIER;	849 return Token::IDENTIFIER;

	850 }

768	851

769 // If the identifier contains unicode escapes, it must not be	852 // If the identifier contains unicode escapes, it must not be

770 // resolved to a keyword.	853 // resolved to a keyword.

771 if (has_escapes)	854 if (has_escapes) {

772 return Token::IDENTIFIER;	855 return Token::IDENTIFIER;

	856 }

773	857

774 return Token::Lookup(&literals_.data()[next_.literal_pos]);	858 return Token::Lookup(&literals_.data()[next_.literal_pos]);

775 }	859 }

776	860

777	861

778	862

779 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {	863 bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {

780 // Checks whether the buffer contains an identifier (no escape).	864 // Checks whether the buffer contains an identifier (no escape).

781 if (!buffer->has_more()) return false;	865 if (!buffer->has_more()) return false;

782 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;	866 if (!kIsIdentifierStart.get(buffer->GetNext())) return false;

(...skipping 58 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
841 }	925 }

842 AddCharAdvance();	926 AddCharAdvance();

843 }	927 }

844 TerminateLiteral();	928 TerminateLiteral();

845	929

846 next_.location.end_pos = source_pos() - 1;	930 next_.location.end_pos = source_pos() - 1;

847 return true;	931 return true;

848 }	932 }

849	933

850 } } // namespace v8::internal	934 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | src/token.h » ('j') | no next file with comments »