src/scanner.cc - Issue 3181036: Created collector class and used it to collect identifiers during scanning.

Side by Side Diff: src/scanner.cc

Issue 3181036: Created collector class and used it to collect identifiers during scanning. (Closed)

Patch Set: Created 10 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved.	1 // Copyright 2006-2008 the V8 project authors. All rights reserved.

2 // Redistribution and use in source and binary forms, with or without	2 // Redistribution and use in source and binary forms, with or without

3 // modification, are permitted provided that the following conditions are	3 // modification, are permitted provided that the following conditions are

4 // met:	4 // met:

5 //	5 //

6 // * Redistributions of source code must retain the above copyright	6 // * Redistributions of source code must retain the above copyright

7 // notice, this list of conditions and the following disclaimer.	7 // notice, this list of conditions and the following disclaimer.

8 // * Redistributions in binary form must reproduce the above	8 // * Redistributions in binary form must reproduce the above

9 // copyright notice, this list of conditions and the following	9 // copyright notice, this list of conditions and the following

10 // disclaimer in the documentation and/or other materials provided	10 // disclaimer in the documentation and/or other materials provided

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;	43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;

44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;	44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;

45	45

46	46

47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;	47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;

48	48

49	49

50 // ----------------------------------------------------------------------------	50 // ----------------------------------------------------------------------------

51 // UTF8Buffer	51 // UTF8Buffer

52	52

53 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { }	53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }

54	54

55	55

56 UTF8Buffer::~UTF8Buffer() {	56 UTF8Buffer::~UTF8Buffer() {}

57 if (data_ != NULL) DeleteArray(data_);	57

	58

	59 void UTF8Buffer::AddCharSlow(uc32 c) {

	60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);

	61 int length = unibrow::Utf8::Length(c);

	62 Vector<char> block = buffer_.AddBlock(length, '\0');

	63 #ifdef DEBUG

	64 int written_length = unibrow::Utf8::Encode(block.start(), c);

	65 CHECK_EQ(length, written_length);

	66 #else

	67 unibrow::Utf8::Encode(block.start(), c);

	68 #endif

58 }	69 }

59	70

60	71

61 void UTF8Buffer::AddCharSlow(uc32 c) {

62 static const int kCapacityGrowthLimit = 1 * MB;

63 if (cursor_ > limit_) {

64 int old_capacity = Capacity();

65 int old_position = pos();

66 int new_capacity =

67 Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit);

68 char* new_data = NewArray<char>(new_capacity);

69 memcpy(new_data, data_, old_position);

70 DeleteArray(data_);

71 data_ = new_data;

72 cursor_ = new_data + old_position;

73 limit_ = ComputeLimit(new_data, new_capacity);

74 ASSERT(Capacity() == new_capacity && pos() == old_position);

75 }

76 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {

77 *cursor_++ = c; // Common case: 7-bit ASCII.

78 } else {

79 cursor_ += unibrow::Utf8::Encode(cursor_, c);

80 }

81 ASSERT(pos() <= Capacity());

82 }

83

84

85 // ----------------------------------------------------------------------------	72 // ----------------------------------------------------------------------------

86 // UTF16Buffer	73 // UTF16Buffer

87	74

88	75

89 UTF16Buffer::UTF16Buffer()	76 UTF16Buffer::UTF16Buffer()

90 : pos_(0), end_(Scanner::kNoEndPosition) { }	77 : pos_(0), end_(Scanner::kNoEndPosition) { }

91	78

92	79

93 // CharacterStreamUTF16Buffer	80 // CharacterStreamUTF16Buffer

94 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()	81 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()

(...skipping 297 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
392 start_position,	379 start_position,

393 end_position);	380 end_position);

394 source_ = &char_stream_buffer_;	381 source_ = &char_stream_buffer_;

395 }	382 }

396	383

397 is_parsing_json_ = (language == JSON);	384 is_parsing_json_ = (language == JSON);

398	385

399 // Set c0_ (one character ahead)	386 // Set c0_ (one character ahead)

400 ASSERT(kCharacterLookaheadBufferSize == 1);	387 ASSERT(kCharacterLookaheadBufferSize == 1);

401 Advance();	388 Advance();

402 // Initializer current_ to not refer to a literal buffer.	389 // Initialise current_ to not refer to a literal.

403 current_.literal_buffer = NULL;	390 current_.literal_chars = Vector<const char>();

404	391

405 // Skip initial whitespace allowing HTML comment ends just like	392 // Skip initial whitespace allowing HTML comment ends just like

406 // after a newline and scan first token.	393 // after a newline and scan first token.

407 has_line_terminator_before_next_ = true;	394 has_line_terminator_before_next_ = true;

408 SkipWhiteSpace();	395 SkipWhiteSpace();

409 Scan();	396 Scan();

410 }	397 }

411	398

412	399

413 Token::Value Scanner::Next() {	400 Token::Value Scanner::Next() {

414 // BUG 1215673: Find a thread safe way to set a stack limit in	401 // BUG 1215673: Find a thread safe way to set a stack limit in

415 // pre-parse mode. Otherwise, we cannot safely pre-parse from other	402 // pre-parse mode. Otherwise, we cannot safely pre-parse from other

416 // threads.	403 // threads.

417 current_ = next_;	404 current_ = next_;

418 // Check for stack-overflow before returning any tokens.	405 // Check for stack-overflow before returning any tokens.

419 StackLimitCheck check;	406 StackLimitCheck check;

420 if (check.HasOverflowed()) {	407 if (check.HasOverflowed()) {

421 stack_overflow_ = true;	408 stack_overflow_ = true;

422 next_.token = Token::ILLEGAL;	409 next_.token = Token::ILLEGAL;

423 } else {	410 } else {

424 Scan();	411 Scan();

425 }	412 }

426 return current_.token;	413 return current_.token;

427 }	414 }

428	415

429	416

430 void Scanner::StartLiteral() {	417 void Scanner::StartLiteral() {

431 // Use the first buffer unless it's currently in use by the current_ token.	418 literal_buffer_.StartLiteral();

432 // In most cases we won't have two literals/identifiers in a row, so

433 // the second buffer won't be used very often and is unlikely to grow much.

434 UTF8Buffer* free_buffer =

435 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_

436 : &literal_buffer_2_;

437 next_.literal_buffer = free_buffer;

438 free_buffer->Reset();

439 }	419 }

440	420

441	421

442 void Scanner::AddChar(uc32 c) {	422 void Scanner::AddChar(uc32 c) {

443 next_.literal_buffer->AddChar(c);	423 literal_buffer_.AddChar(c);

	424 }

	425

	426 void Scanner::TerminateLiteral() {

	427 next_.literal_chars = literal_buffer_.EndLiteral();

444 }	428 }

445	429

446	430

447 void Scanner::TerminateLiteral() {

448 AddChar(0);

449 }

450

451

452 void Scanner::AddCharAdvance() {	431 void Scanner::AddCharAdvance() {

453 AddChar(c0_);	432 AddChar(c0_);

454 Advance();	433 Advance();

455 }	434 }

456	435

457	436

458 static inline bool IsByteOrderMark(uc32 c) {	437 static inline bool IsByteOrderMark(uc32 c) {

459 // The Unicode value U+FFFE is guaranteed never to be assigned as a	438 // The Unicode value U+FFFE is guaranteed never to be assigned as a

460 // Unicode character; this implies that in a Unicode context the	439 // Unicode character; this implies that in a Unicode context the

461 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF	440 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
568 PushBack('-'); // undo Advance()	547 PushBack('-'); // undo Advance()

569 }	548 }

570 PushBack('!'); // undo Advance()	549 PushBack('!'); // undo Advance()

571 ASSERT(c0_ == '!');	550 ASSERT(c0_ == '!');

572 return Token::LT;	551 return Token::LT;

573 }	552 }

574	553

575	554

576	555

577 void Scanner::ScanJson() {	556 void Scanner::ScanJson() {

578 next_.literal_buffer = NULL;	557 next_.literal_chars = Vector<const char>();

579 Token::Value token;	558 Token::Value token;

580 has_line_terminator_before_next_ = false;	559 has_line_terminator_before_next_ = false;

581 do {	560 do {

582 // Remember the position of the next token	561 // Remember the position of the next token

583 next_.location.beg_pos = source_pos();	562 next_.location.beg_pos = source_pos();

584 switch (c0_) {	563 switch (c0_) {

585 case '\t':	564 case '\t':

586 case '\r':	565 case '\r':

587 case '\n':	566 case '\n':

588 case ' ':	567 case ' ':

(...skipping 165 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
754 Advance();	733 Advance();

755 text++;	734 text++;

756 }	735 }

757 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;	736 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;

758 TerminateLiteral();	737 TerminateLiteral();

759 return token;	738 return token;

760 }	739 }

761	740

762	741

763 void Scanner::ScanJavaScript() {	742 void Scanner::ScanJavaScript() {

764 next_.literal_buffer = NULL;	743 next_.literal_chars = Vector<const char>();

765 Token::Value token;	744 Token::Value token;

766 has_line_terminator_before_next_ = false;	745 has_line_terminator_before_next_ = false;

767 do {	746 do {

768 // Remember the position of the next token	747 // Remember the position of the next token

769 next_.location.beg_pos = source_pos();	748 next_.location.beg_pos = source_pos();

770	749

771 switch (c0_) {	750 switch (c0_) {

772 case ' ':	751 case ' ':

773 case '\t':	752 case '\t':

774 Advance();	753 Advance();

(...skipping 562 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1337 }	1316 }

1338 AddCharAdvance();	1317 AddCharAdvance();

1339 }	1318 }

1340 TerminateLiteral();	1319 TerminateLiteral();

1341	1320

1342 next_.location.end_pos = source_pos() - 1;	1321 next_.location.end_pos = source_pos() - 1;

1343 return true;	1322 return true;

1344 }	1323 }

1345	1324

1346 } } // namespace v8::internal	1325 } } // namespace v8::internal

OLD	NEW

« no previous file with comments | « src/scanner.h ('k') | src/utils.h » ('j') | src/utils.h » ('J')