Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(137)

Side by Side Diff: src/scanner.cc

Issue 3181036: Created collector class and used it to collect identifiers during scanning. (Closed)
Patch Set: Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/scanner.h ('k') | src/utils.h » ('j') | src/utils.h » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. 1 // Copyright 2006-2008 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without 2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are 3 // modification, are permitted provided that the following conditions are
4 // met: 4 // met:
5 // 5 //
6 // * Redistributions of source code must retain the above copyright 6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer. 7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above 8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following 9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided 10 // disclaimer in the documentation and/or other materials provided
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; 43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; 44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
45 45
46 46
47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; 47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
48 48
49 49
50 // ---------------------------------------------------------------------------- 50 // ----------------------------------------------------------------------------
51 // UTF8Buffer 51 // UTF8Buffer
52 52
53 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } 53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }
54 54
55 55
56 UTF8Buffer::~UTF8Buffer() { 56 UTF8Buffer::~UTF8Buffer() {}
57 if (data_ != NULL) DeleteArray(data_); 57
58
59 void UTF8Buffer::AddCharSlow(uc32 c) {
60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar);
61 int length = unibrow::Utf8::Length(c);
62 Vector<char> block = buffer_.AddBlock(length, '\0');
63 #ifdef DEBUG
64 int written_length = unibrow::Utf8::Encode(block.start(), c);
65 CHECK_EQ(length, written_length);
66 #else
67 unibrow::Utf8::Encode(block.start(), c);
68 #endif
58 } 69 }
59 70
60 71
61 void UTF8Buffer::AddCharSlow(uc32 c) {
62 static const int kCapacityGrowthLimit = 1 * MB;
63 if (cursor_ > limit_) {
64 int old_capacity = Capacity();
65 int old_position = pos();
66 int new_capacity =
67 Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit);
68 char* new_data = NewArray<char>(new_capacity);
69 memcpy(new_data, data_, old_position);
70 DeleteArray(data_);
71 data_ = new_data;
72 cursor_ = new_data + old_position;
73 limit_ = ComputeLimit(new_data, new_capacity);
74 ASSERT(Capacity() == new_capacity && pos() == old_position);
75 }
76 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) {
77 *cursor_++ = c; // Common case: 7-bit ASCII.
78 } else {
79 cursor_ += unibrow::Utf8::Encode(cursor_, c);
80 }
81 ASSERT(pos() <= Capacity());
82 }
83
84
85 // ---------------------------------------------------------------------------- 72 // ----------------------------------------------------------------------------
86 // UTF16Buffer 73 // UTF16Buffer
87 74
88 75
89 UTF16Buffer::UTF16Buffer() 76 UTF16Buffer::UTF16Buffer()
90 : pos_(0), end_(Scanner::kNoEndPosition) { } 77 : pos_(0), end_(Scanner::kNoEndPosition) { }
91 78
92 79
93 // CharacterStreamUTF16Buffer 80 // CharacterStreamUTF16Buffer
94 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() 81 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
(...skipping 297 matching lines...) Expand 10 before | Expand all | Expand 10 after
392 start_position, 379 start_position,
393 end_position); 380 end_position);
394 source_ = &char_stream_buffer_; 381 source_ = &char_stream_buffer_;
395 } 382 }
396 383
397 is_parsing_json_ = (language == JSON); 384 is_parsing_json_ = (language == JSON);
398 385
399 // Set c0_ (one character ahead) 386 // Set c0_ (one character ahead)
400 ASSERT(kCharacterLookaheadBufferSize == 1); 387 ASSERT(kCharacterLookaheadBufferSize == 1);
401 Advance(); 388 Advance();
402 // Initializer current_ to not refer to a literal buffer. 389 // Initialise current_ to not refer to a literal.
403 current_.literal_buffer = NULL; 390 current_.literal_chars = Vector<const char>();
404 391
405 // Skip initial whitespace allowing HTML comment ends just like 392 // Skip initial whitespace allowing HTML comment ends just like
406 // after a newline and scan first token. 393 // after a newline and scan first token.
407 has_line_terminator_before_next_ = true; 394 has_line_terminator_before_next_ = true;
408 SkipWhiteSpace(); 395 SkipWhiteSpace();
409 Scan(); 396 Scan();
410 } 397 }
411 398
412 399
413 Token::Value Scanner::Next() { 400 Token::Value Scanner::Next() {
414 // BUG 1215673: Find a thread safe way to set a stack limit in 401 // BUG 1215673: Find a thread safe way to set a stack limit in
415 // pre-parse mode. Otherwise, we cannot safely pre-parse from other 402 // pre-parse mode. Otherwise, we cannot safely pre-parse from other
416 // threads. 403 // threads.
417 current_ = next_; 404 current_ = next_;
418 // Check for stack-overflow before returning any tokens. 405 // Check for stack-overflow before returning any tokens.
419 StackLimitCheck check; 406 StackLimitCheck check;
420 if (check.HasOverflowed()) { 407 if (check.HasOverflowed()) {
421 stack_overflow_ = true; 408 stack_overflow_ = true;
422 next_.token = Token::ILLEGAL; 409 next_.token = Token::ILLEGAL;
423 } else { 410 } else {
424 Scan(); 411 Scan();
425 } 412 }
426 return current_.token; 413 return current_.token;
427 } 414 }
428 415
429 416
430 void Scanner::StartLiteral() { 417 void Scanner::StartLiteral() {
431 // Use the first buffer unless it's currently in use by the current_ token. 418 literal_buffer_.StartLiteral();
432 // In most cases we won't have two literals/identifiers in a row, so
433 // the second buffer won't be used very often and is unlikely to grow much.
434 UTF8Buffer* free_buffer =
435 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_
436 : &literal_buffer_2_;
437 next_.literal_buffer = free_buffer;
438 free_buffer->Reset();
439 } 419 }
440 420
441 421
442 void Scanner::AddChar(uc32 c) { 422 void Scanner::AddChar(uc32 c) {
443 next_.literal_buffer->AddChar(c); 423 literal_buffer_.AddChar(c);
424 }
425
426 void Scanner::TerminateLiteral() {
427 next_.literal_chars = literal_buffer_.EndLiteral();
444 } 428 }
445 429
446 430
447 void Scanner::TerminateLiteral() {
448 AddChar(0);
449 }
450
451
452 void Scanner::AddCharAdvance() { 431 void Scanner::AddCharAdvance() {
453 AddChar(c0_); 432 AddChar(c0_);
454 Advance(); 433 Advance();
455 } 434 }
456 435
457 436
458 static inline bool IsByteOrderMark(uc32 c) { 437 static inline bool IsByteOrderMark(uc32 c) {
459 // The Unicode value U+FFFE is guaranteed never to be assigned as a 438 // The Unicode value U+FFFE is guaranteed never to be assigned as a
460 // Unicode character; this implies that in a Unicode context the 439 // Unicode character; this implies that in a Unicode context the
461 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF 440 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after
568 PushBack('-'); // undo Advance() 547 PushBack('-'); // undo Advance()
569 } 548 }
570 PushBack('!'); // undo Advance() 549 PushBack('!'); // undo Advance()
571 ASSERT(c0_ == '!'); 550 ASSERT(c0_ == '!');
572 return Token::LT; 551 return Token::LT;
573 } 552 }
574 553
575 554
576 555
577 void Scanner::ScanJson() { 556 void Scanner::ScanJson() {
578 next_.literal_buffer = NULL; 557 next_.literal_chars = Vector<const char>();
579 Token::Value token; 558 Token::Value token;
580 has_line_terminator_before_next_ = false; 559 has_line_terminator_before_next_ = false;
581 do { 560 do {
582 // Remember the position of the next token 561 // Remember the position of the next token
583 next_.location.beg_pos = source_pos(); 562 next_.location.beg_pos = source_pos();
584 switch (c0_) { 563 switch (c0_) {
585 case '\t': 564 case '\t':
586 case '\r': 565 case '\r':
587 case '\n': 566 case '\n':
588 case ' ': 567 case ' ':
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after
754 Advance(); 733 Advance();
755 text++; 734 text++;
756 } 735 }
757 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; 736 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
758 TerminateLiteral(); 737 TerminateLiteral();
759 return token; 738 return token;
760 } 739 }
761 740
762 741
763 void Scanner::ScanJavaScript() { 742 void Scanner::ScanJavaScript() {
764 next_.literal_buffer = NULL; 743 next_.literal_chars = Vector<const char>();
765 Token::Value token; 744 Token::Value token;
766 has_line_terminator_before_next_ = false; 745 has_line_terminator_before_next_ = false;
767 do { 746 do {
768 // Remember the position of the next token 747 // Remember the position of the next token
769 next_.location.beg_pos = source_pos(); 748 next_.location.beg_pos = source_pos();
770 749
771 switch (c0_) { 750 switch (c0_) {
772 case ' ': 751 case ' ':
773 case '\t': 752 case '\t':
774 Advance(); 753 Advance();
(...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after
1337 } 1316 }
1338 AddCharAdvance(); 1317 AddCharAdvance();
1339 } 1318 }
1340 TerminateLiteral(); 1319 TerminateLiteral();
1341 1320
1342 next_.location.end_pos = source_pos() - 1; 1321 next_.location.end_pos = source_pos() - 1;
1343 return true; 1322 return true;
1344 } 1323 }
1345 1324
1346 } } // namespace v8::internal 1325 } } // namespace v8::internal
OLDNEW
« no previous file with comments | « src/scanner.h ('k') | src/utils.h » ('j') | src/utils.h » ('J')

Powered by Google App Engine
This is Rietveld 408576698