OLD | NEW |
1 // Copyright 2006-2008 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; | 43 unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; |
44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; | 44 unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; |
45 | 45 |
46 | 46 |
47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; | 47 StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; |
48 | 48 |
49 | 49 |
50 // ---------------------------------------------------------------------------- | 50 // ---------------------------------------------------------------------------- |
51 // UTF8Buffer | 51 // UTF8Buffer |
52 | 52 |
53 UTF8Buffer::UTF8Buffer() : data_(NULL), limit_(NULL) { } | 53 UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { } |
54 | 54 |
55 | 55 |
56 UTF8Buffer::~UTF8Buffer() { | 56 UTF8Buffer::~UTF8Buffer() {} |
57 if (data_ != NULL) DeleteArray(data_); | 57 |
| 58 |
| 59 void UTF8Buffer::AddCharSlow(uc32 c) { |
| 60 ASSERT(static_cast<unsigned>(c) > unibrow::Utf8::kMaxOneByteChar); |
| 61 int length = unibrow::Utf8::Length(c); |
| 62 Vector<char> block = buffer_.AddBlock(length, '\0'); |
| 63 #ifdef DEBUG |
| 64 int written_length = unibrow::Utf8::Encode(block.start(), c); |
| 65 CHECK_EQ(length, written_length); |
| 66 #else |
| 67 unibrow::Utf8::Encode(block.start(), c); |
| 68 #endif |
58 } | 69 } |
59 | 70 |
60 | 71 |
61 void UTF8Buffer::AddCharSlow(uc32 c) { | |
62 static const int kCapacityGrowthLimit = 1 * MB; | |
63 if (cursor_ > limit_) { | |
64 int old_capacity = Capacity(); | |
65 int old_position = pos(); | |
66 int new_capacity = | |
67 Min(old_capacity * 3, old_capacity + kCapacityGrowthLimit); | |
68 char* new_data = NewArray<char>(new_capacity); | |
69 memcpy(new_data, data_, old_position); | |
70 DeleteArray(data_); | |
71 data_ = new_data; | |
72 cursor_ = new_data + old_position; | |
73 limit_ = ComputeLimit(new_data, new_capacity); | |
74 ASSERT(Capacity() == new_capacity && pos() == old_position); | |
75 } | |
76 if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { | |
77 *cursor_++ = c; // Common case: 7-bit ASCII. | |
78 } else { | |
79 cursor_ += unibrow::Utf8::Encode(cursor_, c); | |
80 } | |
81 ASSERT(pos() <= Capacity()); | |
82 } | |
83 | |
84 | |
85 // ---------------------------------------------------------------------------- | 72 // ---------------------------------------------------------------------------- |
86 // UTF16Buffer | 73 // UTF16Buffer |
87 | 74 |
88 | 75 |
89 UTF16Buffer::UTF16Buffer() | 76 UTF16Buffer::UTF16Buffer() |
90 : pos_(0), end_(Scanner::kNoEndPosition) { } | 77 : pos_(0), end_(Scanner::kNoEndPosition) { } |
91 | 78 |
92 | 79 |
93 // CharacterStreamUTF16Buffer | 80 // CharacterStreamUTF16Buffer |
94 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() | 81 CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() |
(...skipping 297 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
392 start_position, | 379 start_position, |
393 end_position); | 380 end_position); |
394 source_ = &char_stream_buffer_; | 381 source_ = &char_stream_buffer_; |
395 } | 382 } |
396 | 383 |
397 is_parsing_json_ = (language == JSON); | 384 is_parsing_json_ = (language == JSON); |
398 | 385 |
399 // Set c0_ (one character ahead) | 386 // Set c0_ (one character ahead) |
400 ASSERT(kCharacterLookaheadBufferSize == 1); | 387 ASSERT(kCharacterLookaheadBufferSize == 1); |
401 Advance(); | 388 Advance(); |
402 // Initializer current_ to not refer to a literal buffer. | 389 // Initialise current_ to not refer to a literal. |
403 current_.literal_buffer = NULL; | 390 current_.literal_chars = Vector<const char>(); |
404 | 391 |
405 // Skip initial whitespace allowing HTML comment ends just like | 392 // Skip initial whitespace allowing HTML comment ends just like |
406 // after a newline and scan first token. | 393 // after a newline and scan first token. |
407 has_line_terminator_before_next_ = true; | 394 has_line_terminator_before_next_ = true; |
408 SkipWhiteSpace(); | 395 SkipWhiteSpace(); |
409 Scan(); | 396 Scan(); |
410 } | 397 } |
411 | 398 |
412 | 399 |
413 Token::Value Scanner::Next() { | 400 Token::Value Scanner::Next() { |
414 // BUG 1215673: Find a thread safe way to set a stack limit in | 401 // BUG 1215673: Find a thread safe way to set a stack limit in |
415 // pre-parse mode. Otherwise, we cannot safely pre-parse from other | 402 // pre-parse mode. Otherwise, we cannot safely pre-parse from other |
416 // threads. | 403 // threads. |
417 current_ = next_; | 404 current_ = next_; |
418 // Check for stack-overflow before returning any tokens. | 405 // Check for stack-overflow before returning any tokens. |
419 StackLimitCheck check; | 406 StackLimitCheck check; |
420 if (check.HasOverflowed()) { | 407 if (check.HasOverflowed()) { |
421 stack_overflow_ = true; | 408 stack_overflow_ = true; |
422 next_.token = Token::ILLEGAL; | 409 next_.token = Token::ILLEGAL; |
423 } else { | 410 } else { |
424 Scan(); | 411 Scan(); |
425 } | 412 } |
426 return current_.token; | 413 return current_.token; |
427 } | 414 } |
428 | 415 |
429 | 416 |
430 void Scanner::StartLiteral() { | 417 void Scanner::StartLiteral() { |
431 // Use the first buffer unless it's currently in use by the current_ token. | 418 literal_buffer_.StartLiteral(); |
432 // In most cases we won't have two literals/identifiers in a row, so | |
433 // the second buffer won't be used very often and is unlikely to grow much. | |
434 UTF8Buffer* free_buffer = | |
435 (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_ | |
436 : &literal_buffer_2_; | |
437 next_.literal_buffer = free_buffer; | |
438 free_buffer->Reset(); | |
439 } | 419 } |
440 | 420 |
441 | 421 |
442 void Scanner::AddChar(uc32 c) { | 422 void Scanner::AddChar(uc32 c) { |
443 next_.literal_buffer->AddChar(c); | 423 literal_buffer_.AddChar(c); |
| 424 } |
| 425 |
| 426 void Scanner::TerminateLiteral() { |
| 427 next_.literal_chars = literal_buffer_.EndLiteral(); |
444 } | 428 } |
445 | 429 |
446 | 430 |
447 void Scanner::TerminateLiteral() { | |
448 AddChar(0); | |
449 } | |
450 | |
451 | |
452 void Scanner::AddCharAdvance() { | 431 void Scanner::AddCharAdvance() { |
453 AddChar(c0_); | 432 AddChar(c0_); |
454 Advance(); | 433 Advance(); |
455 } | 434 } |
456 | 435 |
457 | 436 |
458 static inline bool IsByteOrderMark(uc32 c) { | 437 static inline bool IsByteOrderMark(uc32 c) { |
459 // The Unicode value U+FFFE is guaranteed never to be assigned as a | 438 // The Unicode value U+FFFE is guaranteed never to be assigned as a |
460 // Unicode character; this implies that in a Unicode context the | 439 // Unicode character; this implies that in a Unicode context the |
461 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF | 440 // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF |
(...skipping 106 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
568 PushBack('-'); // undo Advance() | 547 PushBack('-'); // undo Advance() |
569 } | 548 } |
570 PushBack('!'); // undo Advance() | 549 PushBack('!'); // undo Advance() |
571 ASSERT(c0_ == '!'); | 550 ASSERT(c0_ == '!'); |
572 return Token::LT; | 551 return Token::LT; |
573 } | 552 } |
574 | 553 |
575 | 554 |
576 | 555 |
577 void Scanner::ScanJson() { | 556 void Scanner::ScanJson() { |
578 next_.literal_buffer = NULL; | 557 next_.literal_chars = Vector<const char>(); |
579 Token::Value token; | 558 Token::Value token; |
580 has_line_terminator_before_next_ = false; | 559 has_line_terminator_before_next_ = false; |
581 do { | 560 do { |
582 // Remember the position of the next token | 561 // Remember the position of the next token |
583 next_.location.beg_pos = source_pos(); | 562 next_.location.beg_pos = source_pos(); |
584 switch (c0_) { | 563 switch (c0_) { |
585 case '\t': | 564 case '\t': |
586 case '\r': | 565 case '\r': |
587 case '\n': | 566 case '\n': |
588 case ' ': | 567 case ' ': |
(...skipping 165 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
754 Advance(); | 733 Advance(); |
755 text++; | 734 text++; |
756 } | 735 } |
757 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; | 736 if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; |
758 TerminateLiteral(); | 737 TerminateLiteral(); |
759 return token; | 738 return token; |
760 } | 739 } |
761 | 740 |
762 | 741 |
763 void Scanner::ScanJavaScript() { | 742 void Scanner::ScanJavaScript() { |
764 next_.literal_buffer = NULL; | 743 next_.literal_chars = Vector<const char>(); |
765 Token::Value token; | 744 Token::Value token; |
766 has_line_terminator_before_next_ = false; | 745 has_line_terminator_before_next_ = false; |
767 do { | 746 do { |
768 // Remember the position of the next token | 747 // Remember the position of the next token |
769 next_.location.beg_pos = source_pos(); | 748 next_.location.beg_pos = source_pos(); |
770 | 749 |
771 switch (c0_) { | 750 switch (c0_) { |
772 case ' ': | 751 case ' ': |
773 case '\t': | 752 case '\t': |
774 Advance(); | 753 Advance(); |
(...skipping 562 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1337 } | 1316 } |
1338 AddCharAdvance(); | 1317 AddCharAdvance(); |
1339 } | 1318 } |
1340 TerminateLiteral(); | 1319 TerminateLiteral(); |
1341 | 1320 |
1342 next_.location.end_pos = source_pos() - 1; | 1321 next_.location.end_pos = source_pos() - 1; |
1343 return true; | 1322 return true; |
1344 } | 1323 } |
1345 | 1324 |
1346 } } // namespace v8::internal | 1325 } } // namespace v8::internal |
OLD | NEW |