src/parsing/scanner.cc - Issue 2281443002: Separate DuplicateFinder from Scanner.

Side by Side Diff: src/parsing/scanner.cc

Issue 2281443002: Separate DuplicateFinder from Scanner. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

11 #include <cmath>	11 #include <cmath>

12	12

13 #include "src/ast/ast-value-factory.h"	13 #include "src/ast/ast-value-factory.h"

14 #include "src/char-predicates-inl.h"	14 #include "src/char-predicates-inl.h"

15 #include "src/conversions-inl.h"	15 #include "src/conversions-inl.h"

16 #include "src/list-inl.h"	16 #include "src/list-inl.h"

	17 #include "src/parsing/duplicate-finder.h" // for Scanner::FindSymbol

17 #include "src/parsing/parser.h"	18 #include "src/parsing/parser.h"

18	19

19 namespace v8 {	20 namespace v8 {

20 namespace internal {	21 namespace internal {

21	22

22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {	23 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {

23 if (is_one_byte()) {	24 if (is_one_byte()) {

24 return isolate->factory()->InternalizeOneByteString(one_byte_literal());	25 return isolate->factory()->InternalizeOneByteString(one_byte_literal());

25 }	26 }

26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());	27 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());

(...skipping 1540 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1567	1568

1568	1569

1569 bool Scanner::ContainsDot() {	1570 bool Scanner::ContainsDot() {

1570 DCHECK(is_literal_one_byte());	1571 DCHECK(is_literal_one_byte());

1571 Vector<const uint8_t> str = literal_one_byte_string();	1572 Vector<const uint8_t> str = literal_one_byte_string();

1572 return std::find(str.begin(), str.end(), '.') != str.end();	1573 return std::find(str.begin(), str.end(), '.') != str.end();

1573 }	1574 }

1574	1575

1575	1576

1576 int Scanner::FindSymbol(DuplicateFinder* finder, int value) {	1577 int Scanner::FindSymbol(DuplicateFinder* finder, int value) {

	1578 // TODO(vogelheim): Move this logic into the calling class; this can be fully

	1579 // implemented using the public interface.

1577 if (is_literal_one_byte()) {	1580 if (is_literal_one_byte()) {

1578 return finder->AddOneByteSymbol(literal_one_byte_string(), value);	1581 return finder->AddOneByteSymbol(literal_one_byte_string(), value);

1579 }	1582 }

1580 return finder->AddTwoByteSymbol(literal_two_byte_string(), value);	1583 return finder->AddTwoByteSymbol(literal_two_byte_string(), value);

1581 }	1584 }

1582	1585

1583	1586

1584 bool Scanner::SetBookmark() {	1587 bool Scanner::SetBookmark() {

1585 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark &&	1588 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark &&

1586 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) {	1589 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) {

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1626 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) {	1629 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) {

1627 DCHECK_NOT_NULL(to);	1630 DCHECK_NOT_NULL(to);

1628 DCHECK_NOT_NULL(from);	1631 DCHECK_NOT_NULL(from);

1629 to->token = from->token;	1632 to->token = from->token;

1630 to->location = from->location;	1633 to->location = from->location;

1631 to->literal_chars->CopyFrom(from->literal_chars);	1634 to->literal_chars->CopyFrom(from->literal_chars);

1632 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);	1635 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);

1633 }	1636 }

1634	1637

1635	1638

1636 int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {

1637 return AddSymbol(key, true, value);

1638 }

1639

1640

1641 int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {

1642 return AddSymbol(Vector<const uint8_t>::cast(key), false, value);

1643 }

1644

1645

1646 int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,

1647 bool is_one_byte,

1648 int value) {

1649 uint32_t hash = Hash(key, is_one_byte);

1650 byte* encoding = BackupKey(key, is_one_byte);

1651 base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);

1652 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));

1653 entry->value =

1654 reinterpret_cast<void*>(static_cast<intptr_t>(value \| old_value));

1655 return old_value;

1656 }

1657

1658

1659 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {

1660 DCHECK(key.length() > 0);

1661 // Quick check for already being in canonical form.

1662 if (IsNumberCanonical(key)) {

1663 return AddOneByteSymbol(key, value);

1664 }

1665

1666 int flags = ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY;

1667 double double_value = StringToDouble(

1668 unicode_constants_, key, flags, 0.0);

1669 int length;

1670 const char* string;

1671 if (!std::isfinite(double_value)) {

1672 string = "Infinity";

1673 length = 8; // strlen("Infinity");

1674 } else {

1675 string = DoubleToCString(double_value,

1676 Vector<char>(number_buffer_, kBufferSize));

1677 length = StrLength(string);

1678 }

1679 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),

1680 length), true, value);

1681 }

1682

1683

1684 bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {

1685 // Test for a safe approximation of number literals that are already

1686 // in canonical form: max 15 digits, no leading zeroes, except an

1687 // integer part that is a single zero, and no trailing zeros below

1688 // the decimal point.

1689 int pos = 0;

1690 int length = number.length();

1691 if (number.length() > 15) return false;

1692 if (number[pos] == '0') {

1693 pos++;

1694 } else {

1695 while (pos < length &&

1696 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;

1697 }

1698 if (length == pos) return true;

1699 if (number[pos] != '.') return false;

1700 pos++;

1701 bool invalid_last_digit = true;

1702 while (pos < length) {

1703 uint8_t digit = number[pos] - '0';

1704 if (digit > '9' - '0') return false;

1705 invalid_last_digit = (digit == 0);

1706 pos++;

1707 }

1708 return !invalid_last_digit;

1709 }

1710

1711

1712 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {

1713 // Primitive hash function, almost identical to the one used

1714 // for strings (except that it's seeded by the length and representation).

1715 int length = key.length();

1716 uint32_t hash = (length << 1) \| (is_one_byte ? 1 : 0);

1717 for (int i = 0; i < length; i++) {

1718 uint32_t c = key[i];

1719 hash = (hash + c) * 1025;

1720 hash ^= (hash >> 6);

1721 }

1722 return hash;

1723 }

1724

1725

1726 bool DuplicateFinder::Match(void* first, void* second) {

1727 // Decode lengths.

1728 // Length + representation is encoded as base 128, most significant heptet

1729 // first, with a 8th bit being non-zero while there are more heptets.

1730 // The value encodes the number of bytes following, and whether the original

1731 // was Latin1.

1732 byte* s1 = reinterpret_cast<byte*>(first);

1733 byte* s2 = reinterpret_cast<byte*>(second);

1734 uint32_t length_one_byte_field = 0;

1735 byte c1;

1736 do {

1737 c1 = *s1;

1738 if (c1 != *s2) return false;

1739 length_one_byte_field = (length_one_byte_field << 7) \| (c1 & 0x7f);

1740 s1++;

1741 s2++;

1742 } while ((c1 & 0x80) != 0);

1743 int length = static_cast<int>(length_one_byte_field >> 1);

1744 return memcmp(s1, s2, length) == 0;

1745 }

1746

1747

1748 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,

1749 bool is_one_byte) {

1750 uint32_t one_byte_length = (bytes.length() << 1) \| (is_one_byte ? 1 : 0);

1751 backing_store_.StartSequence();

1752 // Emit one_byte_length as base-128 encoded number, with the 7th bit set

1753 // on the byte of every heptet except the last, least significant, one.

1754 if (one_byte_length >= (1 << 7)) {

1755 if (one_byte_length >= (1 << 14)) {

1756 if (one_byte_length >= (1 << 21)) {

1757 if (one_byte_length >= (1 << 28)) {

1758 backing_store_.Add(

1759 static_cast<uint8_t>((one_byte_length >> 28) \| 0x80));

1760 }

1761 backing_store_.Add(

1762 static_cast<uint8_t>((one_byte_length >> 21) \| 0x80u));

1763 }

1764 backing_store_.Add(

1765 static_cast<uint8_t>((one_byte_length >> 14) \| 0x80u));

1766 }

1767 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1768 }

1769 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1770

1771 backing_store_.AddBlock(bytes);

1772 return backing_store_.EndSequence().start();

1773 }

1774	1639

1775 } // namespace internal	1640 } // namespace internal

1776 } // namespace v8	1641 } // namespace v8

OLD	NEW

« src/collector.h ('K') | « src/parsing/scanner.h ('k') | src/v8.gyp » ('j') | no next file with comments »