src/parsing/scanner.cc - Issue 2281443002: Separate DuplicateFinder from Scanner.

Side by Side Diff: src/parsing/scanner.cc

Issue 2281443002: Separate DuplicateFinder from Scanner. (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Fix rebase. Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2011 the V8 project authors. All rights reserved.	1 // Copyright 2011 the V8 project authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 // Features shared by parsing and pre-parsing scanners.	5 // Features shared by parsing and pre-parsing scanners.

6	6

7 #include "src/parsing/scanner.h"	7 #include "src/parsing/scanner.h"

8	8

9 #include <stdint.h>	9 #include <stdint.h>

10	10

11 #include <cmath>	11 #include <cmath>

12	12

13 #include "src/ast/ast-value-factory.h"	13 #include "src/ast/ast-value-factory.h"

14 #include "src/char-predicates-inl.h"	14 #include "src/char-predicates-inl.h"

15 #include "src/conversions-inl.h"	15 #include "src/conversions-inl.h"

16 #include "src/list-inl.h"	16 #include "src/list-inl.h"

	17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol

17	18

18 namespace v8 {	19 namespace v8 {

19 namespace internal {	20 namespace internal {

20	21

21 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {	22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {

22 if (is_one_byte()) {	23 if (is_one_byte()) {

23 return isolate->factory()->InternalizeOneByteString(one_byte_literal());	24 return isolate->factory()->InternalizeOneByteString(one_byte_literal());

24 }	25 }

25 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());	26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal());

26 }	27 }

(...skipping 1539 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1566	1567

1567	1568

1568 bool Scanner::ContainsDot() {	1569 bool Scanner::ContainsDot() {

1569 DCHECK(is_literal_one_byte());	1570 DCHECK(is_literal_one_byte());

1570 Vector<const uint8_t> str = literal_one_byte_string();	1571 Vector<const uint8_t> str = literal_one_byte_string();

1571 return std::find(str.begin(), str.end(), '.') != str.end();	1572 return std::find(str.begin(), str.end(), '.') != str.end();

1572 }	1573 }

1573	1574

1574	1575

1575 int Scanner::FindSymbol(DuplicateFinder* finder, int value) {	1576 int Scanner::FindSymbol(DuplicateFinder* finder, int value) {

	1577 // TODO(vogelheim): Move this logic into the calling class; this can be fully

	1578 // implemented using the public interface.

1576 if (is_literal_one_byte()) {	1579 if (is_literal_one_byte()) {

1577 return finder->AddOneByteSymbol(literal_one_byte_string(), value);	1580 return finder->AddOneByteSymbol(literal_one_byte_string(), value);

1578 }	1581 }

1579 return finder->AddTwoByteSymbol(literal_two_byte_string(), value);	1582 return finder->AddTwoByteSymbol(literal_two_byte_string(), value);

1580 }	1583 }

1581	1584

1582	1585

1583 bool Scanner::SetBookmark() {	1586 bool Scanner::SetBookmark() {

1584 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark &&	1587 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark &&

1585 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) {	1588 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) {

(...skipping 39 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1625 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) {	1628 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) {

1626 DCHECK_NOT_NULL(to);	1629 DCHECK_NOT_NULL(to);

1627 DCHECK_NOT_NULL(from);	1630 DCHECK_NOT_NULL(from);

1628 to->token = from->token;	1631 to->token = from->token;

1629 to->location = from->location;	1632 to->location = from->location;

1630 to->literal_chars->CopyFrom(from->literal_chars);	1633 to->literal_chars->CopyFrom(from->literal_chars);

1631 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);	1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars);

1632 }	1635 }

1633	1636

1634	1637

1635 int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) {

1636 return AddSymbol(key, true, value);

1637 }

1638

1639

1640 int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) {

1641 return AddSymbol(Vector<const uint8_t>::cast(key), false, value);

1642 }

1643

1644

1645 int DuplicateFinder::AddSymbol(Vector<const uint8_t> key,

1646 bool is_one_byte,

1647 int value) {

1648 uint32_t hash = Hash(key, is_one_byte);

1649 byte* encoding = BackupKey(key, is_one_byte);

1650 base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);

1651 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));

1652 entry->value =

1653 reinterpret_cast<void*>(static_cast<intptr_t>(value \| old_value));

1654 return old_value;

1655 }

1656

1657

1658 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) {

1659 DCHECK(key.length() > 0);

1660 // Quick check for already being in canonical form.

1661 if (IsNumberCanonical(key)) {

1662 return AddOneByteSymbol(key, value);

1663 }

1664

1665 int flags = ALLOW_HEX \| ALLOW_OCTAL \| ALLOW_IMPLICIT_OCTAL \| ALLOW_BINARY;

1666 double double_value = StringToDouble(

1667 unicode_constants_, key, flags, 0.0);

1668 int length;

1669 const char* string;

1670 if (!std::isfinite(double_value)) {

1671 string = "Infinity";

1672 length = 8; // strlen("Infinity");

1673 } else {

1674 string = DoubleToCString(double_value,

1675 Vector<char>(number_buffer_, kBufferSize));

1676 length = StrLength(string);

1677 }

1678 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string),

1679 length), true, value);

1680 }

1681

1682

1683 bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) {

1684 // Test for a safe approximation of number literals that are already

1685 // in canonical form: max 15 digits, no leading zeroes, except an

1686 // integer part that is a single zero, and no trailing zeros below

1687 // the decimal point.

1688 int pos = 0;

1689 int length = number.length();

1690 if (number.length() > 15) return false;

1691 if (number[pos] == '0') {

1692 pos++;

1693 } else {

1694 while (pos < length &&

1695 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++;

1696 }

1697 if (length == pos) return true;

1698 if (number[pos] != '.') return false;

1699 pos++;

1700 bool invalid_last_digit = true;

1701 while (pos < length) {

1702 uint8_t digit = number[pos] - '0';

1703 if (digit > '9' - '0') return false;

1704 invalid_last_digit = (digit == 0);

1705 pos++;

1706 }

1707 return !invalid_last_digit;

1708 }

1709

1710

1711 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {

1712 // Primitive hash function, almost identical to the one used

1713 // for strings (except that it's seeded by the length and representation).

1714 int length = key.length();

1715 uint32_t hash = (length << 1) \| (is_one_byte ? 1 : 0);

1716 for (int i = 0; i < length; i++) {

1717 uint32_t c = key[i];

1718 hash = (hash + c) * 1025;

1719 hash ^= (hash >> 6);

1720 }

1721 return hash;

1722 }

1723

1724

1725 bool DuplicateFinder::Match(void* first, void* second) {

1726 // Decode lengths.

1727 // Length + representation is encoded as base 128, most significant heptet

1728 // first, with a 8th bit being non-zero while there are more heptets.

1729 // The value encodes the number of bytes following, and whether the original

1730 // was Latin1.

1731 byte* s1 = reinterpret_cast<byte*>(first);

1732 byte* s2 = reinterpret_cast<byte*>(second);

1733 uint32_t length_one_byte_field = 0;

1734 byte c1;

1735 do {

1736 c1 = *s1;

1737 if (c1 != *s2) return false;

1738 length_one_byte_field = (length_one_byte_field << 7) \| (c1 & 0x7f);

1739 s1++;

1740 s2++;

1741 } while ((c1 & 0x80) != 0);

1742 int length = static_cast<int>(length_one_byte_field >> 1);

1743 return memcmp(s1, s2, length) == 0;

1744 }

1745

1746

1747 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,

1748 bool is_one_byte) {

1749 uint32_t one_byte_length = (bytes.length() << 1) \| (is_one_byte ? 1 : 0);

1750 backing_store_.StartSequence();

1751 // Emit one_byte_length as base-128 encoded number, with the 7th bit set

1752 // on the byte of every heptet except the last, least significant, one.

1753 if (one_byte_length >= (1 << 7)) {

1754 if (one_byte_length >= (1 << 14)) {

1755 if (one_byte_length >= (1 << 21)) {

1756 if (one_byte_length >= (1 << 28)) {

1757 backing_store_.Add(

1758 static_cast<uint8_t>((one_byte_length >> 28) \| 0x80));

1759 }

1760 backing_store_.Add(

1761 static_cast<uint8_t>((one_byte_length >> 21) \| 0x80u));

1762 }

1763 backing_store_.Add(

1764 static_cast<uint8_t>((one_byte_length >> 14) \| 0x80u));

1765 }

1766 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) \| 0x80u));

1767 }

1768 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));

1769

1770 backing_store_.AddBlock(bytes);

1771 return backing_store_.EndSequence().start();

1772 }

1773	1638

1774 } // namespace internal	1639 } // namespace internal

1775 } // namespace v8	1640 } // namespace v8

OLD	NEW

« no previous file with comments | « src/parsing/scanner.h ('k') | src/v8.gyp » ('j') | no next file with comments »