OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
11 #include <cmath> | 11 #include <cmath> |
12 | 12 |
13 #include "src/ast/ast-value-factory.h" | 13 #include "src/ast/ast-value-factory.h" |
14 #include "src/char-predicates-inl.h" | 14 #include "src/char-predicates-inl.h" |
15 #include "src/conversions-inl.h" | 15 #include "src/conversions-inl.h" |
16 #include "src/list-inl.h" | 16 #include "src/list-inl.h" |
| 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol |
17 | 18 |
18 namespace v8 { | 19 namespace v8 { |
19 namespace internal { | 20 namespace internal { |
20 | 21 |
21 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { | 22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { |
22 if (is_one_byte()) { | 23 if (is_one_byte()) { |
23 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); | 24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); |
24 } | 25 } |
25 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); | 26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); |
26 } | 27 } |
(...skipping 1539 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1566 | 1567 |
1567 | 1568 |
1568 bool Scanner::ContainsDot() { | 1569 bool Scanner::ContainsDot() { |
1569 DCHECK(is_literal_one_byte()); | 1570 DCHECK(is_literal_one_byte()); |
1570 Vector<const uint8_t> str = literal_one_byte_string(); | 1571 Vector<const uint8_t> str = literal_one_byte_string(); |
1571 return std::find(str.begin(), str.end(), '.') != str.end(); | 1572 return std::find(str.begin(), str.end(), '.') != str.end(); |
1572 } | 1573 } |
1573 | 1574 |
1574 | 1575 |
1575 int Scanner::FindSymbol(DuplicateFinder* finder, int value) { | 1576 int Scanner::FindSymbol(DuplicateFinder* finder, int value) { |
| 1577 // TODO(vogelheim): Move this logic into the calling class; this can be fully |
| 1578 // implemented using the public interface. |
1576 if (is_literal_one_byte()) { | 1579 if (is_literal_one_byte()) { |
1577 return finder->AddOneByteSymbol(literal_one_byte_string(), value); | 1580 return finder->AddOneByteSymbol(literal_one_byte_string(), value); |
1578 } | 1581 } |
1579 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); | 1582 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); |
1580 } | 1583 } |
1581 | 1584 |
1582 | 1585 |
1583 bool Scanner::SetBookmark() { | 1586 bool Scanner::SetBookmark() { |
1584 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark && | 1587 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark && |
1585 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) { | 1588 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) { |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1625 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) { | 1628 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) { |
1626 DCHECK_NOT_NULL(to); | 1629 DCHECK_NOT_NULL(to); |
1627 DCHECK_NOT_NULL(from); | 1630 DCHECK_NOT_NULL(from); |
1628 to->token = from->token; | 1631 to->token = from->token; |
1629 to->location = from->location; | 1632 to->location = from->location; |
1630 to->literal_chars->CopyFrom(from->literal_chars); | 1633 to->literal_chars->CopyFrom(from->literal_chars); |
1631 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); | 1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); |
1632 } | 1635 } |
1633 | 1636 |
1634 | 1637 |
1635 int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) { | |
1636 return AddSymbol(key, true, value); | |
1637 } | |
1638 | |
1639 | |
1640 int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) { | |
1641 return AddSymbol(Vector<const uint8_t>::cast(key), false, value); | |
1642 } | |
1643 | |
1644 | |
1645 int DuplicateFinder::AddSymbol(Vector<const uint8_t> key, | |
1646 bool is_one_byte, | |
1647 int value) { | |
1648 uint32_t hash = Hash(key, is_one_byte); | |
1649 byte* encoding = BackupKey(key, is_one_byte); | |
1650 base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash); | |
1651 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value)); | |
1652 entry->value = | |
1653 reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value)); | |
1654 return old_value; | |
1655 } | |
1656 | |
1657 | |
1658 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) { | |
1659 DCHECK(key.length() > 0); | |
1660 // Quick check for already being in canonical form. | |
1661 if (IsNumberCanonical(key)) { | |
1662 return AddOneByteSymbol(key, value); | |
1663 } | |
1664 | |
1665 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY; | |
1666 double double_value = StringToDouble( | |
1667 unicode_constants_, key, flags, 0.0); | |
1668 int length; | |
1669 const char* string; | |
1670 if (!std::isfinite(double_value)) { | |
1671 string = "Infinity"; | |
1672 length = 8; // strlen("Infinity"); | |
1673 } else { | |
1674 string = DoubleToCString(double_value, | |
1675 Vector<char>(number_buffer_, kBufferSize)); | |
1676 length = StrLength(string); | |
1677 } | |
1678 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string), | |
1679 length), true, value); | |
1680 } | |
1681 | |
1682 | |
1683 bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) { | |
1684 // Test for a safe approximation of number literals that are already | |
1685 // in canonical form: max 15 digits, no leading zeroes, except an | |
1686 // integer part that is a single zero, and no trailing zeros below | |
1687 // the decimal point. | |
1688 int pos = 0; | |
1689 int length = number.length(); | |
1690 if (number.length() > 15) return false; | |
1691 if (number[pos] == '0') { | |
1692 pos++; | |
1693 } else { | |
1694 while (pos < length && | |
1695 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++; | |
1696 } | |
1697 if (length == pos) return true; | |
1698 if (number[pos] != '.') return false; | |
1699 pos++; | |
1700 bool invalid_last_digit = true; | |
1701 while (pos < length) { | |
1702 uint8_t digit = number[pos] - '0'; | |
1703 if (digit > '9' - '0') return false; | |
1704 invalid_last_digit = (digit == 0); | |
1705 pos++; | |
1706 } | |
1707 return !invalid_last_digit; | |
1708 } | |
1709 | |
1710 | |
1711 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) { | |
1712 // Primitive hash function, almost identical to the one used | |
1713 // for strings (except that it's seeded by the length and representation). | |
1714 int length = key.length(); | |
1715 uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0); | |
1716 for (int i = 0; i < length; i++) { | |
1717 uint32_t c = key[i]; | |
1718 hash = (hash + c) * 1025; | |
1719 hash ^= (hash >> 6); | |
1720 } | |
1721 return hash; | |
1722 } | |
1723 | |
1724 | |
1725 bool DuplicateFinder::Match(void* first, void* second) { | |
1726 // Decode lengths. | |
1727 // Length + representation is encoded as base 128, most significant heptet | |
1728 // first, with a 8th bit being non-zero while there are more heptets. | |
1729 // The value encodes the number of bytes following, and whether the original | |
1730 // was Latin1. | |
1731 byte* s1 = reinterpret_cast<byte*>(first); | |
1732 byte* s2 = reinterpret_cast<byte*>(second); | |
1733 uint32_t length_one_byte_field = 0; | |
1734 byte c1; | |
1735 do { | |
1736 c1 = *s1; | |
1737 if (c1 != *s2) return false; | |
1738 length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f); | |
1739 s1++; | |
1740 s2++; | |
1741 } while ((c1 & 0x80) != 0); | |
1742 int length = static_cast<int>(length_one_byte_field >> 1); | |
1743 return memcmp(s1, s2, length) == 0; | |
1744 } | |
1745 | |
1746 | |
1747 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes, | |
1748 bool is_one_byte) { | |
1749 uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0); | |
1750 backing_store_.StartSequence(); | |
1751 // Emit one_byte_length as base-128 encoded number, with the 7th bit set | |
1752 // on the byte of every heptet except the last, least significant, one. | |
1753 if (one_byte_length >= (1 << 7)) { | |
1754 if (one_byte_length >= (1 << 14)) { | |
1755 if (one_byte_length >= (1 << 21)) { | |
1756 if (one_byte_length >= (1 << 28)) { | |
1757 backing_store_.Add( | |
1758 static_cast<uint8_t>((one_byte_length >> 28) | 0x80)); | |
1759 } | |
1760 backing_store_.Add( | |
1761 static_cast<uint8_t>((one_byte_length >> 21) | 0x80u)); | |
1762 } | |
1763 backing_store_.Add( | |
1764 static_cast<uint8_t>((one_byte_length >> 14) | 0x80u)); | |
1765 } | |
1766 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | |
1767 } | |
1768 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | |
1769 | |
1770 backing_store_.AddBlock(bytes); | |
1771 return backing_store_.EndSequence().start(); | |
1772 } | |
1773 | 1638 |
1774 } // namespace internal | 1639 } // namespace internal |
1775 } // namespace v8 | 1640 } // namespace v8 |
OLD | NEW |