| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
| 8 | 8 |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| 11 #include <cmath> | 11 #include <cmath> |
| 12 | 12 |
| 13 #include "src/ast/ast-value-factory.h" | 13 #include "src/ast/ast-value-factory.h" |
| 14 #include "src/char-predicates-inl.h" | 14 #include "src/char-predicates-inl.h" |
| 15 #include "src/conversions-inl.h" | 15 #include "src/conversions-inl.h" |
| 16 #include "src/list-inl.h" | 16 #include "src/list-inl.h" |
| 17 #include "src/parsing/duplicate-finder.h" // For Scanner::FindSymbol |
| 17 | 18 |
| 18 namespace v8 { | 19 namespace v8 { |
| 19 namespace internal { | 20 namespace internal { |
| 20 | 21 |
| 21 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { | 22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { |
| 22 if (is_one_byte()) { | 23 if (is_one_byte()) { |
| 23 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); | 24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); |
| 24 } | 25 } |
| 25 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); | 26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); |
| 26 } | 27 } |
| (...skipping 1539 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1566 | 1567 |
| 1567 | 1568 |
| 1568 bool Scanner::ContainsDot() { | 1569 bool Scanner::ContainsDot() { |
| 1569 DCHECK(is_literal_one_byte()); | 1570 DCHECK(is_literal_one_byte()); |
| 1570 Vector<const uint8_t> str = literal_one_byte_string(); | 1571 Vector<const uint8_t> str = literal_one_byte_string(); |
| 1571 return std::find(str.begin(), str.end(), '.') != str.end(); | 1572 return std::find(str.begin(), str.end(), '.') != str.end(); |
| 1572 } | 1573 } |
| 1573 | 1574 |
| 1574 | 1575 |
| 1575 int Scanner::FindSymbol(DuplicateFinder* finder, int value) { | 1576 int Scanner::FindSymbol(DuplicateFinder* finder, int value) { |
| 1577 // TODO(vogelheim): Move this logic into the calling class; this can be fully |
| 1578 // implemented using the public interface. |
| 1576 if (is_literal_one_byte()) { | 1579 if (is_literal_one_byte()) { |
| 1577 return finder->AddOneByteSymbol(literal_one_byte_string(), value); | 1580 return finder->AddOneByteSymbol(literal_one_byte_string(), value); |
| 1578 } | 1581 } |
| 1579 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); | 1582 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); |
| 1580 } | 1583 } |
| 1581 | 1584 |
| 1582 | 1585 |
| 1583 bool Scanner::SetBookmark() { | 1586 bool Scanner::SetBookmark() { |
| 1584 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark && | 1587 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark && |
| 1585 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) { | 1588 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) { |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1625 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) { | 1628 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) { |
| 1626 DCHECK_NOT_NULL(to); | 1629 DCHECK_NOT_NULL(to); |
| 1627 DCHECK_NOT_NULL(from); | 1630 DCHECK_NOT_NULL(from); |
| 1628 to->token = from->token; | 1631 to->token = from->token; |
| 1629 to->location = from->location; | 1632 to->location = from->location; |
| 1630 to->literal_chars->CopyFrom(from->literal_chars); | 1633 to->literal_chars->CopyFrom(from->literal_chars); |
| 1631 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); | 1634 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); |
| 1632 } | 1635 } |
| 1633 | 1636 |
| 1634 | 1637 |
| 1635 int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) { | |
| 1636 return AddSymbol(key, true, value); | |
| 1637 } | |
| 1638 | |
| 1639 | |
| 1640 int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) { | |
| 1641 return AddSymbol(Vector<const uint8_t>::cast(key), false, value); | |
| 1642 } | |
| 1643 | |
| 1644 | |
| 1645 int DuplicateFinder::AddSymbol(Vector<const uint8_t> key, | |
| 1646 bool is_one_byte, | |
| 1647 int value) { | |
| 1648 uint32_t hash = Hash(key, is_one_byte); | |
| 1649 byte* encoding = BackupKey(key, is_one_byte); | |
| 1650 base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash); | |
| 1651 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value)); | |
| 1652 entry->value = | |
| 1653 reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value)); | |
| 1654 return old_value; | |
| 1655 } | |
| 1656 | |
| 1657 | |
| 1658 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) { | |
| 1659 DCHECK(key.length() > 0); | |
| 1660 // Quick check for already being in canonical form. | |
| 1661 if (IsNumberCanonical(key)) { | |
| 1662 return AddOneByteSymbol(key, value); | |
| 1663 } | |
| 1664 | |
| 1665 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY; | |
| 1666 double double_value = StringToDouble( | |
| 1667 unicode_constants_, key, flags, 0.0); | |
| 1668 int length; | |
| 1669 const char* string; | |
| 1670 if (!std::isfinite(double_value)) { | |
| 1671 string = "Infinity"; | |
| 1672 length = 8; // strlen("Infinity"); | |
| 1673 } else { | |
| 1674 string = DoubleToCString(double_value, | |
| 1675 Vector<char>(number_buffer_, kBufferSize)); | |
| 1676 length = StrLength(string); | |
| 1677 } | |
| 1678 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string), | |
| 1679 length), true, value); | |
| 1680 } | |
| 1681 | |
| 1682 | |
| 1683 bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) { | |
| 1684 // Test for a safe approximation of number literals that are already | |
| 1685 // in canonical form: max 15 digits, no leading zeroes, except an | |
| 1686 // integer part that is a single zero, and no trailing zeros below | |
| 1687 // the decimal point. | |
| 1688 int pos = 0; | |
| 1689 int length = number.length(); | |
| 1690 if (number.length() > 15) return false; | |
| 1691 if (number[pos] == '0') { | |
| 1692 pos++; | |
| 1693 } else { | |
| 1694 while (pos < length && | |
| 1695 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++; | |
| 1696 } | |
| 1697 if (length == pos) return true; | |
| 1698 if (number[pos] != '.') return false; | |
| 1699 pos++; | |
| 1700 bool invalid_last_digit = true; | |
| 1701 while (pos < length) { | |
| 1702 uint8_t digit = number[pos] - '0'; | |
| 1703 if (digit > '9' - '0') return false; | |
| 1704 invalid_last_digit = (digit == 0); | |
| 1705 pos++; | |
| 1706 } | |
| 1707 return !invalid_last_digit; | |
| 1708 } | |
| 1709 | |
| 1710 | |
| 1711 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) { | |
| 1712 // Primitive hash function, almost identical to the one used | |
| 1713 // for strings (except that it's seeded by the length and representation). | |
| 1714 int length = key.length(); | |
| 1715 uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0); | |
| 1716 for (int i = 0; i < length; i++) { | |
| 1717 uint32_t c = key[i]; | |
| 1718 hash = (hash + c) * 1025; | |
| 1719 hash ^= (hash >> 6); | |
| 1720 } | |
| 1721 return hash; | |
| 1722 } | |
| 1723 | |
| 1724 | |
| 1725 bool DuplicateFinder::Match(void* first, void* second) { | |
| 1726 // Decode lengths. | |
| 1727 // Length + representation is encoded as base 128, most significant heptet | |
| 1728 // first, with a 8th bit being non-zero while there are more heptets. | |
| 1729 // The value encodes the number of bytes following, and whether the original | |
| 1730 // was Latin1. | |
| 1731 byte* s1 = reinterpret_cast<byte*>(first); | |
| 1732 byte* s2 = reinterpret_cast<byte*>(second); | |
| 1733 uint32_t length_one_byte_field = 0; | |
| 1734 byte c1; | |
| 1735 do { | |
| 1736 c1 = *s1; | |
| 1737 if (c1 != *s2) return false; | |
| 1738 length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f); | |
| 1739 s1++; | |
| 1740 s2++; | |
| 1741 } while ((c1 & 0x80) != 0); | |
| 1742 int length = static_cast<int>(length_one_byte_field >> 1); | |
| 1743 return memcmp(s1, s2, length) == 0; | |
| 1744 } | |
| 1745 | |
| 1746 | |
| 1747 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes, | |
| 1748 bool is_one_byte) { | |
| 1749 uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0); | |
| 1750 backing_store_.StartSequence(); | |
| 1751 // Emit one_byte_length as base-128 encoded number, with the 7th bit set | |
| 1752 // on the byte of every heptet except the last, least significant, one. | |
| 1753 if (one_byte_length >= (1 << 7)) { | |
| 1754 if (one_byte_length >= (1 << 14)) { | |
| 1755 if (one_byte_length >= (1 << 21)) { | |
| 1756 if (one_byte_length >= (1 << 28)) { | |
| 1757 backing_store_.Add( | |
| 1758 static_cast<uint8_t>((one_byte_length >> 28) | 0x80)); | |
| 1759 } | |
| 1760 backing_store_.Add( | |
| 1761 static_cast<uint8_t>((one_byte_length >> 21) | 0x80u)); | |
| 1762 } | |
| 1763 backing_store_.Add( | |
| 1764 static_cast<uint8_t>((one_byte_length >> 14) | 0x80u)); | |
| 1765 } | |
| 1766 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | |
| 1767 } | |
| 1768 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | |
| 1769 | |
| 1770 backing_store_.AddBlock(bytes); | |
| 1771 return backing_store_.EndSequence().start(); | |
| 1772 } | |
| 1773 | 1638 |
| 1774 } // namespace internal | 1639 } // namespace internal |
| 1775 } // namespace v8 | 1640 } // namespace v8 |
| OLD | NEW |