| OLD | NEW |
| 1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
| 6 | 6 |
| 7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
| 8 | 8 |
| 9 #include <stdint.h> | 9 #include <stdint.h> |
| 10 | 10 |
| 11 #include <cmath> | 11 #include <cmath> |
| 12 | 12 |
| 13 #include "src/ast/ast-value-factory.h" | 13 #include "src/ast/ast-value-factory.h" |
| 14 #include "src/char-predicates-inl.h" | 14 #include "src/char-predicates-inl.h" |
| 15 #include "src/conversions-inl.h" | 15 #include "src/conversions-inl.h" |
| 16 #include "src/list-inl.h" | 16 #include "src/list-inl.h" |
| 17 #include "src/parsing/duplicate-finder.h" // for Scanner::FindSymbol |
| 17 #include "src/parsing/parser.h" | 18 #include "src/parsing/parser.h" |
| 18 | 19 |
| 19 namespace v8 { | 20 namespace v8 { |
| 20 namespace internal { | 21 namespace internal { |
| 21 | 22 |
| 22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { | 23 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { |
| 23 if (is_one_byte()) { | 24 if (is_one_byte()) { |
| 24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); | 25 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); |
| 25 } | 26 } |
| 26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); | 27 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); |
| (...skipping 1540 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1567 | 1568 |
| 1568 | 1569 |
| 1569 bool Scanner::ContainsDot() { | 1570 bool Scanner::ContainsDot() { |
| 1570 DCHECK(is_literal_one_byte()); | 1571 DCHECK(is_literal_one_byte()); |
| 1571 Vector<const uint8_t> str = literal_one_byte_string(); | 1572 Vector<const uint8_t> str = literal_one_byte_string(); |
| 1572 return std::find(str.begin(), str.end(), '.') != str.end(); | 1573 return std::find(str.begin(), str.end(), '.') != str.end(); |
| 1573 } | 1574 } |
| 1574 | 1575 |
| 1575 | 1576 |
| 1576 int Scanner::FindSymbol(DuplicateFinder* finder, int value) { | 1577 int Scanner::FindSymbol(DuplicateFinder* finder, int value) { |
| 1578 // TODO(vogelheim): Move this logic into the calling class; this can be fully |
| 1579 // implemented using the public interface. |
| 1577 if (is_literal_one_byte()) { | 1580 if (is_literal_one_byte()) { |
| 1578 return finder->AddOneByteSymbol(literal_one_byte_string(), value); | 1581 return finder->AddOneByteSymbol(literal_one_byte_string(), value); |
| 1579 } | 1582 } |
| 1580 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); | 1583 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); |
| 1581 } | 1584 } |
| 1582 | 1585 |
| 1583 | 1586 |
| 1584 bool Scanner::SetBookmark() { | 1587 bool Scanner::SetBookmark() { |
| 1585 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark && | 1588 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark && |
| 1586 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) { | 1589 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) { |
| (...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1626 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) { | 1629 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) { |
| 1627 DCHECK_NOT_NULL(to); | 1630 DCHECK_NOT_NULL(to); |
| 1628 DCHECK_NOT_NULL(from); | 1631 DCHECK_NOT_NULL(from); |
| 1629 to->token = from->token; | 1632 to->token = from->token; |
| 1630 to->location = from->location; | 1633 to->location = from->location; |
| 1631 to->literal_chars->CopyFrom(from->literal_chars); | 1634 to->literal_chars->CopyFrom(from->literal_chars); |
| 1632 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); | 1635 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); |
| 1633 } | 1636 } |
| 1634 | 1637 |
| 1635 | 1638 |
| 1636 int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) { | |
| 1637 return AddSymbol(key, true, value); | |
| 1638 } | |
| 1639 | |
| 1640 | |
| 1641 int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) { | |
| 1642 return AddSymbol(Vector<const uint8_t>::cast(key), false, value); | |
| 1643 } | |
| 1644 | |
| 1645 | |
| 1646 int DuplicateFinder::AddSymbol(Vector<const uint8_t> key, | |
| 1647 bool is_one_byte, | |
| 1648 int value) { | |
| 1649 uint32_t hash = Hash(key, is_one_byte); | |
| 1650 byte* encoding = BackupKey(key, is_one_byte); | |
| 1651 base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash); | |
| 1652 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value)); | |
| 1653 entry->value = | |
| 1654 reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value)); | |
| 1655 return old_value; | |
| 1656 } | |
| 1657 | |
| 1658 | |
| 1659 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) { | |
| 1660 DCHECK(key.length() > 0); | |
| 1661 // Quick check for already being in canonical form. | |
| 1662 if (IsNumberCanonical(key)) { | |
| 1663 return AddOneByteSymbol(key, value); | |
| 1664 } | |
| 1665 | |
| 1666 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY; | |
| 1667 double double_value = StringToDouble( | |
| 1668 unicode_constants_, key, flags, 0.0); | |
| 1669 int length; | |
| 1670 const char* string; | |
| 1671 if (!std::isfinite(double_value)) { | |
| 1672 string = "Infinity"; | |
| 1673 length = 8; // strlen("Infinity"); | |
| 1674 } else { | |
| 1675 string = DoubleToCString(double_value, | |
| 1676 Vector<char>(number_buffer_, kBufferSize)); | |
| 1677 length = StrLength(string); | |
| 1678 } | |
| 1679 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string), | |
| 1680 length), true, value); | |
| 1681 } | |
| 1682 | |
| 1683 | |
| 1684 bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) { | |
| 1685 // Test for a safe approximation of number literals that are already | |
| 1686 // in canonical form: max 15 digits, no leading zeroes, except an | |
| 1687 // integer part that is a single zero, and no trailing zeros below | |
| 1688 // the decimal point. | |
| 1689 int pos = 0; | |
| 1690 int length = number.length(); | |
| 1691 if (number.length() > 15) return false; | |
| 1692 if (number[pos] == '0') { | |
| 1693 pos++; | |
| 1694 } else { | |
| 1695 while (pos < length && | |
| 1696 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++; | |
| 1697 } | |
| 1698 if (length == pos) return true; | |
| 1699 if (number[pos] != '.') return false; | |
| 1700 pos++; | |
| 1701 bool invalid_last_digit = true; | |
| 1702 while (pos < length) { | |
| 1703 uint8_t digit = number[pos] - '0'; | |
| 1704 if (digit > '9' - '0') return false; | |
| 1705 invalid_last_digit = (digit == 0); | |
| 1706 pos++; | |
| 1707 } | |
| 1708 return !invalid_last_digit; | |
| 1709 } | |
| 1710 | |
| 1711 | |
| 1712 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) { | |
| 1713 // Primitive hash function, almost identical to the one used | |
| 1714 // for strings (except that it's seeded by the length and representation). | |
| 1715 int length = key.length(); | |
| 1716 uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0); | |
| 1717 for (int i = 0; i < length; i++) { | |
| 1718 uint32_t c = key[i]; | |
| 1719 hash = (hash + c) * 1025; | |
| 1720 hash ^= (hash >> 6); | |
| 1721 } | |
| 1722 return hash; | |
| 1723 } | |
| 1724 | |
| 1725 | |
| 1726 bool DuplicateFinder::Match(void* first, void* second) { | |
| 1727 // Decode lengths. | |
| 1728 // Length + representation is encoded as base 128, most significant heptet | |
| 1729 // first, with a 8th bit being non-zero while there are more heptets. | |
| 1730 // The value encodes the number of bytes following, and whether the original | |
| 1731 // was Latin1. | |
| 1732 byte* s1 = reinterpret_cast<byte*>(first); | |
| 1733 byte* s2 = reinterpret_cast<byte*>(second); | |
| 1734 uint32_t length_one_byte_field = 0; | |
| 1735 byte c1; | |
| 1736 do { | |
| 1737 c1 = *s1; | |
| 1738 if (c1 != *s2) return false; | |
| 1739 length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f); | |
| 1740 s1++; | |
| 1741 s2++; | |
| 1742 } while ((c1 & 0x80) != 0); | |
| 1743 int length = static_cast<int>(length_one_byte_field >> 1); | |
| 1744 return memcmp(s1, s2, length) == 0; | |
| 1745 } | |
| 1746 | |
| 1747 | |
| 1748 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes, | |
| 1749 bool is_one_byte) { | |
| 1750 uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0); | |
| 1751 backing_store_.StartSequence(); | |
| 1752 // Emit one_byte_length as base-128 encoded number, with the 7th bit set | |
| 1753 // on the byte of every heptet except the last, least significant, one. | |
| 1754 if (one_byte_length >= (1 << 7)) { | |
| 1755 if (one_byte_length >= (1 << 14)) { | |
| 1756 if (one_byte_length >= (1 << 21)) { | |
| 1757 if (one_byte_length >= (1 << 28)) { | |
| 1758 backing_store_.Add( | |
| 1759 static_cast<uint8_t>((one_byte_length >> 28) | 0x80)); | |
| 1760 } | |
| 1761 backing_store_.Add( | |
| 1762 static_cast<uint8_t>((one_byte_length >> 21) | 0x80u)); | |
| 1763 } | |
| 1764 backing_store_.Add( | |
| 1765 static_cast<uint8_t>((one_byte_length >> 14) | 0x80u)); | |
| 1766 } | |
| 1767 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | |
| 1768 } | |
| 1769 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | |
| 1770 | |
| 1771 backing_store_.AddBlock(bytes); | |
| 1772 return backing_store_.EndSequence().start(); | |
| 1773 } | |
| 1774 | 1639 |
| 1775 } // namespace internal | 1640 } // namespace internal |
| 1776 } // namespace v8 | 1641 } // namespace v8 |
| OLD | NEW |