OLD | NEW |
1 // Copyright 2011 the V8 project authors. All rights reserved. | 1 // Copyright 2011 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 // Features shared by parsing and pre-parsing scanners. | 5 // Features shared by parsing and pre-parsing scanners. |
6 | 6 |
7 #include "src/parsing/scanner.h" | 7 #include "src/parsing/scanner.h" |
8 | 8 |
9 #include <stdint.h> | 9 #include <stdint.h> |
10 | 10 |
11 #include <cmath> | 11 #include <cmath> |
12 | 12 |
13 #include "src/ast/ast-value-factory.h" | 13 #include "src/ast/ast-value-factory.h" |
14 #include "src/char-predicates-inl.h" | 14 #include "src/char-predicates-inl.h" |
15 #include "src/conversions-inl.h" | 15 #include "src/conversions-inl.h" |
16 #include "src/list-inl.h" | 16 #include "src/list-inl.h" |
| 17 #include "src/parsing/duplicate-finder.h" // for Scanner::FindSymbol |
17 #include "src/parsing/parser.h" | 18 #include "src/parsing/parser.h" |
18 | 19 |
19 namespace v8 { | 20 namespace v8 { |
20 namespace internal { | 21 namespace internal { |
21 | 22 |
22 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { | 23 Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const { |
23 if (is_one_byte()) { | 24 if (is_one_byte()) { |
24 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); | 25 return isolate->factory()->InternalizeOneByteString(one_byte_literal()); |
25 } | 26 } |
26 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); | 27 return isolate->factory()->InternalizeTwoByteString(two_byte_literal()); |
(...skipping 1540 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1567 | 1568 |
1568 | 1569 |
1569 bool Scanner::ContainsDot() { | 1570 bool Scanner::ContainsDot() { |
1570 DCHECK(is_literal_one_byte()); | 1571 DCHECK(is_literal_one_byte()); |
1571 Vector<const uint8_t> str = literal_one_byte_string(); | 1572 Vector<const uint8_t> str = literal_one_byte_string(); |
1572 return std::find(str.begin(), str.end(), '.') != str.end(); | 1573 return std::find(str.begin(), str.end(), '.') != str.end(); |
1573 } | 1574 } |
1574 | 1575 |
1575 | 1576 |
1576 int Scanner::FindSymbol(DuplicateFinder* finder, int value) { | 1577 int Scanner::FindSymbol(DuplicateFinder* finder, int value) { |
| 1578 // TODO(vogelheim): Move this logic into the calling class; this can be fully |
| 1579 // implemented using the public interface. |
1577 if (is_literal_one_byte()) { | 1580 if (is_literal_one_byte()) { |
1578 return finder->AddOneByteSymbol(literal_one_byte_string(), value); | 1581 return finder->AddOneByteSymbol(literal_one_byte_string(), value); |
1579 } | 1582 } |
1580 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); | 1583 return finder->AddTwoByteSymbol(literal_two_byte_string(), value); |
1581 } | 1584 } |
1582 | 1585 |
1583 | 1586 |
1584 bool Scanner::SetBookmark() { | 1587 bool Scanner::SetBookmark() { |
1585 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark && | 1588 if (c0_ != kNoBookmark && bookmark_c0_ == kNoBookmark && |
1586 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) { | 1589 next_next_.token == Token::UNINITIALIZED && source_->SetBookmark()) { |
(...skipping 39 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1626 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) { | 1629 void Scanner::CopyTokenDesc(TokenDesc* to, TokenDesc* from) { |
1627 DCHECK_NOT_NULL(to); | 1630 DCHECK_NOT_NULL(to); |
1628 DCHECK_NOT_NULL(from); | 1631 DCHECK_NOT_NULL(from); |
1629 to->token = from->token; | 1632 to->token = from->token; |
1630 to->location = from->location; | 1633 to->location = from->location; |
1631 to->literal_chars->CopyFrom(from->literal_chars); | 1634 to->literal_chars->CopyFrom(from->literal_chars); |
1632 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); | 1635 to->raw_literal_chars->CopyFrom(from->raw_literal_chars); |
1633 } | 1636 } |
1634 | 1637 |
1635 | 1638 |
1636 int DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key, int value) { | |
1637 return AddSymbol(key, true, value); | |
1638 } | |
1639 | |
1640 | |
1641 int DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key, int value) { | |
1642 return AddSymbol(Vector<const uint8_t>::cast(key), false, value); | |
1643 } | |
1644 | |
1645 | |
1646 int DuplicateFinder::AddSymbol(Vector<const uint8_t> key, | |
1647 bool is_one_byte, | |
1648 int value) { | |
1649 uint32_t hash = Hash(key, is_one_byte); | |
1650 byte* encoding = BackupKey(key, is_one_byte); | |
1651 base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash); | |
1652 int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value)); | |
1653 entry->value = | |
1654 reinterpret_cast<void*>(static_cast<intptr_t>(value | old_value)); | |
1655 return old_value; | |
1656 } | |
1657 | |
1658 | |
1659 int DuplicateFinder::AddNumber(Vector<const uint8_t> key, int value) { | |
1660 DCHECK(key.length() > 0); | |
1661 // Quick check for already being in canonical form. | |
1662 if (IsNumberCanonical(key)) { | |
1663 return AddOneByteSymbol(key, value); | |
1664 } | |
1665 | |
1666 int flags = ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY; | |
1667 double double_value = StringToDouble( | |
1668 unicode_constants_, key, flags, 0.0); | |
1669 int length; | |
1670 const char* string; | |
1671 if (!std::isfinite(double_value)) { | |
1672 string = "Infinity"; | |
1673 length = 8; // strlen("Infinity"); | |
1674 } else { | |
1675 string = DoubleToCString(double_value, | |
1676 Vector<char>(number_buffer_, kBufferSize)); | |
1677 length = StrLength(string); | |
1678 } | |
1679 return AddSymbol(Vector<const byte>(reinterpret_cast<const byte*>(string), | |
1680 length), true, value); | |
1681 } | |
1682 | |
1683 | |
1684 bool DuplicateFinder::IsNumberCanonical(Vector<const uint8_t> number) { | |
1685 // Test for a safe approximation of number literals that are already | |
1686 // in canonical form: max 15 digits, no leading zeroes, except an | |
1687 // integer part that is a single zero, and no trailing zeros below | |
1688 // the decimal point. | |
1689 int pos = 0; | |
1690 int length = number.length(); | |
1691 if (number.length() > 15) return false; | |
1692 if (number[pos] == '0') { | |
1693 pos++; | |
1694 } else { | |
1695 while (pos < length && | |
1696 static_cast<unsigned>(number[pos] - '0') <= ('9' - '0')) pos++; | |
1697 } | |
1698 if (length == pos) return true; | |
1699 if (number[pos] != '.') return false; | |
1700 pos++; | |
1701 bool invalid_last_digit = true; | |
1702 while (pos < length) { | |
1703 uint8_t digit = number[pos] - '0'; | |
1704 if (digit > '9' - '0') return false; | |
1705 invalid_last_digit = (digit == 0); | |
1706 pos++; | |
1707 } | |
1708 return !invalid_last_digit; | |
1709 } | |
1710 | |
1711 | |
1712 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) { | |
1713 // Primitive hash function, almost identical to the one used | |
1714 // for strings (except that it's seeded by the length and representation). | |
1715 int length = key.length(); | |
1716 uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0); | |
1717 for (int i = 0; i < length; i++) { | |
1718 uint32_t c = key[i]; | |
1719 hash = (hash + c) * 1025; | |
1720 hash ^= (hash >> 6); | |
1721 } | |
1722 return hash; | |
1723 } | |
1724 | |
1725 | |
1726 bool DuplicateFinder::Match(void* first, void* second) { | |
1727 // Decode lengths. | |
1728 // Length + representation is encoded as base 128, most significant heptet | |
1729 // first, with a 8th bit being non-zero while there are more heptets. | |
1730 // The value encodes the number of bytes following, and whether the original | |
1731 // was Latin1. | |
1732 byte* s1 = reinterpret_cast<byte*>(first); | |
1733 byte* s2 = reinterpret_cast<byte*>(second); | |
1734 uint32_t length_one_byte_field = 0; | |
1735 byte c1; | |
1736 do { | |
1737 c1 = *s1; | |
1738 if (c1 != *s2) return false; | |
1739 length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f); | |
1740 s1++; | |
1741 s2++; | |
1742 } while ((c1 & 0x80) != 0); | |
1743 int length = static_cast<int>(length_one_byte_field >> 1); | |
1744 return memcmp(s1, s2, length) == 0; | |
1745 } | |
1746 | |
1747 | |
1748 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes, | |
1749 bool is_one_byte) { | |
1750 uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0); | |
1751 backing_store_.StartSequence(); | |
1752 // Emit one_byte_length as base-128 encoded number, with the 7th bit set | |
1753 // on the byte of every heptet except the last, least significant, one. | |
1754 if (one_byte_length >= (1 << 7)) { | |
1755 if (one_byte_length >= (1 << 14)) { | |
1756 if (one_byte_length >= (1 << 21)) { | |
1757 if (one_byte_length >= (1 << 28)) { | |
1758 backing_store_.Add( | |
1759 static_cast<uint8_t>((one_byte_length >> 28) | 0x80)); | |
1760 } | |
1761 backing_store_.Add( | |
1762 static_cast<uint8_t>((one_byte_length >> 21) | 0x80u)); | |
1763 } | |
1764 backing_store_.Add( | |
1765 static_cast<uint8_t>((one_byte_length >> 14) | 0x80u)); | |
1766 } | |
1767 backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u)); | |
1768 } | |
1769 backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f)); | |
1770 | |
1771 backing_store_.AddBlock(bytes); | |
1772 return backing_store_.EndSequence().start(); | |
1773 } | |
1774 | 1639 |
1775 } // namespace internal | 1640 } // namespace internal |
1776 } // namespace v8 | 1641 } // namespace v8 |
OLD | NEW |