Chromium Code Reviews| Index: runtime/vm/hash_table.h |
| =================================================================== |
| --- runtime/vm/hash_table.h (revision 0) |
| +++ runtime/vm/hash_table.h (revision 0) |
| @@ -0,0 +1,557 @@ |
| +// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file |
| +// for details. All rights reserved. Use of this source code is governed by a |
| +// BSD-style license that can be found in the LICENSE file. |
| + |
| +#ifndef VM_HASH_TABLE_H_ |
| +#define VM_HASH_TABLE_H_ |
| + |
| +#include <map> |
| +#include <vector> |
| + |
| +#include "platform/assert.h" |
| +#include "vm/object.h" |
| + |
| +namespace dart { |
| + |
| +// OVERVIEW: |
| +// |
| +// Hash maps and hash sets all use RawArray as backing storage. At the lowest |
| +// level is a generic open-addressing table that supports deletion. |
| +// - HashTable |
| +// The next layer provides ordering and iteration functionality: |
| +// - UnorderedHashTable |
| +// - EnumIndexHashTable |
| +// - LinkedListHashTable (TODO(koda): Implement.) |
| +// The utility class HashTables handles growth and conversion (e.g., converting |
| +// a compact EnumIndexHashTable to an iteration-efficient LinkedListHashTable). |
| +// The next layer fixes the payload size and provides a natural interface: |
| +// - HashMap |
| +// - HashSet |
| +// Combining either of these with an iteration strategy, we get the templates |
| +// intended for use outside this file: |
| +// - UnorderedHashMap |
| +// - EnumIndexHashMap |
| +// - LinkedListHashMap |
| +// - UnorderedHashSet |
| +// - EnumIndexHashSet |
| +// - LinkedListHashSet |
| +// Each of these can be finally specialized with KeyTraits to support any set of |
| +// lookup key types (e.g., look up a char* in a set of String objects), and |
| +// any equality and hash code computation. |
| +// |
| +// The classes all wrap an Array handle, and metods like HashSet::Insert can |
| +// trigger growth into a new RawArray, updating the handle. Debug mode asserts |
| +// that 'Release' was called to get the final RawArray before destruction. |
| +// |
| +// Example use: |
| +// typedef UnorderedHashMap<FooTraits> ResolvedNamesMap; |
| +// ... |
| +// ResolvedNamesMap cache(Array::Handle(resolved_names())); |
| +// cache.UpdateOrInsert(name0, obj0); |
| +// cache.UpdateOrInsert(name1, obj1); |
| +// ... |
| +// StorePointer(&raw_ptr()->resolved_names_, cache.Release()); |
| +// |
| +// TODO(koda): When exposing these to Dart code, document and assert that |
| +// KeyTraits methods must not run Dart code (since the C++ code doesn't check |
| +// for concurrent modification). |
| + |
| + |
| +// Open-addressing hash table template using a RawArray as backing storage. |
| +// |
| +// The elements of the array are partitioned into entries: |
| +// [ header | metadata | entry0 | entry1 | ... | entryN ] |
| +// Each entry contains a key, followed by zero or more payload components, |
| +// and has 3 possible states: unused, occupied, or deleted. |
| +// The header tracks the number of entries in each state. |
| +// Any object except Object::sentinel() and Object::transition_sentinel() |
| +// may be stored as a key. Any object may be stored in a payload. |
| +// |
| +// Parameters |
| +// KeyTraits: defines static methods |
| +// bool IsMatch(const Key& key, const Object& obj) and |
| +// uword Hash(const Key& key) for any number of desired lookup key types. |
| +// kPayloadSize: number of components of the payload in each entry. |
| +// kMetaDataSize: number of elements reserved (e.g., for iteration order data). |
| +template<typename KeyTraits, intptr_t kPayloadSize, intptr_t kMetaDataSize> |
| +class HashTable : public ValueObject { |
| + public: |
| + explicit HashTable(Array& data) : data_(data) {} |
| + |
| + RawArray* Release() { |
| + ASSERT(!data_.IsNull()); |
| + RawArray* array = data_.raw(); |
| + data_ = Array::null(); |
| + return array; |
| + } |
| + |
| + ~HashTable() { |
| + ASSERT(data_.IsNull()); |
| + } |
| + |
| + // Returns a backing storage size such that 'num_occupied' distinct keys can |
| + // be inserted into the table. |
| + static intptr_t ArrayLengthForNumOccupied(intptr_t num_occupied) { |
| + // The current invariant requires at least one unoccupied entry. |
| + // TODO(koda): Adjust after moving to quadratic probing. |
|
siva
2014/06/13 19:51:18
Is there an assertion to ensure that there is at l
koda
2014/06/23 19:44:55
Yes; see InsertKey.
|
| + intptr_t num_entries = num_occupied + 1; |
| + return kFirstKeyIndex + (kEntrySize * num_entries); |
| + } |
| + |
| + // Initializes an empty table. |
| + void Initialize() const { |
| + ASSERT(data_.Length() >= ArrayLengthForNumOccupied(0)); |
| + Smi& zero = Smi::Handle(Smi::New(0)); |
| + data_.SetAt(kOccupiedEntriesIndex, zero); |
| + data_.SetAt(kDeletedEntriesIndex, zero); |
| + for (intptr_t i = kHeaderSize; i < data_.Length(); ++i) { |
| + data_.SetAt(i, Object::sentinel()); |
| + } |
| + } |
| + |
| + // Returns whether 'key' matches any key in the table. |
| + template<typename Key> |
| + bool ContainsKey(const Key& key) const { |
| + return FindKey(key) != -1; |
| + } |
| + |
| + // Returns the entry that matches 'key', or -1 if none exists. |
| + template<typename Key> |
| + intptr_t FindKey(const Key& key) const { |
| + ASSERT(NumOccupied() < NumEntries()); |
| + intptr_t probe = static_cast<uword>(KeyTraits::Hash(key)) % NumEntries(); |
| + Object& obj = Object::Handle(); |
| + // TODO(koda): Use quadratic probing. |
| + for (; ; probe = (probe + 1) % NumEntries()) { |
| + if (IsUnused(probe)) { |
| + return -1; |
| + } else if (IsDeleted(probe)) { |
| + continue; |
| + } else { |
| + obj = GetKey(probe); |
| + if (KeyTraits::IsMatch(key, obj)) { |
| + return probe; |
| + } |
| + } |
| + } |
| + UNREACHABLE(); |
| + return -1; |
| + } |
| + |
| + // Sets *entry to either: |
| + // - an occupied entry matching 'key', and returns true, or |
| + // - an unused/deleted entry where a matching key may be inserted, |
| + // and returns false. |
| + template<typename Key> |
| + bool FindKeyOrDeletedOrUnused(const Key& key, intptr_t* entry) const { |
|
siva
2014/06/13 19:51:18
ASSERT(entry != NULL);
koda
2014/06/23 19:44:55
Done.
|
| + ASSERT(NumOccupied() < NumEntries()); |
| + intptr_t probe = static_cast<uword>(KeyTraits::Hash(key)) % NumEntries(); |
| + Object& obj = Object::Handle(); |
| + intptr_t deleted = -1; |
| + // TODO(koda): Use quadratic probing. |
| + for (; ; probe = (probe + 1) % NumEntries()) { |
| + if (IsUnused(probe)) { |
| + *entry = (deleted != -1) ? deleted : probe; |
| + return false; |
| + } else if (IsDeleted(probe)) { |
| + deleted = (deleted != -1) ? deleted : probe; |
|
siva
2014/06/13 19:51:18
why not :
if (deleted == -1) {
deleted = probe;
koda
2014/06/23 19:44:55
Done.
|
| + } else { |
| + obj = GetKey(probe); |
| + if (KeyTraits::IsMatch(key, obj)) { |
| + *entry = probe; |
| + return true; |
| + } |
| + } |
| + } |
| + UNREACHABLE(); |
| + return false; |
| + } |
| + |
| + // Sets the key of a previously unoccupied entry. This must not be the last |
| + // unoccupied entry. |
| + void InsertKey(intptr_t entry, const Object& key) const { |
| + ASSERT(!IsOccupied(entry)); |
| + AdjustSmiValueAt(kOccupiedEntriesIndex, 1); |
| + if (IsDeleted(entry)) { |
| + AdjustSmiValueAt(kDeletedEntriesIndex, -1); |
| + } else { |
| + ASSERT(IsUnused(entry)); |
| + } |
| + InternalSetKey(entry, key); |
| + ASSERT(IsOccupied(entry)); |
| + ASSERT(NumOccupied() < NumEntries()); |
| + } |
| + |
| + bool IsUnused(intptr_t entry) const { |
| + return InternalGetKey(entry) == Object::sentinel().raw(); |
| + } |
| + bool IsOccupied(intptr_t entry) const { |
| + return !IsUnused(entry) && !IsDeleted(entry); |
| + } |
| + bool IsDeleted(intptr_t entry) const { |
| + return InternalGetKey(entry) == Object::transition_sentinel().raw(); |
| + } |
| + |
| + RawObject* GetKey(intptr_t entry) const { |
| + ASSERT(IsOccupied(entry)); |
| + return InternalGetKey(entry); |
| + } |
| + RawObject* GetPayload(intptr_t entry, intptr_t component) const { |
| + ASSERT(IsOccupied(entry)); |
| + return data_.At(PayloadIndex(entry, component)); |
| + } |
| + void UpdatePayload(intptr_t entry, |
| + intptr_t component, |
| + const Object& value) const { |
| + ASSERT(IsOccupied(entry)); |
| + ASSERT(0 <= component && component < kPayloadSize); |
| + data_.SetAt(PayloadIndex(entry, component), value); |
| + } |
| + // Deletes both the key and payload of the specified entry. |
| + void DeleteEntry(intptr_t entry) const { |
| + ASSERT(IsOccupied(entry)); |
| + for (intptr_t i = 0; i < kPayloadSize; ++i) { |
| + UpdatePayload(entry, i, Object::transition_sentinel()); |
| + } |
| + InternalSetKey(entry, Object::transition_sentinel()); |
| + AdjustSmiValueAt(kOccupiedEntriesIndex, -1); |
| + AdjustSmiValueAt(kDeletedEntriesIndex, 1); |
| + } |
| + intptr_t NumEntries() const { |
| + return (data_.Length() - kFirstKeyIndex) / kEntrySize; |
| + } |
| + intptr_t NumUnused() const { |
| + return NumEntries() - NumOccupied() - NumDeleted(); |
| + } |
| + intptr_t NumOccupied() const { |
| + return GetSmiValueAt(kOccupiedEntriesIndex); |
| + } |
| + intptr_t NumDeleted() const { |
| + return GetSmiValueAt(kDeletedEntriesIndex); |
| + } |
| + |
| + protected: |
| + static const intptr_t kOccupiedEntriesIndex = 0; |
| + static const intptr_t kDeletedEntriesIndex = 1; |
| + static const intptr_t kHeaderSize = kDeletedEntriesIndex + 1; |
| + static const intptr_t kMetaDataIndex = kHeaderSize; |
| + static const intptr_t kFirstKeyIndex = kHeaderSize + kMetaDataSize; |
| + static const intptr_t kEntrySize = 1 + kPayloadSize; |
| + |
| + intptr_t KeyIndex(intptr_t entry) const { |
| + ASSERT(0 <= entry && entry < NumEntries()); |
| + return kFirstKeyIndex + (kEntrySize * entry); |
| + } |
| + |
| + intptr_t PayloadIndex(intptr_t entry, intptr_t component) const { |
| + ASSERT(0 <= component && component < kPayloadSize); |
| + return KeyIndex(entry) + 1 + component; |
| + } |
| + |
| + RawObject* InternalGetKey(intptr_t entry) const { |
| + return data_.At(KeyIndex(entry)); |
| + } |
| + |
| + void InternalSetKey(intptr_t entry, const Object& key) const { |
| + data_.SetAt(KeyIndex(entry), key); |
| + } |
| + |
| + intptr_t GetSmiValueAt(intptr_t index) const { |
| + Smi& smi = Smi::Handle(); |
| + smi ^= data_.At(index); |
| + return smi.Value(); |
|
siva
2014/06/13 19:51:18
You could write this as:
ASSERT(Object::Handle(dat
koda
2014/06/23 19:44:55
Done.
|
| + } |
| + |
| + void SetSmiValueAt(intptr_t index, intptr_t value) const { |
| + Smi& smi = Smi::Handle(); |
| + smi = Smi::New(value); |
|
siva
2014/06/13 19:51:18
const Smi& smi = Smi::Handle(Smi::New(value));
koda
2014/06/23 19:44:55
Done.
|
| + data_.SetAt(index, smi); |
| + } |
| + |
| + void AdjustSmiValueAt(intptr_t index, intptr_t delta) const { |
| + Smi& smi = Smi::Handle(); |
| + smi ^= data_.At(index); |
| + smi = Smi::New(smi.Value() + delta); |
| + data_.SetAt(index, smi); |
|
siva
2014/06/13 19:51:18
Assuming GetSmiValueAt is changed above to a non h
koda
2014/06/23 19:44:55
Done.
|
| + } |
| + |
| + Array& data_; |
| + |
| + friend class HashTables; |
| +}; |
| + |
| + |
| +// Table with unspecified iteration order. No payload overhead or metadata. |
| +template<typename KeyTraits, intptr_t kUserPayloadSize> |
| +class UnorderedHashTable : public HashTable<KeyTraits, kUserPayloadSize, 0> { |
| + public: |
| + typedef HashTable<KeyTraits, kUserPayloadSize, 0> Base; |
| + static const intptr_t kPayloadSize = kUserPayloadSize; |
| + explicit UnorderedHashTable(Array& data) : Base(data) {} |
| + // Note: Does not check for concurrent modification. |
| + class Iterator { |
| + public: |
| + explicit Iterator(const UnorderedHashTable* table) |
| + : table_(table), entry_(-1) {} |
| + bool MoveNext() { |
| + do { |
| + ++entry_; |
| + } while (entry_ < table_->NumEntries() && !table_->IsOccupied(entry_)); |
|
siva
2014/06/13 19:51:18
One could call MoveNext() repeatedly cause an inte
koda
2014/06/23 19:44:55
True. Rewrote the loop to avoid this.
|
| + return entry_ < table_->NumEntries(); |
| + } |
| + intptr_t Current() { |
| + return entry_; |
| + } |
| + |
| + private: |
| + const UnorderedHashTable* table_; |
| + intptr_t entry_; |
| + }; |
| + |
| + void Initialize() const { |
| + Base::Initialize(); |
| + } |
| + void InsertKey(intptr_t entry, const Object& key) const { |
| + Base::InsertKey(entry, key); |
| + } |
| + void DeleteEntry(intptr_t entry) const { |
| + Base::DeleteEntry(entry); |
| + } |
|
siva
2014/06/13 19:51:18
Why is it necessary to have these methods which ju
koda
2014/06/23 19:44:55
They are not necessary.
I just wanted to be expli
|
| +}; |
| + |
| + |
| +// Table with insertion order, using one payload component for the enumeration |
| +// index, and one metadata element for the next enumeration index. |
| +template<typename KeyTraits, intptr_t kUserPayloadSize> |
| +class EnumIndexHashTable |
| + : public HashTable<KeyTraits, kUserPayloadSize + 1, 1> { |
| + public: |
| + typedef HashTable<KeyTraits, kUserPayloadSize + 1, 1> Base; |
| + static const intptr_t kPayloadSize = kUserPayloadSize; |
| + static const intptr_t kNextEnumIndex = Base::kMetaDataIndex; |
| + explicit EnumIndexHashTable(Array& data) : Base(data) {} |
| + // Note: Does not check for concurrent modification. |
| + class Iterator { |
| + public: |
| + explicit Iterator(const EnumIndexHashTable* table) : index_(-1) { |
| + // TODO(koda): Use GrowableArray after adding stateful comparator support. |
| + std::map<intptr_t, intptr_t> enum_to_entry; |
| + for (intptr_t i = 0; i < table->NumEntries(); ++i) { |
| + if (table->IsOccupied(i)) { |
| + intptr_t enum_index = |
| + table->GetSmiValueAt(table->PayloadIndex(i, kPayloadSize)); |
| + enum_to_entry[enum_index] = i; |
| + } |
| + } |
| + for (std::map<intptr_t, intptr_t>::iterator it = enum_to_entry.begin(); |
| + it != enum_to_entry.end(); |
| + ++it) { |
| + entries_.push_back(it->second); |
| + } |
| + } |
| + bool MoveNext() { |
| + ++index_; |
|
siva
2014/06/13 19:51:18
Ditto comment, I feel MoveNext should not allow th
koda
2014/06/23 19:44:55
Done.
|
| + return index_ < static_cast<intptr_t>(entries_.size()); |
| + } |
| + intptr_t Current() { |
| + return entries_[index_]; |
| + } |
| + |
| + private: |
| + intptr_t index_; |
| + std::vector<intptr_t> entries_; |
| + }; |
| + |
| + void Initialize() const { |
| + Base::Initialize(); |
| + Base::SetSmiValueAt(kNextEnumIndex, 0); |
| + } |
| + void InsertKey(intptr_t entry, const Object& key) const { |
| + Base::InsertKey(entry, key); |
| + Smi& next_enum_index = |
| + Smi::Handle(Smi::New(Base::GetSmiValueAt(kNextEnumIndex))); |
| + Base::UpdatePayload(entry, kPayloadSize, next_enum_index); |
| + Base::AdjustSmiValueAt(kNextEnumIndex, 1); |
| + } |
| + void DeleteEntry(intptr_t entry) const { |
| + Base::DeleteEntry(entry); |
| + } |
|
siva
2014/06/13 19:51:18
Ditto comment about DeleteEntry, is it needed?
koda
2014/06/23 19:44:55
Done.
|
| +}; |
| + |
| + |
| +class HashTables : public AllStatic { |
| + public: |
| + // Allocates and initializes a table. |
| + template<typename Table> |
| + static RawArray* New(intptr_t initial_capacity) { |
| + Table table(Array::Handle(Array::New( |
| + Table::ArrayLengthForNumOccupied(initial_capacity)))); |
| + table.Initialize(); |
| + return table.Release(); |
| + } |
| + |
| + // Clears 'to' and inserts all elements from 'from', in iteration order. |
| + // The tables must have the same user payload size. |
| + template<typename From, typename To> |
| + static void Copy(const From& from, const To& to) { |
| + COMPILE_ASSERT(From::kPayloadSize == To::kPayloadSize); |
| + to.Initialize(); |
| + ASSERT(from.NumOccupied() < to.NumEntries()); |
| + typename From::Iterator it(&from); |
| + Object& obj = Object::Handle(); |
| + while (it.MoveNext()) { |
| + intptr_t from_entry = it.Current(); |
| + obj = from.GetKey(from_entry); |
| + intptr_t to_entry = -1; |
| + const Object& key = obj; |
| + bool present = to.FindKeyOrDeletedOrUnused(key, &to_entry); |
| + ASSERT(!present); |
| + to.InsertKey(to_entry, obj); |
| + for (intptr_t i = 0; i < From::kPayloadSize; ++i) { |
| + obj = from.GetPayload(from_entry, i); |
| + to.UpdatePayload(to_entry, i, obj); |
| + } |
| + } |
| + } |
| + |
| + template<typename Table> |
| + static void EnsureLoadFactor(double low, double high, const Table& table) { |
| + double current = (1 + table.NumOccupied() + table.NumDeleted()) / |
| + static_cast<double>(table.NumEntries()); |
| + if (low <= current && current < high) { |
| + return; |
| + } |
| + double target = (low + high) / 2.0; |
| + intptr_t new_capacity = (1 + table.NumOccupied()) / target; |
| + Table new_table(Array::Handle(New<Table>(new_capacity))); |
| + Copy(table, new_table); |
| + table.data_ = new_table.Release(); |
| + } |
| + |
| + // Serializes a table by concatenating its entries as an array. |
| + template<typename Table> |
| + static RawArray* ToArray(const Table& table, bool include_payload) { |
| + const intptr_t entry_size = include_payload ? (1 + Table::kPayloadSize) : 1; |
| + Array& result = Array::Handle(Array::New(table.NumOccupied() * entry_size)); |
| + typename Table::Iterator it(&table); |
| + Object& obj = Object::Handle(); |
| + intptr_t result_index = 0; |
| + while (it.MoveNext()) { |
| + intptr_t entry = it.Current(); |
| + obj = table.GetKey(entry); |
| + result.SetAt(result_index++, obj); |
| + if (include_payload) { |
| + for (intptr_t i = 0; i < Table::kPayloadSize; ++i) { |
| + obj = table.GetPayload(entry, i); |
| + result.SetAt(result_index++, obj); |
| + } |
| + } |
| + } |
| + return result.raw(); |
| + } |
| +}; |
| + |
| + |
| +template<typename Base> |
| +class HashMap : public Base { |
| + public: |
| + explicit HashMap(Array& data) : Base(data) {} |
| + template<typename Key> |
| + RawObject* GetOrNull(const Key& key, bool* present = NULL) const { |
| + intptr_t entry = Base::FindKey(key); |
| + if (present != NULL) { |
| + *present = (entry != -1); |
| + } |
| + return (entry == -1) ? Object::null() : Base::GetPayload(entry, 0); |
| + } |
| + bool UpdateOrInsert(const Object& key, const Object& value) const { |
| + HashTables::EnsureLoadFactor(0.0, 0.75, *this); |
| + intptr_t entry = -1; |
| + bool present = Base::FindKeyOrDeletedOrUnused(key, &entry); |
| + if (!present) { |
| + Base::InsertKey(entry, key); |
| + } |
| + Base::UpdatePayload(entry, 0, value); |
| + return present; |
| + } |
| + // Update the value of an existing key. Note that 'key' need not be an Object. |
| + template<typename Key> |
| + void UpdateValue(const Key& key, const Object& value) const { |
| + intptr_t entry = Base::FindKey(key); |
| + ASSERT(entry != -1); |
| + Base::UpdatePayload(entry, 0, value); |
| + } |
| + |
| + template<typename Key> |
| + bool Remove(const Key& key) const { |
| + intptr_t entry = Base::FindKey(key); |
| + if (entry == -1) { |
| + return false; |
| + } else { |
| + Base::DeleteEntry(entry); |
| + return true; |
| + } |
| + } |
| +}; |
| + |
| + |
| +template<typename KeyTraits> |
| +class UnorderedHashMap : public HashMap<UnorderedHashTable<KeyTraits, 1> > { |
| + public: |
| + typedef HashMap<UnorderedHashTable<KeyTraits, 1> > Base; |
| + explicit UnorderedHashMap(Array& data) : Base(data) {} |
| +}; |
| + |
| + |
| +template<typename KeyTraits> |
| +class EnumIndexHashMap : public HashMap<EnumIndexHashTable<KeyTraits, 1> > { |
| + public: |
| + typedef HashMap<EnumIndexHashTable<KeyTraits, 1> > Base; |
| + explicit EnumIndexHashMap(Array& data) : Base(data) {} |
| +}; |
| + |
| + |
| +template<typename Base> |
| +class HashSet : public Base { |
| + public: |
| + explicit HashSet(Array& data) : Base(data) {} |
| + bool Insert(const Object& key) { |
| + HashTables::EnsureLoadFactor(0.0, 0.75, *this); |
| + intptr_t entry = -1; |
| + bool present = Base::FindKeyOrDeletedOrUnused(key, &entry); |
| + if (!present) { |
| + Base::InsertKey(entry, key); |
| + } |
| + return present; |
| + } |
| + template<typename Key> |
| + bool Remove(const Key& key) const { |
| + intptr_t entry = Base::FindKey(key); |
| + if (entry == -1) { |
| + return false; |
| + } else { |
| + Base::DeleteEntry(entry); |
| + return true; |
| + } |
| + } |
| +}; |
| + |
| + |
| +template<typename KeyTraits> |
| +class UnorderedHashSet : public HashSet<UnorderedHashTable<KeyTraits, 0> > { |
| + public: |
| + typedef HashSet<UnorderedHashTable<KeyTraits, 0> > Base; |
| + explicit UnorderedHashSet(Array& data) : Base(data) {} |
| +}; |
| + |
| + |
| +template<typename KeyTraits> |
| +class EnumIndexHashSet : public HashSet<EnumIndexHashTable<KeyTraits, 0> > { |
| + public: |
| + typedef HashSet<EnumIndexHashTable<KeyTraits, 0> > Base; |
| + explicit EnumIndexHashSet(Array& data) : Base(data) {} |
| +}; |
| + |
| +} // namespace dart |
| + |
| +#endif // VM_HASH_TABLE_H_ |