Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(101)

Unified Diff: runtime/vm/hash_table.h

Issue 304253002: Hash tables templates, wrapping Array. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | runtime/vm/hash_table_test.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/vm/hash_table.h
===================================================================
--- runtime/vm/hash_table.h (revision 0)
+++ runtime/vm/hash_table.h (revision 0)
@@ -0,0 +1,557 @@
+// Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
+// for details. All rights reserved. Use of this source code is governed by a
+// BSD-style license that can be found in the LICENSE file.
+
+#ifndef VM_HASH_TABLE_H_
+#define VM_HASH_TABLE_H_
+
+// Temporarily used when sorting the indices in EnumIndexHashTable.
+// TODO(koda): Remove these dependencies before using in production.
+#include <map>
+#include <vector>
+
+#include "platform/assert.h"
+#include "vm/object.h"
+
+namespace dart {
+
+// OVERVIEW:
+//
+// Hash maps and hash sets all use RawArray as backing storage. At the lowest
+// level is a generic open-addressing table that supports deletion.
+// - HashTable
+// The next layer provides ordering and iteration functionality:
+// - UnorderedHashTable
+// - EnumIndexHashTable
+// - LinkedListHashTable (TODO(koda): Implement.)
+// The utility class HashTables handles growth and conversion (e.g., converting
+// a compact EnumIndexHashTable to an iteration-efficient LinkedListHashTable).
+// The next layer fixes the payload size and provides a natural interface:
+// - HashMap
+// - HashSet
+// Combining either of these with an iteration strategy, we get the templates
+// intended for use outside this file:
+// - UnorderedHashMap
+// - EnumIndexHashMap
+// - LinkedListHashMap
+// - UnorderedHashSet
+// - EnumIndexHashSet
+// - LinkedListHashSet
+// Each of these can be finally specialized with KeyTraits to support any set of
+// lookup key types (e.g., look up a char* in a set of String objects), and
+// any equality and hash code computation.
+//
+// The classes all wrap an Array handle, and metods like HashSet::Insert can
+// trigger growth into a new RawArray, updating the handle. Debug mode asserts
+// that 'Release' was called to get the final RawArray before destruction.
+//
+// Example use:
+// typedef UnorderedHashMap<FooTraits> ResolvedNamesMap;
+// ...
+// ResolvedNamesMap cache(Array::Handle(resolved_names()));
+// cache.UpdateOrInsert(name0, obj0);
+// cache.UpdateOrInsert(name1, obj1);
+// ...
+// StorePointer(&raw_ptr()->resolved_names_, cache.Release());
+//
+// TODO(koda): When exposing these to Dart code, document and assert that
+// KeyTraits methods must not run Dart code (since the C++ code doesn't check
+// for concurrent modification).
+
+
+// Open-addressing hash table template using a RawArray as backing storage.
+//
+// The elements of the array are partitioned into entries:
+// [ header | metadata | entry0 | entry1 | ... | entryN ]
+// Each entry contains a key, followed by zero or more payload components,
+// and has 3 possible states: unused, occupied, or deleted.
+// The header tracks the number of entries in each state.
+// Any object except Object::sentinel() and Object::transition_sentinel()
+// may be stored as a key. Any object may be stored in a payload.
+//
+// Parameters
+// KeyTraits: defines static methods
+// bool IsMatch(const Key& key, const Object& obj) and
+// uword Hash(const Key& key) for any number of desired lookup key types.
+// kPayloadSize: number of components of the payload in each entry.
+// kMetaDataSize: number of elements reserved (e.g., for iteration order data).
+template<typename KeyTraits, intptr_t kPayloadSize, intptr_t kMetaDataSize>
+class HashTable : public ValueObject {
+ public:
+ explicit HashTable(Array& data) : data_(data) {}
+
+ RawArray* Release() {
+ ASSERT(!data_.IsNull());
+ RawArray* array = data_.raw();
+ data_ = Array::null();
+ return array;
+ }
+
+ ~HashTable() {
+ ASSERT(data_.IsNull());
+ }
+
+ // Returns a backing storage size such that 'num_occupied' distinct keys can
+ // be inserted into the table.
+ static intptr_t ArrayLengthForNumOccupied(intptr_t num_occupied) {
+ // The current invariant requires at least one unoccupied entry.
+ // TODO(koda): Adjust if moving to quadratic probing.
+ intptr_t num_entries = num_occupied + 1;
+ return kFirstKeyIndex + (kEntrySize * num_entries);
+ }
+
+ // Initializes an empty table.
+ void Initialize() const {
+ ASSERT(data_.Length() >= ArrayLengthForNumOccupied(0));
+ Smi& zero = Smi::Handle(Smi::New(0));
+ data_.SetAt(kOccupiedEntriesIndex, zero);
+ data_.SetAt(kDeletedEntriesIndex, zero);
+ for (intptr_t i = kHeaderSize; i < data_.Length(); ++i) {
+ data_.SetAt(i, Object::sentinel());
+ }
+ }
+
+ // Returns whether 'key' matches any key in the table.
+ template<typename Key>
+ bool ContainsKey(const Key& key) const {
+ return FindKey(key) != -1;
+ }
+
+ // Returns the entry that matches 'key', or -1 if none exists.
+ template<typename Key>
+ intptr_t FindKey(const Key& key) const {
+ ASSERT(NumOccupied() < NumEntries());
+ // TODO(koda): Add salt.
+ intptr_t probe = static_cast<uword>(KeyTraits::Hash(key)) % NumEntries();
+ Object& obj = Object::Handle();
+ // TODO(koda): Consider quadratic probing.
+ for (; ; probe = (probe + 1) % NumEntries()) {
+ if (IsUnused(probe)) {
+ return -1;
+ } else if (IsDeleted(probe)) {
+ continue;
+ } else {
+ obj = GetKey(probe);
+ if (KeyTraits::IsMatch(key, obj)) {
+ return probe;
+ }
+ }
+ }
+ UNREACHABLE();
+ return -1;
+ }
+
+ // Sets *entry to either:
+ // - an occupied entry matching 'key', and returns true, or
+ // - an unused/deleted entry where a matching key may be inserted,
+ // and returns false.
+ template<typename Key>
+ bool FindKeyOrDeletedOrUnused(const Key& key, intptr_t* entry) const {
+ ASSERT(entry != NULL);
+ ASSERT(NumOccupied() < NumEntries());
+ intptr_t probe = static_cast<uword>(KeyTraits::Hash(key)) % NumEntries();
+ Object& obj = Object::Handle();
+ intptr_t deleted = -1;
+ // TODO(koda): Consider quadratic probing.
+ for (; ; probe = (probe + 1) % NumEntries()) {
+ if (IsUnused(probe)) {
+ *entry = (deleted != -1) ? deleted : probe;
+ return false;
+ } else if (IsDeleted(probe)) {
+ if (deleted == -1) {
+ deleted = probe;
+ }
+ } else {
+ obj = GetKey(probe);
+ if (KeyTraits::IsMatch(key, obj)) {
+ *entry = probe;
+ return true;
+ }
+ }
+ }
+ UNREACHABLE();
+ return false;
+ }
+
+ // Sets the key of a previously unoccupied entry. This must not be the last
+ // unoccupied entry.
+ void InsertKey(intptr_t entry, const Object& key) const {
+ ASSERT(!IsOccupied(entry));
+ AdjustSmiValueAt(kOccupiedEntriesIndex, 1);
+ if (IsDeleted(entry)) {
+ AdjustSmiValueAt(kDeletedEntriesIndex, -1);
+ } else {
+ ASSERT(IsUnused(entry));
+ }
+ InternalSetKey(entry, key);
+ ASSERT(IsOccupied(entry));
+ ASSERT(NumOccupied() < NumEntries());
+ }
+
+ bool IsUnused(intptr_t entry) const {
+ return InternalGetKey(entry) == Object::sentinel().raw();
+ }
+ bool IsOccupied(intptr_t entry) const {
+ return !IsUnused(entry) && !IsDeleted(entry);
+ }
+ bool IsDeleted(intptr_t entry) const {
+ return InternalGetKey(entry) == Object::transition_sentinel().raw();
+ }
+
+ RawObject* GetKey(intptr_t entry) const {
+ ASSERT(IsOccupied(entry));
+ return InternalGetKey(entry);
+ }
+ RawObject* GetPayload(intptr_t entry, intptr_t component) const {
+ ASSERT(IsOccupied(entry));
+ return data_.At(PayloadIndex(entry, component));
+ }
+ void UpdatePayload(intptr_t entry,
+ intptr_t component,
+ const Object& value) const {
+ ASSERT(IsOccupied(entry));
+ ASSERT(0 <= component && component < kPayloadSize);
+ data_.SetAt(PayloadIndex(entry, component), value);
+ }
+ // Deletes both the key and payload of the specified entry.
+ void DeleteEntry(intptr_t entry) const {
+ ASSERT(IsOccupied(entry));
+ for (intptr_t i = 0; i < kPayloadSize; ++i) {
+ UpdatePayload(entry, i, Object::transition_sentinel());
+ }
+ InternalSetKey(entry, Object::transition_sentinel());
+ AdjustSmiValueAt(kOccupiedEntriesIndex, -1);
+ AdjustSmiValueAt(kDeletedEntriesIndex, 1);
+ }
+ intptr_t NumEntries() const {
+ return (data_.Length() - kFirstKeyIndex) / kEntrySize;
+ }
+ intptr_t NumUnused() const {
+ return NumEntries() - NumOccupied() - NumDeleted();
+ }
+ intptr_t NumOccupied() const {
+ return GetSmiValueAt(kOccupiedEntriesIndex);
+ }
+ intptr_t NumDeleted() const {
+ return GetSmiValueAt(kDeletedEntriesIndex);
+ }
+
+ protected:
+ static const intptr_t kOccupiedEntriesIndex = 0;
+ static const intptr_t kDeletedEntriesIndex = 1;
+ static const intptr_t kHeaderSize = kDeletedEntriesIndex + 1;
+ static const intptr_t kMetaDataIndex = kHeaderSize;
+ static const intptr_t kFirstKeyIndex = kHeaderSize + kMetaDataSize;
+ static const intptr_t kEntrySize = 1 + kPayloadSize;
+
+ intptr_t KeyIndex(intptr_t entry) const {
+ ASSERT(0 <= entry && entry < NumEntries());
+ return kFirstKeyIndex + (kEntrySize * entry);
+ }
+
+ intptr_t PayloadIndex(intptr_t entry, intptr_t component) const {
+ ASSERT(0 <= component && component < kPayloadSize);
+ return KeyIndex(entry) + 1 + component;
+ }
+
+ RawObject* InternalGetKey(intptr_t entry) const {
+ return data_.At(KeyIndex(entry));
+ }
+
+ void InternalSetKey(intptr_t entry, const Object& key) const {
+ data_.SetAt(KeyIndex(entry), key);
+ }
+
+ intptr_t GetSmiValueAt(intptr_t index) const {
+ ASSERT(Object::Handle(data_.At(index)).IsSmi());
+ return Smi::Value(Smi::RawCast(data_.At(index)));
+ }
+
+ void SetSmiValueAt(intptr_t index, intptr_t value) const {
+ const Smi& smi = Smi::Handle(Smi::New(value));
+ data_.SetAt(index, smi);
+ }
+
+ void AdjustSmiValueAt(intptr_t index, intptr_t delta) const {
+ SetSmiValueAt(index, (GetSmiValueAt(index) + delta));
+ }
+
+ Array& data_;
+
+ friend class HashTables;
+};
+
+
+// Table with unspecified iteration order. No payload overhead or metadata.
+template<typename KeyTraits, intptr_t kUserPayloadSize>
+class UnorderedHashTable : public HashTable<KeyTraits, kUserPayloadSize, 0> {
+ public:
+ typedef HashTable<KeyTraits, kUserPayloadSize, 0> Base;
+ static const intptr_t kPayloadSize = kUserPayloadSize;
+ explicit UnorderedHashTable(Array& data) : Base(data) {}
+ // Note: Does not check for concurrent modification.
+ class Iterator {
+ public:
+ explicit Iterator(const UnorderedHashTable* table)
+ : table_(table), entry_(-1) {}
+ bool MoveNext() {
+ while (entry_ < (table_->NumEntries() - 1)) {
+ ++entry_;
+ if (table_->IsOccupied(entry_)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ intptr_t Current() {
+ return entry_;
+ }
+
+ private:
+ const UnorderedHashTable* table_;
+ intptr_t entry_;
+ };
+
+ // No extra book-keeping needed for Initialize, InsertKey, DeleteEntry.
+};
+
+
+// Table with insertion order, using one payload component for the enumeration
+// index, and one metadata element for the next enumeration index.
+template<typename KeyTraits, intptr_t kUserPayloadSize>
+class EnumIndexHashTable
+ : public HashTable<KeyTraits, kUserPayloadSize + 1, 1> {
+ public:
+ typedef HashTable<KeyTraits, kUserPayloadSize + 1, 1> Base;
+ static const intptr_t kPayloadSize = kUserPayloadSize;
+ static const intptr_t kNextEnumIndex = Base::kMetaDataIndex;
+ explicit EnumIndexHashTable(Array& data) : Base(data) {}
+ // Note: Does not check for concurrent modification.
+ class Iterator {
+ public:
+ explicit Iterator(const EnumIndexHashTable* table) : index_(-1) {
+ // TODO(koda): Use GrowableArray after adding stateful comparator support.
+ std::map<intptr_t, intptr_t> enum_to_entry;
+ for (intptr_t i = 0; i < table->NumEntries(); ++i) {
+ if (table->IsOccupied(i)) {
+ intptr_t enum_index =
+ table->GetSmiValueAt(table->PayloadIndex(i, kPayloadSize));
+ enum_to_entry[enum_index] = i;
+ }
+ }
+ for (std::map<intptr_t, intptr_t>::iterator it = enum_to_entry.begin();
+ it != enum_to_entry.end();
+ ++it) {
+ entries_.push_back(it->second);
+ }
+ }
+ bool MoveNext() {
+ if (index_ < (static_cast<intptr_t>(entries_.size() - 1))) {
+ index_++;
+ return true;
+ }
+ return false;
+ }
+ intptr_t Current() {
+ return entries_[index_];
+ }
+
+ private:
+ intptr_t index_;
+ std::vector<intptr_t> entries_;
+ };
+
+ void Initialize() const {
+ Base::Initialize();
+ Base::SetSmiValueAt(kNextEnumIndex, 0);
+ }
+
+ void InsertKey(intptr_t entry, const Object& key) const {
+ Base::InsertKey(entry, key);
+ const Smi& next_enum_index =
+ Smi::Handle(Smi::New(Base::GetSmiValueAt(kNextEnumIndex)));
+ Base::UpdatePayload(entry, kPayloadSize, next_enum_index);
+ // TODO(koda): Handle possible Smi overflow from repeated insert/delete.
+ Base::AdjustSmiValueAt(kNextEnumIndex, 1);
+ }
+
+ // No extra book-keeping needed for DeleteEntry.
+};
+
+
+class HashTables : public AllStatic {
+ public:
+ // Allocates and initializes a table.
+ template<typename Table>
+ static RawArray* New(intptr_t initial_capacity) {
+ Table table(Array::Handle(Array::New(
+ Table::ArrayLengthForNumOccupied(initial_capacity))));
+ table.Initialize();
+ return table.Release();
+ }
+
+ // Clears 'to' and inserts all elements from 'from', in iteration order.
+ // The tables must have the same user payload size.
+ template<typename From, typename To>
+ static void Copy(const From& from, const To& to) {
+ COMPILE_ASSERT(From::kPayloadSize == To::kPayloadSize);
+ to.Initialize();
+ ASSERT(from.NumOccupied() < to.NumEntries());
+ typename From::Iterator it(&from);
+ Object& obj = Object::Handle();
+ while (it.MoveNext()) {
+ intptr_t from_entry = it.Current();
+ obj = from.GetKey(from_entry);
+ intptr_t to_entry = -1;
+ const Object& key = obj;
+ bool present = to.FindKeyOrDeletedOrUnused(key, &to_entry);
+ ASSERT(!present);
+ to.InsertKey(to_entry, obj);
+ for (intptr_t i = 0; i < From::kPayloadSize; ++i) {
+ obj = from.GetPayload(from_entry, i);
+ to.UpdatePayload(to_entry, i, obj);
+ }
+ }
+ }
+
+ template<typename Table>
+ static void EnsureLoadFactor(double low, double high, const Table& table) {
+ double current = (1 + table.NumOccupied() + table.NumDeleted()) /
+ static_cast<double>(table.NumEntries());
+ if (low <= current && current < high) {
+ return;
+ }
+ double target = (low + high) / 2.0;
+ intptr_t new_capacity = (1 + table.NumOccupied()) / target;
+ Table new_table(Array::Handle(New<Table>(new_capacity)));
+ Copy(table, new_table);
+ table.data_ = new_table.Release();
+ }
+
+ // Serializes a table by concatenating its entries as an array.
+ template<typename Table>
+ static RawArray* ToArray(const Table& table, bool include_payload) {
+ const intptr_t entry_size = include_payload ? (1 + Table::kPayloadSize) : 1;
+ Array& result = Array::Handle(Array::New(table.NumOccupied() * entry_size));
+ typename Table::Iterator it(&table);
+ Object& obj = Object::Handle();
+ intptr_t result_index = 0;
+ while (it.MoveNext()) {
+ intptr_t entry = it.Current();
+ obj = table.GetKey(entry);
+ result.SetAt(result_index++, obj);
+ if (include_payload) {
+ for (intptr_t i = 0; i < Table::kPayloadSize; ++i) {
+ obj = table.GetPayload(entry, i);
+ result.SetAt(result_index++, obj);
+ }
+ }
+ }
+ return result.raw();
+ }
+};
+
+
+template<typename Base>
+class HashMap : public Base {
+ public:
+ explicit HashMap(Array& data) : Base(data) {}
+ template<typename Key>
+ RawObject* GetOrNull(const Key& key, bool* present = NULL) const {
+ intptr_t entry = Base::FindKey(key);
+ if (present != NULL) {
+ *present = (entry != -1);
+ }
+ return (entry == -1) ? Object::null() : Base::GetPayload(entry, 0);
+ }
+ bool UpdateOrInsert(const Object& key, const Object& value) const {
+ HashTables::EnsureLoadFactor(0.0, 0.75, *this);
+ intptr_t entry = -1;
+ bool present = Base::FindKeyOrDeletedOrUnused(key, &entry);
+ if (!present) {
+ Base::InsertKey(entry, key);
+ }
+ Base::UpdatePayload(entry, 0, value);
+ return present;
+ }
+ // Update the value of an existing key. Note that 'key' need not be an Object.
+ template<typename Key>
+ void UpdateValue(const Key& key, const Object& value) const {
+ intptr_t entry = Base::FindKey(key);
+ ASSERT(entry != -1);
+ Base::UpdatePayload(entry, 0, value);
+ }
+
+ template<typename Key>
+ bool Remove(const Key& key) const {
+ intptr_t entry = Base::FindKey(key);
+ if (entry == -1) {
+ return false;
+ } else {
+ Base::DeleteEntry(entry);
+ return true;
+ }
+ }
+};
+
+
+template<typename KeyTraits>
+class UnorderedHashMap : public HashMap<UnorderedHashTable<KeyTraits, 1> > {
+ public:
+ typedef HashMap<UnorderedHashTable<KeyTraits, 1> > Base;
+ explicit UnorderedHashMap(Array& data) : Base(data) {}
+};
+
+
+template<typename KeyTraits>
+class EnumIndexHashMap : public HashMap<EnumIndexHashTable<KeyTraits, 1> > {
+ public:
+ typedef HashMap<EnumIndexHashTable<KeyTraits, 1> > Base;
+ explicit EnumIndexHashMap(Array& data) : Base(data) {}
+};
+
+
+template<typename Base>
+class HashSet : public Base {
+ public:
+ explicit HashSet(Array& data) : Base(data) {}
+ bool Insert(const Object& key) {
+ HashTables::EnsureLoadFactor(0.0, 0.75, *this);
+ intptr_t entry = -1;
+ bool present = Base::FindKeyOrDeletedOrUnused(key, &entry);
+ if (!present) {
+ Base::InsertKey(entry, key);
+ }
+ return present;
+ }
+ template<typename Key>
+ bool Remove(const Key& key) const {
+ intptr_t entry = Base::FindKey(key);
+ if (entry == -1) {
+ return false;
+ } else {
+ Base::DeleteEntry(entry);
+ return true;
+ }
+ }
+};
+
+
+template<typename KeyTraits>
+class UnorderedHashSet : public HashSet<UnorderedHashTable<KeyTraits, 0> > {
+ public:
+ typedef HashSet<UnorderedHashTable<KeyTraits, 0> > Base;
+ explicit UnorderedHashSet(Array& data) : Base(data) {}
+};
+
+
+template<typename KeyTraits>
+class EnumIndexHashSet : public HashSet<EnumIndexHashTable<KeyTraits, 0> > {
+ public:
+ typedef HashSet<EnumIndexHashTable<KeyTraits, 0> > Base;
+ explicit EnumIndexHashSet(Array& data) : Base(data) {}
+};
+
+} // namespace dart
+
+#endif // VM_HASH_TABLE_H_
« no previous file with comments | « no previous file | runtime/vm/hash_table_test.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698