Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(390)

Side by Side Diff: runtime/vm/hash_table.h

Issue 304253002: Hash tables templates, wrapping Array. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | runtime/vm/hash_table_test.cc » ('j') | runtime/vm/hash_table_test.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 #ifndef VM_HASH_TABLE_H_
6 #define VM_HASH_TABLE_H_
7
8 #include <map>
9 #include <vector>
10
11 #include "platform/assert.h"
12 #include "vm/object.h"
13
14 namespace dart {
15
16 // OVERVIEW:
17 //
18 // Hash maps and hash sets all use RawArray as backing storage. At the lowest
19 // level is a generic open-addressing table that supports deletion.
20 // - HashTable
21 // The next layer provides ordering and iteration functionality:
22 // - UnorderedHashTable
23 // - EnumIndexHashTable
24 // - LinkedListHashTable (TODO(koda): Implement.)
25 // The utility class HashTables handles growth and conversion (e.g., converting
26 // a compact EnumIndexHashTable to an iteration-efficient LinkedListHashTable).
27 // The next layer fixes the payload size and provides a natural interface:
28 // - HashMap
29 // - HashSet
30 // Combining either of these with an iteration strategy, we get the templates
31 // intended for use outside this file:
32 // - UnorderedHashMap
33 // - EnumIndexHashMap
34 // - LinkedListHashMap
35 // - UnorderedHashSet
36 // - EnumIndexHashSet
37 // - LinkedListHashSet
38 // Each of these can be finally specialized with KeyTraits to support any set of
39 // lookup key types (e.g., look up a char* in a set of String objects), and
40 // any equality and hash code computation.
41 //
42 // The classes all wrap an Array handle, and metods like HashSet::Insert can
43 // trigger growth into a new RawArray, updating the handle. Debug mode asserts
44 // that 'Release' was called to get the final RawArray before destruction.
45 //
46 // Example use:
47 // typedef UnorderedHashMap<FooTraits> ResolvedNamesMap;
48 // ...
49 // ResolvedNamesMap cache(Array::Handle(resolved_names()));
50 // cache.UpdateOrInsert(name0, obj0);
51 // cache.UpdateOrInsert(name1, obj1);
52 // ...
53 // StorePointer(&raw_ptr()->resolved_names_, cache.Release());
54 //
55 // TODO(koda): When exposing these to Dart code, document and assert that
56 // KeyTraits methods must not run Dart code (since the C++ code doesn't check
57 // for concurrent modification).
58
59
60 // Open-addressing hash table template using a RawArray as backing storage.
61 //
62 // The elements of the array are partitioned into entries:
63 // [ header | metadata | entry0 | entry1 | ... | entryN ]
64 // Each entry contains a key, followed by zero or more payload components,
65 // and has 3 possible states: unused, occupied, or deleted.
66 // The header tracks the number of entries in each state.
67 // Any object except Object::sentinel() and Object::transition_sentinel()
68 // may be stored as a key. Any object may be stored in a payload.
69 //
70 // Parameters
71 // KeyTraits: defines static methods
72 // bool IsMatch(const Key& key, const Object& obj) and
73 // uword Hash(const Key& key) for any number of desired lookup key types.
74 // kPayloadSize: number of components of the payload in each entry.
75 // kMetaDataSize: number of elements reserved (e.g., for iteration order data).
76 template<typename KeyTraits, intptr_t kPayloadSize, intptr_t kMetaDataSize>
77 class HashTable : public ValueObject {
78 public:
79 explicit HashTable(Array& data) : data_(data) {}
80
81 RawArray* Release() {
82 ASSERT(!data_.IsNull());
83 RawArray* array = data_.raw();
84 data_ = Array::null();
85 return array;
86 }
87
88 ~HashTable() {
89 ASSERT(data_.IsNull());
90 }
91
92 // Returns a backing storage size such that 'num_occupied' distinct keys can
93 // be inserted into the table.
94 static intptr_t ArrayLengthForNumOccupied(intptr_t num_occupied) {
95 // The current invariant requires at least one unoccupied entry.
96 // TODO(koda): Adjust after moving to quadratic probing.
siva 2014/06/13 19:51:18 Is there an assertion to ensure that there is at l
koda 2014/06/23 19:44:55 Yes; see InsertKey.
97 intptr_t num_entries = num_occupied + 1;
98 return kFirstKeyIndex + (kEntrySize * num_entries);
99 }
100
101 // Initializes an empty table.
102 void Initialize() const {
103 ASSERT(data_.Length() >= ArrayLengthForNumOccupied(0));
104 Smi& zero = Smi::Handle(Smi::New(0));
105 data_.SetAt(kOccupiedEntriesIndex, zero);
106 data_.SetAt(kDeletedEntriesIndex, zero);
107 for (intptr_t i = kHeaderSize; i < data_.Length(); ++i) {
108 data_.SetAt(i, Object::sentinel());
109 }
110 }
111
112 // Returns whether 'key' matches any key in the table.
113 template<typename Key>
114 bool ContainsKey(const Key& key) const {
115 return FindKey(key) != -1;
116 }
117
118 // Returns the entry that matches 'key', or -1 if none exists.
119 template<typename Key>
120 intptr_t FindKey(const Key& key) const {
121 ASSERT(NumOccupied() < NumEntries());
122 intptr_t probe = static_cast<uword>(KeyTraits::Hash(key)) % NumEntries();
123 Object& obj = Object::Handle();
124 // TODO(koda): Use quadratic probing.
125 for (; ; probe = (probe + 1) % NumEntries()) {
126 if (IsUnused(probe)) {
127 return -1;
128 } else if (IsDeleted(probe)) {
129 continue;
130 } else {
131 obj = GetKey(probe);
132 if (KeyTraits::IsMatch(key, obj)) {
133 return probe;
134 }
135 }
136 }
137 UNREACHABLE();
138 return -1;
139 }
140
141 // Sets *entry to either:
142 // - an occupied entry matching 'key', and returns true, or
143 // - an unused/deleted entry where a matching key may be inserted,
144 // and returns false.
145 template<typename Key>
146 bool FindKeyOrDeletedOrUnused(const Key& key, intptr_t* entry) const {
siva 2014/06/13 19:51:18 ASSERT(entry != NULL);
koda 2014/06/23 19:44:55 Done.
147 ASSERT(NumOccupied() < NumEntries());
148 intptr_t probe = static_cast<uword>(KeyTraits::Hash(key)) % NumEntries();
149 Object& obj = Object::Handle();
150 intptr_t deleted = -1;
151 // TODO(koda): Use quadratic probing.
152 for (; ; probe = (probe + 1) % NumEntries()) {
153 if (IsUnused(probe)) {
154 *entry = (deleted != -1) ? deleted : probe;
155 return false;
156 } else if (IsDeleted(probe)) {
157 deleted = (deleted != -1) ? deleted : probe;
siva 2014/06/13 19:51:18 why not : if (deleted == -1) { deleted = probe;
koda 2014/06/23 19:44:55 Done.
158 } else {
159 obj = GetKey(probe);
160 if (KeyTraits::IsMatch(key, obj)) {
161 *entry = probe;
162 return true;
163 }
164 }
165 }
166 UNREACHABLE();
167 return false;
168 }
169
170 // Sets the key of a previously unoccupied entry. This must not be the last
171 // unoccupied entry.
172 void InsertKey(intptr_t entry, const Object& key) const {
173 ASSERT(!IsOccupied(entry));
174 AdjustSmiValueAt(kOccupiedEntriesIndex, 1);
175 if (IsDeleted(entry)) {
176 AdjustSmiValueAt(kDeletedEntriesIndex, -1);
177 } else {
178 ASSERT(IsUnused(entry));
179 }
180 InternalSetKey(entry, key);
181 ASSERT(IsOccupied(entry));
182 ASSERT(NumOccupied() < NumEntries());
183 }
184
185 bool IsUnused(intptr_t entry) const {
186 return InternalGetKey(entry) == Object::sentinel().raw();
187 }
188 bool IsOccupied(intptr_t entry) const {
189 return !IsUnused(entry) && !IsDeleted(entry);
190 }
191 bool IsDeleted(intptr_t entry) const {
192 return InternalGetKey(entry) == Object::transition_sentinel().raw();
193 }
194
195 RawObject* GetKey(intptr_t entry) const {
196 ASSERT(IsOccupied(entry));
197 return InternalGetKey(entry);
198 }
199 RawObject* GetPayload(intptr_t entry, intptr_t component) const {
200 ASSERT(IsOccupied(entry));
201 return data_.At(PayloadIndex(entry, component));
202 }
203 void UpdatePayload(intptr_t entry,
204 intptr_t component,
205 const Object& value) const {
206 ASSERT(IsOccupied(entry));
207 ASSERT(0 <= component && component < kPayloadSize);
208 data_.SetAt(PayloadIndex(entry, component), value);
209 }
210 // Deletes both the key and payload of the specified entry.
211 void DeleteEntry(intptr_t entry) const {
212 ASSERT(IsOccupied(entry));
213 for (intptr_t i = 0; i < kPayloadSize; ++i) {
214 UpdatePayload(entry, i, Object::transition_sentinel());
215 }
216 InternalSetKey(entry, Object::transition_sentinel());
217 AdjustSmiValueAt(kOccupiedEntriesIndex, -1);
218 AdjustSmiValueAt(kDeletedEntriesIndex, 1);
219 }
220 intptr_t NumEntries() const {
221 return (data_.Length() - kFirstKeyIndex) / kEntrySize;
222 }
223 intptr_t NumUnused() const {
224 return NumEntries() - NumOccupied() - NumDeleted();
225 }
226 intptr_t NumOccupied() const {
227 return GetSmiValueAt(kOccupiedEntriesIndex);
228 }
229 intptr_t NumDeleted() const {
230 return GetSmiValueAt(kDeletedEntriesIndex);
231 }
232
233 protected:
234 static const intptr_t kOccupiedEntriesIndex = 0;
235 static const intptr_t kDeletedEntriesIndex = 1;
236 static const intptr_t kHeaderSize = kDeletedEntriesIndex + 1;
237 static const intptr_t kMetaDataIndex = kHeaderSize;
238 static const intptr_t kFirstKeyIndex = kHeaderSize + kMetaDataSize;
239 static const intptr_t kEntrySize = 1 + kPayloadSize;
240
241 intptr_t KeyIndex(intptr_t entry) const {
242 ASSERT(0 <= entry && entry < NumEntries());
243 return kFirstKeyIndex + (kEntrySize * entry);
244 }
245
246 intptr_t PayloadIndex(intptr_t entry, intptr_t component) const {
247 ASSERT(0 <= component && component < kPayloadSize);
248 return KeyIndex(entry) + 1 + component;
249 }
250
251 RawObject* InternalGetKey(intptr_t entry) const {
252 return data_.At(KeyIndex(entry));
253 }
254
255 void InternalSetKey(intptr_t entry, const Object& key) const {
256 data_.SetAt(KeyIndex(entry), key);
257 }
258
259 intptr_t GetSmiValueAt(intptr_t index) const {
260 Smi& smi = Smi::Handle();
261 smi ^= data_.At(index);
262 return smi.Value();
siva 2014/06/13 19:51:18 You could write this as: ASSERT(Object::Handle(dat
koda 2014/06/23 19:44:55 Done.
263 }
264
265 void SetSmiValueAt(intptr_t index, intptr_t value) const {
266 Smi& smi = Smi::Handle();
267 smi = Smi::New(value);
siva 2014/06/13 19:51:18 const Smi& smi = Smi::Handle(Smi::New(value));
koda 2014/06/23 19:44:55 Done.
268 data_.SetAt(index, smi);
269 }
270
271 void AdjustSmiValueAt(intptr_t index, intptr_t delta) const {
272 Smi& smi = Smi::Handle();
273 smi ^= data_.At(index);
274 smi = Smi::New(smi.Value() + delta);
275 data_.SetAt(index, smi);
siva 2014/06/13 19:51:18 Assuming GetSmiValueAt is changed above to a non h
koda 2014/06/23 19:44:55 Done.
276 }
277
278 Array& data_;
279
280 friend class HashTables;
281 };
282
283
284 // Table with unspecified iteration order. No payload overhead or metadata.
285 template<typename KeyTraits, intptr_t kUserPayloadSize>
286 class UnorderedHashTable : public HashTable<KeyTraits, kUserPayloadSize, 0> {
287 public:
288 typedef HashTable<KeyTraits, kUserPayloadSize, 0> Base;
289 static const intptr_t kPayloadSize = kUserPayloadSize;
290 explicit UnorderedHashTable(Array& data) : Base(data) {}
291 // Note: Does not check for concurrent modification.
292 class Iterator {
293 public:
294 explicit Iterator(const UnorderedHashTable* table)
295 : table_(table), entry_(-1) {}
296 bool MoveNext() {
297 do {
298 ++entry_;
299 } while (entry_ < table_->NumEntries() && !table_->IsOccupied(entry_));
siva 2014/06/13 19:51:18 One could call MoveNext() repeatedly cause an inte
koda 2014/06/23 19:44:55 True. Rewrote the loop to avoid this.
300 return entry_ < table_->NumEntries();
301 }
302 intptr_t Current() {
303 return entry_;
304 }
305
306 private:
307 const UnorderedHashTable* table_;
308 intptr_t entry_;
309 };
310
311 void Initialize() const {
312 Base::Initialize();
313 }
314 void InsertKey(intptr_t entry, const Object& key) const {
315 Base::InsertKey(entry, key);
316 }
317 void DeleteEntry(intptr_t entry) const {
318 Base::DeleteEntry(entry);
319 }
siva 2014/06/13 19:51:18 Why is it necessary to have these methods which ju
koda 2014/06/23 19:44:55 They are not necessary. I just wanted to be expli
320 };
321
322
323 // Table with insertion order, using one payload component for the enumeration
324 // index, and one metadata element for the next enumeration index.
325 template<typename KeyTraits, intptr_t kUserPayloadSize>
326 class EnumIndexHashTable
327 : public HashTable<KeyTraits, kUserPayloadSize + 1, 1> {
328 public:
329 typedef HashTable<KeyTraits, kUserPayloadSize + 1, 1> Base;
330 static const intptr_t kPayloadSize = kUserPayloadSize;
331 static const intptr_t kNextEnumIndex = Base::kMetaDataIndex;
332 explicit EnumIndexHashTable(Array& data) : Base(data) {}
333 // Note: Does not check for concurrent modification.
334 class Iterator {
335 public:
336 explicit Iterator(const EnumIndexHashTable* table) : index_(-1) {
337 // TODO(koda): Use GrowableArray after adding stateful comparator support.
338 std::map<intptr_t, intptr_t> enum_to_entry;
339 for (intptr_t i = 0; i < table->NumEntries(); ++i) {
340 if (table->IsOccupied(i)) {
341 intptr_t enum_index =
342 table->GetSmiValueAt(table->PayloadIndex(i, kPayloadSize));
343 enum_to_entry[enum_index] = i;
344 }
345 }
346 for (std::map<intptr_t, intptr_t>::iterator it = enum_to_entry.begin();
347 it != enum_to_entry.end();
348 ++it) {
349 entries_.push_back(it->second);
350 }
351 }
352 bool MoveNext() {
353 ++index_;
siva 2014/06/13 19:51:18 Ditto comment, I feel MoveNext should not allow th
koda 2014/06/23 19:44:55 Done.
354 return index_ < static_cast<intptr_t>(entries_.size());
355 }
356 intptr_t Current() {
357 return entries_[index_];
358 }
359
360 private:
361 intptr_t index_;
362 std::vector<intptr_t> entries_;
363 };
364
365 void Initialize() const {
366 Base::Initialize();
367 Base::SetSmiValueAt(kNextEnumIndex, 0);
368 }
369 void InsertKey(intptr_t entry, const Object& key) const {
370 Base::InsertKey(entry, key);
371 Smi& next_enum_index =
372 Smi::Handle(Smi::New(Base::GetSmiValueAt(kNextEnumIndex)));
373 Base::UpdatePayload(entry, kPayloadSize, next_enum_index);
374 Base::AdjustSmiValueAt(kNextEnumIndex, 1);
375 }
376 void DeleteEntry(intptr_t entry) const {
377 Base::DeleteEntry(entry);
378 }
siva 2014/06/13 19:51:18 Ditto comment about DeleteEntry, is it needed?
koda 2014/06/23 19:44:55 Done.
379 };
380
381
382 class HashTables : public AllStatic {
383 public:
384 // Allocates and initializes a table.
385 template<typename Table>
386 static RawArray* New(intptr_t initial_capacity) {
387 Table table(Array::Handle(Array::New(
388 Table::ArrayLengthForNumOccupied(initial_capacity))));
389 table.Initialize();
390 return table.Release();
391 }
392
393 // Clears 'to' and inserts all elements from 'from', in iteration order.
394 // The tables must have the same user payload size.
395 template<typename From, typename To>
396 static void Copy(const From& from, const To& to) {
397 COMPILE_ASSERT(From::kPayloadSize == To::kPayloadSize);
398 to.Initialize();
399 ASSERT(from.NumOccupied() < to.NumEntries());
400 typename From::Iterator it(&from);
401 Object& obj = Object::Handle();
402 while (it.MoveNext()) {
403 intptr_t from_entry = it.Current();
404 obj = from.GetKey(from_entry);
405 intptr_t to_entry = -1;
406 const Object& key = obj;
407 bool present = to.FindKeyOrDeletedOrUnused(key, &to_entry);
408 ASSERT(!present);
409 to.InsertKey(to_entry, obj);
410 for (intptr_t i = 0; i < From::kPayloadSize; ++i) {
411 obj = from.GetPayload(from_entry, i);
412 to.UpdatePayload(to_entry, i, obj);
413 }
414 }
415 }
416
417 template<typename Table>
418 static void EnsureLoadFactor(double low, double high, const Table& table) {
419 double current = (1 + table.NumOccupied() + table.NumDeleted()) /
420 static_cast<double>(table.NumEntries());
421 if (low <= current && current < high) {
422 return;
423 }
424 double target = (low + high) / 2.0;
425 intptr_t new_capacity = (1 + table.NumOccupied()) / target;
426 Table new_table(Array::Handle(New<Table>(new_capacity)));
427 Copy(table, new_table);
428 table.data_ = new_table.Release();
429 }
430
431 // Serializes a table by concatenating its entries as an array.
432 template<typename Table>
433 static RawArray* ToArray(const Table& table, bool include_payload) {
434 const intptr_t entry_size = include_payload ? (1 + Table::kPayloadSize) : 1;
435 Array& result = Array::Handle(Array::New(table.NumOccupied() * entry_size));
436 typename Table::Iterator it(&table);
437 Object& obj = Object::Handle();
438 intptr_t result_index = 0;
439 while (it.MoveNext()) {
440 intptr_t entry = it.Current();
441 obj = table.GetKey(entry);
442 result.SetAt(result_index++, obj);
443 if (include_payload) {
444 for (intptr_t i = 0; i < Table::kPayloadSize; ++i) {
445 obj = table.GetPayload(entry, i);
446 result.SetAt(result_index++, obj);
447 }
448 }
449 }
450 return result.raw();
451 }
452 };
453
454
455 template<typename Base>
456 class HashMap : public Base {
457 public:
458 explicit HashMap(Array& data) : Base(data) {}
459 template<typename Key>
460 RawObject* GetOrNull(const Key& key, bool* present = NULL) const {
461 intptr_t entry = Base::FindKey(key);
462 if (present != NULL) {
463 *present = (entry != -1);
464 }
465 return (entry == -1) ? Object::null() : Base::GetPayload(entry, 0);
466 }
467 bool UpdateOrInsert(const Object& key, const Object& value) const {
468 HashTables::EnsureLoadFactor(0.0, 0.75, *this);
469 intptr_t entry = -1;
470 bool present = Base::FindKeyOrDeletedOrUnused(key, &entry);
471 if (!present) {
472 Base::InsertKey(entry, key);
473 }
474 Base::UpdatePayload(entry, 0, value);
475 return present;
476 }
477 // Update the value of an existing key. Note that 'key' need not be an Object.
478 template<typename Key>
479 void UpdateValue(const Key& key, const Object& value) const {
480 intptr_t entry = Base::FindKey(key);
481 ASSERT(entry != -1);
482 Base::UpdatePayload(entry, 0, value);
483 }
484
485 template<typename Key>
486 bool Remove(const Key& key) const {
487 intptr_t entry = Base::FindKey(key);
488 if (entry == -1) {
489 return false;
490 } else {
491 Base::DeleteEntry(entry);
492 return true;
493 }
494 }
495 };
496
497
498 template<typename KeyTraits>
499 class UnorderedHashMap : public HashMap<UnorderedHashTable<KeyTraits, 1> > {
500 public:
501 typedef HashMap<UnorderedHashTable<KeyTraits, 1> > Base;
502 explicit UnorderedHashMap(Array& data) : Base(data) {}
503 };
504
505
506 template<typename KeyTraits>
507 class EnumIndexHashMap : public HashMap<EnumIndexHashTable<KeyTraits, 1> > {
508 public:
509 typedef HashMap<EnumIndexHashTable<KeyTraits, 1> > Base;
510 explicit EnumIndexHashMap(Array& data) : Base(data) {}
511 };
512
513
514 template<typename Base>
515 class HashSet : public Base {
516 public:
517 explicit HashSet(Array& data) : Base(data) {}
518 bool Insert(const Object& key) {
519 HashTables::EnsureLoadFactor(0.0, 0.75, *this);
520 intptr_t entry = -1;
521 bool present = Base::FindKeyOrDeletedOrUnused(key, &entry);
522 if (!present) {
523 Base::InsertKey(entry, key);
524 }
525 return present;
526 }
527 template<typename Key>
528 bool Remove(const Key& key) const {
529 intptr_t entry = Base::FindKey(key);
530 if (entry == -1) {
531 return false;
532 } else {
533 Base::DeleteEntry(entry);
534 return true;
535 }
536 }
537 };
538
539
540 template<typename KeyTraits>
541 class UnorderedHashSet : public HashSet<UnorderedHashTable<KeyTraits, 0> > {
542 public:
543 typedef HashSet<UnorderedHashTable<KeyTraits, 0> > Base;
544 explicit UnorderedHashSet(Array& data) : Base(data) {}
545 };
546
547
548 template<typename KeyTraits>
549 class EnumIndexHashSet : public HashSet<EnumIndexHashTable<KeyTraits, 0> > {
550 public:
551 typedef HashSet<EnumIndexHashTable<KeyTraits, 0> > Base;
552 explicit EnumIndexHashSet(Array& data) : Base(data) {}
553 };
554
555 } // namespace dart
556
557 #endif // VM_HASH_TABLE_H_
OLDNEW
« no previous file with comments | « no previous file | runtime/vm/hash_table_test.cc » ('j') | runtime/vm/hash_table_test.cc » ('J')

Powered by Google App Engine
This is Rietveld 408576698