OLD | NEW |
| (Empty) |
1 // Copyright (c) 2014, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 #ifndef VM_HASH_TABLE_H_ | |
6 #define VM_HASH_TABLE_H_ | |
7 | |
8 // Temporarily used when sorting the indices in EnumIndexHashTable. | |
9 // TODO(koda): Remove these dependencies before using in production. | |
10 #include <map> | |
11 #include <vector> | |
12 | |
13 #include "platform/assert.h" | |
14 #include "vm/object.h" | |
15 | |
16 namespace dart { | |
17 | |
18 // OVERVIEW: | |
19 // | |
20 // Hash maps and hash sets all use RawArray as backing storage. At the lowest | |
21 // level is a generic open-addressing table that supports deletion. | |
22 // - HashTable | |
23 // The next layer provides ordering and iteration functionality: | |
24 // - UnorderedHashTable | |
25 // - EnumIndexHashTable | |
26 // - LinkedListHashTable (TODO(koda): Implement.) | |
27 // The utility class HashTables handles growth and conversion (e.g., converting | |
28 // a compact EnumIndexHashTable to an iteration-efficient LinkedListHashTable). | |
29 // The next layer fixes the payload size and provides a natural interface: | |
30 // - HashMap | |
31 // - HashSet | |
32 // Combining either of these with an iteration strategy, we get the templates | |
33 // intended for use outside this file: | |
34 // - UnorderedHashMap | |
35 // - EnumIndexHashMap | |
36 // - LinkedListHashMap | |
37 // - UnorderedHashSet | |
38 // - EnumIndexHashSet | |
39 // - LinkedListHashSet | |
40 // Each of these can be finally specialized with KeyTraits to support any set of | |
41 // lookup key types (e.g., look up a char* in a set of String objects), and | |
42 // any equality and hash code computation. | |
43 // | |
44 // The classes all wrap an Array handle, and metods like HashSet::Insert can | |
45 // trigger growth into a new RawArray, updating the handle. Debug mode asserts | |
46 // that 'Release' was called to get the final RawArray before destruction. | |
47 // | |
48 // Example use: | |
49 // typedef UnorderedHashMap<FooTraits> ResolvedNamesMap; | |
50 // ... | |
51 // ResolvedNamesMap cache(Array::Handle(resolved_names())); | |
52 // cache.UpdateOrInsert(name0, obj0); | |
53 // cache.UpdateOrInsert(name1, obj1); | |
54 // ... | |
55 // StorePointer(&raw_ptr()->resolved_names_, cache.Release()); | |
56 // | |
57 // TODO(koda): When exposing these to Dart code, document and assert that | |
58 // KeyTraits methods must not run Dart code (since the C++ code doesn't check | |
59 // for concurrent modification). | |
60 | |
61 | |
62 // Open-addressing hash table template using a RawArray as backing storage. | |
63 // | |
64 // The elements of the array are partitioned into entries: | |
65 // [ header | metadata | entry0 | entry1 | ... | entryN ] | |
66 // Each entry contains a key, followed by zero or more payload components, | |
67 // and has 3 possible states: unused, occupied, or deleted. | |
68 // The header tracks the number of entries in each state. | |
69 // Any object except Object::sentinel() and Object::transition_sentinel() | |
70 // may be stored as a key. Any object may be stored in a payload. | |
71 // | |
72 // Parameters | |
73 // KeyTraits: defines static methods | |
74 // bool IsMatch(const Key& key, const Object& obj) and | |
75 // uword Hash(const Key& key) for any number of desired lookup key types. | |
76 // kPayloadSize: number of components of the payload in each entry. | |
77 // kMetaDataSize: number of elements reserved (e.g., for iteration order data). | |
78 template<typename KeyTraits, intptr_t kPayloadSize, intptr_t kMetaDataSize> | |
79 class HashTable : public ValueObject { | |
80 public: | |
81 explicit HashTable(Array& data) : data_(data) {} | |
82 | |
83 RawArray* Release() { | |
84 ASSERT(!data_.IsNull()); | |
85 RawArray* array = data_.raw(); | |
86 data_ = Array::null(); | |
87 return array; | |
88 } | |
89 | |
90 ~HashTable() { | |
91 ASSERT(data_.IsNull()); | |
92 } | |
93 | |
94 // Returns a backing storage size such that 'num_occupied' distinct keys can | |
95 // be inserted into the table. | |
96 static intptr_t ArrayLengthForNumOccupied(intptr_t num_occupied) { | |
97 // The current invariant requires at least one unoccupied entry. | |
98 // TODO(koda): Adjust if moving to quadratic probing. | |
99 intptr_t num_entries = num_occupied + 1; | |
100 return kFirstKeyIndex + (kEntrySize * num_entries); | |
101 } | |
102 | |
103 // Initializes an empty table. | |
104 void Initialize() const { | |
105 ASSERT(data_.Length() >= ArrayLengthForNumOccupied(0)); | |
106 Smi& zero = Smi::Handle(Smi::New(0)); | |
107 data_.SetAt(kOccupiedEntriesIndex, zero); | |
108 data_.SetAt(kDeletedEntriesIndex, zero); | |
109 for (intptr_t i = kHeaderSize; i < data_.Length(); ++i) { | |
110 data_.SetAt(i, Object::sentinel()); | |
111 } | |
112 } | |
113 | |
114 // Returns whether 'key' matches any key in the table. | |
115 template<typename Key> | |
116 bool ContainsKey(const Key& key) const { | |
117 return FindKey(key) != -1; | |
118 } | |
119 | |
120 // Returns the entry that matches 'key', or -1 if none exists. | |
121 template<typename Key> | |
122 intptr_t FindKey(const Key& key) const { | |
123 ASSERT(NumOccupied() < NumEntries()); | |
124 // TODO(koda): Add salt. | |
125 intptr_t probe = static_cast<uword>(KeyTraits::Hash(key)) % NumEntries(); | |
126 Object& obj = Object::Handle(); | |
127 // TODO(koda): Consider quadratic probing. | |
128 for (; ; probe = (probe + 1) % NumEntries()) { | |
129 if (IsUnused(probe)) { | |
130 return -1; | |
131 } else if (IsDeleted(probe)) { | |
132 continue; | |
133 } else { | |
134 obj = GetKey(probe); | |
135 if (KeyTraits::IsMatch(key, obj)) { | |
136 return probe; | |
137 } | |
138 } | |
139 } | |
140 UNREACHABLE(); | |
141 return -1; | |
142 } | |
143 | |
144 // Sets *entry to either: | |
145 // - an occupied entry matching 'key', and returns true, or | |
146 // - an unused/deleted entry where a matching key may be inserted, | |
147 // and returns false. | |
148 template<typename Key> | |
149 bool FindKeyOrDeletedOrUnused(const Key& key, intptr_t* entry) const { | |
150 ASSERT(entry != NULL); | |
151 ASSERT(NumOccupied() < NumEntries()); | |
152 intptr_t probe = static_cast<uword>(KeyTraits::Hash(key)) % NumEntries(); | |
153 Object& obj = Object::Handle(); | |
154 intptr_t deleted = -1; | |
155 // TODO(koda): Consider quadratic probing. | |
156 for (; ; probe = (probe + 1) % NumEntries()) { | |
157 if (IsUnused(probe)) { | |
158 *entry = (deleted != -1) ? deleted : probe; | |
159 return false; | |
160 } else if (IsDeleted(probe)) { | |
161 if (deleted == -1) { | |
162 deleted = probe; | |
163 } | |
164 } else { | |
165 obj = GetKey(probe); | |
166 if (KeyTraits::IsMatch(key, obj)) { | |
167 *entry = probe; | |
168 return true; | |
169 } | |
170 } | |
171 } | |
172 UNREACHABLE(); | |
173 return false; | |
174 } | |
175 | |
176 // Sets the key of a previously unoccupied entry. This must not be the last | |
177 // unoccupied entry. | |
178 void InsertKey(intptr_t entry, const Object& key) const { | |
179 ASSERT(!IsOccupied(entry)); | |
180 AdjustSmiValueAt(kOccupiedEntriesIndex, 1); | |
181 if (IsDeleted(entry)) { | |
182 AdjustSmiValueAt(kDeletedEntriesIndex, -1); | |
183 } else { | |
184 ASSERT(IsUnused(entry)); | |
185 } | |
186 InternalSetKey(entry, key); | |
187 ASSERT(IsOccupied(entry)); | |
188 ASSERT(NumOccupied() < NumEntries()); | |
189 } | |
190 | |
191 bool IsUnused(intptr_t entry) const { | |
192 return InternalGetKey(entry) == Object::sentinel().raw(); | |
193 } | |
194 bool IsOccupied(intptr_t entry) const { | |
195 return !IsUnused(entry) && !IsDeleted(entry); | |
196 } | |
197 bool IsDeleted(intptr_t entry) const { | |
198 return InternalGetKey(entry) == Object::transition_sentinel().raw(); | |
199 } | |
200 | |
201 RawObject* GetKey(intptr_t entry) const { | |
202 ASSERT(IsOccupied(entry)); | |
203 return InternalGetKey(entry); | |
204 } | |
205 RawObject* GetPayload(intptr_t entry, intptr_t component) const { | |
206 ASSERT(IsOccupied(entry)); | |
207 return data_.At(PayloadIndex(entry, component)); | |
208 } | |
209 void UpdatePayload(intptr_t entry, | |
210 intptr_t component, | |
211 const Object& value) const { | |
212 ASSERT(IsOccupied(entry)); | |
213 ASSERT(0 <= component && component < kPayloadSize); | |
214 data_.SetAt(PayloadIndex(entry, component), value); | |
215 } | |
216 // Deletes both the key and payload of the specified entry. | |
217 void DeleteEntry(intptr_t entry) const { | |
218 ASSERT(IsOccupied(entry)); | |
219 for (intptr_t i = 0; i < kPayloadSize; ++i) { | |
220 UpdatePayload(entry, i, Object::transition_sentinel()); | |
221 } | |
222 InternalSetKey(entry, Object::transition_sentinel()); | |
223 AdjustSmiValueAt(kOccupiedEntriesIndex, -1); | |
224 AdjustSmiValueAt(kDeletedEntriesIndex, 1); | |
225 } | |
226 intptr_t NumEntries() const { | |
227 return (data_.Length() - kFirstKeyIndex) / kEntrySize; | |
228 } | |
229 intptr_t NumUnused() const { | |
230 return NumEntries() - NumOccupied() - NumDeleted(); | |
231 } | |
232 intptr_t NumOccupied() const { | |
233 return GetSmiValueAt(kOccupiedEntriesIndex); | |
234 } | |
235 intptr_t NumDeleted() const { | |
236 return GetSmiValueAt(kDeletedEntriesIndex); | |
237 } | |
238 | |
239 protected: | |
240 static const intptr_t kOccupiedEntriesIndex = 0; | |
241 static const intptr_t kDeletedEntriesIndex = 1; | |
242 static const intptr_t kHeaderSize = kDeletedEntriesIndex + 1; | |
243 static const intptr_t kMetaDataIndex = kHeaderSize; | |
244 static const intptr_t kFirstKeyIndex = kHeaderSize + kMetaDataSize; | |
245 static const intptr_t kEntrySize = 1 + kPayloadSize; | |
246 | |
247 intptr_t KeyIndex(intptr_t entry) const { | |
248 ASSERT(0 <= entry && entry < NumEntries()); | |
249 return kFirstKeyIndex + (kEntrySize * entry); | |
250 } | |
251 | |
252 intptr_t PayloadIndex(intptr_t entry, intptr_t component) const { | |
253 ASSERT(0 <= component && component < kPayloadSize); | |
254 return KeyIndex(entry) + 1 + component; | |
255 } | |
256 | |
257 RawObject* InternalGetKey(intptr_t entry) const { | |
258 return data_.At(KeyIndex(entry)); | |
259 } | |
260 | |
261 void InternalSetKey(intptr_t entry, const Object& key) const { | |
262 data_.SetAt(KeyIndex(entry), key); | |
263 } | |
264 | |
265 intptr_t GetSmiValueAt(intptr_t index) const { | |
266 ASSERT(Object::Handle(data_.At(index)).IsSmi()); | |
267 return Smi::Value(Smi::RawCast(data_.At(index))); | |
268 } | |
269 | |
270 void SetSmiValueAt(intptr_t index, intptr_t value) const { | |
271 const Smi& smi = Smi::Handle(Smi::New(value)); | |
272 data_.SetAt(index, smi); | |
273 } | |
274 | |
275 void AdjustSmiValueAt(intptr_t index, intptr_t delta) const { | |
276 SetSmiValueAt(index, (GetSmiValueAt(index) + delta)); | |
277 } | |
278 | |
279 Array& data_; | |
280 | |
281 friend class HashTables; | |
282 }; | |
283 | |
284 | |
285 // Table with unspecified iteration order. No payload overhead or metadata. | |
286 template<typename KeyTraits, intptr_t kUserPayloadSize> | |
287 class UnorderedHashTable : public HashTable<KeyTraits, kUserPayloadSize, 0> { | |
288 public: | |
289 typedef HashTable<KeyTraits, kUserPayloadSize, 0> Base; | |
290 static const intptr_t kPayloadSize = kUserPayloadSize; | |
291 explicit UnorderedHashTable(Array& data) : Base(data) {} | |
292 // Note: Does not check for concurrent modification. | |
293 class Iterator { | |
294 public: | |
295 explicit Iterator(const UnorderedHashTable* table) | |
296 : table_(table), entry_(-1) {} | |
297 bool MoveNext() { | |
298 while (entry_ < (table_->NumEntries() - 1)) { | |
299 ++entry_; | |
300 if (table_->IsOccupied(entry_)) { | |
301 return true; | |
302 } | |
303 } | |
304 return false; | |
305 } | |
306 intptr_t Current() { | |
307 return entry_; | |
308 } | |
309 | |
310 private: | |
311 const UnorderedHashTable* table_; | |
312 intptr_t entry_; | |
313 }; | |
314 | |
315 // No extra book-keeping needed for Initialize, InsertKey, DeleteEntry. | |
316 }; | |
317 | |
318 | |
319 // Table with insertion order, using one payload component for the enumeration | |
320 // index, and one metadata element for the next enumeration index. | |
321 template<typename KeyTraits, intptr_t kUserPayloadSize> | |
322 class EnumIndexHashTable | |
323 : public HashTable<KeyTraits, kUserPayloadSize + 1, 1> { | |
324 public: | |
325 typedef HashTable<KeyTraits, kUserPayloadSize + 1, 1> Base; | |
326 static const intptr_t kPayloadSize = kUserPayloadSize; | |
327 static const intptr_t kNextEnumIndex = Base::kMetaDataIndex; | |
328 explicit EnumIndexHashTable(Array& data) : Base(data) {} | |
329 // Note: Does not check for concurrent modification. | |
330 class Iterator { | |
331 public: | |
332 explicit Iterator(const EnumIndexHashTable* table) : index_(-1) { | |
333 // TODO(koda): Use GrowableArray after adding stateful comparator support. | |
334 std::map<intptr_t, intptr_t> enum_to_entry; | |
335 for (intptr_t i = 0; i < table->NumEntries(); ++i) { | |
336 if (table->IsOccupied(i)) { | |
337 intptr_t enum_index = | |
338 table->GetSmiValueAt(table->PayloadIndex(i, kPayloadSize)); | |
339 enum_to_entry[enum_index] = i; | |
340 } | |
341 } | |
342 for (std::map<intptr_t, intptr_t>::iterator it = enum_to_entry.begin(); | |
343 it != enum_to_entry.end(); | |
344 ++it) { | |
345 entries_.push_back(it->second); | |
346 } | |
347 } | |
348 bool MoveNext() { | |
349 if (index_ < (static_cast<intptr_t>(entries_.size() - 1))) { | |
350 index_++; | |
351 return true; | |
352 } | |
353 return false; | |
354 } | |
355 intptr_t Current() { | |
356 return entries_[index_]; | |
357 } | |
358 | |
359 private: | |
360 intptr_t index_; | |
361 std::vector<intptr_t> entries_; | |
362 }; | |
363 | |
364 void Initialize() const { | |
365 Base::Initialize(); | |
366 Base::SetSmiValueAt(kNextEnumIndex, 0); | |
367 } | |
368 | |
369 void InsertKey(intptr_t entry, const Object& key) const { | |
370 Base::InsertKey(entry, key); | |
371 const Smi& next_enum_index = | |
372 Smi::Handle(Smi::New(Base::GetSmiValueAt(kNextEnumIndex))); | |
373 Base::UpdatePayload(entry, kPayloadSize, next_enum_index); | |
374 // TODO(koda): Handle possible Smi overflow from repeated insert/delete. | |
375 Base::AdjustSmiValueAt(kNextEnumIndex, 1); | |
376 } | |
377 | |
378 // No extra book-keeping needed for DeleteEntry. | |
379 }; | |
380 | |
381 | |
382 class HashTables : public AllStatic { | |
383 public: | |
384 // Allocates and initializes a table. | |
385 template<typename Table> | |
386 static RawArray* New(intptr_t initial_capacity) { | |
387 Table table(Array::Handle(Array::New( | |
388 Table::ArrayLengthForNumOccupied(initial_capacity)))); | |
389 table.Initialize(); | |
390 return table.Release(); | |
391 } | |
392 | |
393 // Clears 'to' and inserts all elements from 'from', in iteration order. | |
394 // The tables must have the same user payload size. | |
395 template<typename From, typename To> | |
396 static void Copy(const From& from, const To& to) { | |
397 COMPILE_ASSERT(From::kPayloadSize == To::kPayloadSize); | |
398 to.Initialize(); | |
399 ASSERT(from.NumOccupied() < to.NumEntries()); | |
400 typename From::Iterator it(&from); | |
401 Object& obj = Object::Handle(); | |
402 while (it.MoveNext()) { | |
403 intptr_t from_entry = it.Current(); | |
404 obj = from.GetKey(from_entry); | |
405 intptr_t to_entry = -1; | |
406 const Object& key = obj; | |
407 bool present = to.FindKeyOrDeletedOrUnused(key, &to_entry); | |
408 ASSERT(!present); | |
409 to.InsertKey(to_entry, obj); | |
410 for (intptr_t i = 0; i < From::kPayloadSize; ++i) { | |
411 obj = from.GetPayload(from_entry, i); | |
412 to.UpdatePayload(to_entry, i, obj); | |
413 } | |
414 } | |
415 } | |
416 | |
417 template<typename Table> | |
418 static void EnsureLoadFactor(double low, double high, const Table& table) { | |
419 double current = (1 + table.NumOccupied() + table.NumDeleted()) / | |
420 static_cast<double>(table.NumEntries()); | |
421 if (low <= current && current < high) { | |
422 return; | |
423 } | |
424 double target = (low + high) / 2.0; | |
425 intptr_t new_capacity = (1 + table.NumOccupied()) / target; | |
426 Table new_table(Array::Handle(New<Table>(new_capacity))); | |
427 Copy(table, new_table); | |
428 table.data_ = new_table.Release(); | |
429 } | |
430 | |
431 // Serializes a table by concatenating its entries as an array. | |
432 template<typename Table> | |
433 static RawArray* ToArray(const Table& table, bool include_payload) { | |
434 const intptr_t entry_size = include_payload ? (1 + Table::kPayloadSize) : 1; | |
435 Array& result = Array::Handle(Array::New(table.NumOccupied() * entry_size)); | |
436 typename Table::Iterator it(&table); | |
437 Object& obj = Object::Handle(); | |
438 intptr_t result_index = 0; | |
439 while (it.MoveNext()) { | |
440 intptr_t entry = it.Current(); | |
441 obj = table.GetKey(entry); | |
442 result.SetAt(result_index++, obj); | |
443 if (include_payload) { | |
444 for (intptr_t i = 0; i < Table::kPayloadSize; ++i) { | |
445 obj = table.GetPayload(entry, i); | |
446 result.SetAt(result_index++, obj); | |
447 } | |
448 } | |
449 } | |
450 return result.raw(); | |
451 } | |
452 }; | |
453 | |
454 | |
455 template<typename Base> | |
456 class HashMap : public Base { | |
457 public: | |
458 typedef Base BaseTable; | |
459 explicit HashMap(Array& data) : BaseTable(data) {} | |
460 template<typename Key> | |
461 RawObject* GetOrNull(const Key& key, bool* present = NULL) const { | |
462 intptr_t entry = Base::FindKey(key); | |
463 if (present != NULL) { | |
464 *present = (entry != -1); | |
465 } | |
466 return (entry == -1) ? Object::null() : Base::GetPayload(entry, 0); | |
467 } | |
468 bool UpdateOrInsert(const Object& key, const Object& value) const { | |
469 HashTables::EnsureLoadFactor(0.0, 0.75, *this); | |
470 intptr_t entry = -1; | |
471 bool present = Base::FindKeyOrDeletedOrUnused(key, &entry); | |
472 if (!present) { | |
473 Base::InsertKey(entry, key); | |
474 } | |
475 Base::UpdatePayload(entry, 0, value); | |
476 return present; | |
477 } | |
478 // Update the value of an existing key. Note that 'key' need not be an Object. | |
479 template<typename Key> | |
480 void UpdateValue(const Key& key, const Object& value) const { | |
481 intptr_t entry = Base::FindKey(key); | |
482 ASSERT(entry != -1); | |
483 Base::UpdatePayload(entry, 0, value); | |
484 } | |
485 | |
486 template<typename Key> | |
487 bool Remove(const Key& key) const { | |
488 intptr_t entry = Base::FindKey(key); | |
489 if (entry == -1) { | |
490 return false; | |
491 } else { | |
492 Base::DeleteEntry(entry); | |
493 return true; | |
494 } | |
495 } | |
496 }; | |
497 | |
498 | |
499 template<typename KeyTraits> | |
500 class UnorderedHashMap : public HashMap<UnorderedHashTable<KeyTraits, 1> > { | |
501 public: | |
502 typedef HashMap<UnorderedHashTable<KeyTraits, 1> > Base; | |
503 explicit UnorderedHashMap(Array& data) : Base(data) {} | |
504 }; | |
505 | |
506 | |
507 template<typename KeyTraits> | |
508 class EnumIndexHashMap : public HashMap<EnumIndexHashTable<KeyTraits, 1> > { | |
509 public: | |
510 typedef HashMap<EnumIndexHashTable<KeyTraits, 1> > Base; | |
511 explicit EnumIndexHashMap(Array& data) : Base(data) {} | |
512 }; | |
513 | |
514 | |
515 template<typename Base> | |
516 class HashSet : public Base { | |
517 public: | |
518 typedef Base BaseTable; | |
519 explicit HashSet(Array& data) : BaseTable(data) {} | |
520 bool Insert(const Object& key) { | |
521 HashTables::EnsureLoadFactor(0.0, 0.75, *this); | |
522 intptr_t entry = -1; | |
523 bool present = Base::FindKeyOrDeletedOrUnused(key, &entry); | |
524 if (!present) { | |
525 Base::InsertKey(entry, key); | |
526 } | |
527 return present; | |
528 } | |
529 template<typename Key> | |
530 bool Remove(const Key& key) const { | |
531 intptr_t entry = Base::FindKey(key); | |
532 if (entry == -1) { | |
533 return false; | |
534 } else { | |
535 Base::DeleteEntry(entry); | |
536 return true; | |
537 } | |
538 } | |
539 }; | |
540 | |
541 | |
542 template<typename KeyTraits> | |
543 class UnorderedHashSet : public HashSet<UnorderedHashTable<KeyTraits, 0> > { | |
544 public: | |
545 typedef HashSet<UnorderedHashTable<KeyTraits, 0> > Base; | |
546 explicit UnorderedHashSet(Array& data) : Base(data) {} | |
547 }; | |
548 | |
549 | |
550 template<typename KeyTraits> | |
551 class EnumIndexHashSet : public HashSet<EnumIndexHashTable<KeyTraits, 0> > { | |
552 public: | |
553 typedef HashSet<EnumIndexHashTable<KeyTraits, 0> > Base; | |
554 explicit EnumIndexHashSet(Array& data) : Base(data) {} | |
555 }; | |
556 | |
557 } // namespace dart | |
558 | |
559 #endif // VM_HASH_TABLE_H_ | |
OLD | NEW |