Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(163)

Side by Side Diff: runtime/lib/compact_hash.dart

Issue 915323002: Compact LinkedHashMap/Set implementation. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/lib/collection_sources.gypi ('k') | runtime/lib/linked_hash_map.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 import 'dart:typed_data';
6
7 // Hash table with open addressing that separates the index from keys/values.
8 abstract class _HashBase {
9 // Each occupied entry in _index is a fixed-size integer that encodes a pair:
10 // [ hash pattern for key | index of entry in _data ]
11 // The hash pattern is based on hashCode, but is guaranteed to be non-zero.
12 // The length of _index is always a power of two, and there is always at
13 // least one unoccupied entry.
14 Uint32List _index;
15
16 // The number of bits used for each component is determined by table size.
17 // The length of _index is twice the number of entries in _data, and both
18 // are doubled when _data is full. Thus, _index will have a max load factor
19 // of 1/2, which enables one more bit to be used for the hash.
20 // TODO(koda): Consider growing _data by factor sqrt(2), twice as often.
21 static const int _INITIAL_INDEX_BITS = 3;
22 static const int _INITIAL_INDEX_SIZE = 1 << (_INITIAL_INDEX_BITS + 1);
23
24 // Unused and deleted entries are marked by 0 and 1, respectively.
25 static const int _UNUSED_PAIR = 0;
26 static const int _DELETED_PAIR = 1;
27
28 // Cached in-place mask for the hash pattern component. On 32-bit, the top
29 // bits are wasted to avoid Mint allocation.
30 // TODO(koda): Reclaim the bits by making the compiler treat hash patterns
31 // as unsigned words.
32 int _hashMask = int.is64Bit() ?
33 (1 << (32 - _INITIAL_INDEX_BITS)) - 1 :
34 (1 << (30 - _INITIAL_INDEX_BITS)) - 1;
35
36 static int _hashPattern(int fullHash, int hashMask, int size) =>
37 // TODO(koda): Consider keeping bit length and use left shift.
38 (fullHash == 0) ? (size >> 1) : (fullHash & hashMask) * (size >> 1);
39
40 // Linear probing.
41 static int _firstProbe(int fullHash, int sizeMask) {
42 final int i = fullHash & sizeMask;
43 // Light, fast shuffle to mitigate bad hashCode (e.g., sequential).
44 return ((i << 1) + i) & sizeMask;
45 }
46 static int _nextProbe(int i, int sizeMask) => (i + 1) & sizeMask;
47
48 // Fixed-length list of keys (set) or key/value at even/odd indices (map).
49 List _data;
50 // Length of _data that is used (i.e., keys + values for a map).
51 int _usedData = 0;
52 // Number of deleted keys.
53 int _deletedKeys = 0;
54
55 // A self-loop is used to mark a deleted key or value.
56 static bool _isDeleted(List data, Object keyOrValue) =>
57 identical(keyOrValue, data);
58 static void _setDeletedAt(List data, int d) {
59 data[d] = data;
60 }
61
62 // Concurrent modification detection relies on this checksum monotonically
63 // increasing between reallocations of _data.
64 int get _checkSum => _usedData + _deletedKeys;
65 bool _isModifiedSince(List oldData, int oldCheckSum) =>
66 !identical(_data, oldData) || (_checkSum != oldCheckSum);
67 }
68
69 // Map with iteration in insertion order (hence "Linked"). New keys are simply
70 // appended to _data.
71 class _CompactLinkedHashMap<K, V>
72 extends MapBase<K, V> with _HashBase
73 implements HashMap<K, V>, LinkedHashMap<K, V> {
74
75 _CompactLinkedHashMap() {
76 assert(_HashBase._UNUSED_PAIR == 0);
77 _index = new Uint32List(_HashBase._INITIAL_INDEX_SIZE);
78 _data = new List(_HashBase._INITIAL_INDEX_SIZE);
79 }
80
81 int get length => (_usedData >> 1) - _deletedKeys;
82 bool get isEmpty => length == 0;
83 bool get isNotEmpty => !isEmpty;
84
85 void _rehash() {
86 if ((_deletedKeys << 1) > _usedData) {
87 // TODO(koda): Consider shrinking.
88 // TODO(koda): Consider in-place compaction and more costly CME check.
89 _init(_index.length, _hashMask, _data, _usedData);
90 } else {
91 // TODO(koda): Support 32->64 bit transition (and adjust _hashMask).
92 _init(_index.length << 1, _hashMask >> 1, _data, _usedData);
93 }
94 }
95
96 void clear() {
97 if (!isEmpty) {
98 _init(_index.length, _hashMask);
99 }
100 }
101
102 // Allocate new _index and _data, and optionally copy existing contents.
103 void _init(int size, int hashMask, [List oldData, int oldUsed]) {
104 assert(size & (size - 1) == 0);
105 assert(_HashBase._UNUSED_PAIR == 0);
106 _index = new Uint32List(size);
107 _hashMask = hashMask;
108 _data = new List(size);
109 _usedData = 0;
110 _deletedKeys = 0;
111 if (oldData != null) {
112 for (int i = 0; i < oldUsed; i += 2) {
113 var key = oldData[i];
114 if (!_HashBase._isDeleted(oldData, key)) {
115 // TODO(koda): While there are enough hash bits, avoid hashCode calls.
116 this[key] = oldData[i + 1];
117 }
118 }
119 }
120 }
121
122 void _insert(K key, V value, int hashPattern, int i) {
123 if (_usedData == _data.length) {
124 _rehash();
125 this[key] = value;
126 } else {
127 assert(0 <= hashPattern && hashPattern < (1 << 32));
128 final int index = _usedData >> 1;
129 assert((index & hashPattern) == 0);
130 _index[i] = hashPattern | index;
131 _data[_usedData++] = key;
132 _data[_usedData++] = value;
133 }
134 }
135
136 // If key is present, returns the index of the value in _data, else returns
137 // the negated insertion point in _index.
138 int _findValueOrInsertPoint(K key, int fullHash, int hashPattern, int size) {
139 final int sizeMask = size - 1;
140 final int maxEntries = size >> 1;
141 int i = _HashBase._firstProbe(fullHash, sizeMask);
142 int firstDeleted = -1;
143 int pair = _index[i];
144 while (pair != _HashBase._UNUSED_PAIR) {
145 if (pair == _HashBase._DELETED_PAIR) {
146 if (firstDeleted < 0){
147 firstDeleted = i;
148 }
149 } else {
150 final int entry = hashPattern ^ pair;
151 if (entry < maxEntries) {
152 final int d = entry << 1;
153 if (key == _data[d]) {
154 return d + 1;
155 }
156 }
157 }
158 i = _HashBase._nextProbe(i, sizeMask);
159 pair = _index[i];
160 }
161 return firstDeleted >= 0 ? -firstDeleted : -i;
162 }
163
164 void operator[]=(K key, V value) {
165 final int size = _index.length;
166 final int sizeMask = size - 1;
167 final int fullHash = key.hashCode;
168 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
169 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
170 if (d > 0) {
171 _data[d] = value;
172 } else {
173 final int i = -d;
174 _insert(key, value, hashPattern, i);
175 }
176 }
177
178 V putIfAbsent(K key, V ifAbsent()) {
179 final int size = _index.length;
180 final int sizeMask = size - 1;
181 final int maxEntries = size >> 1;
182 final int fullHash = key.hashCode;
183 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
184 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
185 if (d > 0) {
186 return _data[d];
187 }
188 // 'ifAbsent' is allowed to modify the map.
189 List oldData = _data;
190 int oldCheckSum = _checkSum;
191 V value = ifAbsent();
192 if (_isModifiedSince(oldData, oldCheckSum)) {
193 this[key] = value;
194 } else {
195 final int i = -d;
196 _insert(key, value, hashPattern, i);
197 }
198 return value;
199 }
200
201 V remove(Object key) {
202 final int size = _index.length;
203 final int sizeMask = size - 1;
204 final int maxEntries = size >> 1;
205 final int fullHash = key.hashCode;
206 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
207 int i = _HashBase._firstProbe(fullHash, sizeMask);
208 int pair = _index[i];
209 while (pair != _HashBase._UNUSED_PAIR) {
210 if (pair != _HashBase._DELETED_PAIR) {
211 final int entry = hashPattern ^ pair;
212 if (entry < maxEntries) {
213 final int d = entry << 1;
214 if (key == _data[d]) {
215 _index[i] = _HashBase._DELETED_PAIR;
216 _HashBase._setDeletedAt(_data, d);
217 V value = _data[d + 1];
218 _HashBase._setDeletedAt(_data, d + 1);
219 ++_deletedKeys;
220 return value;
221 }
222 }
223 }
224 i = _HashBase._nextProbe(i, sizeMask);
225 pair = _index[i];
226 }
227 return null;
228 }
229
230 // If key is absent, return _data (which is never a value).
231 Object _getValueOrData(Object key) {
232 final int size = _index.length;
233 final int sizeMask = size - 1;
234 final int maxEntries = size >> 1;
235 final int fullHash = key.hashCode;
236 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
237 int i = _HashBase._firstProbe(fullHash, sizeMask);
238 int pair = _index[i];
239 while (pair != _HashBase._UNUSED_PAIR) {
240 if (pair != _HashBase._DELETED_PAIR) {
241 final int entry = hashPattern ^ pair;
242 if (entry < maxEntries) {
243 final int d = entry << 1;
244 if (key == _data[d]) {
245 return _data[d + 1];
246 }
247 }
248 }
249 i = _HashBase._nextProbe(i, sizeMask);
250 pair = _index[i];
251 }
252 return _data;
253 }
254
255 bool containsKey(Object key) => !identical(_data, _getValueOrData(key));
256
257 V operator[](Object key) {
258 var v = _getValueOrData(key);
259 return identical(_data, v) ? null : v;
260 }
261
262 bool containsValue(Object value) {
263 for (var v in values) {
264 if (v == value) {
265 return true;
266 }
267 }
268 return false;
269 }
270
271 void forEach(void f(K key, V value)) {
272 var ki = keys.iterator;
273 var vi = values.iterator;
274 while (ki.moveNext()) {
275 vi.moveNext();
276 f(ki.current, vi.current);
277 }
278 }
279
280 Iterable<K> get keys =>
281 new _CompactIterable<K>(this, _data, _usedData, -2, 2);
282 Iterable<V> get values =>
283 new _CompactIterable<V>(this, _data, _usedData, -1, 2);
284 }
285
286 // Iterates through _data[_offset + _step], _data[_offset + 2*_step], ...
287 // and checks for concurrent modification.
288 class _CompactIterable<E> extends IterableBase<E> {
289 final _table;
290 final List _data;
291 final int _len;
292 final int _offset;
293 final int _step;
294
295 _CompactIterable(this._table, this._data, this._len,
296 this._offset, this._step);
297
298 Iterator<E> get iterator =>
299 new _CompactIterator<E>(_table, _data, _len, _offset, _step);
300
301 int get length => _table.length;
302 bool get isEmpty => length == 0;
303 bool get isNotEmpty => !isEmpty;
304 }
305
306 class _CompactIterator<E> implements Iterator<E> {
307 final _table;
308 final List _data;
309 final int _len;
310 int _offset;
311 final int _step;
312 final int _checkSum;
313 E current;
314
315 _CompactIterator(table, this._data, this._len, this._offset, this._step) :
316 _table = table, _checkSum = table._checkSum;
317
318 bool moveNext() {
319 if (_table._isModifiedSince(_data, _checkSum)) {
320 throw new ConcurrentModificationError(_table);
321 }
322 do {
323 _offset += _step;
324 } while (_offset < _len && _HashBase._isDeleted(_data, _data[_offset]));
325 if (_offset < _len) {
326 current = _data[_offset];
327 return true;
328 } else {
329 current = null;
330 return false;
331 }
332 }
333 }
334
335 // Set implementation, analogous to _CompactLinkedHashMap.
336 class _CompactLinkedHashSet<K>
337 extends SetBase<K> with _HashBase
338 implements HashSet<K>, LinkedHashSet<K> {
339
340 _CompactLinkedHashSet() {
341 assert(_HashBase._UNUSED_PAIR == 0);
342 _index = new Uint32List(_HashBase._INITIAL_INDEX_SIZE);
343 _data = new List(_HashBase._INITIAL_INDEX_SIZE >> 1);
344 }
345
346 int get length => _usedData - _deletedKeys;
347
348 void _rehash() {
349 if ((_deletedKeys << 1) > _usedData) {
350 _init(_index.length, _hashMask, _data, _usedData);
351 } else {
352 _init(_index.length << 1, _hashMask >> 1, _data, _usedData);
353 }
354 }
355
356 void clear() {
357 if (!isEmpty) {
358 _init(_index.length, _hashMask);
359 }
360 }
361
362 void _init(int size, int hashMask, [List oldData, int oldUsed]) {
363 _index = new Uint32List(size);
364 _hashMask = hashMask;
365 _data = new List(size >> 1);
366 _usedData = 0;
367 _deletedKeys = 0;
368 if (oldData != null) {
369 for (int i = 0; i < oldUsed; i += 1) {
370 var key = oldData[i];
371 if (!_HashBase._isDeleted(oldData, key)) {
372 add(key);
373 }
374 }
375 }
376 }
377
378 bool add(Object key) {
379 final int size = _index.length;
380 final int sizeMask = size - 1;
381 final int maxEntries = size >> 1;
382 final int fullHash = key.hashCode;
383 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
384 int i = _HashBase._firstProbe(fullHash, sizeMask);
385 int firstDeleted = -1;
386 int pair = _index[i];
387 while (pair != _HashBase._UNUSED_PAIR) {
388 if (pair == _HashBase._DELETED_PAIR) {
389 if (firstDeleted < 0){
390 firstDeleted = i;
391 }
392 } else {
393 final int d = hashPattern ^ pair;
394 if (d < maxEntries && key == _data[d]) {
395 return false;
396 }
397 }
398 i = _HashBase._nextProbe(i, sizeMask);
399 pair = _index[i];
400 }
401 if (_usedData == _data.length) {
402 _rehash();
403 add(key);
404 } else {
405 final int insertionPoint = (firstDeleted >= 0) ? firstDeleted : i;
406 assert(0 <= hashPattern && hashPattern < (1 << 32));
407 assert((hashPattern & _usedData) == 0);
408 _index[insertionPoint] = hashPattern | _usedData;
409 _data[_usedData++] = key;
410 }
411 return true;
412 }
413
414 // If key is absent, return _data (which is never a value).
415 Object _getKeyOrData(Object key) {
416 final int size = _index.length;
417 final int sizeMask = size - 1;
418 final int maxEntries = size >> 1;
419 final int fullHash = key.hashCode;
420 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
421 int i = _HashBase._firstProbe(fullHash, sizeMask);
422 int pair = _index[i];
423 while (pair != _HashBase._UNUSED_PAIR) {
424 if (pair != _HashBase._DELETED_PAIR) {
425 final int d = hashPattern ^ pair;
426 if (d < maxEntries && key == _data[d]) {
427 return _data[d]; // Note: Must return the existing key.
428 }
429 }
430 i = _HashBase._nextProbe(i, sizeMask);
431 pair = _index[i];
432 }
433 return _data;
434 }
435
436 K lookup(Object key) {
437 var k = _getKeyOrData(key);
438 return identical(_data, k) ? null : k;
439 }
440
441 bool contains(Object key) => !identical(_data, _getKeyOrData(key));
442
443 bool remove(Object key) {
444 final int size = _index.length;
445 final int sizeMask = size - 1;
446 final int maxEntries = size >> 1;
447 final int fullHash = key.hashCode;
448 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
449 int i = _HashBase._firstProbe(fullHash, sizeMask);
450 int pair = _index[i];
451 while (pair != _HashBase._UNUSED_PAIR) {
452 if (pair != _HashBase._DELETED_PAIR) {
453 final int d = hashPattern ^ pair;
454 if (d < maxEntries && key == _data[d]) {
455 _index[i] = _HashBase._DELETED_PAIR;
456 _HashBase._setDeletedAt(_data, d);
457 ++_deletedKeys;
458 return true;
459 }
460 }
461 i = _HashBase._nextProbe(i, sizeMask);
462 pair = _index[i];
463 }
464 return false;
465 }
466
467 Iterator<K> get iterator =>
468 new _CompactIterator<K>(this, _data, _usedData, -1, 1);
469
470 Set<K> toSet() => new Set<K>()..addAll(this);
471 }
OLDNEW
« no previous file with comments | « runtime/lib/collection_sources.gypi ('k') | runtime/lib/linked_hash_map.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698