OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file | |
2 // for details. All rights reserved. Use of this source code is governed by a | |
3 // BSD-style license that can be found in the LICENSE file. | |
4 | |
5 import 'dart:typed_data'; | |
6 | |
7 // Hash table with open addressing that separates the index from keys/values. | |
8 abstract class _CompactHashTableMixin { | |
9 // Each occupied entry in _index is a fixed-size integer that encodes a pair: | |
10 // [ hash pattern for key | index of key in _data ] | |
11 // The hash pattern is based on hashCode, but is guaranteed to be non-zero. | |
12 // The length of _index is always a power of two, and there is always at | |
13 // least one unoccupied entry. | |
14 Uint32List _index; | |
15 | |
16 // The number of bits used for each component is determined by table size. | |
17 static const int _INITIAL_INDEX_BITS = 4; | |
18 static const int _INITIAL_INDEX_SIZE = 1 << _INITIAL_INDEX_BITS; | |
19 | |
20 // Unused and deleted entries are marked by 0 and 1, respectively. | |
21 static const int _UNUSED_PAIR = 0; | |
22 static const int _DELETED_PAIR = 1; | |
23 | |
24 // Cached in-place mask for the hash pattern component. On 32-bit, the top | |
25 // bits are wasted to avoid Mint allocation. TODO(koda): Reclaim the bits by | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Please keep TODO on a separate line from the rest
koda
2015/02/17 17:26:29
Done.
| |
26 // making the compiler treat hash patterns as unsigned words. | |
27 int _hashMask = int.is64Bit() ? | |
28 (1 << (32 - _INITIAL_INDEX_BITS)) - 1 : | |
29 (1 << (30 - _INITIAL_INDEX_BITS)) - 1; | |
30 | |
31 int _hashPattern(int fullHash, int hashMask, int size) => | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
should this be static? it does not use any members
koda
2015/02/17 17:26:29
Done.
| |
32 // TODO(koda): Consider keeping bit length and use left shift. | |
33 (fullHash == 0) ? size : (fullHash & hashMask) * size; | |
34 | |
35 // Linear probing. | |
36 int _firstProbe(int fullHash, int sizeMask) { | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
should this be static?
koda
2015/02/17 17:26:28
Done.
| |
37 int i = fullHash & sizeMask; | |
38 // Light, fast shuffle to mitigate bad hashCode (e.g., sequential). | |
39 return ((i << 1) + i) & sizeMask; | |
40 } | |
41 int _nextProbe(int i, int sizeMask) => (i + 1) & sizeMask; | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
ditto, the same comment applies to all helpers bel
koda
2015/02/17 17:26:29
Done.
I also shortened the mixin name to limit th
| |
42 | |
43 // Fixed-length list of keys (set) or key/value at even/odd indices (map). | |
44 List _data; | |
45 // Length of _data that is used (i.e., keys + values for a map). | |
46 int _usedData = 0; | |
47 // Number of deleted keys. | |
48 int _deletedKeys = 0; | |
49 | |
50 // A self-loop is used to mark a deleted key or value. | |
51 bool _isDeleted(List data, Object keyOrValue) => identical(keyOrValue, data); | |
52 void _setDeletedAt(List data, int d) { | |
53 data[d] = data; | |
54 } | |
55 | |
56 // Concurrent modification detection relies on this checksum monotonically | |
57 // increasing between reallocations of _data. | |
58 int get _checkSum => _usedData + _deletedKeys; | |
59 bool _isModifiedSince(List oldData, int oldCheckSum) => | |
60 !identical(_data, oldData) || (_checkSum != oldCheckSum); | |
61 } | |
62 | |
63 // Map with iteration in insertion order (hence "Linked"). New keys are simply | |
64 // appended to _data. | |
65 class _CompactLinkedHashMap<K, V> | |
66 extends MapBase<K, V> with _CompactHashTableMixin | |
67 implements HashMap<K, V>, LinkedHashMap<K, V> { | |
68 | |
69 // Growth policy is to keep _index and _data the same length, and double | |
70 // both when _data is full. Thus, _index will have a max load factor of 0.5. | |
71 // TODO(koda): Consider growing _data by factor sqrt(2), twice as often. | |
72 _CompactLinkedHashMap() { | |
73 _index = new Uint32List(_CompactHashTableMixin._INITIAL_INDEX_SIZE); | |
74 _data = new List(_CompactHashTableMixin._INITIAL_INDEX_SIZE); | |
75 } | |
76 | |
77 int get length => (_usedData >> 1) - _deletedKeys; | |
78 bool get isEmpty => length == 0; | |
79 bool get isNotEmpty => !isEmpty; | |
80 | |
81 void _rehash() { | |
82 if ((_deletedKeys << 1) > _usedData) { | |
83 // TODO(koda): Consider shrinking. | |
84 // TODO(koda): Consider in-place compaction and more costly CME check. | |
85 _init(_index.length, _hashMask, _data, _usedData); | |
86 } else { | |
87 // TODO(koda): Support 32->64 bit transition (and adjust _hashMask). | |
88 _init(_index.length << 1, _hashMask >> 1, _data, _usedData); | |
89 } | |
90 } | |
91 | |
92 void clear() { | |
93 if (!isEmpty) { | |
94 _init(_index.length, _hashMask); | |
95 } | |
96 } | |
97 | |
98 // Allocate new _index and _data, and optionally copy existing contents. | |
99 void _init(int size, int hashMask, [List oldData, int oldUsed]) { | |
100 assert(size & (size - 1) == 0); | |
101 assert(_CompactHashTableMixin._UNUSED_PAIR == 0); | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
should these assert be in constructor too? we crea
koda
2015/02/17 17:26:28
Done.
| |
102 _index = new Uint32List(size); | |
103 _hashMask = hashMask; | |
104 _data = new List(size); | |
105 _usedData = 0; | |
106 _deletedKeys = 0; | |
107 if (oldData != null) { | |
108 for (int i = 0; i < oldUsed; i += 2) { | |
109 var key = oldData[i]; | |
110 if (!_isDeleted(oldData, key)) { | |
111 // TODO(koda): While there are enough hash bits, avoid hashCode calls. | |
112 this[key] = oldData[i + 1]; | |
113 } | |
114 } | |
115 } | |
116 } | |
117 | |
118 void _insert(K key, V value, int hashPattern, int i) { | |
119 if (_usedData == _data.length) { | |
120 _rehash(); | |
121 this[key] = value; | |
122 } else { | |
123 _index[i] = hashPattern | _usedData; | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
looks like we are wasting 1 bit here - (_usedData
koda
2015/02/17 17:26:28
Done.
Yes, we were. It allowed for the very simpl
| |
124 _data[_usedData++] = key; | |
125 _data[_usedData++] = value; | |
126 } | |
127 } | |
128 | |
129 // If key is present, returns the index of the value in _data, else returns | |
130 // the negated insertion point in _index. | |
131 int _findValueOrInsertPoint(K key, int fullHash, int hashPattern, int size) { | |
132 int sizeMask = size - 1; | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
when the are a lot of local variables it easier to
koda
2015/02/17 17:26:28
Done.
| |
133 int i = _firstProbe(fullHash, sizeMask); | |
134 int firstDeleted = -1; | |
135 int pair = _index[i]; | |
136 while (pair != _CompactHashTableMixin._UNUSED_PAIR) { | |
137 if (pair == _CompactHashTableMixin._DELETED_PAIR) { | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
I wonder if the code becomes faster if you have tw
koda
2015/02/17 17:26:29
Perhaps. I agree that's a good candidate for optim
Vyacheslav Egorov (Google)
2015/02/17 23:39:00
Ack! That was more of a thought than immediate cal
| |
138 if (firstDeleted < 0){ | |
139 firstDeleted = i; | |
140 } | |
141 } else { | |
142 int d = hashPattern ^ pair; | |
143 if (d < size && key == _data[d]) { | |
144 return d + 1; | |
145 } | |
146 } | |
147 i = _nextProbe(i, sizeMask); | |
148 pair = _index[i]; | |
149 } | |
150 return firstDeleted >= 0 ? -firstDeleted : -i; | |
151 } | |
152 | |
153 void operator[]=(K key, V value) { | |
154 int size = _index.length; | |
155 int sizeMask = size - 1; | |
156 int fullHash = key.hashCode; | |
157 int hashPattern = _hashPattern(fullHash, _hashMask, size); | |
158 int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size); | |
159 if (d > 0) { | |
160 _data[d] = value; | |
161 } else { | |
162 int i = -d; | |
163 _insert(key, value, hashPattern, i); | |
164 } | |
165 } | |
166 | |
167 V putIfAbsent(K key, V ifAbsent()) { | |
168 int size = _index.length; | |
169 int sizeMask = size - 1; | |
170 int fullHash = key.hashCode; | |
171 int hashPattern = _hashPattern(fullHash, _hashMask, size); | |
172 int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size); | |
173 if (d > 0) { | |
174 return _data[d]; | |
175 } | |
176 int i = -d; | |
177 // 'ifAbsent' is allowed to modify the map. | |
178 List oldData = _data; | |
179 int oldCheckSum = _checkSum; | |
180 V value = ifAbsent(); | |
181 if (_isModifiedSince(oldData, oldCheckSum)) { | |
182 this[key] = value; | |
183 } else { | |
184 _insert(key, value, hashPattern, i); | |
185 } | |
186 return value; | |
187 } | |
188 | |
189 V remove(Object key) { | |
190 int size = _index.length; | |
191 int sizeMask = size - 1; | |
192 int fullHash = key.hashCode; | |
193 int hashPattern = _hashPattern(fullHash, _hashMask, size); | |
194 int i = _firstProbe(fullHash, sizeMask); | |
195 int pair = _index[i]; | |
196 while (pair != _CompactHashTableMixin._UNUSED_PAIR) { | |
197 if (pair != _CompactHashTableMixin._DELETED_PAIR) { | |
198 int d = hashPattern ^ pair; | |
199 if (d < size && key == _data[d]) { | |
200 _index[i] = _CompactHashTableMixin._DELETED_PAIR; | |
201 _setDeletedAt(_data, d); | |
202 V value = _data[d + 1]; | |
203 _setDeletedAt(_data, d + 1); | |
204 ++_deletedKeys; | |
205 return value; | |
206 } | |
207 } | |
208 i = _nextProbe(i, sizeMask); | |
209 pair = _index[i]; | |
210 } | |
211 return null; | |
212 } | |
213 | |
214 // If key is absent, return _data (which is never a value). | |
215 Object _getValueOrData(Object key) { | |
216 int size = _index.length; | |
217 int sizeMask = size - 1; | |
218 int fullHash = key.hashCode; | |
219 int hashPattern = _hashPattern(fullHash, _hashMask, size); | |
220 int i = _firstProbe(fullHash, sizeMask); | |
221 int pair = _index[i]; | |
222 while (pair != _CompactHashTableMixin._UNUSED_PAIR) { | |
223 if (pair != _CompactHashTableMixin._DELETED_PAIR) { | |
224 int d = hashPattern ^ pair; | |
225 if (d < size && key == _data[d]) { | |
226 return _data[d + 1]; | |
227 } | |
228 } | |
229 i = _nextProbe(i, sizeMask); | |
230 pair = _index[i]; | |
231 } | |
232 return _data; | |
233 } | |
234 | |
235 bool containsKey(Object key) => !identical(_data, _getValueOrData(key)); | |
236 | |
237 V operator[](Object key) { | |
238 var v = _getValueOrData(key); | |
239 return identical(_data, v) ? null : v; | |
240 } | |
241 | |
242 bool containsValue(Object value) { | |
243 for (var v in values) { | |
244 if (v == value) { | |
245 return true; | |
246 } | |
247 } | |
248 return false; | |
249 } | |
250 | |
251 void forEach(void f(K key, V value)) { | |
252 var ki = keys.iterator; | |
253 var vi = values.iterator; | |
254 while (ki.moveNext()) { | |
255 vi.moveNext(); | |
256 f(ki.current, vi.current); | |
257 } | |
258 } | |
259 | |
260 Iterable<K> get keys => | |
261 new _CompactIterable<K>(this, _data, _usedData, -2, 2); | |
262 Iterable<V> get values => | |
263 new _CompactIterable<V>(this, _data, _usedData, -1, 2); | |
264 } | |
265 | |
266 // Iterates through _data[_offset + _step], _data[_offset + 2*_step], ... | |
267 // and checks for concurrent modification. | |
268 class _CompactIterable<E> extends IterableBase<E> { | |
269 var _map; | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Please mark all fields that do not change final.
koda
2015/02/17 17:26:28
Done.
| |
270 List _data; | |
271 int _len; | |
272 int _offset; | |
273 int _step; | |
274 | |
275 _CompactIterable(this._map, this._data, this._len, this._offset, this._step); | |
276 | |
277 Iterator<E> get iterator => | |
278 new _CompactIterator<E>(_map, _data, _len, _offset, _step); | |
279 | |
280 int get length => _map.length; | |
281 bool get isEmpty => length == 0; | |
282 bool get isNotEmpty => !isEmpty; | |
283 } | |
284 | |
285 class _CompactIterator<E> implements Iterator<E> { | |
286 var _map; | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Please mark all fields that do not change final (i
koda
2015/02/17 17:26:29
Done.
| |
287 List _data; | |
288 int _len; | |
289 int _offset; | |
290 int _step; | |
291 int _checkSum; | |
292 E current; | |
293 | |
294 _CompactIterator(this._map, this._data, this._len, this._offset, this._step) { | |
295 this._checkSum = _map._checkSum; | |
296 } | |
297 | |
298 bool moveNext() { | |
299 if (_map._isModifiedSince(_data, _checkSum)) { | |
300 throw new ConcurrentModificationError(_map); | |
301 } | |
302 do { | |
303 _offset += _step; | |
304 } while (_offset < _len && _map._isDeleted(_data, _data[_offset])); | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
This shouldn't be an instance call (also it will f
koda
2015/02/17 17:26:29
Done.
| |
305 if (_offset < _len) { | |
306 current = _data[_offset]; | |
307 return true; | |
308 } else { | |
309 current = null; | |
310 return false; | |
311 } | |
312 } | |
313 } | |
314 | |
315 // Set implementation, analogous to _CompactLinkedHashMap. | |
316 class _CompactLinkedHashSet<K> | |
317 extends SetBase<K> with _CompactHashTableMixin | |
318 implements HashSet<K>, LinkedHashSet<K> { | |
319 | |
320 _CompactLinkedHashSet() { | |
321 _index = new Uint32List(_CompactHashTableMixin._INITIAL_INDEX_SIZE); | |
322 _data = new List(_CompactHashTableMixin._INITIAL_INDEX_SIZE >> 1); | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Are we wasting 1 bit in the index here too? becaus
koda
2015/02/17 17:26:29
Done.
| |
323 } | |
324 | |
325 int get length => _usedData - _deletedKeys; | |
326 | |
327 void _rehash() { | |
328 if ((_deletedKeys << 1) > _usedData) { | |
329 _init(_index.length, _hashMask, _data, _usedData); | |
330 } else { | |
331 _init(_index.length << 1, _hashMask >> 1, _data, _usedData); | |
332 } | |
333 } | |
334 | |
335 void clear() { | |
336 if (!isEmpty) { | |
337 _init(_index.length, _hashMask); | |
338 } | |
339 } | |
340 | |
341 void _init(int size, int hashMask, [List oldData, int oldUsed]) { | |
342 _index = new Uint32List(size); | |
343 _hashMask = hashMask; | |
344 _data = new List(size >> 1); | |
345 _usedData = 0; | |
346 _deletedKeys = 0; | |
347 if (oldData != null) { | |
348 for (int i = 0; i < oldUsed; i += 1) { | |
349 var key = oldData[i]; | |
350 if (!identical(key, oldData)) { | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
_isDeleted instead of identical?
koda
2015/02/17 17:26:29
Done.
| |
351 add(key); | |
352 } | |
353 } | |
354 } | |
355 } | |
356 | |
357 bool add(Object key) { | |
358 int size = _index.length; | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Same comments from Map implementation apply here.
koda
2015/02/17 17:26:28
Done, except for the code-splitting optimization (
| |
359 int sizeMask = size - 1; | |
360 int fullHash = key.hashCode; | |
361 int hashPattern = _hashPattern(fullHash, _hashMask, size); | |
362 int i = _firstProbe(fullHash, sizeMask); | |
363 int firstDeleted = -1; | |
364 int pair = _index[i]; | |
365 while (pair != _CompactHashTableMixin._UNUSED_PAIR) { | |
366 if (pair == _CompactHashTableMixin._DELETED_PAIR) { | |
367 if (firstDeleted < 0){ | |
368 firstDeleted = i; | |
369 } | |
370 } else { | |
371 int d = hashPattern ^ pair; | |
372 if (d < size && key == _data[d]) { | |
373 return false; | |
374 } | |
375 } | |
376 i = _nextProbe(i, sizeMask); | |
377 pair = _index[i]; | |
378 } | |
379 int insertionPoint = (firstDeleted >= 0) ? firstDeleted : i; | |
380 if (_usedData == _data.length) { | |
381 _rehash(); | |
382 add(key); | |
383 } else { | |
384 _index[insertionPoint] = hashPattern | _usedData; | |
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
move int insertionPoint = ... to here.
koda
2015/02/17 17:26:29
Done.
| |
385 _data[_usedData++] = key; | |
386 } | |
387 return true; | |
388 } | |
389 | |
390 // If key is absent, return _data (which is never a value). | |
391 Object _getKeyOrData(Object key) { | |
392 int size = _index.length; | |
393 int sizeMask = size - 1; | |
394 int fullHash = key.hashCode; | |
395 int hashPattern = _hashPattern(fullHash, _hashMask, size); | |
396 int i = _firstProbe(fullHash, sizeMask); | |
397 int pair = _index[i]; | |
398 while (pair != _CompactHashTableMixin._UNUSED_PAIR) { | |
399 if (pair != _CompactHashTableMixin._DELETED_PAIR) { | |
400 int d = hashPattern ^ pair; | |
401 if (d < size && key == _data[d]) { | |
402 return _data[d]; // Note: Must return the existing key. | |
403 } | |
404 } | |
405 i = _nextProbe(i, sizeMask); | |
406 pair = _index[i]; | |
407 } | |
408 return _data; | |
409 } | |
410 | |
411 K lookup(Object key) { | |
412 var k = _getKeyOrData(key); | |
413 return identical(_data, k) ? null : k; | |
414 } | |
415 | |
416 bool contains(Object key) => !identical(_data, _getKeyOrData(key)); | |
417 | |
418 bool remove(Object key) { | |
419 int size = _index.length; | |
420 int sizeMask = size - 1; | |
421 int fullHash = key.hashCode; | |
422 int hashPattern = _hashPattern(fullHash, _hashMask, size); | |
423 int i = _firstProbe(fullHash, sizeMask); | |
424 int pair = _index[i]; | |
425 while (pair != _CompactHashTableMixin._UNUSED_PAIR) { | |
426 if (pair != _CompactHashTableMixin._DELETED_PAIR) { | |
427 int d = hashPattern ^ pair; | |
428 if (d < size && key == _data[d]) { | |
429 _index[i] = _CompactHashTableMixin._DELETED_PAIR; | |
430 _data[d] = _data; | |
431 ++_deletedKeys; | |
432 return true; | |
433 } | |
434 } | |
435 i = _nextProbe(i, sizeMask); | |
436 pair = _index[i]; | |
437 } | |
438 return false; | |
439 } | |
440 | |
441 Iterator<K> get iterator => | |
442 new _CompactIterator<K>(this, _data, _usedData, -1, 1); | |
443 | |
444 Set<K> toSet() => new Set<K>()..addAll(this); | |
445 } | |
OLD | NEW |