Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: runtime/lib/compact_hash.dart

Issue 915323002: Compact LinkedHashMap/Set implementation. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 import 'dart:typed_data';
6
7 // Hash table with open addressing that separates the index from keys/values.
8 abstract class _CompactHashTableMixin {
9 // Each occupied entry in _index is a fixed-size integer that encodes a pair:
10 // [ hash pattern for key | index of key in _data ]
11 // The hash pattern is based on hashCode, but is guaranteed to be non-zero.
12 // The length of _index is always a power of two, and there is always at
13 // least one unoccupied entry.
14 Uint32List _index;
15
16 // The number of bits used for each component is determined by table size.
17 static const int _INITIAL_INDEX_BITS = 4;
18 static const int _INITIAL_INDEX_SIZE = 1 << _INITIAL_INDEX_BITS;
19
20 // Unused and deleted entries are marked by 0 and 1, respectively.
21 static const int _UNUSED_PAIR = 0;
22 static const int _DELETED_PAIR = 1;
23
24 // Cached in-place mask for the hash pattern component. On 32-bit, the top
25 // bits are wasted to avoid Mint allocation. TODO(koda): Reclaim the bits by
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Please keep TODO on a separate line from the rest
koda 2015/02/17 17:26:29 Done.
26 // making the compiler treat hash patterns as unsigned words.
27 int _hashMask = int.is64Bit() ?
28 (1 << (32 - _INITIAL_INDEX_BITS)) - 1 :
29 (1 << (30 - _INITIAL_INDEX_BITS)) - 1;
30
31 int _hashPattern(int fullHash, int hashMask, int size) =>
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 should this be static? it does not use any members
koda 2015/02/17 17:26:29 Done.
32 // TODO(koda): Consider keeping bit length and use left shift.
33 (fullHash == 0) ? size : (fullHash & hashMask) * size;
34
35 // Linear probing.
36 int _firstProbe(int fullHash, int sizeMask) {
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 should this be static?
koda 2015/02/17 17:26:28 Done.
37 int i = fullHash & sizeMask;
38 // Light, fast shuffle to mitigate bad hashCode (e.g., sequential).
39 return ((i << 1) + i) & sizeMask;
40 }
41 int _nextProbe(int i, int sizeMask) => (i + 1) & sizeMask;
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 ditto, the same comment applies to all helpers bel
koda 2015/02/17 17:26:29 Done. I also shortened the mixin name to limit th
42
43 // Fixed-length list of keys (set) or key/value at even/odd indices (map).
44 List _data;
45 // Length of _data that is used (i.e., keys + values for a map).
46 int _usedData = 0;
47 // Number of deleted keys.
48 int _deletedKeys = 0;
49
50 // A self-loop is used to mark a deleted key or value.
51 bool _isDeleted(List data, Object keyOrValue) => identical(keyOrValue, data);
52 void _setDeletedAt(List data, int d) {
53 data[d] = data;
54 }
55
56 // Concurrent modification detection relies on this checksum monotonically
57 // increasing between reallocations of _data.
58 int get _checkSum => _usedData + _deletedKeys;
59 bool _isModifiedSince(List oldData, int oldCheckSum) =>
60 !identical(_data, oldData) || (_checkSum != oldCheckSum);
61 }
62
63 // Map with iteration in insertion order (hence "Linked"). New keys are simply
64 // appended to _data.
65 class _CompactLinkedHashMap<K, V>
66 extends MapBase<K, V> with _CompactHashTableMixin
67 implements HashMap<K, V>, LinkedHashMap<K, V> {
68
69 // Growth policy is to keep _index and _data the same length, and double
70 // both when _data is full. Thus, _index will have a max load factor of 0.5.
71 // TODO(koda): Consider growing _data by factor sqrt(2), twice as often.
72 _CompactLinkedHashMap() {
73 _index = new Uint32List(_CompactHashTableMixin._INITIAL_INDEX_SIZE);
74 _data = new List(_CompactHashTableMixin._INITIAL_INDEX_SIZE);
75 }
76
77 int get length => (_usedData >> 1) - _deletedKeys;
78 bool get isEmpty => length == 0;
79 bool get isNotEmpty => !isEmpty;
80
81 void _rehash() {
82 if ((_deletedKeys << 1) > _usedData) {
83 // TODO(koda): Consider shrinking.
84 // TODO(koda): Consider in-place compaction and more costly CME check.
85 _init(_index.length, _hashMask, _data, _usedData);
86 } else {
87 // TODO(koda): Support 32->64 bit transition (and adjust _hashMask).
88 _init(_index.length << 1, _hashMask >> 1, _data, _usedData);
89 }
90 }
91
92 void clear() {
93 if (!isEmpty) {
94 _init(_index.length, _hashMask);
95 }
96 }
97
98 // Allocate new _index and _data, and optionally copy existing contents.
99 void _init(int size, int hashMask, [List oldData, int oldUsed]) {
100 assert(size & (size - 1) == 0);
101 assert(_CompactHashTableMixin._UNUSED_PAIR == 0);
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 should these assert be in constructor too? we crea
koda 2015/02/17 17:26:28 Done.
102 _index = new Uint32List(size);
103 _hashMask = hashMask;
104 _data = new List(size);
105 _usedData = 0;
106 _deletedKeys = 0;
107 if (oldData != null) {
108 for (int i = 0; i < oldUsed; i += 2) {
109 var key = oldData[i];
110 if (!_isDeleted(oldData, key)) {
111 // TODO(koda): While there are enough hash bits, avoid hashCode calls.
112 this[key] = oldData[i + 1];
113 }
114 }
115 }
116 }
117
118 void _insert(K key, V value, int hashPattern, int i) {
119 if (_usedData == _data.length) {
120 _rehash();
121 this[key] = value;
122 } else {
123 _index[i] = hashPattern | _usedData;
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 looks like we are wasting 1 bit here - (_usedData
koda 2015/02/17 17:26:28 Done. Yes, we were. It allowed for the very simpl
124 _data[_usedData++] = key;
125 _data[_usedData++] = value;
126 }
127 }
128
129 // If key is present, returns the index of the value in _data, else returns
130 // the negated insertion point in _index.
131 int _findValueOrInsertPoint(K key, int fullHash, int hashPattern, int size) {
132 int sizeMask = size - 1;
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 when the are a lot of local variables it easier to
koda 2015/02/17 17:26:28 Done.
133 int i = _firstProbe(fullHash, sizeMask);
134 int firstDeleted = -1;
135 int pair = _index[i];
136 while (pair != _CompactHashTableMixin._UNUSED_PAIR) {
137 if (pair == _CompactHashTableMixin._DELETED_PAIR) {
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 I wonder if the code becomes faster if you have tw
koda 2015/02/17 17:26:29 Perhaps. I agree that's a good candidate for optim
Vyacheslav Egorov (Google) 2015/02/17 23:39:00 Ack! That was more of a thought than immediate cal
138 if (firstDeleted < 0){
139 firstDeleted = i;
140 }
141 } else {
142 int d = hashPattern ^ pair;
143 if (d < size && key == _data[d]) {
144 return d + 1;
145 }
146 }
147 i = _nextProbe(i, sizeMask);
148 pair = _index[i];
149 }
150 return firstDeleted >= 0 ? -firstDeleted : -i;
151 }
152
153 void operator[]=(K key, V value) {
154 int size = _index.length;
155 int sizeMask = size - 1;
156 int fullHash = key.hashCode;
157 int hashPattern = _hashPattern(fullHash, _hashMask, size);
158 int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
159 if (d > 0) {
160 _data[d] = value;
161 } else {
162 int i = -d;
163 _insert(key, value, hashPattern, i);
164 }
165 }
166
167 V putIfAbsent(K key, V ifAbsent()) {
168 int size = _index.length;
169 int sizeMask = size - 1;
170 int fullHash = key.hashCode;
171 int hashPattern = _hashPattern(fullHash, _hashMask, size);
172 int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
173 if (d > 0) {
174 return _data[d];
175 }
176 int i = -d;
177 // 'ifAbsent' is allowed to modify the map.
178 List oldData = _data;
179 int oldCheckSum = _checkSum;
180 V value = ifAbsent();
181 if (_isModifiedSince(oldData, oldCheckSum)) {
182 this[key] = value;
183 } else {
184 _insert(key, value, hashPattern, i);
185 }
186 return value;
187 }
188
189 V remove(Object key) {
190 int size = _index.length;
191 int sizeMask = size - 1;
192 int fullHash = key.hashCode;
193 int hashPattern = _hashPattern(fullHash, _hashMask, size);
194 int i = _firstProbe(fullHash, sizeMask);
195 int pair = _index[i];
196 while (pair != _CompactHashTableMixin._UNUSED_PAIR) {
197 if (pair != _CompactHashTableMixin._DELETED_PAIR) {
198 int d = hashPattern ^ pair;
199 if (d < size && key == _data[d]) {
200 _index[i] = _CompactHashTableMixin._DELETED_PAIR;
201 _setDeletedAt(_data, d);
202 V value = _data[d + 1];
203 _setDeletedAt(_data, d + 1);
204 ++_deletedKeys;
205 return value;
206 }
207 }
208 i = _nextProbe(i, sizeMask);
209 pair = _index[i];
210 }
211 return null;
212 }
213
214 // If key is absent, return _data (which is never a value).
215 Object _getValueOrData(Object key) {
216 int size = _index.length;
217 int sizeMask = size - 1;
218 int fullHash = key.hashCode;
219 int hashPattern = _hashPattern(fullHash, _hashMask, size);
220 int i = _firstProbe(fullHash, sizeMask);
221 int pair = _index[i];
222 while (pair != _CompactHashTableMixin._UNUSED_PAIR) {
223 if (pair != _CompactHashTableMixin._DELETED_PAIR) {
224 int d = hashPattern ^ pair;
225 if (d < size && key == _data[d]) {
226 return _data[d + 1];
227 }
228 }
229 i = _nextProbe(i, sizeMask);
230 pair = _index[i];
231 }
232 return _data;
233 }
234
235 bool containsKey(Object key) => !identical(_data, _getValueOrData(key));
236
237 V operator[](Object key) {
238 var v = _getValueOrData(key);
239 return identical(_data, v) ? null : v;
240 }
241
242 bool containsValue(Object value) {
243 for (var v in values) {
244 if (v == value) {
245 return true;
246 }
247 }
248 return false;
249 }
250
251 void forEach(void f(K key, V value)) {
252 var ki = keys.iterator;
253 var vi = values.iterator;
254 while (ki.moveNext()) {
255 vi.moveNext();
256 f(ki.current, vi.current);
257 }
258 }
259
260 Iterable<K> get keys =>
261 new _CompactIterable<K>(this, _data, _usedData, -2, 2);
262 Iterable<V> get values =>
263 new _CompactIterable<V>(this, _data, _usedData, -1, 2);
264 }
265
266 // Iterates through _data[_offset + _step], _data[_offset + 2*_step], ...
267 // and checks for concurrent modification.
268 class _CompactIterable<E> extends IterableBase<E> {
269 var _map;
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Please mark all fields that do not change final.
koda 2015/02/17 17:26:28 Done.
270 List _data;
271 int _len;
272 int _offset;
273 int _step;
274
275 _CompactIterable(this._map, this._data, this._len, this._offset, this._step);
276
277 Iterator<E> get iterator =>
278 new _CompactIterator<E>(_map, _data, _len, _offset, _step);
279
280 int get length => _map.length;
281 bool get isEmpty => length == 0;
282 bool get isNotEmpty => !isEmpty;
283 }
284
285 class _CompactIterator<E> implements Iterator<E> {
286 var _map;
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Please mark all fields that do not change final (i
koda 2015/02/17 17:26:29 Done.
287 List _data;
288 int _len;
289 int _offset;
290 int _step;
291 int _checkSum;
292 E current;
293
294 _CompactIterator(this._map, this._data, this._len, this._offset, this._step) {
295 this._checkSum = _map._checkSum;
296 }
297
298 bool moveNext() {
299 if (_map._isModifiedSince(_data, _checkSum)) {
300 throw new ConcurrentModificationError(_map);
301 }
302 do {
303 _offset += _step;
304 } while (_offset < _len && _map._isDeleted(_data, _data[_offset]));
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 This shouldn't be an instance call (also it will f
koda 2015/02/17 17:26:29 Done.
305 if (_offset < _len) {
306 current = _data[_offset];
307 return true;
308 } else {
309 current = null;
310 return false;
311 }
312 }
313 }
314
315 // Set implementation, analogous to _CompactLinkedHashMap.
316 class _CompactLinkedHashSet<K>
317 extends SetBase<K> with _CompactHashTableMixin
318 implements HashSet<K>, LinkedHashSet<K> {
319
320 _CompactLinkedHashSet() {
321 _index = new Uint32List(_CompactHashTableMixin._INITIAL_INDEX_SIZE);
322 _data = new List(_CompactHashTableMixin._INITIAL_INDEX_SIZE >> 1);
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Are we wasting 1 bit in the index here too? becaus
koda 2015/02/17 17:26:29 Done.
323 }
324
325 int get length => _usedData - _deletedKeys;
326
327 void _rehash() {
328 if ((_deletedKeys << 1) > _usedData) {
329 _init(_index.length, _hashMask, _data, _usedData);
330 } else {
331 _init(_index.length << 1, _hashMask >> 1, _data, _usedData);
332 }
333 }
334
335 void clear() {
336 if (!isEmpty) {
337 _init(_index.length, _hashMask);
338 }
339 }
340
341 void _init(int size, int hashMask, [List oldData, int oldUsed]) {
342 _index = new Uint32List(size);
343 _hashMask = hashMask;
344 _data = new List(size >> 1);
345 _usedData = 0;
346 _deletedKeys = 0;
347 if (oldData != null) {
348 for (int i = 0; i < oldUsed; i += 1) {
349 var key = oldData[i];
350 if (!identical(key, oldData)) {
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 _isDeleted instead of identical?
koda 2015/02/17 17:26:29 Done.
351 add(key);
352 }
353 }
354 }
355 }
356
357 bool add(Object key) {
358 int size = _index.length;
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Same comments from Map implementation apply here.
koda 2015/02/17 17:26:28 Done, except for the code-splitting optimization (
359 int sizeMask = size - 1;
360 int fullHash = key.hashCode;
361 int hashPattern = _hashPattern(fullHash, _hashMask, size);
362 int i = _firstProbe(fullHash, sizeMask);
363 int firstDeleted = -1;
364 int pair = _index[i];
365 while (pair != _CompactHashTableMixin._UNUSED_PAIR) {
366 if (pair == _CompactHashTableMixin._DELETED_PAIR) {
367 if (firstDeleted < 0){
368 firstDeleted = i;
369 }
370 } else {
371 int d = hashPattern ^ pair;
372 if (d < size && key == _data[d]) {
373 return false;
374 }
375 }
376 i = _nextProbe(i, sizeMask);
377 pair = _index[i];
378 }
379 int insertionPoint = (firstDeleted >= 0) ? firstDeleted : i;
380 if (_usedData == _data.length) {
381 _rehash();
382 add(key);
383 } else {
384 _index[insertionPoint] = hashPattern | _usedData;
Vyacheslav Egorov (Google) 2015/02/12 15:42:41 move int insertionPoint = ... to here.
koda 2015/02/17 17:26:29 Done.
385 _data[_usedData++] = key;
386 }
387 return true;
388 }
389
390 // If key is absent, return _data (which is never a value).
391 Object _getKeyOrData(Object key) {
392 int size = _index.length;
393 int sizeMask = size - 1;
394 int fullHash = key.hashCode;
395 int hashPattern = _hashPattern(fullHash, _hashMask, size);
396 int i = _firstProbe(fullHash, sizeMask);
397 int pair = _index[i];
398 while (pair != _CompactHashTableMixin._UNUSED_PAIR) {
399 if (pair != _CompactHashTableMixin._DELETED_PAIR) {
400 int d = hashPattern ^ pair;
401 if (d < size && key == _data[d]) {
402 return _data[d]; // Note: Must return the existing key.
403 }
404 }
405 i = _nextProbe(i, sizeMask);
406 pair = _index[i];
407 }
408 return _data;
409 }
410
411 K lookup(Object key) {
412 var k = _getKeyOrData(key);
413 return identical(_data, k) ? null : k;
414 }
415
416 bool contains(Object key) => !identical(_data, _getKeyOrData(key));
417
418 bool remove(Object key) {
419 int size = _index.length;
420 int sizeMask = size - 1;
421 int fullHash = key.hashCode;
422 int hashPattern = _hashPattern(fullHash, _hashMask, size);
423 int i = _firstProbe(fullHash, sizeMask);
424 int pair = _index[i];
425 while (pair != _CompactHashTableMixin._UNUSED_PAIR) {
426 if (pair != _CompactHashTableMixin._DELETED_PAIR) {
427 int d = hashPattern ^ pair;
428 if (d < size && key == _data[d]) {
429 _index[i] = _CompactHashTableMixin._DELETED_PAIR;
430 _data[d] = _data;
431 ++_deletedKeys;
432 return true;
433 }
434 }
435 i = _nextProbe(i, sizeMask);
436 pair = _index[i];
437 }
438 return false;
439 }
440
441 Iterator<K> get iterator =>
442 new _CompactIterator<K>(this, _data, _usedData, -1, 1);
443
444 Set<K> toSet() => new Set<K>()..addAll(this);
445 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698