Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(778)

Side by Side Diff: runtime/lib/compact_hash.dart

Issue 915323002: Compact LinkedHashMap/Set implementation. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/lib/collection_sources.gypi ('k') | runtime/lib/linked_hash_map.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file.
4
5 import 'dart:typed_data';
6
7 // Hash table with open addressing that separates the index from keys/values.
8 abstract class _HashBase {
9 // Each occupied entry in _index is a fixed-size integer that encodes a pair:
10 // [ hash pattern for key | index of entry in _data ]
11 // The hash pattern is based on hashCode, but is guaranteed to be non-zero.
12 // The length of _index is always a power of two, and there is always at
13 // least one unoccupied entry.
14 Uint32List _index;
15
16 // The number of bits used for each component is determined by table size.
17 // The length of _index is twice the number of entries in _data, and both
18 // are doubled when _data is full. Thus, _index will have a max load factor
19 // of 1/2, which enables one more bit to be used for the hash.
20 // TODO(koda): Consider growing _data by factor sqrt(2), twice as often.
21 static const int _INITIAL_INDEX_BITS = 3;
22 static const int _INITIAL_INDEX_SIZE = 1 << (_INITIAL_INDEX_BITS + 1);
23
24 // Unused and deleted entries are marked by 0 and 1, respectively.
25 static const int _UNUSED_PAIR = 0;
26 static const int _DELETED_PAIR = 1;
27
28 // Cached in-place mask for the hash pattern component. On 32-bit, the top
29 // bits are wasted to avoid Mint allocation.
30 // TODO(koda): Reclaim the bits by making the compiler treat hash patterns
31 // as unsigned words.
32 int _hashMask = int.is64Bit() ?
33 (1 << (32 - _INITIAL_INDEX_BITS)) - 1 :
34 (1 << (30 - _INITIAL_INDEX_BITS)) - 1;
35
36 static int _hashPattern(int fullHash, int hashMask, int size) =>
37 // TODO(koda): Consider keeping bit length and use left shift.
38 (fullHash == 0) ? size : (fullHash & hashMask) * size;
39
40 // Linear probing.
41 static int _firstProbe(int fullHash, int sizeMask) {
42 final int i = fullHash & sizeMask;
43 // Light, fast shuffle to mitigate bad hashCode (e.g., sequential).
44 return ((i << 1) + i) & sizeMask;
45 }
46 static int _nextProbe(int i, int sizeMask) => (i + 1) & sizeMask;
47
48 // Fixed-length list of keys (set) or key/value at even/odd indices (map).
49 List _data;
50 // Length of _data that is used (i.e., keys + values for a map).
51 int _usedData = 0;
52 // Number of deleted keys.
53 int _deletedKeys = 0;
54
55 // A self-loop is used to mark a deleted key or value.
56 static bool _isDeleted(List data, Object keyOrValue) =>
57 identical(keyOrValue, data);
58 static void _setDeletedAt(List data, int d) {
59 data[d] = data;
60 }
61
62 // Concurrent modification detection relies on this checksum monotonically
63 // increasing between reallocations of _data.
64 int get _checkSum => _usedData + _deletedKeys;
65 bool _isModifiedSince(List oldData, int oldCheckSum) =>
66 !identical(_data, oldData) || (_checkSum != oldCheckSum);
67 }
68
69 // Map with iteration in insertion order (hence "Linked"). New keys are simply
70 // appended to _data.
71 class _CompactLinkedHashMap<K, V>
72 extends MapBase<K, V> with _HashBase
73 implements HashMap<K, V>, LinkedHashMap<K, V> {
74
75 _CompactLinkedHashMap() {
76 assert(_HashBase._UNUSED_PAIR == 0);
77 _index = new Uint32List(_HashBase._INITIAL_INDEX_SIZE);
78 _data = new List(_HashBase._INITIAL_INDEX_SIZE);
79 }
80
81 int get length => (_usedData >> 1) - _deletedKeys;
82 bool get isEmpty => length == 0;
83 bool get isNotEmpty => !isEmpty;
84
85 void _rehash() {
86 if ((_deletedKeys << 1) > _usedData) {
87 // TODO(koda): Consider shrinking.
88 // TODO(koda): Consider in-place compaction and more costly CME check.
89 _init(_index.length, _hashMask, _data, _usedData);
90 } else {
91 // TODO(koda): Support 32->64 bit transition (and adjust _hashMask).
92 _init(_index.length << 1, _hashMask >> 1, _data, _usedData);
93 }
94 }
95
96 void clear() {
97 if (!isEmpty) {
98 _init(_index.length, _hashMask);
99 }
100 }
101
102 // Allocate new _index and _data, and optionally copy existing contents.
103 void _init(int size, int hashMask, [List oldData, int oldUsed]) {
104 assert(size & (size - 1) == 0);
105 assert(_HashBase._UNUSED_PAIR == 0);
106 _index = new Uint32List(size);
107 _hashMask = hashMask;
108 _data = new List(size);
109 _usedData = 0;
110 _deletedKeys = 0;
111 if (oldData != null) {
112 for (int i = 0; i < oldUsed; i += 2) {
113 var key = oldData[i];
114 if (!_HashBase._isDeleted(oldData, key)) {
115 // TODO(koda): While there are enough hash bits, avoid hashCode calls.
116 this[key] = oldData[i + 1];
117 }
118 }
119 }
120 }
121
122 void _insert(K key, V value, int hashPattern, int i) {
123 if (_usedData == _data.length) {
124 _rehash();
125 this[key] = value;
126 } else {
127 _index[i] = hashPattern | (_usedData >> 1);
128 _data[_usedData++] = key;
129 _data[_usedData++] = value;
130 }
131 }
132
133 // If key is present, returns the index of the value in _data, else returns
134 // the negated insertion point in _index.
135 int _findValueOrInsertPoint(K key, int fullHash, int hashPattern, int size) {
136 final int sizeMask = size - 1;
137 int i = _HashBase._firstProbe(fullHash, sizeMask);
138 int firstDeleted = -1;
139 int pair = _index[i];
140 while (pair != _HashBase._UNUSED_PAIR) {
141 if (pair == _HashBase._DELETED_PAIR) {
142 if (firstDeleted < 0){
143 firstDeleted = i;
144 }
145 } else {
146 final int d = (hashPattern ^ pair) << 1;
147 if (d < size && key == _data[d]) {
148 return d + 1;
149 }
150 }
151 i = _HashBase._nextProbe(i, sizeMask);
152 pair = _index[i];
153 }
154 return firstDeleted >= 0 ? -firstDeleted : -i;
155 }
156
157 void operator[]=(K key, V value) {
158 final int size = _index.length;
159 final int sizeMask = size - 1;
160 final int fullHash = key.hashCode;
161 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
162 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
163 if (d > 0) {
164 _data[d] = value;
165 } else {
166 int i = -d;
Vyacheslav Egorov (Google) 2015/02/17 23:39:00 final int i? ditto below.
koda 2015/02/18 00:03:33 Done.
167 _insert(key, value, hashPattern, i);
168 }
169 }
170
171 V putIfAbsent(K key, V ifAbsent()) {
172 final int size = _index.length;
173 final int sizeMask = size - 1;
174 final int fullHash = key.hashCode;
175 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
176 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
177 if (d > 0) {
178 return _data[d];
179 }
180 int i = -d;
181 // 'ifAbsent' is allowed to modify the map.
182 List oldData = _data;
183 int oldCheckSum = _checkSum;
184 V value = ifAbsent();
185 if (_isModifiedSince(oldData, oldCheckSum)) {
186 this[key] = value;
187 } else {
188 _insert(key, value, hashPattern, i);
189 }
190 return value;
191 }
192
193 V remove(Object key) {
194 final int size = _index.length;
195 final int sizeMask = size - 1;
196 final int fullHash = key.hashCode;
197 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
198 int i = _HashBase._firstProbe(fullHash, sizeMask);
199 int pair = _index[i];
200 while (pair != _HashBase._UNUSED_PAIR) {
201 if (pair != _HashBase._DELETED_PAIR) {
202 final int d = (hashPattern ^ pair) << 1;
203 if (d < size && key == _data[d]) {
204 _index[i] = _HashBase._DELETED_PAIR;
205 _HashBase._setDeletedAt(_data, d);
206 V value = _data[d + 1];
207 _HashBase._setDeletedAt(_data, d + 1);
208 ++_deletedKeys;
209 return value;
210 }
211 }
212 i = _HashBase._nextProbe(i, sizeMask);
213 pair = _index[i];
214 }
215 return null;
216 }
217
218 // If key is absent, return _data (which is never a value).
219 Object _getValueOrData(Object key) {
220 final int size = _index.length;
221 final int sizeMask = size - 1;
222 final int fullHash = key.hashCode;
223 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
224 int i = _HashBase._firstProbe(fullHash, sizeMask);
225 int pair = _index[i];
226 while (pair != _HashBase._UNUSED_PAIR) {
227 if (pair != _HashBase._DELETED_PAIR) {
228 final int d = (hashPattern ^ pair) << 1;
229 if (d < size && key == _data[d]) {
230 return _data[d + 1];
231 }
232 }
233 i = _HashBase._nextProbe(i, sizeMask);
234 pair = _index[i];
235 }
236 return _data;
237 }
238
239 bool containsKey(Object key) => !identical(_data, _getValueOrData(key));
240
241 V operator[](Object key) {
242 var v = _getValueOrData(key);
243 return identical(_data, v) ? null : v;
244 }
245
246 bool containsValue(Object value) {
247 for (var v in values) {
248 if (v == value) {
249 return true;
250 }
251 }
252 return false;
253 }
254
255 void forEach(void f(K key, V value)) {
256 var ki = keys.iterator;
257 var vi = values.iterator;
258 while (ki.moveNext()) {
259 vi.moveNext();
260 f(ki.current, vi.current);
261 }
262 }
263
264 Iterable<K> get keys =>
265 new _CompactIterable<K>(this, _data, _usedData, -2, 2);
266 Iterable<V> get values =>
267 new _CompactIterable<V>(this, _data, _usedData, -1, 2);
268 }
269
270 // Iterates through _data[_offset + _step], _data[_offset + 2*_step], ...
271 // and checks for concurrent modification.
272 class _CompactIterable<E> extends IterableBase<E> {
273 final _table;
274 final List _data;
275 final int _len;
276 final int _offset;
277 final int _step;
278
279 _CompactIterable(this._table, this._data, this._len,
280 this._offset, this._step);
281
282 Iterator<E> get iterator =>
283 new _CompactIterator<E>(_table, _data, _len, _offset, _step);
284
285 int get length => _table.length;
286 bool get isEmpty => length == 0;
287 bool get isNotEmpty => !isEmpty;
288 }
289
290 class _CompactIterator<E> implements Iterator<E> {
291 final _table;
292 final List _data;
293 final int _len;
294 int _offset;
295 final int _step;
296 final int _checkSum;
297 E current;
298
299 _CompactIterator(table, this._data, this._len, this._offset, this._step) :
300 _table = table, _checkSum = table._checkSum;
301
302 bool moveNext() {
303 if (_table._isModifiedSince(_data, _checkSum)) {
304 throw new ConcurrentModificationError(_table);
305 }
306 do {
307 _offset += _step;
308 } while (_offset < _len && _HashBase._isDeleted(_data, _data[_offset]));
309 if (_offset < _len) {
310 current = _data[_offset];
311 return true;
312 } else {
313 current = null;
314 return false;
315 }
316 }
317 }
318
319 // Set implementation, analogous to _CompactLinkedHashMap.
320 class _CompactLinkedHashSet<K>
321 extends SetBase<K> with _HashBase
322 implements HashSet<K>, LinkedHashSet<K> {
323
324 _CompactLinkedHashSet() {
325 assert(_HashBase._UNUSED_PAIR == 0);
326 _index = new Uint32List(_HashBase._INITIAL_INDEX_SIZE);
327 _data = new List(_HashBase._INITIAL_INDEX_SIZE >> 1);
328 }
329
330 int get length => _usedData - _deletedKeys;
331
332 void _rehash() {
333 if ((_deletedKeys << 1) > _usedData) {
334 _init(_index.length, _hashMask, _data, _usedData);
335 } else {
336 _init(_index.length << 1, _hashMask >> 1, _data, _usedData);
337 }
338 }
339
340 void clear() {
341 if (!isEmpty) {
342 _init(_index.length, _hashMask);
343 }
344 }
345
346 void _init(int size, int hashMask, [List oldData, int oldUsed]) {
347 _index = new Uint32List(size);
348 _hashMask = hashMask;
349 _data = new List(size >> 1);
350 _usedData = 0;
351 _deletedKeys = 0;
352 if (oldData != null) {
353 for (int i = 0; i < oldUsed; i += 1) {
354 var key = oldData[i];
355 if (!_HashBase._isDeleted(oldData, key)) {
356 add(key);
357 }
358 }
359 }
360 }
361
362 bool add(Object key) {
363 final int size = _index.length;
364 final int sizeMask = size - 1;
365 final int fullHash = key.hashCode;
366 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
367 int i = _HashBase._firstProbe(fullHash, sizeMask);
368 int firstDeleted = -1;
369 int pair = _index[i];
370 while (pair != _HashBase._UNUSED_PAIR) {
371 if (pair == _HashBase._DELETED_PAIR) {
372 if (firstDeleted < 0){
373 firstDeleted = i;
374 }
375 } else {
376 final int d = hashPattern ^ pair;
377 if (d < size && key == _data[d]) {
378 return false;
379 }
380 }
381 i = _HashBase._nextProbe(i, sizeMask);
382 pair = _index[i];
383 }
384 if (_usedData == _data.length) {
385 _rehash();
386 add(key);
387 } else {
388 final int insertionPoint = (firstDeleted >= 0) ? firstDeleted : i;
389 _index[insertionPoint] = hashPattern | _usedData;
390 _data[_usedData++] = key;
391 }
392 return true;
393 }
394
395 // If key is absent, return _data (which is never a value).
396 Object _getKeyOrData(Object key) {
397 final int size = _index.length;
398 final int sizeMask = size - 1;
399 final int fullHash = key.hashCode;
400 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
401 int i = _HashBase._firstProbe(fullHash, sizeMask);
402 int pair = _index[i];
403 while (pair != _HashBase._UNUSED_PAIR) {
404 if (pair != _HashBase._DELETED_PAIR) {
405 final int d = hashPattern ^ pair;
406 if (d < size && key == _data[d]) {
407 return _data[d]; // Note: Must return the existing key.
408 }
409 }
410 i = _HashBase._nextProbe(i, sizeMask);
411 pair = _index[i];
412 }
413 return _data;
414 }
415
416 K lookup(Object key) {
417 var k = _getKeyOrData(key);
418 return identical(_data, k) ? null : k;
419 }
420
421 bool contains(Object key) => !identical(_data, _getKeyOrData(key));
422
423 bool remove(Object key) {
424 final int size = _index.length;
425 final int sizeMask = size - 1;
426 final int fullHash = key.hashCode;
427 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
428 int i = _HashBase._firstProbe(fullHash, sizeMask);
429 int pair = _index[i];
430 while (pair != _HashBase._UNUSED_PAIR) {
431 if (pair != _HashBase._DELETED_PAIR) {
432 final int d = hashPattern ^ pair;
433 if (d < size && key == _data[d]) {
434 _index[i] = _HashBase._DELETED_PAIR;
435 _HashBase._setDeletedAt(_data, d);
436 ++_deletedKeys;
437 return true;
438 }
439 }
440 i = _HashBase._nextProbe(i, sizeMask);
441 pair = _index[i];
442 }
443 return false;
444 }
445
446 Iterator<K> get iterator =>
447 new _CompactIterator<K>(this, _data, _usedData, -1, 1);
448
449 Set<K> toSet() => new Set<K>()..addAll(this);
450 }
OLDNEW
« no previous file with comments | « runtime/lib/collection_sources.gypi ('k') | runtime/lib/linked_hash_map.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698