Chromium Code Reviews| Index: runtime/lib/compact_hash.dart |
| =================================================================== |
| --- runtime/lib/compact_hash.dart (revision 0) |
| +++ runtime/lib/compact_hash.dart (working copy) |
| @@ -0,0 +1,445 @@ |
| +// Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file |
| +// for details. All rights reserved. Use of this source code is governed by a |
| +// BSD-style license that can be found in the LICENSE file. |
| + |
| +import 'dart:typed_data'; |
| + |
| +// Hash table with open addressing that separates the index from keys/values. |
| +abstract class _CompactHashTableMixin { |
| + // Each occupied entry in _index is a fixed-size integer that encodes a pair: |
| + // [ hash pattern for key | index of key in _data ] |
| + // The hash pattern is based on hashCode, but is guaranteed to be non-zero. |
| + // The length of _index is always a power of two, and there is always at |
| + // least one unoccupied entry. |
| + Uint32List _index; |
| + |
| + // The number of bits used for each component is determined by table size. |
| + static const int _INITIAL_INDEX_BITS = 4; |
| + static const int _INITIAL_INDEX_SIZE = 1 << _INITIAL_INDEX_BITS; |
| + |
| + // Unused and deleted entries are marked by 0 and 1, respectively. |
| + static const int _UNUSED_PAIR = 0; |
| + static const int _DELETED_PAIR = 1; |
| + |
| + // Cached in-place mask for the hash pattern component. On 32-bit, the top |
| + // bits are wasted to avoid Mint allocation. TODO(koda): Reclaim the bits by |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Please keep TODO on a separate line from the rest
koda
2015/02/17 17:26:29
Done.
|
| + // making the compiler treat hash patterns as unsigned words. |
| + int _hashMask = int.is64Bit() ? |
| + (1 << (32 - _INITIAL_INDEX_BITS)) - 1 : |
| + (1 << (30 - _INITIAL_INDEX_BITS)) - 1; |
| + |
| + int _hashPattern(int fullHash, int hashMask, int size) => |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
should this be static? it does not use any members
koda
2015/02/17 17:26:29
Done.
|
| + // TODO(koda): Consider keeping bit length and use left shift. |
| + (fullHash == 0) ? size : (fullHash & hashMask) * size; |
| + |
| + // Linear probing. |
| + int _firstProbe(int fullHash, int sizeMask) { |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
should this be static?
koda
2015/02/17 17:26:28
Done.
|
| + int i = fullHash & sizeMask; |
| + // Light, fast shuffle to mitigate bad hashCode (e.g., sequential). |
| + return ((i << 1) + i) & sizeMask; |
| + } |
| + int _nextProbe(int i, int sizeMask) => (i + 1) & sizeMask; |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
ditto, the same comment applies to all helpers bel
koda
2015/02/17 17:26:29
Done.
I also shortened the mixin name to limit th
|
| + |
| + // Fixed-length list of keys (set) or key/value at even/odd indices (map). |
| + List _data; |
| + // Length of _data that is used (i.e., keys + values for a map). |
| + int _usedData = 0; |
| + // Number of deleted keys. |
| + int _deletedKeys = 0; |
| + |
| + // A self-loop is used to mark a deleted key or value. |
| + bool _isDeleted(List data, Object keyOrValue) => identical(keyOrValue, data); |
| + void _setDeletedAt(List data, int d) { |
| + data[d] = data; |
| + } |
| + |
| + // Concurrent modification detection relies on this checksum monotonically |
| + // increasing between reallocations of _data. |
| + int get _checkSum => _usedData + _deletedKeys; |
| + bool _isModifiedSince(List oldData, int oldCheckSum) => |
| + !identical(_data, oldData) || (_checkSum != oldCheckSum); |
| +} |
| + |
| +// Map with iteration in insertion order (hence "Linked"). New keys are simply |
| +// appended to _data. |
| +class _CompactLinkedHashMap<K, V> |
| + extends MapBase<K, V> with _CompactHashTableMixin |
| + implements HashMap<K, V>, LinkedHashMap<K, V> { |
| + |
| + // Growth policy is to keep _index and _data the same length, and double |
| + // both when _data is full. Thus, _index will have a max load factor of 0.5. |
| + // TODO(koda): Consider growing _data by factor sqrt(2), twice as often. |
| + _CompactLinkedHashMap() { |
| + _index = new Uint32List(_CompactHashTableMixin._INITIAL_INDEX_SIZE); |
| + _data = new List(_CompactHashTableMixin._INITIAL_INDEX_SIZE); |
| + } |
| + |
| + int get length => (_usedData >> 1) - _deletedKeys; |
| + bool get isEmpty => length == 0; |
| + bool get isNotEmpty => !isEmpty; |
| + |
| + void _rehash() { |
| + if ((_deletedKeys << 1) > _usedData) { |
| + // TODO(koda): Consider shrinking. |
| + // TODO(koda): Consider in-place compaction and more costly CME check. |
| + _init(_index.length, _hashMask, _data, _usedData); |
| + } else { |
| + // TODO(koda): Support 32->64 bit transition (and adjust _hashMask). |
| + _init(_index.length << 1, _hashMask >> 1, _data, _usedData); |
| + } |
| + } |
| + |
| + void clear() { |
| + if (!isEmpty) { |
| + _init(_index.length, _hashMask); |
| + } |
| + } |
| + |
| + // Allocate new _index and _data, and optionally copy existing contents. |
| + void _init(int size, int hashMask, [List oldData, int oldUsed]) { |
| + assert(size & (size - 1) == 0); |
| + assert(_CompactHashTableMixin._UNUSED_PAIR == 0); |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
should these assert be in constructor too? we crea
koda
2015/02/17 17:26:28
Done.
|
| + _index = new Uint32List(size); |
| + _hashMask = hashMask; |
| + _data = new List(size); |
| + _usedData = 0; |
| + _deletedKeys = 0; |
| + if (oldData != null) { |
| + for (int i = 0; i < oldUsed; i += 2) { |
| + var key = oldData[i]; |
| + if (!_isDeleted(oldData, key)) { |
| + // TODO(koda): While there are enough hash bits, avoid hashCode calls. |
| + this[key] = oldData[i + 1]; |
| + } |
| + } |
| + } |
| + } |
| + |
| + void _insert(K key, V value, int hashPattern, int i) { |
| + if (_usedData == _data.length) { |
| + _rehash(); |
| + this[key] = value; |
| + } else { |
| + _index[i] = hashPattern | _usedData; |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
looks like we are wasting 1 bit here - (_usedData
koda
2015/02/17 17:26:28
Done.
Yes, we were. It allowed for the very simpl
|
| + _data[_usedData++] = key; |
| + _data[_usedData++] = value; |
| + } |
| + } |
| + |
| + // If key is present, returns the index of the value in _data, else returns |
| + // the negated insertion point in _index. |
| + int _findValueOrInsertPoint(K key, int fullHash, int hashPattern, int size) { |
| + int sizeMask = size - 1; |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
when the are a lot of local variables it easier to
koda
2015/02/17 17:26:28
Done.
|
| + int i = _firstProbe(fullHash, sizeMask); |
| + int firstDeleted = -1; |
| + int pair = _index[i]; |
| + while (pair != _CompactHashTableMixin._UNUSED_PAIR) { |
| + if (pair == _CompactHashTableMixin._DELETED_PAIR) { |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
I wonder if the code becomes faster if you have tw
koda
2015/02/17 17:26:29
Perhaps. I agree that's a good candidate for optim
Vyacheslav Egorov (Google)
2015/02/17 23:39:00
Ack! That was more of a thought than immediate cal
|
| + if (firstDeleted < 0){ |
| + firstDeleted = i; |
| + } |
| + } else { |
| + int d = hashPattern ^ pair; |
| + if (d < size && key == _data[d]) { |
| + return d + 1; |
| + } |
| + } |
| + i = _nextProbe(i, sizeMask); |
| + pair = _index[i]; |
| + } |
| + return firstDeleted >= 0 ? -firstDeleted : -i; |
| + } |
| + |
| + void operator[]=(K key, V value) { |
| + int size = _index.length; |
| + int sizeMask = size - 1; |
| + int fullHash = key.hashCode; |
| + int hashPattern = _hashPattern(fullHash, _hashMask, size); |
| + int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size); |
| + if (d > 0) { |
| + _data[d] = value; |
| + } else { |
| + int i = -d; |
| + _insert(key, value, hashPattern, i); |
| + } |
| + } |
| + |
| + V putIfAbsent(K key, V ifAbsent()) { |
| + int size = _index.length; |
| + int sizeMask = size - 1; |
| + int fullHash = key.hashCode; |
| + int hashPattern = _hashPattern(fullHash, _hashMask, size); |
| + int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size); |
| + if (d > 0) { |
| + return _data[d]; |
| + } |
| + int i = -d; |
| + // 'ifAbsent' is allowed to modify the map. |
| + List oldData = _data; |
| + int oldCheckSum = _checkSum; |
| + V value = ifAbsent(); |
| + if (_isModifiedSince(oldData, oldCheckSum)) { |
| + this[key] = value; |
| + } else { |
| + _insert(key, value, hashPattern, i); |
| + } |
| + return value; |
| + } |
| + |
| + V remove(Object key) { |
| + int size = _index.length; |
| + int sizeMask = size - 1; |
| + int fullHash = key.hashCode; |
| + int hashPattern = _hashPattern(fullHash, _hashMask, size); |
| + int i = _firstProbe(fullHash, sizeMask); |
| + int pair = _index[i]; |
| + while (pair != _CompactHashTableMixin._UNUSED_PAIR) { |
| + if (pair != _CompactHashTableMixin._DELETED_PAIR) { |
| + int d = hashPattern ^ pair; |
| + if (d < size && key == _data[d]) { |
| + _index[i] = _CompactHashTableMixin._DELETED_PAIR; |
| + _setDeletedAt(_data, d); |
| + V value = _data[d + 1]; |
| + _setDeletedAt(_data, d + 1); |
| + ++_deletedKeys; |
| + return value; |
| + } |
| + } |
| + i = _nextProbe(i, sizeMask); |
| + pair = _index[i]; |
| + } |
| + return null; |
| + } |
| + |
| + // If key is absent, return _data (which is never a value). |
| + Object _getValueOrData(Object key) { |
| + int size = _index.length; |
| + int sizeMask = size - 1; |
| + int fullHash = key.hashCode; |
| + int hashPattern = _hashPattern(fullHash, _hashMask, size); |
| + int i = _firstProbe(fullHash, sizeMask); |
| + int pair = _index[i]; |
| + while (pair != _CompactHashTableMixin._UNUSED_PAIR) { |
| + if (pair != _CompactHashTableMixin._DELETED_PAIR) { |
| + int d = hashPattern ^ pair; |
| + if (d < size && key == _data[d]) { |
| + return _data[d + 1]; |
| + } |
| + } |
| + i = _nextProbe(i, sizeMask); |
| + pair = _index[i]; |
| + } |
| + return _data; |
| + } |
| + |
| + bool containsKey(Object key) => !identical(_data, _getValueOrData(key)); |
| + |
| + V operator[](Object key) { |
| + var v = _getValueOrData(key); |
| + return identical(_data, v) ? null : v; |
| + } |
| + |
| + bool containsValue(Object value) { |
| + for (var v in values) { |
| + if (v == value) { |
| + return true; |
| + } |
| + } |
| + return false; |
| + } |
| + |
| + void forEach(void f(K key, V value)) { |
| + var ki = keys.iterator; |
| + var vi = values.iterator; |
| + while (ki.moveNext()) { |
| + vi.moveNext(); |
| + f(ki.current, vi.current); |
| + } |
| + } |
| + |
| + Iterable<K> get keys => |
| + new _CompactIterable<K>(this, _data, _usedData, -2, 2); |
| + Iterable<V> get values => |
| + new _CompactIterable<V>(this, _data, _usedData, -1, 2); |
| +} |
| + |
| +// Iterates through _data[_offset + _step], _data[_offset + 2*_step], ... |
| +// and checks for concurrent modification. |
| +class _CompactIterable<E> extends IterableBase<E> { |
| + var _map; |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Please mark all fields that do not change final.
koda
2015/02/17 17:26:28
Done.
|
| + List _data; |
| + int _len; |
| + int _offset; |
| + int _step; |
| + |
| + _CompactIterable(this._map, this._data, this._len, this._offset, this._step); |
| + |
| + Iterator<E> get iterator => |
| + new _CompactIterator<E>(_map, _data, _len, _offset, _step); |
| + |
| + int get length => _map.length; |
| + bool get isEmpty => length == 0; |
| + bool get isNotEmpty => !isEmpty; |
| +} |
| + |
| +class _CompactIterator<E> implements Iterator<E> { |
| + var _map; |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Please mark all fields that do not change final (i
koda
2015/02/17 17:26:29
Done.
|
| + List _data; |
| + int _len; |
| + int _offset; |
| + int _step; |
| + int _checkSum; |
| + E current; |
| + |
| + _CompactIterator(this._map, this._data, this._len, this._offset, this._step) { |
| + this._checkSum = _map._checkSum; |
| + } |
| + |
| + bool moveNext() { |
| + if (_map._isModifiedSince(_data, _checkSum)) { |
| + throw new ConcurrentModificationError(_map); |
| + } |
| + do { |
| + _offset += _step; |
| + } while (_offset < _len && _map._isDeleted(_data, _data[_offset])); |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
This shouldn't be an instance call (also it will f
koda
2015/02/17 17:26:29
Done.
|
| + if (_offset < _len) { |
| + current = _data[_offset]; |
| + return true; |
| + } else { |
| + current = null; |
| + return false; |
| + } |
| + } |
| +} |
| + |
| +// Set implementation, analogous to _CompactLinkedHashMap. |
| +class _CompactLinkedHashSet<K> |
| + extends SetBase<K> with _CompactHashTableMixin |
| + implements HashSet<K>, LinkedHashSet<K> { |
| + |
| + _CompactLinkedHashSet() { |
| + _index = new Uint32List(_CompactHashTableMixin._INITIAL_INDEX_SIZE); |
| + _data = new List(_CompactHashTableMixin._INITIAL_INDEX_SIZE >> 1); |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Are we wasting 1 bit in the index here too? becaus
koda
2015/02/17 17:26:29
Done.
|
| + } |
| + |
| + int get length => _usedData - _deletedKeys; |
| + |
| + void _rehash() { |
| + if ((_deletedKeys << 1) > _usedData) { |
| + _init(_index.length, _hashMask, _data, _usedData); |
| + } else { |
| + _init(_index.length << 1, _hashMask >> 1, _data, _usedData); |
| + } |
| + } |
| + |
| + void clear() { |
| + if (!isEmpty) { |
| + _init(_index.length, _hashMask); |
| + } |
| + } |
| + |
| + void _init(int size, int hashMask, [List oldData, int oldUsed]) { |
| + _index = new Uint32List(size); |
| + _hashMask = hashMask; |
| + _data = new List(size >> 1); |
| + _usedData = 0; |
| + _deletedKeys = 0; |
| + if (oldData != null) { |
| + for (int i = 0; i < oldUsed; i += 1) { |
| + var key = oldData[i]; |
| + if (!identical(key, oldData)) { |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
_isDeleted instead of identical?
koda
2015/02/17 17:26:29
Done.
|
| + add(key); |
| + } |
| + } |
| + } |
| + } |
| + |
| + bool add(Object key) { |
| + int size = _index.length; |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
Same comments from Map implementation apply here.
koda
2015/02/17 17:26:28
Done, except for the code-splitting optimization (
|
| + int sizeMask = size - 1; |
| + int fullHash = key.hashCode; |
| + int hashPattern = _hashPattern(fullHash, _hashMask, size); |
| + int i = _firstProbe(fullHash, sizeMask); |
| + int firstDeleted = -1; |
| + int pair = _index[i]; |
| + while (pair != _CompactHashTableMixin._UNUSED_PAIR) { |
| + if (pair == _CompactHashTableMixin._DELETED_PAIR) { |
| + if (firstDeleted < 0){ |
| + firstDeleted = i; |
| + } |
| + } else { |
| + int d = hashPattern ^ pair; |
| + if (d < size && key == _data[d]) { |
| + return false; |
| + } |
| + } |
| + i = _nextProbe(i, sizeMask); |
| + pair = _index[i]; |
| + } |
| + int insertionPoint = (firstDeleted >= 0) ? firstDeleted : i; |
| + if (_usedData == _data.length) { |
| + _rehash(); |
| + add(key); |
| + } else { |
| + _index[insertionPoint] = hashPattern | _usedData; |
|
Vyacheslav Egorov (Google)
2015/02/12 15:42:41
move int insertionPoint = ... to here.
koda
2015/02/17 17:26:29
Done.
|
| + _data[_usedData++] = key; |
| + } |
| + return true; |
| + } |
| + |
| + // If key is absent, return _data (which is never a value). |
| + Object _getKeyOrData(Object key) { |
| + int size = _index.length; |
| + int sizeMask = size - 1; |
| + int fullHash = key.hashCode; |
| + int hashPattern = _hashPattern(fullHash, _hashMask, size); |
| + int i = _firstProbe(fullHash, sizeMask); |
| + int pair = _index[i]; |
| + while (pair != _CompactHashTableMixin._UNUSED_PAIR) { |
| + if (pair != _CompactHashTableMixin._DELETED_PAIR) { |
| + int d = hashPattern ^ pair; |
| + if (d < size && key == _data[d]) { |
| + return _data[d]; // Note: Must return the existing key. |
| + } |
| + } |
| + i = _nextProbe(i, sizeMask); |
| + pair = _index[i]; |
| + } |
| + return _data; |
| + } |
| + |
| + K lookup(Object key) { |
| + var k = _getKeyOrData(key); |
| + return identical(_data, k) ? null : k; |
| + } |
| + |
| + bool contains(Object key) => !identical(_data, _getKeyOrData(key)); |
| + |
| + bool remove(Object key) { |
| + int size = _index.length; |
| + int sizeMask = size - 1; |
| + int fullHash = key.hashCode; |
| + int hashPattern = _hashPattern(fullHash, _hashMask, size); |
| + int i = _firstProbe(fullHash, sizeMask); |
| + int pair = _index[i]; |
| + while (pair != _CompactHashTableMixin._UNUSED_PAIR) { |
| + if (pair != _CompactHashTableMixin._DELETED_PAIR) { |
| + int d = hashPattern ^ pair; |
| + if (d < size && key == _data[d]) { |
| + _index[i] = _CompactHashTableMixin._DELETED_PAIR; |
| + _data[d] = _data; |
| + ++_deletedKeys; |
| + return true; |
| + } |
| + } |
| + i = _nextProbe(i, sizeMask); |
| + pair = _index[i]; |
| + } |
| + return false; |
| + } |
| + |
| + Iterator<K> get iterator => |
| + new _CompactIterator<K>(this, _data, _usedData, -1, 1); |
| + |
| + Set<K> toSet() => new Set<K>()..addAll(this); |
| +} |