runtime/lib/compact_hash.dart - Issue 915323002: Compact LinkedHashMap/Set implementation.

Unified Diff: runtime/lib/compact_hash.dart

Issue 915323002: Compact LinkedHashMap/Set implementation. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/

Patch Set: Created 5 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: runtime/lib/compact_hash.dart

===================================================================

--- runtime/lib/compact_hash.dart (revision 0)

+++ runtime/lib/compact_hash.dart (working copy)

@@ -0,0 +1,445 @@

+// BSD-style license that can be found in the LICENSE file.

+import 'dart:typed_data';

+// Hash table with open addressing that separates the index from keys/values.

+abstract class _CompactHashTableMixin {

+ // Each occupied entry in _index is a fixed-size integer that encodes a pair:

+ // [ hash pattern for key | index of key in _data ]

+ // The hash pattern is based on hashCode, but is guaranteed to be non-zero.

+ // The length of _index is always a power of two, and there is always at

+ // least one unoccupied entry.

+ Uint32List _index;

+ // The number of bits used for each component is determined by table size.

+ static const int _INITIAL_INDEX_BITS = 4;

+ static const int _INITIAL_INDEX_SIZE = 1 << _INITIAL_INDEX_BITS;

+ // Unused and deleted entries are marked by 0 and 1, respectively.

+ static const int _UNUSED_PAIR = 0;

+ static const int _DELETED_PAIR = 1;

+ // Cached in-place mask for the hash pattern component. On 32-bit, the top

+ // bits are wasted to avoid Mint allocation. TODO(koda): Reclaim the bits by

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Please keep TODO on a separate line from the rest

koda 2015/02/17 17:26:29 Done.

+ // making the compiler treat hash patterns as unsigned words.

+ int _hashMask = int.is64Bit() ?

+ (1 << (32 - _INITIAL_INDEX_BITS)) - 1 :

+ (1 << (30 - _INITIAL_INDEX_BITS)) - 1;

+ int _hashPattern(int fullHash, int hashMask, int size) =>

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 should this be static? it does not use any members

koda 2015/02/17 17:26:29 Done.

+ // TODO(koda): Consider keeping bit length and use left shift.

+ (fullHash == 0) ? size : (fullHash & hashMask) * size;

+ // Linear probing.

+ int _firstProbe(int fullHash, int sizeMask) {

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 should this be static?

koda 2015/02/17 17:26:28 Done.

+ int i = fullHash & sizeMask;

+ // Light, fast shuffle to mitigate bad hashCode (e.g., sequential).

+ return ((i << 1) + i) & sizeMask;

+ }

+ int _nextProbe(int i, int sizeMask) => (i + 1) & sizeMask;

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 ditto, the same comment applies to all helpers bel

koda 2015/02/17 17:26:29 Done. I also shortened the mixin name to limit th

+ // Fixed-length list of keys (set) or key/value at even/odd indices (map).

+ List _data;

+ // Length of _data that is used (i.e., keys + values for a map).

+ int _usedData = 0;

+ // Number of deleted keys.

+ int _deletedKeys = 0;

+ // A self-loop is used to mark a deleted key or value.

+ bool _isDeleted(List data, Object keyOrValue) => identical(keyOrValue, data);

+ void _setDeletedAt(List data, int d) {

+ data[d] = data;

+ }

+ // Concurrent modification detection relies on this checksum monotonically

+ // increasing between reallocations of _data.

+ int get _checkSum => _usedData + _deletedKeys;

+ bool _isModifiedSince(List oldData, int oldCheckSum) =>

+ !identical(_data, oldData) || (_checkSum != oldCheckSum);

+// Map with iteration in insertion order (hence "Linked"). New keys are simply

+// appended to _data.

+class _CompactLinkedHashMap<K, V>

+ extends MapBase<K, V> with _CompactHashTableMixin

+ implements HashMap<K, V>, LinkedHashMap<K, V> {

+ // Growth policy is to keep _index and _data the same length, and double

+ // both when _data is full. Thus, _index will have a max load factor of 0.5.

+ // TODO(koda): Consider growing _data by factor sqrt(2), twice as often.

+ _CompactLinkedHashMap() {

+ _index = new Uint32List(_CompactHashTableMixin._INITIAL_INDEX_SIZE);

+ _data = new List(_CompactHashTableMixin._INITIAL_INDEX_SIZE);

+ }

+ int get length => (_usedData >> 1) - _deletedKeys;

+ bool get isEmpty => length == 0;

+ bool get isNotEmpty => !isEmpty;

+ void _rehash() {

+ if ((_deletedKeys << 1) > _usedData) {

+ // TODO(koda): Consider shrinking.

+ // TODO(koda): Consider in-place compaction and more costly CME check.

+ _init(_index.length, _hashMask, _data, _usedData);

+ } else {

+ // TODO(koda): Support 32->64 bit transition (and adjust _hashMask).

+ _init(_index.length << 1, _hashMask >> 1, _data, _usedData);

+ }

+ void clear() {

+ if (!isEmpty) {

+ _init(_index.length, _hashMask);

+ }

+ // Allocate new _index and _data, and optionally copy existing contents.

+ void _init(int size, int hashMask, [List oldData, int oldUsed]) {

+ assert(size & (size - 1) == 0);

+ assert(_CompactHashTableMixin._UNUSED_PAIR == 0);

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 should these assert be in constructor too? we crea

koda 2015/02/17 17:26:28 Done.

+ _index = new Uint32List(size);

+ _hashMask = hashMask;

+ _data = new List(size);

+ _usedData = 0;

+ _deletedKeys = 0;

+ if (oldData != null) {

+ for (int i = 0; i < oldUsed; i += 2) {

+ var key = oldData[i];

+ if (!_isDeleted(oldData, key)) {

+ // TODO(koda): While there are enough hash bits, avoid hashCode calls.

+ this[key] = oldData[i + 1];

+ }

+ void _insert(K key, V value, int hashPattern, int i) {

+ if (_usedData == _data.length) {

+ _rehash();

+ this[key] = value;

+ } else {

+ _index[i] = hashPattern | _usedData;

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 looks like we are wasting 1 bit here - (_usedData

koda 2015/02/17 17:26:28 Done. Yes, we were. It allowed for the very simpl

+ _data[_usedData++] = key;

+ _data[_usedData++] = value;

+ }

+ // If key is present, returns the index of the value in _data, else returns

+ // the negated insertion point in _index.

+ int _findValueOrInsertPoint(K key, int fullHash, int hashPattern, int size) {

+ int sizeMask = size - 1;

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 when the are a lot of local variables it easier to

koda 2015/02/17 17:26:28 Done.

+ int i = _firstProbe(fullHash, sizeMask);

+ int firstDeleted = -1;

+ int pair = _index[i];

+ while (pair != _CompactHashTableMixin._UNUSED_PAIR) {

+ if (pair == _CompactHashTableMixin._DELETED_PAIR) {

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 I wonder if the code becomes faster if you have tw

koda 2015/02/17 17:26:29 Perhaps. I agree that's a good candidate for optim

Vyacheslav Egorov (Google) 2015/02/17 23:39:00 Ack! That was more of a thought than immediate cal

+ if (firstDeleted < 0){

+ firstDeleted = i;

+ }

+ } else {

+ int d = hashPattern ^ pair;

+ if (d < size && key == _data[d]) {

+ return d + 1;

+ }

+ i = _nextProbe(i, sizeMask);

+ pair = _index[i];

+ }

+ return firstDeleted >= 0 ? -firstDeleted : -i;

+ }

+ void operator[]=(K key, V value) {

+ int size = _index.length;

+ int sizeMask = size - 1;

+ int fullHash = key.hashCode;

+ int hashPattern = _hashPattern(fullHash, _hashMask, size);

+ int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);

+ if (d > 0) {

+ _data[d] = value;

+ } else {

+ int i = -d;

+ _insert(key, value, hashPattern, i);

+ }

+ V putIfAbsent(K key, V ifAbsent()) {

+ int size = _index.length;

+ int sizeMask = size - 1;

+ int fullHash = key.hashCode;

+ int hashPattern = _hashPattern(fullHash, _hashMask, size);

+ int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);

+ if (d > 0) {

+ return _data[d];

+ }

+ int i = -d;

+ // 'ifAbsent' is allowed to modify the map.

+ List oldData = _data;

+ int oldCheckSum = _checkSum;

+ V value = ifAbsent();

+ if (_isModifiedSince(oldData, oldCheckSum)) {

+ this[key] = value;

+ } else {

+ _insert(key, value, hashPattern, i);

+ }

+ return value;

+ }

+ V remove(Object key) {

+ int size = _index.length;

+ int sizeMask = size - 1;

+ int fullHash = key.hashCode;

+ int hashPattern = _hashPattern(fullHash, _hashMask, size);

+ int i = _firstProbe(fullHash, sizeMask);

+ int pair = _index[i];

+ while (pair != _CompactHashTableMixin._UNUSED_PAIR) {

+ if (pair != _CompactHashTableMixin._DELETED_PAIR) {

+ int d = hashPattern ^ pair;

+ if (d < size && key == _data[d]) {

+ _index[i] = _CompactHashTableMixin._DELETED_PAIR;

+ _setDeletedAt(_data, d);

+ V value = _data[d + 1];

+ _setDeletedAt(_data, d + 1);

+ ++_deletedKeys;

+ return value;

+ }

+ i = _nextProbe(i, sizeMask);

+ pair = _index[i];

+ }

+ return null;

+ }

+ // If key is absent, return _data (which is never a value).

+ Object _getValueOrData(Object key) {

+ int size = _index.length;

+ int sizeMask = size - 1;

+ int fullHash = key.hashCode;

+ int hashPattern = _hashPattern(fullHash, _hashMask, size);

+ int i = _firstProbe(fullHash, sizeMask);

+ int pair = _index[i];

+ while (pair != _CompactHashTableMixin._UNUSED_PAIR) {

+ if (pair != _CompactHashTableMixin._DELETED_PAIR) {

+ int d = hashPattern ^ pair;

+ if (d < size && key == _data[d]) {

+ return _data[d + 1];

+ }

+ i = _nextProbe(i, sizeMask);

+ pair = _index[i];

+ }

+ return _data;

+ }

+ bool containsKey(Object key) => !identical(_data, _getValueOrData(key));

+ V operator[](Object key) {

+ var v = _getValueOrData(key);

+ return identical(_data, v) ? null : v;

+ }

+ bool containsValue(Object value) {

+ for (var v in values) {

+ if (v == value) {

+ return true;

+ }

+ return false;

+ }

+ void forEach(void f(K key, V value)) {

+ var ki = keys.iterator;

+ var vi = values.iterator;

+ while (ki.moveNext()) {

+ vi.moveNext();

+ f(ki.current, vi.current);

+ }

+ Iterable<K> get keys =>

+ new _CompactIterable<K>(this, _data, _usedData, -2, 2);

+ Iterable<V> get values =>

+ new _CompactIterable<V>(this, _data, _usedData, -1, 2);

+// Iterates through _data[_offset + _step], _data[_offset + 2*_step], ...

+// and checks for concurrent modification.

+class _CompactIterable<E> extends IterableBase<E> {

+ var _map;

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Please mark all fields that do not change final.

koda 2015/02/17 17:26:28 Done.

+ List _data;

+ int _len;

+ int _offset;

+ int _step;

+ _CompactIterable(this._map, this._data, this._len, this._offset, this._step);

+ Iterator<E> get iterator =>

+ new _CompactIterator<E>(_map, _data, _len, _offset, _step);

+ int get length => _map.length;

+ bool get isEmpty => length == 0;

+ bool get isNotEmpty => !isEmpty;

+class _CompactIterator<E> implements Iterator<E> {

+ var _map;

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Please mark all fields that do not change final (i

koda 2015/02/17 17:26:29 Done.

+ List _data;

+ int _len;

+ int _offset;

+ int _step;

+ int _checkSum;

+ E current;

+ _CompactIterator(this._map, this._data, this._len, this._offset, this._step) {

+ this._checkSum = _map._checkSum;

+ }

+ bool moveNext() {

+ if (_map._isModifiedSince(_data, _checkSum)) {

+ throw new ConcurrentModificationError(_map);

+ }

+ do {

+ _offset += _step;

+ } while (_offset < _len && _map._isDeleted(_data, _data[_offset]));

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 This shouldn't be an instance call (also it will f

koda 2015/02/17 17:26:29 Done.

+ if (_offset < _len) {

+ current = _data[_offset];

+ return true;

+ } else {

+ current = null;

+ return false;

+ }

+// Set implementation, analogous to _CompactLinkedHashMap.

+class _CompactLinkedHashSet<K>

+ extends SetBase<K> with _CompactHashTableMixin

+ implements HashSet<K>, LinkedHashSet<K> {

+ _CompactLinkedHashSet() {

+ _index = new Uint32List(_CompactHashTableMixin._INITIAL_INDEX_SIZE);

+ _data = new List(_CompactHashTableMixin._INITIAL_INDEX_SIZE >> 1);

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Are we wasting 1 bit in the index here too? becaus

koda 2015/02/17 17:26:29 Done.

+ }

+ int get length => _usedData - _deletedKeys;

+ void _rehash() {

+ if ((_deletedKeys << 1) > _usedData) {

+ _init(_index.length, _hashMask, _data, _usedData);

+ } else {

+ _init(_index.length << 1, _hashMask >> 1, _data, _usedData);

+ }

+ void clear() {

+ if (!isEmpty) {

+ _init(_index.length, _hashMask);

+ }

+ void _init(int size, int hashMask, [List oldData, int oldUsed]) {

+ _index = new Uint32List(size);

+ _hashMask = hashMask;

+ _data = new List(size >> 1);

+ _usedData = 0;

+ _deletedKeys = 0;

+ if (oldData != null) {

+ for (int i = 0; i < oldUsed; i += 1) {

+ var key = oldData[i];

+ if (!identical(key, oldData)) {

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 _isDeleted instead of identical?

koda 2015/02/17 17:26:29 Done.

+ add(key);

+ }

+ bool add(Object key) {

+ int size = _index.length;

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 Same comments from Map implementation apply here.

koda 2015/02/17 17:26:28 Done, except for the code-splitting optimization (

+ int sizeMask = size - 1;

+ int fullHash = key.hashCode;

+ int hashPattern = _hashPattern(fullHash, _hashMask, size);

+ int i = _firstProbe(fullHash, sizeMask);

+ int firstDeleted = -1;

+ int pair = _index[i];

+ while (pair != _CompactHashTableMixin._UNUSED_PAIR) {

+ if (pair == _CompactHashTableMixin._DELETED_PAIR) {

+ if (firstDeleted < 0){

+ firstDeleted = i;

+ }

+ } else {

+ int d = hashPattern ^ pair;

+ if (d < size && key == _data[d]) {

+ return false;

+ }

+ i = _nextProbe(i, sizeMask);

+ pair = _index[i];

+ }

+ int insertionPoint = (firstDeleted >= 0) ? firstDeleted : i;

+ if (_usedData == _data.length) {

+ _rehash();

+ add(key);

+ } else {

+ _index[insertionPoint] = hashPattern | _usedData;

Vyacheslav Egorov (Google) 2015/02/12 15:42:41 move int insertionPoint = ... to here.

koda 2015/02/17 17:26:29 Done.

+ _data[_usedData++] = key;

+ }

+ return true;

+ }

+ // If key is absent, return _data (which is never a value).

+ Object _getKeyOrData(Object key) {

+ int size = _index.length;

+ int sizeMask = size - 1;

+ int fullHash = key.hashCode;

+ int hashPattern = _hashPattern(fullHash, _hashMask, size);

+ int i = _firstProbe(fullHash, sizeMask);

+ int pair = _index[i];

+ while (pair != _CompactHashTableMixin._UNUSED_PAIR) {

+ if (pair != _CompactHashTableMixin._DELETED_PAIR) {

+ int d = hashPattern ^ pair;

+ if (d < size && key == _data[d]) {

+ return _data[d]; // Note: Must return the existing key.

+ }

+ i = _nextProbe(i, sizeMask);

+ pair = _index[i];

+ }

+ return _data;

+ }

+ K lookup(Object key) {

+ var k = _getKeyOrData(key);

+ return identical(_data, k) ? null : k;

+ }

+ bool contains(Object key) => !identical(_data, _getKeyOrData(key));

+ bool remove(Object key) {

+ int size = _index.length;

+ int sizeMask = size - 1;

+ int fullHash = key.hashCode;

+ int hashPattern = _hashPattern(fullHash, _hashMask, size);

+ int i = _firstProbe(fullHash, sizeMask);

+ int pair = _index[i];

+ while (pair != _CompactHashTableMixin._UNUSED_PAIR) {

+ if (pair != _CompactHashTableMixin._DELETED_PAIR) {

+ int d = hashPattern ^ pair;

+ if (d < size && key == _data[d]) {

+ _index[i] = _CompactHashTableMixin._DELETED_PAIR;

+ _data[d] = _data;

+ ++_deletedKeys;

+ return true;

+ }

+ i = _nextProbe(i, sizeMask);

+ pair = _index[i];

+ }

+ return false;

+ }

+ Iterator<K> get iterator =>

+ new _CompactIterator<K>(this, _data, _usedData, -1, 1);

+ Set<K> toSet() => new Set<K>()..addAll(this);

« runtime/lib/collection_patch.dart ('K') | « runtime/lib/collection_sources.gypi ('k') | runtime/lib/linked_hash_map.cc » ('j') | no next file with comments »