runtime/lib/compact_hash.dart - Issue 3001763002: [VM-corelib] Improve performance for very big maps and sets.

Unified Diff: runtime/lib/compact_hash.dart

Issue 3001763002: [VM-corelib] Improve performance for very big maps and sets.

Patch Set: Created 3 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: runtime/lib/compact_hash.dart

diff --git a/runtime/lib/compact_hash.dart b/runtime/lib/compact_hash.dart

index bfeaf53ab2a8fb713170ec6f691952f4bf5751ff..0be62319d938cf854de05559b279d70c5b453876 100644

--- a/runtime/lib/compact_hash.dart

+++ b/runtime/lib/compact_hash.dart

@@ -71,6 +71,44 @@ abstract class _HashBase {

static const int _UNUSED_PAIR = 0;

static const int _DELETED_PAIR = 1;

+ static const int _MAX_LINEAR_DATA = 4096;

+ static const int _MAX_LINEAR_DATA_LOG_2 = 12;

+ // For sizes up to _MAX_LINEAR_DATA the size of the _data array is just the

+ // size we ask for. Above that size we add enough elements onto the _data

+ // array to hold _MAX_LINEAR_DATA-sized sub-arrays for the rest of the

+ // entries.

sra1 2017/08/17 18:33:17 It might be better not to have the hybrid scheme w

+ static int _sizeToBaseListSize(int size) {

+ if (size <= _MAX_LINEAR_DATA) return size;

+ // Round up.

+ size = ((size - 1) | (_MAX_LINEAR_DATA - 1)) + 1;

+ // First few entries are in the linear area.

+ size -= _MAX_LINEAR_DATA;

+ // Enough entries for the sub-arrays.

+ size >>= _MAX_LINEAR_DATA_LOG_2;

+ return _MAX_LINEAR_DATA + size;

+ }

+ static int _baseListSizeToSize(int baseListSize) {

+ if (baseListSize <= _MAX_LINEAR_DATA) return baseListSize;

+ baseListSize -= _MAX_LINEAR_DATA;

+ baseListSize <<= _MAX_LINEAR_DATA_LOG_2;

+ return baseListSize + _MAX_LINEAR_DATA;

+ }

+ static List _indexToList(List base, int index) {

+ if (index < _MAX_LINEAR_DATA) return base;

+ index >>= _MAX_LINEAR_DATA_LOG_2;

+ return base[_MAX_LINEAR_DATA - 1 + index];

+ }

+ static List _setSublist(List base, int index, List sublist) {

+ assert(index >= _MAX_LINEAR_DATA);

+ index -= _MAX_LINEAR_DATA;

+ index >>= _MAX_LINEAR_DATA_LOG_2;

+ base[_MAX_LINEAR_DATA + index] = sublist;

+ }

// On 32-bit, the top bits are wasted to avoid Mint allocation.

// TODO(koda): Reclaim the bits by making the compiler treat hash patterns

// as unsigned words.

@@ -99,8 +137,8 @@ abstract class _HashBase {

// A self-loop is used to mark a deleted key or value.

static bool _isDeleted(List data, Object keyOrValue) =>

identical(keyOrValue, data);

- static void _setDeletedAt(List data, int d) {

- data[d] = data;

+ static void _setDeletedAt(List data, List sublist, int modulus) {

+ sublist[modulus] = data;

}

// Concurrent modification detection relies on this checksum monotonically

@@ -146,60 +184,85 @@ class _LinkedHashMapMixin<K, V> {

if ((_deletedKeys << 2) > _usedData) {

// TODO(koda): Consider shrinking.

// TODO(koda): Consider in-place compaction and more costly CME check.

- _init(_index.length, _hashMask, _data, _usedData);

+ _init(_index.length, _hashMask, _data);

} else {

// TODO(koda): Support 32->64 bit transition (and adjust _hashMask).

- _init(_index.length << 1, _hashMask >> 1, _data, _usedData);

+ _init(_index.length << 1, _hashMask >> 1, _data);

}

void clear() {

if (!isEmpty) {

- // Use _data.length, since _index might be null.

- _init(_data.length, _hashMask, null, 0);

+ int size = _HashBase._INITIAL_INDEX_SIZE;

+ _init(size, _HashBase._indexSizeToHashMask(size), null);

}

// Allocate new _index and _data, and optionally copy existing contents.

- void _init(int size, int hashMask, List oldData, int oldUsed) {

+ void _init(int size, int hashMask, List oldData) {

assert(size & (size - 1) == 0);

assert(_HashBase._UNUSED_PAIR == 0);

_index = new Uint32List(size);

_hashMask = hashMask;

- _data = new List(size);

+ if (_deletedKeys == 0 && _data == oldData) {

+ _rebuildIndex(size, oldData);

+ return;

+ }

+ _data = new List(_HashBase._sizeToBaseListSize(size));

+ int oldUsed = _usedData;

_usedData = 0;

_deletedKeys = 0;

if (oldData != null) {

for (int i = 0; i < oldUsed; i += 2) {

- var key = oldData[i];

+ List sublist = _HashBase._indexToList(oldData, i);

+ int modulus = i & (_HashBase._MAX_LINEAR_DATA - 1);

+ var key = sublist[modulus];

if (!_HashBase._isDeleted(oldData, key)) {

// TODO(koda): While there are enough hash bits, avoid hashCode calls.

- this[key] = oldData[i + 1];

+ this[key] = sublist[modulus + 1];

}

+ void _rebuildIndex(int size, List oldData) {

+ int dataSize = _HashBase._sizeToBaseListSize(size);

+ if (_data.length != dataSize) {

+ _data = new List(dataSize);

+ for (int i = 0; i < oldData.length; i++) {

+ _data[i] = oldData[i];

+ }

+ int i = 0;

+ int notYetAdded = _usedData;

+ _usedData = 0;

+ for (; i < notYetAdded && i < _HashBase._MAX_LINEAR_DATA; i += 2) {

+ _setAlreadyThere(oldData[i]);

+ }

+ for (; i < oldData.length; i++) {

+ notYetAdded -= _HashBase._MAX_LINEAR_DATA;

+ List sublist = oldData[i];

+ for (int j = 0; j < notYetAdded && j < sublist.length; j += 2) {

+ _setAlreadyThere(sublist[j]);

+ }

int _getIndexLength() {

return (_index == null) ? _regenerateIndex() : _index.length;

}

int _regenerateIndex() {

assert(_index == null);

- _index = new Uint32List(_data.length);

+ _index = new Uint32List(_HashBase._baseListSizeToSize(_data.length));

assert(_hashMask == 0);

_hashMask = _HashBase._indexSizeToHashMask(_index.length);

- final int tmpUsed = _usedData;

- _usedData = 0;

- for (int i = 0; i < tmpUsed; i += 2) {

- // TODO(koda): Avoid redundant equality tests and stores into _data.

- this[_data[i]] = _data[i + 1];

- }

+ _rebuildIndex(_data.length, _data);

return _index.length;

}

void _insert(K key, V value, int hashPattern, int i) {

- if (_usedData == _data.length) {

+ if (_usedData == _getIndexLength()) {

_rehash();

this[key] = value;

} else {

@@ -207,8 +270,15 @@ class _LinkedHashMapMixin<K, V> {

final int index = _usedData >> 1;

assert((index & hashPattern) == 0);

_index[i] = hashPattern | index;

- _data[_usedData++] = key;

- _data[_usedData++] = value;

+ List sublist = _HashBase._indexToList(_data, _usedData);

+ if (sublist == null) {

+ sublist = new List(_HashBase._MAX_LINEAR_DATA);

+ _HashBase._setSublist(_data, _usedData, sublist);

+ }

+ int modulus = _usedData & (_HashBase._MAX_LINEAR_DATA - 1);

+ sublist[modulus] = key;

+ sublist[modulus + 1] = value;

+ _usedData += 2;

}

@@ -229,8 +299,12 @@ class _LinkedHashMapMixin<K, V> {

final int entry = hashPattern ^ pair;

if (entry < maxEntries) {

final int d = entry << 1;

- if (_equals(key, _data[d])) {

- return d + 1;

+ List sublist = _HashBase._indexToList(_data, d);

+ if (sublist != null) {

+ int modulus = d & (_HashBase._MAX_LINEAR_DATA - 1);

+ if (_equals(key, sublist[modulus])) {

+ return d + 1;

+ }

}

@@ -240,6 +314,28 @@ class _LinkedHashMapMixin<K, V> {

return firstDeleted >= 0 ? -firstDeleted : -i;

}

+ // Adds a key to the index where the (key, value) are already in the data.

+ void _setAlreadyThere(K key) {

+ final int size = _getIndexLength();

+ final int sizeMask = size - 1;

+ final int fullHash = _hashCode(key);

+ final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);

+ final int maxEntries = size >> 1;

+ int i = _HashBase._firstProbe(fullHash, sizeMask);

+ int pair = _index[i];

+ while (pair != _HashBase._UNUSED_PAIR) {

+ i = _HashBase._nextProbe(i, sizeMask);

+ pair = _index[i];

+ }

+ assert(1 <= hashPattern && hashPattern < (1 << 32));

+ final int index = _usedData >> 1;

+ assert((index & hashPattern) == 0);

+ _index[i] = hashPattern | index;

+ _usedData += 2;

+ }

void operator []=(K key, V value) {

final int size = _getIndexLength();

final int sizeMask = size - 1;

@@ -247,7 +343,9 @@ class _LinkedHashMapMixin<K, V> {

final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);

final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);

if (d > 0) {

- _data[d] = value;

+ List sublist = _HashBase._indexToList(_data, d);

+ int modulus = d & (_HashBase._MAX_LINEAR_DATA - 1);

+ sublist[modulus] = value;

} else {

final int i = -d;

_insert(key, value, hashPattern, i);

@@ -262,7 +360,9 @@ class _LinkedHashMapMixin<K, V> {

final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);

final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);

if (d > 0) {

- return _data[d];

+ List sublist = _HashBase._indexToList(_data, d);

+ int modulus = d & (_HashBase._MAX_LINEAR_DATA - 1);

+ return sublist[modulus];

}

// 'ifAbsent' is allowed to modify the map.

List oldData = _data;

@@ -290,11 +390,13 @@ class _LinkedHashMapMixin<K, V> {

final int entry = hashPattern ^ pair;

if (entry < maxEntries) {

final int d = entry << 1;

- if (_equals(key, _data[d])) {

+ List sublist = _HashBase._indexToList(_data, d);

+ int modulus = d & (_HashBase._MAX_LINEAR_DATA - 1);

+ if (_equals(key, sublist[modulus])) {

_index[i] = _HashBase._DELETED_PAIR;

- _HashBase._setDeletedAt(_data, d);

- V value = _data[d + 1];

- _HashBase._setDeletedAt(_data, d + 1);

+ _HashBase._setDeletedAt(_data, sublist, modulus);

+ V value = sublist[modulus + 1];

+ _HashBase._setDeletedAt(_data, sublist, modulus + 1);

++_deletedKeys;

return value;

}

@@ -320,8 +422,10 @@ class _LinkedHashMapMixin<K, V> {

final int entry = hashPattern ^ pair;

if (entry < maxEntries) {

final int d = entry << 1;

- if (_equals(key, _data[d])) {

- return _data[d + 1];

+ List sublist = _HashBase._indexToList(_data, d);

+ int modulus = d & (_HashBase._MAX_LINEAR_DATA - 1);

+ if (_equals(key, sublist[modulus])) {

+ return sublist[modulus + 1];

}

@@ -432,9 +536,26 @@ class _CompactIterator<E> implements Iterator<E> {

}

do {

_offset += _step;

- } while (_offset < _len && _HashBase._isDeleted(_data, _data[_offset]));

+ } while (_offset < _len &&

+ _offset < _HashBase._MAX_LINEAR_DATA &&

+ _HashBase._isDeleted(_data, _data[_offset]));

+ if (_offset < _len && _offset >= _HashBase._MAX_LINEAR_DATA) {

+ List sublist = _HashBase._indexToList(_data, _offset);

+ int modulus = _offset & (_HashBase._MAX_LINEAR_DATA - 1);

+ while (_HashBase._isDeleted(_data, sublist[modulus])) {

+ _offset += _step;

+ modulus += _step;

+ if (_offset >= _len) break;

+ if (modulus >= _HashBase._MAX_LINEAR_DATA) {

+ modulus = 0;

+ sublist = _HashBase._indexToList(_data, _offset);

+ }

if (_offset < _len) {

- current = _data[_offset];

+ List sublist = _HashBase._indexToList(_data, _offset);

+ int modulus = _offset & (_HashBase._MAX_LINEAR_DATA - 1);

+ current = sublist[modulus];

return true;

} else {

current = null;

sra1 2017/08/17 18:33:17 If the current sublist from _data is held in a fie

@@ -455,27 +576,35 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase

void _rehash() {

if ((_deletedKeys << 1) > _usedData) {

- _init(_index.length, _hashMask, _data, _usedData);

+ _init(_index.length, _hashMask, _data);

} else {

- _init(_index.length << 1, _hashMask >> 1, _data, _usedData);

+ _init(_index.length << 1, _hashMask >> 1, _data);

}

void clear() {

if (!isEmpty) {

- _init(_index.length, _hashMask, null, 0);

+ int size = _HashBase._INITIAL_INDEX_SIZE;

+ _init(size, _HashBase._indexSizeToHashMask(size), null);

}

- void _init(int size, int hashMask, List oldData, int oldUsed) {

+ void _init(int size, int hashMask, List oldData) {

_index = new Uint32List(size);

_hashMask = hashMask;

- _data = new List(size >> 1);

+ if (_deletedKeys == 0 && _data == oldData) {

+ _rebuildIndex(size, oldData);

+ return;

+ }

+ _data = new List(_HashBase._sizeToBaseListSize(size >> 1));

+ int oldUsed = _usedData;

_usedData = 0;

_deletedKeys = 0;

if (oldData != null) {

for (int i = 0; i < oldUsed; i += 1) {

- var key = oldData[i];

+ List sublist = _HashBase._indexToList(oldData, i);

+ int modulus = i & (_HashBase._MAX_LINEAR_DATA - 1);

+ var key = sublist[modulus];

if (!_HashBase._isDeleted(oldData, key)) {

add(key);

}

@@ -483,6 +612,31 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase

}

+ void _rebuildIndex(int size, List oldData) {

+ int dataSize = _HashBase._sizeToBaseListSize(size >> 1);

+ if (_data.length != dataSize) {

+ _data = new List(dataSize);

+ for (int i = 0; i < oldData.length; i++) {

+ _data[i] = oldData[i];

+ }

+ _usedData = 0;

+ // Unlike in the map case, this Set method is only called when the

+ // data array and sublists are full, so we don't need to keep track of

+ // the number of entries, we can just add everything from the data arrays

+ // to the new index.

+ for (int i = 0; i < oldData.length; i++) {

+ if (i < _HashBase._MAX_LINEAR_DATA) {

+ _addAlreadyThere(oldData[i]);

+ } else {

+ List sublist = oldData[i];

+ for (int j = 0; j < sublist.length; j++) {

+ _addAlreadyThere(sublist[j]);

+ }

bool add(E key) {

final int size = _index.length;

final int sizeMask = size - 1;

@@ -499,14 +653,20 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase

}

} else {

final int d = hashPattern ^ pair;

- if (d < maxEntries && _equals(key, _data[d])) {

- return false;

+ if (d < maxEntries) {

+ List sublist = _HashBase._indexToList(_data, d);

+ if (sublist != null) {

+ int modulus = d & (_HashBase._MAX_LINEAR_DATA - 1);

+ if (_equals(key, sublist[modulus])) {

+ return false;

+ }

}

i = _HashBase._nextProbe(i, sizeMask);

pair = _index[i];

}

- if (_usedData == _data.length) {

+ if (_usedData == maxEntries) {

_rehash();

add(key);

} else {

@@ -514,11 +674,38 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase

assert(1 <= hashPattern && hashPattern < (1 << 32));

assert((hashPattern & _usedData) == 0);

_index[insertionPoint] = hashPattern | _usedData;

- _data[_usedData++] = key;

+ List sublist = _HashBase._indexToList(_data, _usedData);

+ if (sublist == null) {

+ sublist = new List(_HashBase._MAX_LINEAR_DATA);

+ _HashBase._setSublist(_data, _usedData, sublist);

+ }

+ int modulus = _usedData & (_HashBase._MAX_LINEAR_DATA - 1);

+ sublist[modulus] = key;

+ _usedData++;

}

return true;

}

+ // Adds a key to the index which is already in the data.

+ void _addAlreadyThere(E key) {

+ final int size = _index.length;

+ final int sizeMask = size - 1;

+ final int maxEntries = size >> 1;

+ final int fullHash = _hashCode(key);

+ final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);

+ int i = _HashBase._firstProbe(fullHash, sizeMask);

+ int pair = _index[i];

+ while (pair != _HashBase._UNUSED_PAIR) {

+ i = _HashBase._nextProbe(i, sizeMask);

+ pair = _index[i];

+ }

+ assert(1 <= hashPattern && hashPattern < (1 << 32));

+ assert((hashPattern & _usedData) == 0);

+ _index[i] = hashPattern | _usedData;

+ _usedData++;

+ }

// If key is absent, return _data (which is never a value).

Object _getKeyOrData(Object key) {

final int size = _index.length;

@@ -531,8 +718,14 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase

while (pair != _HashBase._UNUSED_PAIR) {

if (pair != _HashBase._DELETED_PAIR) {

final int d = hashPattern ^ pair;

- if (d < maxEntries && _equals(key, _data[d])) {

- return _data[d]; // Note: Must return the existing key.

+ if (d < maxEntries) {

+ List sublist = _HashBase._indexToList(_data, d);

+ if (sublist != null) {

+ int modulus = d & (_HashBase._MAX_LINEAR_DATA - 1);

+ if (_equals(key, sublist[modulus])) {

+ return sublist[modulus]; // Note: Must return the existing key.

+ }

}

i = _HashBase._nextProbe(i, sizeMask);

@@ -559,11 +752,17 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase

while (pair != _HashBase._UNUSED_PAIR) {

if (pair != _HashBase._DELETED_PAIR) {

final int d = hashPattern ^ pair;

- if (d < maxEntries && _equals(key, _data[d])) {

- _index[i] = _HashBase._DELETED_PAIR;

- _HashBase._setDeletedAt(_data, d);

- ++_deletedKeys;

- return true;

+ if (d < maxEntries) {

+ List sublist = _HashBase._indexToList(_data, d);

+ if (sublist != null) {

+ int modulus = d & (_HashBase._MAX_LINEAR_DATA - 1);

+ if (_equals(key, sublist[modulus])) {

+ _index[i] = _HashBase._DELETED_PAIR;

+ _HashBase._setDeletedAt(_data, sublist, modulus);

+ ++_deletedKeys;

+ return true;

+ }

}

i = _HashBase._nextProbe(i, sizeMask);

« no previous file with comments | « no previous file | runtime/vm/flow_graph_inliner.cc » ('j') | no next file with comments »