Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(522)

Unified Diff: runtime/lib/compact_hash.dart

Issue 3001763002: [VM-corelib] Improve performance for very big maps and sets.
Patch Set: Speed up moveNext on the map/set iterator. Created 3 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | runtime/vm/flow_graph_inliner.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/lib/compact_hash.dart
diff --git a/runtime/lib/compact_hash.dart b/runtime/lib/compact_hash.dart
index bfeaf53ab2a8fb713170ec6f691952f4bf5751ff..b41f90f1acde5c7bdf2fcebcdfa32afcc18e398f 100644
--- a/runtime/lib/compact_hash.dart
+++ b/runtime/lib/compact_hash.dart
@@ -71,6 +71,45 @@ abstract class _HashBase {
static const int _UNUSED_PAIR = 0;
static const int _DELETED_PAIR = 1;
+ static const int _MAX_LINEAR_DATA = 4096;
+ static const int _MAX_LINEAR_MASK = 4095;
+ static const int _MAX_LINEAR_DATA_LOG_2 = 12;
+
+ // For sizes up to _MAX_LINEAR_DATA the size of the _data array is just the
+ // size we ask for. Above that size we add enough elements onto the _data
+ // array to hold _MAX_LINEAR_DATA-sized sub-arrays for the rest of the
+ // entries.
+ static int _sizeToBaseListSize(int size) {
+ if (size <= _MAX_LINEAR_DATA) return size;
+ // Round up.
+ size = ((size - 1) | (_MAX_LINEAR_DATA - 1)) + 1;
+ // First few entries are in the linear area.
+ size -= _MAX_LINEAR_DATA;
+ // Enough entries for the sub-arrays.
+ size >>= _MAX_LINEAR_DATA_LOG_2;
+ return _MAX_LINEAR_DATA + size;
+ }
+
+ static int _baseListSizeToSize(int baseListSize) {
+ if (baseListSize <= _MAX_LINEAR_DATA) return baseListSize;
+ baseListSize -= _MAX_LINEAR_DATA;
+ baseListSize <<= _MAX_LINEAR_DATA_LOG_2;
+ return baseListSize + _MAX_LINEAR_DATA;
+ }
+
+ static List _indexToList(List base, int index) {
+ if (index < _MAX_LINEAR_DATA) return base;
+ index >>= _MAX_LINEAR_DATA_LOG_2;
+ return base[_MAX_LINEAR_DATA - 1 + index];
+ }
+
+ static List _setSublist(List base, int index, List sublist) {
+ assert(index >= _MAX_LINEAR_DATA);
+ index -= _MAX_LINEAR_DATA;
+ index >>= _MAX_LINEAR_DATA_LOG_2;
+ base[_MAX_LINEAR_DATA + index] = sublist;
+ }
+
// On 32-bit, the top bits are wasted to avoid Mint allocation.
// TODO(koda): Reclaim the bits by making the compiler treat hash patterns
// as unsigned words.
@@ -98,9 +137,9 @@ abstract class _HashBase {
// A self-loop is used to mark a deleted key or value.
static bool _isDeleted(List data, Object keyOrValue) =>
- identical(keyOrValue, data);
- static void _setDeletedAt(List data, int d) {
- data[d] = data;
+ identical(data, keyOrValue);
+ static void _setDeletedAt(List data, List sublist, int modulus) {
+ sublist[modulus] = data;
}
// Concurrent modification detection relies on this checksum monotonically
@@ -146,60 +185,85 @@ class _LinkedHashMapMixin<K, V> {
if ((_deletedKeys << 2) > _usedData) {
// TODO(koda): Consider shrinking.
// TODO(koda): Consider in-place compaction and more costly CME check.
- _init(_index.length, _hashMask, _data, _usedData);
+ _init(_index.length, _hashMask, _data);
} else {
// TODO(koda): Support 32->64 bit transition (and adjust _hashMask).
- _init(_index.length << 1, _hashMask >> 1, _data, _usedData);
+ _init(_index.length << 1, _hashMask >> 1, _data);
}
}
void clear() {
if (!isEmpty) {
- // Use _data.length, since _index might be null.
- _init(_data.length, _hashMask, null, 0);
+ int size = _HashBase._INITIAL_INDEX_SIZE;
+ _init(size, _HashBase._indexSizeToHashMask(size), null);
}
}
// Allocate new _index and _data, and optionally copy existing contents.
- void _init(int size, int hashMask, List oldData, int oldUsed) {
+ void _init(int size, int hashMask, List oldData) {
assert(size & (size - 1) == 0);
assert(_HashBase._UNUSED_PAIR == 0);
_index = new Uint32List(size);
_hashMask = hashMask;
- _data = new List(size);
+ if (_deletedKeys == 0 && _data == oldData) {
+ _rebuildIndex(size, oldData);
+ return;
+ }
+ _data = new List(_HashBase._sizeToBaseListSize(size));
+ int oldUsed = _usedData;
_usedData = 0;
_deletedKeys = 0;
if (oldData != null) {
for (int i = 0; i < oldUsed; i += 2) {
- var key = oldData[i];
+ List sublist = _HashBase._indexToList(oldData, i);
+ int modulus = i & _HashBase._MAX_LINEAR_MASK;
+ var key = sublist[modulus];
if (!_HashBase._isDeleted(oldData, key)) {
// TODO(koda): While there are enough hash bits, avoid hashCode calls.
- this[key] = oldData[i + 1];
+ this[key] = sublist[modulus + 1];
}
}
}
}
+ void _rebuildIndex(int size, List oldData) {
+ int dataSize = _HashBase._sizeToBaseListSize(size);
+ if (_data.length != dataSize) {
+ _data = new List(dataSize);
+ for (int i = 0; i < oldData.length; i++) {
+ _data[i] = oldData[i];
+ }
+ }
+ int i = 0;
+ int notYetAdded = _usedData;
+ _usedData = 0;
+ for (; i < notYetAdded && i < _HashBase._MAX_LINEAR_DATA; i += 2) {
+ _setAlreadyThere(oldData[i]);
+ }
+ for (; i < oldData.length; i++) {
+ notYetAdded -= _HashBase._MAX_LINEAR_DATA;
+ List sublist = oldData[i];
+ for (int j = 0; j < notYetAdded && j < sublist.length; j += 2) {
+ _setAlreadyThere(sublist[j]);
+ }
+ }
+ }
+
int _getIndexLength() {
return (_index == null) ? _regenerateIndex() : _index.length;
}
int _regenerateIndex() {
assert(_index == null);
- _index = new Uint32List(_data.length);
+ _index = new Uint32List(_HashBase._baseListSizeToSize(_data.length));
assert(_hashMask == 0);
_hashMask = _HashBase._indexSizeToHashMask(_index.length);
- final int tmpUsed = _usedData;
- _usedData = 0;
- for (int i = 0; i < tmpUsed; i += 2) {
- // TODO(koda): Avoid redundant equality tests and stores into _data.
- this[_data[i]] = _data[i + 1];
- }
+ _rebuildIndex(_data.length, _data);
return _index.length;
}
void _insert(K key, V value, int hashPattern, int i) {
- if (_usedData == _data.length) {
+ if (_usedData == _getIndexLength()) {
_rehash();
this[key] = value;
} else {
@@ -207,8 +271,15 @@ class _LinkedHashMapMixin<K, V> {
final int index = _usedData >> 1;
assert((index & hashPattern) == 0);
_index[i] = hashPattern | index;
- _data[_usedData++] = key;
- _data[_usedData++] = value;
+ List sublist = _HashBase._indexToList(_data, _usedData);
+ if (sublist == null) {
+ sublist = new List(_HashBase._MAX_LINEAR_DATA);
+ _HashBase._setSublist(_data, _usedData, sublist);
+ }
+ int modulus = _usedData & _HashBase._MAX_LINEAR_MASK;
+ sublist[modulus] = key;
+ sublist[modulus + 1] = value;
+ _usedData += 2;
}
}
@@ -229,8 +300,12 @@ class _LinkedHashMapMixin<K, V> {
final int entry = hashPattern ^ pair;
if (entry < maxEntries) {
final int d = entry << 1;
- if (_equals(key, _data[d])) {
- return d + 1;
+ List sublist = _HashBase._indexToList(_data, d);
+ if (sublist != null) {
+ int modulus = d & _HashBase._MAX_LINEAR_MASK;
+ if (_equals(key, sublist[modulus])) {
+ return d + 1;
+ }
}
}
}
@@ -240,6 +315,28 @@ class _LinkedHashMapMixin<K, V> {
return firstDeleted >= 0 ? -firstDeleted : -i;
}
+ // Adds a key to the index where the (key, value) are already in the data.
+ void _setAlreadyThere(K key) {
+ final int size = _getIndexLength();
+ final int sizeMask = size - 1;
+ final int fullHash = _hashCode(key);
+ final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
+
+ final int maxEntries = size >> 1;
+ int i = _HashBase._firstProbe(fullHash, sizeMask);
+ int pair = _index[i];
+ while (pair != _HashBase._UNUSED_PAIR) {
+ i = _HashBase._nextProbe(i, sizeMask);
+ pair = _index[i];
+ }
+
+ assert(1 <= hashPattern && hashPattern < (1 << 32));
+ final int index = _usedData >> 1;
+ assert((index & hashPattern) == 0);
+ _index[i] = hashPattern | index;
+ _usedData += 2;
+ }
+
void operator []=(K key, V value) {
final int size = _getIndexLength();
final int sizeMask = size - 1;
@@ -247,7 +344,9 @@ class _LinkedHashMapMixin<K, V> {
final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
if (d > 0) {
- _data[d] = value;
+ List sublist = _HashBase._indexToList(_data, d);
+ int modulus = d & _HashBase._MAX_LINEAR_MASK;
+ sublist[modulus] = value;
} else {
final int i = -d;
_insert(key, value, hashPattern, i);
@@ -262,7 +361,9 @@ class _LinkedHashMapMixin<K, V> {
final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
if (d > 0) {
- return _data[d];
+ List sublist = _HashBase._indexToList(_data, d);
+ int modulus = d & _HashBase._MAX_LINEAR_MASK;
+ return sublist[modulus];
}
// 'ifAbsent' is allowed to modify the map.
List oldData = _data;
@@ -290,11 +391,13 @@ class _LinkedHashMapMixin<K, V> {
final int entry = hashPattern ^ pair;
if (entry < maxEntries) {
final int d = entry << 1;
- if (_equals(key, _data[d])) {
+ List sublist = _HashBase._indexToList(_data, d);
+ int modulus = d & _HashBase._MAX_LINEAR_MASK;
+ if (_equals(key, sublist[modulus])) {
_index[i] = _HashBase._DELETED_PAIR;
- _HashBase._setDeletedAt(_data, d);
- V value = _data[d + 1];
- _HashBase._setDeletedAt(_data, d + 1);
+ _HashBase._setDeletedAt(_data, sublist, modulus);
+ V value = sublist[modulus + 1];
+ _HashBase._setDeletedAt(_data, sublist, modulus + 1);
++_deletedKeys;
return value;
}
@@ -320,8 +423,10 @@ class _LinkedHashMapMixin<K, V> {
final int entry = hashPattern ^ pair;
if (entry < maxEntries) {
final int d = entry << 1;
- if (_equals(key, _data[d])) {
- return _data[d + 1];
+ List sublist = _HashBase._indexToList(_data, d);
+ int modulus = d & _HashBase._MAX_LINEAR_MASK;
+ if (_equals(key, sublist[modulus])) {
+ return sublist[modulus + 1];
}
}
}
@@ -417,6 +522,8 @@ class _CompactIterator<E> implements Iterator<E> {
final _table;
final List _data;
final int _len;
+ int _limit;
+ List _currentList;
int _offset;
final int _step;
final int _checkSum;
@@ -424,22 +531,39 @@ class _CompactIterator<E> implements Iterator<E> {
_CompactIterator(table, this._data, this._len, this._offset, this._step)
: _table = table,
- _checkSum = table._checkSum;
+ _checkSum = table._checkSum {
+ _limit = -3;
+ _currentList = _data;
+ }
bool moveNext() {
if (_table._isModifiedSince(_data, _checkSum)) {
throw new ConcurrentModificationError(_table);
}
- do {
- _offset += _step;
- } while (_offset < _len && _HashBase._isDeleted(_data, _data[_offset]));
- if (_offset < _len) {
- current = _data[_offset];
+ int offset = _offset + _step;
+ if (offset < _limit &&
+ !_HashBase._isDeleted(_data,
+ current = _currentList[offset & _HashBase._MAX_LINEAR_MASK])) {
+ _offset = offset;
return true;
- } else {
- current = null;
- return false;
}
+
+ do {
+ current = null;
+ _offset += _step;
+ int page = _offset >> _HashBase._MAX_LINEAR_DATA_LOG_2;
+ _currentList =
+ page == 0 ? _data : _data[_HashBase._MAX_LINEAR_DATA - 1 + page];
+ if (page == _len >> _HashBase._MAX_LINEAR_DATA_LOG_2) {
+ _limit = _len;
+ } else {
+ _limit = (page + 1) << _HashBase._MAX_LINEAR_DATA_LOG_2;
+ }
+ } while (_offset < _limit &&
+ _HashBase._isDeleted(_data,
+ current = _currentList[_offset & _HashBase._MAX_LINEAR_MASK]));
+
+ return _offset < _limit;
}
}
@@ -455,27 +579,35 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase
void _rehash() {
if ((_deletedKeys << 1) > _usedData) {
- _init(_index.length, _hashMask, _data, _usedData);
+ _init(_index.length, _hashMask, _data);
} else {
- _init(_index.length << 1, _hashMask >> 1, _data, _usedData);
+ _init(_index.length << 1, _hashMask >> 1, _data);
}
}
void clear() {
if (!isEmpty) {
- _init(_index.length, _hashMask, null, 0);
+ int size = _HashBase._INITIAL_INDEX_SIZE;
+ _init(size, _HashBase._indexSizeToHashMask(size), null);
}
}
- void _init(int size, int hashMask, List oldData, int oldUsed) {
+ void _init(int size, int hashMask, List oldData) {
_index = new Uint32List(size);
_hashMask = hashMask;
- _data = new List(size >> 1);
+ if (_deletedKeys == 0 && _data == oldData) {
+ _rebuildIndex(size, oldData);
+ return;
+ }
+ _data = new List(_HashBase._sizeToBaseListSize(size >> 1));
+ int oldUsed = _usedData;
_usedData = 0;
_deletedKeys = 0;
if (oldData != null) {
for (int i = 0; i < oldUsed; i += 1) {
- var key = oldData[i];
+ List sublist = _HashBase._indexToList(oldData, i);
+ int modulus = i & _HashBase._MAX_LINEAR_MASK;
+ var key = sublist[modulus];
if (!_HashBase._isDeleted(oldData, key)) {
add(key);
}
@@ -483,6 +615,31 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase
}
}
+ void _rebuildIndex(int size, List oldData) {
+ int dataSize = _HashBase._sizeToBaseListSize(size >> 1);
+ if (_data.length != dataSize) {
+ _data = new List(dataSize);
+ for (int i = 0; i < oldData.length; i++) {
+ _data[i] = oldData[i];
+ }
+ }
+ _usedData = 0;
+ // Unlike in the map case, this Set method is only called when the
+ // data array and sublists are full, so we don't need to keep track of
+ // the number of entries, we can just add everything from the data arrays
+ // to the new index.
+ for (int i = 0; i < oldData.length; i++) {
+ if (i < _HashBase._MAX_LINEAR_DATA) {
+ _addAlreadyThere(oldData[i]);
+ } else {
+ List sublist = oldData[i];
+ for (int j = 0; j < sublist.length; j++) {
+ _addAlreadyThere(sublist[j]);
+ }
+ }
+ }
+ }
+
bool add(E key) {
final int size = _index.length;
final int sizeMask = size - 1;
@@ -499,14 +656,20 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase
}
} else {
final int d = hashPattern ^ pair;
- if (d < maxEntries && _equals(key, _data[d])) {
- return false;
+ if (d < maxEntries) {
+ List sublist = _HashBase._indexToList(_data, d);
+ if (sublist != null) {
+ int modulus = d & _HashBase._MAX_LINEAR_MASK;
+ if (_equals(key, sublist[modulus])) {
+ return false;
+ }
+ }
}
}
i = _HashBase._nextProbe(i, sizeMask);
pair = _index[i];
}
- if (_usedData == _data.length) {
+ if (_usedData == maxEntries) {
_rehash();
add(key);
} else {
@@ -514,11 +677,38 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase
assert(1 <= hashPattern && hashPattern < (1 << 32));
assert((hashPattern & _usedData) == 0);
_index[insertionPoint] = hashPattern | _usedData;
- _data[_usedData++] = key;
+ List sublist = _HashBase._indexToList(_data, _usedData);
+ if (sublist == null) {
+ sublist = new List(_HashBase._MAX_LINEAR_DATA);
+ _HashBase._setSublist(_data, _usedData, sublist);
+ }
+ int modulus = _usedData & _HashBase._MAX_LINEAR_MASK;
+ sublist[modulus] = key;
+ _usedData++;
}
return true;
}
+ // Adds a key to the index which is already in the data.
+ void _addAlreadyThere(E key) {
+ final int size = _index.length;
+ final int sizeMask = size - 1;
+ final int maxEntries = size >> 1;
+ final int fullHash = _hashCode(key);
+ final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
+ int i = _HashBase._firstProbe(fullHash, sizeMask);
+ int pair = _index[i];
+ while (pair != _HashBase._UNUSED_PAIR) {
+ i = _HashBase._nextProbe(i, sizeMask);
+ pair = _index[i];
+ }
+
+ assert(1 <= hashPattern && hashPattern < (1 << 32));
+ assert((hashPattern & _usedData) == 0);
+ _index[i] = hashPattern | _usedData;
+ _usedData++;
+ }
+
// If key is absent, return _data (which is never a value).
Object _getKeyOrData(Object key) {
final int size = _index.length;
@@ -531,8 +721,14 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase
while (pair != _HashBase._UNUSED_PAIR) {
if (pair != _HashBase._DELETED_PAIR) {
final int d = hashPattern ^ pair;
- if (d < maxEntries && _equals(key, _data[d])) {
- return _data[d]; // Note: Must return the existing key.
+ if (d < maxEntries) {
+ List sublist = _HashBase._indexToList(_data, d);
+ if (sublist != null) {
+ int modulus = d & _HashBase._MAX_LINEAR_MASK;
+ if (_equals(key, sublist[modulus])) {
+ return sublist[modulus]; // Note: Must return the existing key.
+ }
+ }
}
}
i = _HashBase._nextProbe(i, sizeMask);
@@ -559,11 +755,17 @@ class _CompactLinkedHashSet<E> extends _HashFieldBase
while (pair != _HashBase._UNUSED_PAIR) {
if (pair != _HashBase._DELETED_PAIR) {
final int d = hashPattern ^ pair;
- if (d < maxEntries && _equals(key, _data[d])) {
- _index[i] = _HashBase._DELETED_PAIR;
- _HashBase._setDeletedAt(_data, d);
- ++_deletedKeys;
- return true;
+ if (d < maxEntries) {
+ List sublist = _HashBase._indexToList(_data, d);
+ if (sublist != null) {
+ int modulus = d & _HashBase._MAX_LINEAR_MASK;
+ if (_equals(key, sublist[modulus])) {
+ _index[i] = _HashBase._DELETED_PAIR;
+ _HashBase._setDeletedAt(_data, sublist, modulus);
+ ++_deletedKeys;
+ return true;
+ }
+ }
}
}
i = _HashBase._nextProbe(i, sizeMask);
« no previous file with comments | « no previous file | runtime/vm/flow_graph_inliner.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698