Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(235)

Unified Diff: sdk/lib/collection/hash_table.dart

Issue 12213010: New implementation of {,Linked}Hash{Set,Map}. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Now with new files too Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: sdk/lib/collection/hash_table.dart
diff --git a/sdk/lib/collection/hash_table.dart b/sdk/lib/collection/hash_table.dart
new file mode 100644
index 0000000000000000000000000000000000000000..a20f883be9a9b5615bcbd1d04207ac69493f7a70
--- /dev/null
+++ b/sdk/lib/collection/hash_table.dart
@@ -0,0 +1,380 @@
+part of dart.collection;
+
+class _DeadEntry {
+ const _DeadEntry();
+}
+
+class _NullKey {
+ const _NullKey();
+ int get hashCode => null.hashCode;
+}
+
+const _TOMBSTONE = const _DeadEntry();
+const _NULL = const _NullKey();
+
+class _HashTable<K> {
+ /**
+ * Table of entries with [_entrySize] elements per entry.
floitsch 2013/02/06 10:43:58 [_entrySize] slots per entry.
+ *
+ * Capacity in entries must be factor of two.
+ */
+ List _table;
+ /** Current capacity. Always equal to [:_table.length ~/ _entrySize:]. */
+ int _capacity;
+ /** Count of occupied entries, including deleted ones. */
+ int _entryCount = 0;
+ /** Count of deleted entries. */
+ int _deletedCount = 0;
+ /** Counter incremented when table is modified. */
+ int _modificationCount = 0;
+
+ _HashTable(int initialCapacity) : _capacity = initialCapacity {
+ _table = _createTable(initialCapacity);
+ }
+
+ Object _key(offset) {
+ Object key = _table[offset];
+ if (!identical(key, _NULL)) return key;
+ return null;
+ }
+
+ void _setKey(int offset, Object key) {
+ if (key == null) key = _NULL;
+ _table[offset] = key;
+ }
+
+ int get _elementCount => _entryCount - _deletedCount;
+
+ /** Size of each entry. */
+ int get _entrySize => 1;
+
+ void _checkModification(int expectedModificationCount) {
+ if (_modificationCount != expectedModificationCount) {
+ throw new ConcurrentModificationError(this);
+ }
+ }
+
+ /**
+ * Create an empty table.
+ */
+ List _createTable(int capacity) {
+ List table = new List.fixedLength(capacity * _entrySize);
+ return table;
+ }
+
+ /** First table probe. */
+ int _firstProbe(int hashCode, int capacity) {
+ return hashCode & (capacity - 1);
+ }
+
+ /** Following table probes. */
+ int _nextProbe(int previousIndex, int probeCount, int capacity) {
+ return (previousIndex + probeCount) & (capacity - 1);
+ }
+
+ /** Whether an object is a free-marker (either tombstone or free). */
+ bool _isFree(Object marker) =>
+ marker == null || identical(marker, _TOMBSTONE);
+
+ /**
+ * Look up the offset for an object in the table.
+ *
+ * Finds the offset of the object in the table, if it is there,
+ * or the first free offset for its hashCode.
+ */
+ int _probeForAdd(int hashCode, Object object) {
+ if (object == null) object = _NULL;
+ int entrySize = _entrySize;
+ int index = _firstProbe(hashCode, _capacity);
+ int firstTombstone = -1;
+ int probeCount = 0;
+ do {
+ int offset = index * entrySize;
+ Object entry = _key(offset);
+ if (identical(entry, _TOMBSTONE)) {
+ if (firstTombstone < 0) firstTombstone = offset;
+ } else if (entry == null) {
+ if (firstTombstone < 0) return offset;
+ return firstTombstone;
+ } else if (_equals(entry, object)) {
+ // TODO(lrn): Test if caching the last found key
+ // is better than (almost) always reading it again.
+ return offset;
+ }
+ // The _nextProbe must be designed so that it hits
+ // every index eventually.
+ index = _nextProbe(index, ++probeCount, _capacity);
+ } while (true);
+ }
+
+ int _probeForLookup(int hashCode, Object object) {
+ if (object == null) object = _NULL;
+ int entrySize = _entrySize;
+ int index = _firstProbe(hashCode, _capacity);
+ int probeCount = 0;
+ while (true) {
floitsch 2013/02/06 10:43:58 Make it a do while(true) ?
erikcorry 2013/02/06 10:57:56 Why?
floitsch 2013/02/06 11:45:46 consistency with above. And in general (without lo
erikcorry 2013/02/06 11:48:41 This must be a German language thing. You like ha
sra1 2013/02/06 17:01:08 I like for (;;) {...} If you know there is so
Lasse Reichstein Nielsen 2013/02/08 13:53:01 I've changed it to while(true). It is more readabl
+ int offset = index * entrySize;
+ Object entry = _key(offset);
+ if (entry == null) {
+ return -1;
+ } else if (_equals(entry, object)) {
+ // TODO(lrn): Test if caching the last found key
+ // is better than (almost) always reading it again.
+ return offset;
+ }
+ // The _nextProbe must be designed so that it hits
+ // every index eventually.
+ index = _nextProbe(index, ++probeCount, _capacity);
+ }
+ }
+
+ // Override the following two to change equality/hashCode computations
+
+ /**
+ * Compare two object for equality.
+ *
+ * The first object is the one already in the table,
+ * and the second is the one being searched for.
+ */
+ bool _equals(Object element, Object other) {
sra1 2013/02/06 05:35:21 Who overrides this? Do you plan to use it for an i
Lasse Reichstein Nielsen 2013/02/08 13:53:01 Exactly.
+ return element == other;
floitsch 2013/02/06 10:43:58 In the last implementation we were hitting a lot o
Lasse Reichstein Nielsen 2013/02/08 13:53:01 If that is a problem, maybe it can be avoided by a
+ }
+
+ /**
+ * Compute hash-code for an object.
+ */
+ int _hashCodeOf(Object object) => object.hashCode;
+
+ /**
+ * Ensure that the table has room to add [moreElements] entries.
+ *
+ * Call this before adding one or more elements.
+ */
+ int _ensureCapacity(int moreElements) {
+ // Compute everything in multiples of entrySize to avoid division.
+ int entrySize = _entrySize;
+ int capacity = _capacity;
+ // Assume worst-case where no deleted elements are reused.
+ int newEntryCount = _entryCount + moreElements;
+ int newFreeCount = capacity - newEntryCount;
+ if (newFreeCount * 4 < capacity ||
+ newFreeCount < _deletedCount) {
+ // Less than 25% free or less free entries than deleted entries.
floitsch 2013/02/06 10:43:58 fewer
Lasse Reichstein Nielsen 2013/02/08 13:53:01 Done.
+ _grow(_entryCount - _deletedCount + moreElements);
+ }
+ }
+
+ void _grow(int contentCount) {
+ int capacity = _capacity;
+ // Don't grow to less than twice the needed capacity.
+ int minCapacity = contentCount * 2;
+ do {
floitsch 2013/02/06 10:43:58 If a table adds and removes elements frequently, i
Lasse Reichstein Nielsen 2013/02/08 13:53:01 Good point. I've had the same thought too.
+ capacity *= 2;
+ } while (capacity < minCapacity);
+ // Reset to another table and add all existing elements.
+ List oldTable = _table;
+ _table = _createTable(capacity);
+ _capacity = capacity;
+ _entryCount = 0;
+ _deletedCount = 0;
+ _addAllEntries(oldTable);
+ }
+
+ /**
+ * Copies all non-free entries from the old table to the new empty table.
+ */
+ void _addAllEntries(List oldTable) {
+ for (int i = 0; i < oldTable.length; i += _entrySize) {
+ Object object = oldTable[i];
+ if (!_isFree(object)) {
+ int toOffset = _put(object);
+ _copyEntry(oldTable, i, toOffset);
+ }
+ }
+ }
+
+ /**
+ * Copies everything but the key element from one entry to another.
+ *
+ * Called while growing the base array.
+ *
+ * Override this if verbatim copying isn't sufficient.
+ */
+ void _copyEntry(List fromTable, int fromOffset, int toOffset) {
floitsch 2013/02/06 10:43:58 I don't think there is an implementation that uses
Lasse Reichstein Nielsen 2013/02/08 13:53:01 It's reduced to doing nothing. That is still corre
+ int entrySize = _entrySize;
+ for (int i = 1; i < entrySize; i++) {
+ _table[toOffset + i] = fromTable[fromOffset + i];
+ }
+ }
+
+ // The following three methods are for simple get/set/remove operations.
+ // They only affect the key of an entry. The remaining fields must be
floitsch 2013/02/06 10:43:58 Not true. The clear method removes everything.
Lasse Reichstein Nielsen 2013/02/08 13:53:01 Whoops, moving _clear below the three methods I ac
+ // filled by the caller.
+
+ /** Clears the table completely, leaving it empty. */
+ void _clear() {
+ for (int i = 0; i < _table.length; i ++) {
+ _table[i] = null;
+ }
+ _entryCount = _deletedCount = 0;
+ _modificationCount++;
+ }
+
+ /**
+ * Returns the offset of a key in [_table], or negative if it's not there.
+ */
+ int _get(K key) {
+ return _probeForLookup(_hashCodeOf(key), key);
+ }
+
+ /**
+ * Puts the key into the table and returns its offset into [_table].
+ *
+ * If [_entrySize] is greater than 1, the caller should fill the
+ * remaining fileds.
floitsch 2013/02/06 10:43:58 fields
Lasse Reichstein Nielsen 2013/02/08 13:53:01 Done.
+ *
+ * Remember to call [_ensureCapacity] before using this method.
+ * You can call it once with a larger number before doing a sequence
+ * of put operations.
+ */
+ int _put(K key) {
+ int offset = _probeForAdd(_hashCodeOf(key), key);
+ Object oldEntry = _key(offset);
+ if (oldEntry == null) {
+ _entryCount++;
+ _setKey(offset, key);
+ } else if (identical(oldEntry, _TOMBSTONE)) {
+ _deletedCount--;
+ _setKey(offset, key);
+ }
+ return offset;
+ }
+
+ /**
+ * Removes a key from the table and returns its offset into [_table].
+ *
+ * Returns null if the key was not in the table.
+ * If [_entrySize] is greater than 1, the caller should clean up the
+ * remaining fields.
+ */
+ int _remove(K key) {
+ int offset = _probeForLookup(_hashCodeOf(key), key);
+ if (offset >= 0) {
+ _deleteEntry(offset);
+ }
+ return offset;
+ }
+
+ void _deleteEntry(int offset) {
+ assert(!_isFree(_key(offset)));
+ _setKey(offset, _TOMBSTONE);
+ _deletedCount++;
+ }
+}
+
+class _HashTableKeyIterable<K> extends Iterable<K> {
+ final _HashTable<K> _hashTable;
+ _HashTableKeyIterable(this._hashTable);
+ Iterator<K> get iterator => new _HashTableKeyIterator<K>(_hashTable);
+
+ int get length => _hashTable._elementCount;
+
+ bool contains(Object value) => _hashTable._get(value) >= 0;
+
+ bool get isEmpty => _hashTable._elementCount == 0;
+
+ K get single {
+ if (_hashTable._elementCount > 1) {
+ throw new StateError("More than one element");
+ }
+ return first;
+ }
+}
+
+class _HashTableKeyIterator<K> implements Iterator<K> {
+ final _HashTable<K> _hashTable;
+ final int _modificationCount;
+ /** Location right after last found element. */
+ int _offset = 0;
+ K _current = null;
+
+ _HashTableKeyIterator(_HashTable<K> hashTable)
+ : _hashTable = hashTable,
+ _modificationCount = hashTable._modificationCount;
+
+ bool moveNext() {
+ _hashTable._checkModification(_modificationCount);
+
+ List table = _hashTable._table;
+ int entrySize = _hashTable._entrySize;
+
+ while (_offset < table.length) {
+ Object key = table[_offset];
+ _offset += entrySize;
+ if (!_hashTable._isFree(key)) {
+ if (identical(key, _NULL)) {
+ _current = null;
+ } else {
+ _current = key;
+ }
+ return true;
+ }
+ }
+ _current = null;
+ return false;
+ }
+
+ K get current => _current;
+}
+
+class _HashTableValueIterable<V> extends Iterable<V> {
+ final _HashTable _hashTable;
+ final int _entryIndex;
+ _HashTableValueIterable(this._hashTable, this._entryIndex);
+
+ Iterator<V> get iterator =>
+ new _HashTableValueIterator<V>(_hashTable, _entryIndex);
+
+ int get length => _hashTable._elementCount;
+}
+
+/**
+ * Iterator traversing a [HashTable] and returning a value for each entry.
+ */
+class _HashTableValueIterator<V> implements Iterator<V> {
+ final _HashTable _hashTable;
+ /** The index of the value in the entry.*/
+ final int _entryIndex;
+ final int _modificationCount;
+ /** Location right after last found element. */
+ int _offset = 0;
+ V _current = null;
+
+ _HashTableValueIterator(_HashTable hashTable, this._entryIndex)
+ : _hashTable = hashTable,
+ _modificationCount = hashTable._modificationCount {
+ assert(_entryIndex > 0); // Use key-iterator above for index 0.
+ assert(_entryIndex < hashTable._entrySize);
+ }
+
+ bool moveNext() {
+ _hashTable._checkModification(_modificationCount);
+
+ List table = _hashTable._table;
+ int entrySize = _hashTable._entrySize;
+
+ while (_offset < table.length) {
+ Object key = table[_offset];
+ int currentOffset = _offset;
+ _offset += entrySize;
+ if (!_hashTable._isFree(key)) {
+ _current = table[currentOffset + _entryIndex];
+ return true;
+ }
+ }
+ _current = null;
+ return false;
+ }
+
+ V get current => _current;
+}

Powered by Google App Engine
This is Rietveld 408576698