Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(73)

Side by Side Diff: runtime/lib/compact_hash.dart

Issue 1151013005: Serialize maps without hashes. (Closed) Base URL: git@github.com:dart-lang/sdk.git@master
Patch Set: _getIndexSize and name constant Created 5 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | runtime/vm/object.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 import 'dart:typed_data'; 5 import 'dart:typed_data';
6 import 'dart:_internal' as internal; 6 import 'dart:_internal' as internal;
7 7
8 // Hash table with open addressing that separates the index from keys/values. 8 // Hash table with open addressing that separates the index from keys/values.
9 9
10 abstract class _HashFieldBase { 10 abstract class _HashFieldBase {
11 // Each occupied entry in _index is a fixed-size integer that encodes a pair: 11 // Each occupied entry in _index is a fixed-size integer that encodes a pair:
12 // [ hash pattern for key | index of entry in _data ] 12 // [ hash pattern for key | index of entry in _data ]
13 // The hash pattern is based on hashCode, but is guaranteed to be non-zero. 13 // The hash pattern is based on hashCode, but is guaranteed to be non-zero.
14 // The length of _index is always a power of two, and there is always at 14 // The length of _index is always a power of two, and there is always at
15 // least one unoccupied entry. 15 // least one unoccupied entry.
16 // NOTE: When maps are deserialized, their _index and _hashMask is regenerated
17 // lazily by _regenerateIndex.
18 // TODO(koda): Consider also using null _index for tiny, linear-search tables.
16 Uint32List _index = new Uint32List(_HashBase._INITIAL_INDEX_SIZE); 19 Uint32List _index = new Uint32List(_HashBase._INITIAL_INDEX_SIZE);
17 20
18 // Cached in-place mask for the hash pattern component. On 32-bit, the top 21 // Cached in-place mask for the hash pattern component.
19 // bits are wasted to avoid Mint allocation. 22 int _hashMask = _HashBase._indexSizeToHashMask(_HashBase._INITIAL_INDEX_SIZE);
20 // TODO(koda): Reclaim the bits by making the compiler treat hash patterns
21 // as unsigned words.
22 int _hashMask = internal.is64Bit ?
23 (1 << (32 - _HashBase._INITIAL_INDEX_BITS)) - 1 :
24 (1 << (30 - _HashBase._INITIAL_INDEX_BITS)) - 1;
25 23
26 // Fixed-length list of keys (set) or key/value at even/odd indices (map). 24 // Fixed-length list of keys (set) or key/value at even/odd indices (map).
27 List _data = new List(_HashBase._INITIAL_INDEX_SIZE); 25 List _data = new List(_HashBase._INITIAL_INDEX_SIZE);
28 26
29 // Length of _data that is used (i.e., keys + values for a map). 27 // Length of _data that is used (i.e., keys + values for a map).
30 int _usedData = 0; 28 int _usedData = 0;
31 29
32 // Number of deleted keys. 30 // Number of deleted keys.
33 int _deletedKeys = 0; 31 int _deletedKeys = 0;
34 } 32 }
(...skipping 25 matching lines...) Expand all
60 // The length of _index is twice the number of entries in _data, and both 58 // The length of _index is twice the number of entries in _data, and both
61 // are doubled when _data is full. Thus, _index will have a max load factor 59 // are doubled when _data is full. Thus, _index will have a max load factor
62 // of 1/2, which enables one more bit to be used for the hash. 60 // of 1/2, which enables one more bit to be used for the hash.
63 // TODO(koda): Consider growing _data by factor sqrt(2), twice as often. 61 // TODO(koda): Consider growing _data by factor sqrt(2), twice as often.
64 static const int _INITIAL_INDEX_BITS = 3; 62 static const int _INITIAL_INDEX_BITS = 3;
65 static const int _INITIAL_INDEX_SIZE = 1 << (_INITIAL_INDEX_BITS + 1); 63 static const int _INITIAL_INDEX_SIZE = 1 << (_INITIAL_INDEX_BITS + 1);
66 64
67 // Unused and deleted entries are marked by 0 and 1, respectively. 65 // Unused and deleted entries are marked by 0 and 1, respectively.
68 static const int _UNUSED_PAIR = 0; 66 static const int _UNUSED_PAIR = 0;
69 static const int _DELETED_PAIR = 1; 67 static const int _DELETED_PAIR = 1;
70 68
69 // On 32-bit, the top bits are wasted to avoid Mint allocation.
70 // TODO(koda): Reclaim the bits by making the compiler treat hash patterns
71 // as unsigned words.
72 static int _indexSizeToHashMask(int indexSize) {
73 int indexBits = indexSize.bitLength - 2;
74 return internal.is64Bit ? (1 << (32 - indexBits)) - 1 :
75 (1 << (30 - indexBits)) - 1;
76 }
77
71 static int _hashPattern(int fullHash, int hashMask, int size) { 78 static int _hashPattern(int fullHash, int hashMask, int size) {
72 final int maskedHash = fullHash & hashMask; 79 final int maskedHash = fullHash & hashMask;
73 // TODO(koda): Consider keeping bit length and use left shift. 80 // TODO(koda): Consider keeping bit length and use left shift.
74 return (maskedHash == 0) ? (size >> 1) : maskedHash * (size >> 1); 81 return (maskedHash == 0) ? (size >> 1) : maskedHash * (size >> 1);
75 } 82 }
76 83
77 // Linear probing. 84 // Linear probing.
78 static int _firstProbe(int fullHash, int sizeMask) { 85 static int _firstProbe(int fullHash, int sizeMask) {
79 final int i = fullHash & sizeMask; 86 final int i = fullHash & sizeMask;
80 // Light, fast shuffle to mitigate bad hashCode (e.g., sequential). 87 // Light, fast shuffle to mitigate bad hashCode (e.g., sequential).
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
125 // TODO(koda): Consider in-place compaction and more costly CME check. 132 // TODO(koda): Consider in-place compaction and more costly CME check.
126 _init(_index.length, _hashMask, _data, _usedData); 133 _init(_index.length, _hashMask, _data, _usedData);
127 } else { 134 } else {
128 // TODO(koda): Support 32->64 bit transition (and adjust _hashMask). 135 // TODO(koda): Support 32->64 bit transition (and adjust _hashMask).
129 _init(_index.length << 1, _hashMask >> 1, _data, _usedData); 136 _init(_index.length << 1, _hashMask >> 1, _data, _usedData);
130 } 137 }
131 } 138 }
132 139
133 void clear() { 140 void clear() {
134 if (!isEmpty) { 141 if (!isEmpty) {
135 _init(_index.length, _hashMask); 142 // Use _data.length, since _index might be null.
143 _init(_data.length, _hashMask);
136 } 144 }
137 } 145 }
138 146
139 // Allocate new _index and _data, and optionally copy existing contents. 147 // Allocate new _index and _data, and optionally copy existing contents.
140 void _init(int size, int hashMask, [List oldData, int oldUsed]) { 148 void _init(int size, int hashMask, [List oldData, int oldUsed]) {
141 assert(size & (size - 1) == 0); 149 assert(size & (size - 1) == 0);
142 assert(_HashBase._UNUSED_PAIR == 0); 150 assert(_HashBase._UNUSED_PAIR == 0);
143 _index = new Uint32List(size); 151 _index = new Uint32List(size);
144 _hashMask = hashMask; 152 _hashMask = hashMask;
145 _data = new List(size); 153 _data = new List(size);
146 _usedData = 0; 154 _usedData = 0;
147 _deletedKeys = 0; 155 _deletedKeys = 0;
148 if (oldData != null) { 156 if (oldData != null) {
149 for (int i = 0; i < oldUsed; i += 2) { 157 for (int i = 0; i < oldUsed; i += 2) {
150 var key = oldData[i]; 158 var key = oldData[i];
151 if (!_HashBase._isDeleted(oldData, key)) { 159 if (!_HashBase._isDeleted(oldData, key)) {
152 // TODO(koda): While there are enough hash bits, avoid hashCode calls. 160 // TODO(koda): While there are enough hash bits, avoid hashCode calls.
153 this[key] = oldData[i + 1]; 161 this[key] = oldData[i + 1];
154 } 162 }
155 } 163 }
156 } 164 }
157 } 165 }
166
167 int _getIndexLength() {
168 return (_index == null) ? _regenerateIndex() : _index.length;
169 }
170
171 int _regenerateIndex() {
172 assert(_index == null);
173 _index = new Uint32List(_data.length);
174 assert(_hashMask == 0);
175 _hashMask = _HashBase._indexSizeToHashMask(_index.length);
176 final int tmpUsed = _usedData;
177 _usedData = 0;
178 for (int i = 0; i < tmpUsed; i += 2) {
179 // TODO(koda): Avoid redundant equality tests and stores into _data.
180 this[_data[i]] = _data[i + 1];
181 }
182 return _index.length;
183 }
158 184
159 void _insert(K key, V value, int hashPattern, int i) { 185 void _insert(K key, V value, int hashPattern, int i) {
160 if (_usedData == _data.length) { 186 if (_usedData == _data.length) {
161 _rehash(); 187 _rehash();
162 this[key] = value; 188 this[key] = value;
163 } else { 189 } else {
164 assert(1 <= hashPattern && hashPattern < (1 << 32)); 190 assert(1 <= hashPattern && hashPattern < (1 << 32));
165 final int index = _usedData >> 1; 191 final int index = _usedData >> 1;
166 assert((index & hashPattern) == 0); 192 assert((index & hashPattern) == 0);
167 _index[i] = hashPattern | index; 193 _index[i] = hashPattern | index;
(...skipping 24 matching lines...) Expand all
192 } 218 }
193 } 219 }
194 } 220 }
195 i = _HashBase._nextProbe(i, sizeMask); 221 i = _HashBase._nextProbe(i, sizeMask);
196 pair = _index[i]; 222 pair = _index[i];
197 } 223 }
198 return firstDeleted >= 0 ? -firstDeleted : -i; 224 return firstDeleted >= 0 ? -firstDeleted : -i;
199 } 225 }
200 226
201 void operator[]=(K key, V value) { 227 void operator[]=(K key, V value) {
202 final int size = _index.length; 228 final int size = _getIndexLength();
203 final int sizeMask = size - 1; 229 final int sizeMask = size - 1;
204 final int fullHash = _hashCode(key); 230 final int fullHash = _hashCode(key);
205 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size); 231 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
206 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size); 232 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
207 if (d > 0) { 233 if (d > 0) {
208 _data[d] = value; 234 _data[d] = value;
209 } else { 235 } else {
210 final int i = -d; 236 final int i = -d;
211 _insert(key, value, hashPattern, i); 237 _insert(key, value, hashPattern, i);
212 } 238 }
213 } 239 }
214 240
215 V putIfAbsent(K key, V ifAbsent()) { 241 V putIfAbsent(K key, V ifAbsent()) {
216 final int size = _index.length; 242 final int size = _getIndexLength();
217 final int sizeMask = size - 1; 243 final int sizeMask = size - 1;
218 final int maxEntries = size >> 1; 244 final int maxEntries = size >> 1;
219 final int fullHash = _hashCode(key); 245 final int fullHash = _hashCode(key);
220 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size); 246 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
221 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size); 247 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);
222 if (d > 0) { 248 if (d > 0) {
223 return _data[d]; 249 return _data[d];
224 } 250 }
225 // 'ifAbsent' is allowed to modify the map. 251 // 'ifAbsent' is allowed to modify the map.
226 List oldData = _data; 252 List oldData = _data;
227 int oldCheckSum = _checkSum; 253 int oldCheckSum = _checkSum;
228 V value = ifAbsent(); 254 V value = ifAbsent();
229 if (_isModifiedSince(oldData, oldCheckSum)) { 255 if (_isModifiedSince(oldData, oldCheckSum)) {
230 this[key] = value; 256 this[key] = value;
231 } else { 257 } else {
232 final int i = -d; 258 final int i = -d;
233 _insert(key, value, hashPattern, i); 259 _insert(key, value, hashPattern, i);
234 } 260 }
235 return value; 261 return value;
236 } 262 }
237 263
238 V remove(Object key) { 264 V remove(Object key) {
239 final int size = _index.length; 265 final int size = _getIndexLength();
240 final int sizeMask = size - 1; 266 final int sizeMask = size - 1;
241 final int maxEntries = size >> 1; 267 final int maxEntries = size >> 1;
242 final int fullHash = _hashCode(key); 268 final int fullHash = _hashCode(key);
243 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size); 269 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
244 int i = _HashBase._firstProbe(fullHash, sizeMask); 270 int i = _HashBase._firstProbe(fullHash, sizeMask);
245 int pair = _index[i]; 271 int pair = _index[i];
246 while (pair != _HashBase._UNUSED_PAIR) { 272 while (pair != _HashBase._UNUSED_PAIR) {
247 if (pair != _HashBase._DELETED_PAIR) { 273 if (pair != _HashBase._DELETED_PAIR) {
248 final int entry = hashPattern ^ pair; 274 final int entry = hashPattern ^ pair;
249 if (entry < maxEntries) { 275 if (entry < maxEntries) {
250 final int d = entry << 1; 276 final int d = entry << 1;
251 if (_equals(key, _data[d])) { 277 if (_equals(key, _data[d])) {
252 _index[i] = _HashBase._DELETED_PAIR; 278 _index[i] = _HashBase._DELETED_PAIR;
253 _HashBase._setDeletedAt(_data, d); 279 _HashBase._setDeletedAt(_data, d);
254 V value = _data[d + 1]; 280 V value = _data[d + 1];
255 _HashBase._setDeletedAt(_data, d + 1); 281 _HashBase._setDeletedAt(_data, d + 1);
256 ++_deletedKeys; 282 ++_deletedKeys;
257 return value; 283 return value;
258 } 284 }
259 } 285 }
260 } 286 }
261 i = _HashBase._nextProbe(i, sizeMask); 287 i = _HashBase._nextProbe(i, sizeMask);
262 pair = _index[i]; 288 pair = _index[i];
263 } 289 }
264 return null; 290 return null;
265 } 291 }
266 292
267 // If key is absent, return _data (which is never a value). 293 // If key is absent, return _data (which is never a value).
268 Object _getValueOrData(Object key) { 294 Object _getValueOrData(Object key) {
269 final int size = _index.length; 295 final int size = _getIndexLength();
270 final int sizeMask = size - 1; 296 final int sizeMask = size - 1;
271 final int maxEntries = size >> 1; 297 final int maxEntries = size >> 1;
272 final int fullHash = _hashCode(key); 298 final int fullHash = _hashCode(key);
273 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size); 299 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);
274 int i = _HashBase._firstProbe(fullHash, sizeMask); 300 int i = _HashBase._firstProbe(fullHash, sizeMask);
275 int pair = _index[i]; 301 int pair = _index[i];
276 while (pair != _HashBase._UNUSED_PAIR) { 302 while (pair != _HashBase._UNUSED_PAIR) {
277 if (pair != _HashBase._DELETED_PAIR) { 303 if (pair != _HashBase._DELETED_PAIR) {
278 final int entry = hashPattern ^ pair; 304 final int entry = hashPattern ^ pair;
279 if (entry < maxEntries) { 305 if (entry < maxEntries) {
(...skipping 274 matching lines...) Expand 10 before | Expand all | Expand 10 after
554 E lookup(Object o) => _validKey(o) ? super.lookup(o) : null; 580 E lookup(Object o) => _validKey(o) ? super.lookup(o) : null;
555 bool remove(Object o) => _validKey(o) ? super.remove(o) : false; 581 bool remove(Object o) => _validKey(o) ? super.remove(o) : false;
556 582
557 _CompactLinkedCustomHashSet(this._equality, this._hasher, validKey) 583 _CompactLinkedCustomHashSet(this._equality, this._hasher, validKey)
558 : _validKey = (validKey != null) ? validKey : new _TypeTest<E>().test; 584 : _validKey = (validKey != null) ? validKey : new _TypeTest<E>().test;
559 585
560 Set<E> toSet() => 586 Set<E> toSet() =>
561 new _CompactLinkedCustomHashSet<E>(_equality, _hasher, _validKey) 587 new _CompactLinkedCustomHashSet<E>(_equality, _hasher, _validKey)
562 ..addAll(this); 588 ..addAll(this);
563 } 589 }
OLDNEW
« no previous file with comments | « no previous file | runtime/vm/object.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698