runtime/lib/compact_hash.dart - Issue 1151013005: Serialize maps without hashes.

Side by Side Diff: runtime/lib/compact_hash.dart

Issue 1151013005: Serialize maps without hashes. (Closed) Base URL: git@github.com:dart-lang/sdk.git@master

Patch Set: Make assertion actually hold Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file	1 // Copyright (c) 2015, the Dart project authors. Please see the AUTHORS file

2 // for details. All rights reserved. Use of this source code is governed by a	2 // for details. All rights reserved. Use of this source code is governed by a

3 // BSD-style license that can be found in the LICENSE file.	3 // BSD-style license that can be found in the LICENSE file.

4	4

5 import 'dart:typed_data';	5 import 'dart:typed_data';

6 import 'dart:_internal' as internal;	6 import 'dart:_internal' as internal;

7	7

8 // Hash table with open addressing that separates the index from keys/values.	8 // Hash table with open addressing that separates the index from keys/values.

9	9

10 abstract class _HashFieldBase {	10 abstract class _HashFieldBase {

11 // Each occupied entry in _index is a fixed-size integer that encodes a pair:	11 // Each occupied entry in _index is a fixed-size integer that encodes a pair:

12 // [ hash pattern for key \| index of entry in _data ]	12 // [ hash pattern for key \| index of entry in _data ]

13 // The hash pattern is based on hashCode, but is guaranteed to be non-zero.	13 // The hash pattern is based on hashCode, but is guaranteed to be non-zero.

14 // The length of _index is always a power of two, and there is always at	14 // The length of _index is always a power of two, and there is always at

15 // least one unoccupied entry.	15 // least one unoccupied entry.

	16 // NOTE: When maps are deserialized, their _index and _hashMask is regenerated

	17 // lazily by _regenerateIndex.

	18 // TODO(koda): Consider also using null _index for tiny, linear-search tables.

16 Uint32List _index = new Uint32List(_HashBase._INITIAL_INDEX_SIZE);	19 Uint32List _index = new Uint32List(_HashBase._INITIAL_INDEX_SIZE);

17	20

18 // Cached in-place mask for the hash pattern component. On 32-bit, the top	21 // Cached in-place mask for the hash pattern component.

19 // bits are wasted to avoid Mint allocation.	22 int _hashMask = _HashBase._indexSizeToHashMask(_HashBase._INITIAL_INDEX_SIZE);

20 // TODO(koda): Reclaim the bits by making the compiler treat hash patterns

21 // as unsigned words.

22 int _hashMask = internal.is64Bit ?

23 (1 << (32 - _HashBase._INITIAL_INDEX_BITS)) - 1 :

24 (1 << (30 - _HashBase._INITIAL_INDEX_BITS)) - 1;

25	23

26 // Fixed-length list of keys (set) or key/value at even/odd indices (map).	24 // Fixed-length list of keys (set) or key/value at even/odd indices (map).

27 List _data = new List(_HashBase._INITIAL_INDEX_SIZE);	25 List _data = new List(_HashBase._INITIAL_INDEX_SIZE);

28	26

29 // Length of _data that is used (i.e., keys + values for a map).	27 // Length of _data that is used (i.e., keys + values for a map).

30 int _usedData = 0;	28 int _usedData = 0;

31	29

32 // Number of deleted keys.	30 // Number of deleted keys.

33 int _deletedKeys = 0;	31 int _deletedKeys = 0;

34 }	32 }

(...skipping 25 matching lines...) Expand all Loading...
60 // The length of _index is twice the number of entries in _data, and both	58 // The length of _index is twice the number of entries in _data, and both

61 // are doubled when _data is full. Thus, _index will have a max load factor	59 // are doubled when _data is full. Thus, _index will have a max load factor

62 // of 1/2, which enables one more bit to be used for the hash.	60 // of 1/2, which enables one more bit to be used for the hash.

63 // TODO(koda): Consider growing _data by factor sqrt(2), twice as often.	61 // TODO(koda): Consider growing _data by factor sqrt(2), twice as often.

64 static const int _INITIAL_INDEX_BITS = 3;	62 static const int _INITIAL_INDEX_BITS = 3;

65 static const int _INITIAL_INDEX_SIZE = 1 << (_INITIAL_INDEX_BITS + 1);	63 static const int _INITIAL_INDEX_SIZE = 1 << (_INITIAL_INDEX_BITS + 1);

66	64

67 // Unused and deleted entries are marked by 0 and 1, respectively.	65 // Unused and deleted entries are marked by 0 and 1, respectively.

68 static const int _UNUSED_PAIR = 0;	66 static const int _UNUSED_PAIR = 0;

69 static const int _DELETED_PAIR = 1;	67 static const int _DELETED_PAIR = 1;

70	68

	69 // On 32-bit, the top bits are wasted to avoid Mint allocation.

	70 // TODO(koda): Reclaim the bits by making the compiler treat hash patterns

	71 // as unsigned words.

	72 static int _indexSizeToHashMask(int indexSize) {

	73 int indexBits = indexSize.bitLength - 1;

	74 return internal.is64Bit ? (1 << (32 - indexBits)) - 1 :

	75 (1 << (30 - indexBits)) - 1;

	76 }

	77

71 static int _hashPattern(int fullHash, int hashMask, int size) {	78 static int _hashPattern(int fullHash, int hashMask, int size) {

72 final int maskedHash = fullHash & hashMask;	79 final int maskedHash = fullHash & hashMask;

73 // TODO(koda): Consider keeping bit length and use left shift.	80 // TODO(koda): Consider keeping bit length and use left shift.

74 return (maskedHash == 0) ? (size >> 1) : maskedHash * (size >> 1);	81 return (maskedHash == 0) ? (size >> 1) : maskedHash * (size >> 1);

75 }	82 }

76	83

77 // Linear probing.	84 // Linear probing.

78 static int _firstProbe(int fullHash, int sizeMask) {	85 static int _firstProbe(int fullHash, int sizeMask) {

79 final int i = fullHash & sizeMask;	86 final int i = fullHash & sizeMask;

80 // Light, fast shuffle to mitigate bad hashCode (e.g., sequential).	87 // Light, fast shuffle to mitigate bad hashCode (e.g., sequential).

(...skipping 44 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
125 // TODO(koda): Consider in-place compaction and more costly CME check.	132 // TODO(koda): Consider in-place compaction and more costly CME check.

126 _init(_index.length, _hashMask, _data, _usedData);	133 _init(_index.length, _hashMask, _data, _usedData);

127 } else {	134 } else {

128 // TODO(koda): Support 32->64 bit transition (and adjust _hashMask).	135 // TODO(koda): Support 32->64 bit transition (and adjust _hashMask).

129 _init(_index.length << 1, _hashMask >> 1, _data, _usedData);	136 _init(_index.length << 1, _hashMask >> 1, _data, _usedData);

130 }	137 }

131 }	138 }

132	139

133 void clear() {	140 void clear() {

134 if (!isEmpty) {	141 if (!isEmpty) {

135 _init(_index.length, _hashMask);	142 // Use _data.length, since _index might be null.

	143 _init(_data.length, _hashMask);

136 }	144 }

137 }	145 }

138	146

139 // Allocate new _index and _data, and optionally copy existing contents.	147 // Allocate new _index and _data, and optionally copy existing contents.

140 void _init(int size, int hashMask, [List oldData, int oldUsed]) {	148 void _init(int size, int hashMask, [List oldData, int oldUsed]) {

141 assert(size & (size - 1) == 0);	149 assert(size & (size - 1) == 0);

142 assert(_HashBase._UNUSED_PAIR == 0);	150 assert(_HashBase._UNUSED_PAIR == 0);

143 _index = new Uint32List(size);	151 _index = new Uint32List(size);

144 _hashMask = hashMask;	152 _hashMask = hashMask;

145 _data = new List(size);	153 _data = new List(size);

146 _usedData = 0;	154 _usedData = 0;

147 _deletedKeys = 0;	155 _deletedKeys = 0;

148 if (oldData != null) {	156 if (oldData != null) {

149 for (int i = 0; i < oldUsed; i += 2) {	157 for (int i = 0; i < oldUsed; i += 2) {

150 var key = oldData[i];	158 var key = oldData[i];

151 if (!_HashBase._isDeleted(oldData, key)) {	159 if (!_HashBase._isDeleted(oldData, key)) {

152 // TODO(koda): While there are enough hash bits, avoid hashCode calls.	160 // TODO(koda): While there are enough hash bits, avoid hashCode calls.

153 this[key] = oldData[i + 1];	161 this[key] = oldData[i + 1];

154 }	162 }

155 }	163 }

156 }	164 }

157 }	165 }

	166

	167 int _regenerateIndex() {

	168 assert(_index == null);

	169 _index = new Uint32List(_data.length);

	170 assert(_hashMask == 0);

	171 _hashMask = _HashBase._indexSizeToHashMask(_index.length);

	172 final int tmpUsed = _usedData;

	173 _usedData = 0;

	174 for (int i = 0; i < tmpUsed; i += 2) {

	175 // TODO(koda): Avoid redundant equality tests and stores into _data.

	176 this[_data[i]] = _data[i + 1];

	177 }

	178 return _index.length;
	siva 2015/06/01 22:19:36 instead of checking every caller to this function instead of checking every caller to this function why not just do if (_index == null) { ...... ...... } return _index.length; koda 2015/06/01 23:01:01 Since we don't do partial inlining (right?), that Show quoted text On 2015/06/01 22:19:36, siva wrote: > instead of checking every caller to this function why not just do > if (_index == null) { > ...... > ...... > } > return _index.length; Since we don't do partial inlining (right?), that will either add a function call to all those operations or blow up their code size. Do you think this is a premature optimization? siva 2015/06/02 00:13:35 I don't feel strongly about moving the null check Show quoted text On 2015/06/01 23:01:01, koda wrote: > On 2015/06/01 22:19:36, siva wrote: > > instead of checking every caller to this function why not just do > > if (_index == null) { > > ...... > > ...... > > } > > return _index.length; > > Since we don't do partial inlining (right?), that will either add a function > call to all those operations or blow up their code size. > > Do you think this is a premature optimization? I don't feel strongly about moving the null check into the function but it does make the code seem verbose at the call sites.
	179 }

158	180

159 void _insert(K key, V value, int hashPattern, int i) {	181 void _insert(K key, V value, int hashPattern, int i) {

160 if (_usedData == _data.length) {	182 if (_usedData == _data.length) {

161 _rehash();	183 _rehash();

162 this[key] = value;	184 this[key] = value;

163 } else {	185 } else {

164 assert(1 <= hashPattern && hashPattern < (1 << 32));	186 assert(1 <= hashPattern && hashPattern < (1 << 32));

165 final int index = _usedData >> 1;	187 final int index = _usedData >> 1;

166 assert((index & hashPattern) == 0);	188 assert((index & hashPattern) == 0);

167 _index[i] = hashPattern \| index;	189 _index[i] = hashPattern \| index;

(...skipping 24 matching lines...) Expand all Loading...
192 }	214 }

193 }	215 }

194 }	216 }

195 i = _HashBase._nextProbe(i, sizeMask);	217 i = _HashBase._nextProbe(i, sizeMask);

196 pair = _index[i];	218 pair = _index[i];

197 }	219 }

198 return firstDeleted >= 0 ? -firstDeleted : -i;	220 return firstDeleted >= 0 ? -firstDeleted : -i;

199 }	221 }

200	222

201 void operator[]=(K key, V value) {	223 void operator[]=(K key, V value) {

202 final int size = _index.length;	224 final int size = (_index == null) ? _regenerateIndex() : _index.length;

203 final int sizeMask = size - 1;	225 final int sizeMask = size - 1;

204 final int fullHash = _hashCode(key);	226 final int fullHash = _hashCode(key);

205 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);	227 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);

206 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);	228 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);

207 if (d > 0) {	229 if (d > 0) {

208 _data[d] = value;	230 _data[d] = value;

209 } else {	231 } else {

210 final int i = -d;	232 final int i = -d;

211 _insert(key, value, hashPattern, i);	233 _insert(key, value, hashPattern, i);

212 }	234 }

213 }	235 }

214	236

215 V putIfAbsent(K key, V ifAbsent()) {	237 V putIfAbsent(K key, V ifAbsent()) {

216 final int size = _index.length;	238 final int size = (_index == null) ? _regenerateIndex() : _index.length;

217 final int sizeMask = size - 1;	239 final int sizeMask = size - 1;

218 final int maxEntries = size >> 1;	240 final int maxEntries = size >> 1;

219 final int fullHash = _hashCode(key);	241 final int fullHash = _hashCode(key);

220 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);	242 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);

221 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);	243 final int d = _findValueOrInsertPoint(key, fullHash, hashPattern, size);

222 if (d > 0) {	244 if (d > 0) {

223 return _data[d];	245 return _data[d];

224 }	246 }

225 // 'ifAbsent' is allowed to modify the map.	247 // 'ifAbsent' is allowed to modify the map.

226 List oldData = _data;	248 List oldData = _data;

227 int oldCheckSum = _checkSum;	249 int oldCheckSum = _checkSum;

228 V value = ifAbsent();	250 V value = ifAbsent();

229 if (_isModifiedSince(oldData, oldCheckSum)) {	251 if (_isModifiedSince(oldData, oldCheckSum)) {

230 this[key] = value;	252 this[key] = value;

231 } else {	253 } else {

232 final int i = -d;	254 final int i = -d;

233 _insert(key, value, hashPattern, i);	255 _insert(key, value, hashPattern, i);

234 }	256 }

235 return value;	257 return value;

236 }	258 }

237	259

238 V remove(Object key) {	260 V remove(Object key) {

239 final int size = _index.length;	261 final int size = (_index == null) ? _regenerateIndex() : _index.length;

240 final int sizeMask = size - 1;	262 final int sizeMask = size - 1;

241 final int maxEntries = size >> 1;	263 final int maxEntries = size >> 1;

242 final int fullHash = _hashCode(key);	264 final int fullHash = _hashCode(key);

243 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);	265 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);

244 int i = _HashBase._firstProbe(fullHash, sizeMask);	266 int i = _HashBase._firstProbe(fullHash, sizeMask);

245 int pair = _index[i];	267 int pair = _index[i];

246 while (pair != _HashBase._UNUSED_PAIR) {	268 while (pair != _HashBase._UNUSED_PAIR) {

247 if (pair != _HashBase._DELETED_PAIR) {	269 if (pair != _HashBase._DELETED_PAIR) {

248 final int entry = hashPattern ^ pair;	270 final int entry = hashPattern ^ pair;

249 if (entry < maxEntries) {	271 if (entry < maxEntries) {

250 final int d = entry << 1;	272 final int d = entry << 1;

251 if (_equals(key, _data[d])) {	273 if (_equals(key, _data[d])) {

252 _index[i] = _HashBase._DELETED_PAIR;	274 _index[i] = _HashBase._DELETED_PAIR;

253 _HashBase._setDeletedAt(_data, d);	275 _HashBase._setDeletedAt(_data, d);

254 V value = _data[d + 1];	276 V value = _data[d + 1];

255 _HashBase._setDeletedAt(_data, d + 1);	277 _HashBase._setDeletedAt(_data, d + 1);

256 ++_deletedKeys;	278 ++_deletedKeys;

257 return value;	279 return value;

258 }	280 }

259 }	281 }

260 }	282 }

261 i = _HashBase._nextProbe(i, sizeMask);	283 i = _HashBase._nextProbe(i, sizeMask);

262 pair = _index[i];	284 pair = _index[i];

263 }	285 }

264 return null;	286 return null;

265 }	287 }

266	288

267 // If key is absent, return _data (which is never a value).	289 // If key is absent, return _data (which is never a value).

268 Object _getValueOrData(Object key) {	290 Object _getValueOrData(Object key) {

269 final int size = _index.length;	291 final int size = (_index == null) ? _regenerateIndex() : _index.length;

270 final int sizeMask = size - 1;	292 final int sizeMask = size - 1;

271 final int maxEntries = size >> 1;	293 final int maxEntries = size >> 1;

272 final int fullHash = _hashCode(key);	294 final int fullHash = _hashCode(key);

273 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);	295 final int hashPattern = _HashBase._hashPattern(fullHash, _hashMask, size);

274 int i = _HashBase._firstProbe(fullHash, sizeMask);	296 int i = _HashBase._firstProbe(fullHash, sizeMask);

275 int pair = _index[i];	297 int pair = _index[i];

276 while (pair != _HashBase._UNUSED_PAIR) {	298 while (pair != _HashBase._UNUSED_PAIR) {

277 if (pair != _HashBase._DELETED_PAIR) {	299 if (pair != _HashBase._DELETED_PAIR) {

278 final int entry = hashPattern ^ pair;	300 final int entry = hashPattern ^ pair;

279 if (entry < maxEntries) {	301 if (entry < maxEntries) {

(...skipping 274 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
554 E lookup(Object o) => _validKey(o) ? super.lookup(o) : null;	576 E lookup(Object o) => _validKey(o) ? super.lookup(o) : null;

555 bool remove(Object o) => _validKey(o) ? super.remove(o) : false;	577 bool remove(Object o) => _validKey(o) ? super.remove(o) : false;

556	578

557 _CompactLinkedCustomHashSet(this._equality, this._hasher, validKey)	579 _CompactLinkedCustomHashSet(this._equality, this._hasher, validKey)

558 : _validKey = (validKey != null) ? validKey : new _TypeTest<E>().test;	580 : _validKey = (validKey != null) ? validKey : new _TypeTest<E>().test;

559	581

560 Set<E> toSet() =>	582 Set<E> toSet() =>

561 new _CompactLinkedCustomHashSet<E>(_equality, _hasher, _validKey)	583 new _CompactLinkedCustomHashSet<E>(_equality, _hasher, _validKey)

562 ..addAll(this);	584 ..addAll(this);

563 }	585 }

OLD	NEW

« no previous file with comments | « no previous file | runtime/vm/raw_object.h » ('j') | runtime/vm/raw_object_snapshot.cc » ('J')