charted/lib/charts/data_transformers/aggregation.dart - Issue 1400473008: Roll Observatory packages and add a roll script

Side by Side Diff: charted/lib/charts/data_transformers/aggregation.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master

Patch Set: Created 5 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /*

2 * Copyright 2014 Google Inc. All rights reserved.

3 *

4 * Use of this source code is governed by a BSD-style

5 * license that can be found in the LICENSE file or at

6 * https://developers.google.com/open-source/licenses/bsd

7 */

8

9 part of charted.charts;

10

11 /**

12 * Function callback to filter items in the input

13 */

14 typedef bool AggregationFilterFunc(var item);

15

16 typedef dynamic FieldAccessor(dynamic item, dynamic key);

17

18

19 // TODO(midoringo, prsd): Consider splitting each aggregation type into its own

20 // strategy object for readability, maintainability, and scalability.

21 /**

22 * Given list of items, dimensions and facts, compute

23 * aggregates (COUNT, SUM, MIN, MAX) for facts at each dimension level.

24 */

25 class AggregationModel {

26

27 // Number of aggregations we collect on each fact

28 int _aggregationTypesCount = 0;

29

30 // Currently supported list of aggregations.

31 static final List<String> supportedAggregationTypes =

32 ['sum', 'min', 'max', 'valid'];

33

34 // Computed aggregation types.

35 List<String> computedAggregationTypes;

36

37 // Offsets of aggregations that are computed once per fact per dimension

38 // If an offset is null, it will not be computed

39 int _offsetSum;

40 int _offsetMin;

41 int _offsetMax;

42 int _offsetCnt;

43

44 // Offset of aggregations that one computed once per dimension

45 int _offsetFilteredCount;

46 int _offsetSortedIndex;

47

48 // Number of bits we can use in an integer without making it medium int

49 static const int SMI_BITS = 30;

50

51 // Computed aggregations

52 static const int AGGREGATIONS_BUFFER_LENGTH = 1024 * 1024;

53 Float64List _aggregations;

54

55 // Cache of fact values

56 Float64List _factsCache;

57

58 // Cache of enumerated dimensions

59 List<int> _dimEnumCache;

60

61 // Sorted list of indices (for data in _rows/_dimEnumCache/_factsCache)

62 List<int> _sorted;

63

64 // Enumeration map for dimension values

65 List<Map<dynamic, int>> _dimToIntMap;

66

67 // Sort orders for dimension values

68 List<List<int>> _dimSortOrders;

69

70 // Input

71 List _rows;

72 List _dimFields;

73 List _factFields;

74

75 // When groupBy is called, this value represents the

76 // common prefix of the old and new dimensions list.

77 int _dimPrefixLength = 0;

78

79 // Dimensions mapped to computed aggregates

80 Map<String, int> _dimToAggrMap;

81

82 // null when no filter was applied.

83 // Otherwise, store a bitmap indicating if an item was included.

84 List<int> _filterResults;

85

86 // Cache of entities created for the facts on this aggregation view.

87 Map<String, AggregationItem> _entityCache;

88

89 // List of field names that aggregation items will have.

90 List<String> _itemFieldNamesCache;

91

92 // Walk through the map, by splitting key at '.'

93 final bool walkThroughMap;

94

95 // Map of fieldName to comparator function.

96 final Map<String, Function> comparators;

97

98 // Timing operations

99 static final Logger _logger = new Logger('aggregations');

100 Stopwatch _timeItWatch;

101 String _timeItName;

102

103 FieldAccessor dimensionAccessor;

104 FieldAccessor factsAccessor;

105

106 /**

107 * Create a new [AggregationModel] from a [collection] of items,

108 * list of [dimensions] on which the items are grouped and a list of [facts]

109 * on which aggregations are computed.

110 */

111 AggregationModel(List collection, List dimensions,

112 List facts,

113 { List<String> aggregationTypes,

114 this.walkThroughMap: false,

115 this.comparators,

116 this.dimensionAccessor,

117 this.factsAccessor}) {

118 _init(collection, dimensions, facts, aggregationTypes);

119 }

120

121 void _timeItStart(String name) {

122 _timeItName = name;

123 _timeItWatch = new Stopwatch();

124 _timeItWatch.start();

125 }

126

127 void _timeItEnd() {

128 _timeItWatch.stop();

129 _logger.info('[aggregations/$_timeItName] '

130 '${_timeItWatch.elapsed.inMilliseconds}ms/${_rows.length}r');

131 }

132

133 List get factFields => _factFields;

134 List get dimensionFields => _dimFields;

135

136 /**

137 * Initialize the view

138 */

139 void _init(List collection, List dimensions,

140 List facts, List<String> aggregationTypes) {

141 if (collection == null) {

142 throw new ArgumentError('Data cannot be empty or null');

143 }

144

145 if (facts == null \|\| facts.isEmpty) {

146 throw new ArgumentError('Facts cannot be empty or null');

147 }

148

149 if (dimensions == null) {

150 dimensions = [];

151 }

152

153 if (dimensionAccessor == null) {

154 dimensionAccessor = _fetch;

155 }

156

157 if (factsAccessor == null) {

158 factsAccessor = _fetch;

159 }

160

161 if (aggregationTypes != null) {

162 Iterable unknownAggregationTypes =

163 aggregationTypes.where((e) => !supportedAggregationTypes.contains(e));

164 if (unknownAggregationTypes.length != 0) {

165 throw new ArgumentError(

166 'Unknown aggregation types: ${unknownAggregationTypes.join(', ')}');

167 }

168 } else {

169 aggregationTypes = ['sum'];

170 }

171

172 // Always adding 'count' for correct computation of average and count.

173 if (!aggregationTypes.contains('valid')) {

174 aggregationTypes.add('valid');

175 }

176

177 _rows = collection;

178 _dimFields = new List.from(dimensions, growable: false);

179 _factFields = new List.from(facts, growable: false);

180 _entityCache = new Map<String, AggregationItem>();

181

182 _createBuffers();

183

184 _aggregationTypesCount = aggregationTypes.length;

185 for (int i = 0; i < _aggregationTypesCount; i++) {

186 switch(aggregationTypes[i]) {

187 case 'sum':

188 _offsetSum = i;

189 break;

190 case 'min':

191 _offsetMin = i;

192 break;

193 case 'max':

194 _offsetMax = i;

195 break;

196 case 'valid':

197 _offsetCnt = i;

198 break;

199 }

200 }

201 computedAggregationTypes = new List.from(aggregationTypes, growable: false);

202

203 // Preprocess the data

204 _preprocess();

205 }

206

207 /**

208 * Re-calculate aggregations based on new dimensions.

209 */

210 void groupBy(List dimensions, [AggregationFilterFunc filter = null]) {

211 if (dimensions == null) {

212 dimensions = [];

213 }

214

215 List savedDimFields = _dimFields;

216 _dimFields = new List.from(dimensions, growable: false);

217

218 _dimPrefixLength = 0;

219 while (_dimPrefixLength < _dimFields.length &&

220 _dimPrefixLength < savedDimFields.length &&

221 savedDimFields[_dimPrefixLength] == _dimFields[_dimPrefixLength]) {

222 ++_dimPrefixLength;

223 }

224

225 _createBuffers();

226 _preprocess(groupBy:true);

227

228 // For groupBy, compute immediately.

229 compute(filter);

230

231 // Ensure that cache represents updated dimensions

232 _updateCachedEntities();

233 }

234

235 /**

236 * Create buffers.

237 * This method is called when the object is being created and when

238 * a groupBy is called to change the dimensions on which

239 * aggregations are computed.

240 */

241 void _createBuffers() {

242 // Create both when object is created and groupBy is called

243 _dimEnumCache = new Int32List(_dimFields.length * _rows.length);

244

245 // Create only when the object is created

246 if (_factsCache == null) {

247 _factsCache = new Float64List((_factFields.length + 1) * _rows.length);

248 }

249

250 // Create only when the object is created

251 if (_filterResults == null) {

252 _filterResults = new List<int>((_rows.length) ~/ SMI_BITS + 1);

253 }

254

255 // Create both when object is created and groupBy is called

256 // Reuse dimension enumerations if possible.

257 var oldDimToInt = _dimToIntMap;

258 _dimToIntMap = new List<Map<dynamic, int>>.generate(_dimFields.length,

259 (i) => i < _dimPrefixLength ? oldDimToInt[i] : new Map<dynamic, int>());

260 }

261

262 /**

263 * Check cache entries

264 * When data is grouped by a new dimensions, entities that were

265 * created prior to the groupBy should be cleared and removed from cache

266 * if they aren't valid anymore.

267 * Update the entities that are valid after the groupBy.

268 */

269 void _updateCachedEntities() {

270 List keys = new List.from(_entityCache.keys, growable: false);

271 keys.forEach((key) {

272 _AggregationItemImpl entity = _entityCache[key];

273 if (entity == null) {

274 _entityCache.remove(key);

275 } else if (entity != null && entity.isValid) {

276 if (key.split(':').length <= _dimPrefixLength) {

277 entity.update();

278 } else {

279 _entityCache.remove(key);

280 entity.clear();

281 }

282 }

283 });

284 }

285

286 final Map<String, List> _parsedKeys = {};

287 /**

288 * Get value from a map-like object

289 */

290 dynamic _fetch(var item, String key) {

291 if (walkThroughMap && key.contains('.')) {

292 return walk(item, key, _parsedKeys);

293 } else {

294 return item[key];

295 }

296 }

297

298 /*

299 * Preprocess Data

300 * - Enumerate dimension values

301 * - Create sort orders for dimension values

302 * - Cache facts in lists

303 */

304 void _preprocess({bool groupBy: false}) {

305

306 _timeItStart('preprocess');

307

308 // Enumerate dimensions...

309 // Cache dimensions and facts.

310

311 List<int> dimensionValCount =

312 new List<int>.generate(_dimFields.length, (idx) => 0);

313

314 int dimensionsCount = _dimFields.length;

315 int factsCount = _factFields.length;

316 int rowCount = _rows.length;

317

318 for (int ri = 0, factsDataOffset = 0, dimDataOffset = 0;

319 ri < rowCount; ++ri, factsDataOffset += factsCount,

320 dimDataOffset += dimensionsCount) {

321 var item = _rows[ri];

322

323 // Cache the fact values in the big buffer, but only

324 // when we are initializing (not when a groupBy was called

325 // after initialization)

326 if (!groupBy) {

327 for (int fi = 0; fi < factsCount; fi++) {

328 var value = factsAccessor(item,_factFields[fi]);

329 _factsCache[factsDataOffset + fi] =

330 (value == null) ? double.NAN : value.toDouble();

331 }

332 }

333

334 // Enumerate the dimension values and cache enumerated rows

335 for (int di = 0; di < dimensionsCount; di++) {

336 var dimensionVal = dimensionAccessor(item, _dimFields[di]);

337 int dimensionValEnum = _dimToIntMap[di][dimensionVal];

338 if (dimensionValEnum == null) {

339 _dimToIntMap[di][dimensionVal] = dimensionValCount[di];

340 dimensionValEnum = dimensionValCount[di]++;

341 }

342 _dimEnumCache[dimDataOffset + di] = dimensionValEnum;

343 }

344 }

345

346 // Sort all dimensions internally

347 // The resulting arrays would be used to sort the entire data

348

349 List oldSortOrders = _dimSortOrders;

350 _dimSortOrders = new List.generate(dimensionsCount, (i) {

351 if (groupBy && i < _dimPrefixLength) {

352 return oldSortOrders[i];

353 }

354

355 List dimensionVals = new List.from(_dimToIntMap[i].keys);

356 List retval = new List(_dimToIntMap[i].length);

357

358 // When a comparator is not specified, our implementation of the

359 // comparator tries to gracefully handle null values.

360 dimensionVals.sort(

361 comparators != null && comparators.containsKey(_dimFields[i]) ?

362 comparators[_dimFields[i]] : _defaultDimComparator);

363

364 for (int si = 0; si < retval.length; ++si) {

365 retval[_dimToIntMap[i][dimensionVals[si]]] = si;

366 }

367 return retval;

368 }, growable: false);

369

370 // Create a list of sorted indices - only if we are not having a full

371 // overlap of dimensionFields.

372 if (!groupBy \|\| _dimPrefixLength != _dimFields.length) {

373 _sorted = new List<int>.generate(_rows.length, (i) => i, growable: false);

374 _sorted.sort(_comparator);

375 }

376

377 // Pre-compute frequently used values

378 _offsetSortedIndex = factsCount * _aggregationTypesCount;

379 _offsetFilteredCount = factsCount * _aggregationTypesCount + 1;

380

381 _timeItEnd();

382 }

383

384 // Ensures that null dimension values don't cause an issue with sorting

385 _defaultDimComparator(Comparable left, Comparable right) =>

386 (left == null && right == null) ? 0 :

387 (left == null) ? -1 :

388 (right == null) ? 1 : left.compareTo(right);

389

390 /*

391 * Given item indices in rows, compare them based

392 * on the sort orders created while preprocessing data.

393 */

394 _comparator(int one, int two) {

395 if (one == two) {

396 return 0;

397 }

398

399 int offsetOne = _dimFields.length * one;

400 int offsetTwo = _dimFields.length * two;

401

402 for (int i = 0; i < _dimFields.length; ++i) {

403 int diff = _dimSortOrders[i][_dimEnumCache[offsetOne + i]] -

404 _dimSortOrders[i][_dimEnumCache[offsetTwo + i]];

405 if (diff != 0) {

406 return diff;

407 }

408 }

409 return 0;

410 }

411

412 /**

413 * Compute aggregations

414 * If [filter] is not null, it would be used to filter out items that

415 * should not be included in the aggregates.

416 */

417 void compute([AggregationFilterFunc filter = null]) {

418 _timeItStart('compute');

419

420 _dimToAggrMap = new Map<String, int>();

421 _aggregations = new Float64List(AGGREGATIONS_BUFFER_LENGTH);

422 _filterResults = filter == null ?

423 null : new List<int>.filled((_rows.length ~/ SMI_BITS) + 1, 0);

424

425 int rowCount = _rows.length;

426 int dimensionCount = _dimFields.length;

427 int factsCount = _factFields.length;

428

429 // Saves current dimension value to which we are aggregating

430 // Values of dimensions are in even indices (starting at 0) and

431 // location of respective dimension in buffer is in odd indices.

432 List<int> currentDim = new List<int>(dimensionCount * 2);

433 bool reset = true;

434 bool isNewDimension = false;

435 int aggregationSizePerDim = factsCount * _aggregationTypesCount;

436

437 // Reserve the 0th position for top-level aggregations.

438 int currentBufferPos = (factsCount * _aggregationTypesCount + 2);

439 _dimToAggrMap[''] = 0;

440 _aggregations[_offsetSortedIndex] = 0.0;

441

442

443 for (int ri = 0, index = 0, dimensionDataOffset = 0, factsDataOffset = 0;

444 ri < rowCount; ++ri, reset = false) {

445

446 // If filter is not null, check if this row must be included in

447 // the aggregations and mark it accordingly.

448 index = _sorted[ri];

449 if (filter != null) {

450 if (!filter(_rows[index])) {

451 continue;

452 } else {

453 _filterResults[ri ~/ SMI_BITS] \|= (1 << (ri % SMI_BITS));

454 }

455 }

456

457 dimensionDataOffset = index * dimensionCount;

458 factsDataOffset = index * factsCount;

459

460 // Update top-level aggregations.

461 _updateAggregationsAt(0, factsDataOffset, ri == 0 ? true : false);

462

463 // See which dimensions get effected by this row.

464 // di => the index of the dimension

465 // ci => index of the cached value in [currentDim]

466 for (int di = 0, ci = 0; di < dimensionCount; ++di, ci += 2) {

467 // If a dimension value changed, then all dimensions that are lower

468 // in the hierarchy change too.

469 if (reset \|\|

470 currentDim[ci] != _dimEnumCache[dimensionDataOffset + di]) {

471 currentDim[ci] = _dimEnumCache[dimensionDataOffset + di];

472 currentDim[ci + 1] = currentBufferPos;

473

474 // Save location to aggregations position in the buffer

475 _dimToAggrMap[new List.generate(di + 1,

476 (i) => currentDim[2 * i]).join(':')] = currentBufferPos;

477

478 // Store items start position

479 _aggregations[currentBufferPos + _offsetSortedIndex] = ri.toDouble();

480

481 // After the aggregated values, we save the filtered count,

482 // index of the first item (in sorted)

483 currentBufferPos += (aggregationSizePerDim + 2);

484 reset = true;

485 isNewDimension = true;

486 }

487

488 _updateAggregationsAt(currentDim[ci + 1],

489 factsDataOffset, isNewDimension);

490 isNewDimension = false;

491 }

492 }

493

494 _timeItEnd();

495 }

496

497 /**

498 * Helper function that does the actual aggregations.

499 * This function is called once per row per dimension.

500 */

501 _updateAggregationsAt(int aggrDataOffset,

502 int factsDataOffset, bool isNewDimension) {

503 // Update count.

504 _aggregations[aggrDataOffset + _offsetFilteredCount] += 1;

505

506 // Update aggregation for each of the facts.

507 for (int fi = 0, bufferFactOffset = aggrDataOffset;

508 fi < _factFields.length;

509 bufferFactOffset += _aggregationTypesCount, ++fi) {

510

511 double factValue = _factsCache[factsDataOffset + fi];

512 if (factValue.isNaN) {

513 continue;

514 }

515

516 // Sum

517 if (_offsetSum != null) {

518 _aggregations[bufferFactOffset + _offsetSum] += factValue;

519 }

520

521 // Min

522 if (_offsetMin != null && (isNewDimension \|\| factValue <

523 _aggregations[bufferFactOffset + _offsetMin])) {

524 _aggregations[bufferFactOffset + _offsetMin] = factValue;

525 }

526

527 // Max

528 if (_offsetMax != null && (isNewDimension \|\| factValue >

529 _aggregations[bufferFactOffset + _offsetMax])) {

530 _aggregations[bufferFactOffset + _offsetMax] = factValue;

531 }

532

533 // Count

534 if (_offsetCnt != null) {

535 _aggregations[bufferFactOffset + _offsetCnt]++;

536 }

537 }

538 }

539

540 /*

541 * TODO(prsd):

542 * 1. Implementation of updates and posting updates to entities.

543 * patchEntity and addToEntity must add listeners on AggregationItems

544 * and any changes must be propagated to entities.

545 * 2. Updates (add/remove/update) should do their best to update the

546 * aggregations and then maybe do a delayed recomputation (sort etc;)

547 */

548

549 /**

550 * Update an item.

551 * If aggregates were already computed, they are updated to reflect the

552 * new value and any observers are notified.

553 */

554 void updateItem(dynamic item, String field) {

555 throw new UnimplementedError();

556 }

557

558 /**

559 * Add a new item.

560 * If aggregates were already computed, they are updated to reflect

561 * values on the new item.

562 */

563 void addItem(dynamic item) {

564 throw new UnimplementedError();

565 }

566

567 /**

568 * Remove an existing item.

569 * If aggregates were already computed, they are updated to reflect

570 * facts on the removed item.

571 */

572 void removeItem(dynamic item) {

573 throw new UnimplementedError();

574 }

575

576 /**

577 * Return an [AggregationItem] that represents facts for dimension

578 * represented by [dimension] Only one instance of an entity is created

579 * per dimension (even if this function is called multiple times)

580 *

581 * Callers of this method can observe the returned entity for updates to

582 * aggregations caused by changes to filter or done through add, remove

583 * or modify of items in the collection.

584 */

585 AggregationItem facts(List dimension) {

586 List<int> enumeratedList = new List<int>();

587 for (int i = 0; i < dimension.length; ++i) {

588 enumeratedList.add(_dimToIntMap[i][dimension[i]]);

589 }

590

591 String key = enumeratedList.join(':');

592 AggregationItem item = _entityCache[key];

593

594 if (item == null && _dimToAggrMap.containsKey(key)) {

595 item = new _AggregationItemImpl(this, dimension, key);

596 _entityCache[key] = item;

597 }

598

599 return item;

600 }

601

602 /**

603 * Return a list of values that are present for a dimension field.

604 */

605 List valuesForDimension(dynamic dimensionFieldName) {

606 int di = _dimFields.indexOf(dimensionFieldName);

607 if (di < 0) {

608 return null;

609 }

610 List values = new List.from(_dimToIntMap[di].keys);

611 values.sort(

612 comparators != null && comparators.containsKey(dimensionFieldName) ?

613 comparators[dimensionFieldName] : _defaultDimComparator);

614 return values;

615 }

616 }

617

618 /**

619 * Parse a path for nested map-like objects.

620 * Caches the parsed key in the passed map.

621 *

622 * Takes map keys of the format:

623 * "list(key=val;val=m).another(state=good).state"

624 * and outputs:

625 * ["list", {"key": "val", "val": "m"},

626 * "another", {"state": "good"}, "state"]

627 */

628 List _parseKey(String key, Map parsedKeysCache) {

629 List parts = parsedKeysCache == null ? null : parsedKeysCache[key];

630 if (parts == null && key != null) {

631 parts = new List();

632 if (key.contains(').')) {

633 int start = 0;

634 int cursor = 0;

635 bool inParams = false;

636 List matchKeyVals;

637 Map listMatchingMap = {};

638

639 while (cursor < key.length) {

640 if (!inParams) {

641 cursor = key.indexOf('(', start);

642 if (cursor == -1) {

643 parts.addAll(key.substring(start).split('.'));

644 break;

645 }

646 parts.addAll(key.substring(start, cursor).split('.'));

647 cursor++;

648 start = cursor;

649 inParams = true;

650 } else {

651 cursor = key.indexOf(')', start);

652 if (cursor == -1) {

653 throw new ArgumentError('Invalid field name: $key');

654 }

655 listMatchingMap.clear();

656 matchKeyVals = key.substring(start, cursor).split(';');

657 matchKeyVals.forEach((value) {

658 List keyval = value.split('=');

659 if (keyval.length != 2) {

660 throw new ArgumentError('Invalid field name: ${key}');

661 }

662 listMatchingMap[keyval[0]] = keyval[1];

663 });

664 parts.add(listMatchingMap);

665 cursor += 2;

666 start = cursor;

667 inParams = false;

668 }

669 }

670 } else {

671 parts = key.split('.');

672 }

673 if (parsedKeysCache != null) {

674 parsedKeysCache[key] = parts;

675 }

676 }

677

678 return parts;

679 }

680

681 /**

682 * Walk a map-like structure that could have list in the path.

683 *

684 * Example:

685 * Map testMap = {

686 * "first": "firstval",

687 * "list": [

688 * { "val": "m",

689 * "key": "val",

690 * "another": [

691 * { 'state': 'good' },

692 * { 'state': 'bad' }

693 * ]

694 * },

695 * { "val": "m", "key": "invalid" },

696 * { "val": "o" }

697 * ]

698 * };

699 *

700 * For the above map:

701 * walk(testMap, "list(key=val;val=m).another(state=good).state");

702 * outputs:

703 * good

704 */

705 dynamic walk(initial, String key, Map parsedKeyCache) {

706 List parts = _parseKey(key, parsedKeyCache);

707 return parts.fold(initial, (current, part) {

708 if (current == null) {

709 return null;

710 } else if (current is List && part is Map) {

711 for (int i = 0; i < current.length; i++) {

712 bool match = true;

713 part.forEach((key, val) {

714 if ((key.contains('.') &&

715 walk(part, key, parsedKeyCache).toString() != val) \|\|

716 part[key] != val) {

717 match = false;

718 }

719 });

720 if (match) {

721 return current[i];

722 }

723 }

724 } else {

725 return current[part];

726 }

727 });

728 }

OLD	NEW

« no previous file with comments | « charted/lib/charts/charts.dart ('k') | charted/lib/charts/data_transformers/aggregation_item.dart » ('j') | no next file with comments »