Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(97)

Side by Side Diff: charted/lib/charts/data_transformers/aggregation.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 /*
2 * Copyright 2014 Google Inc. All rights reserved.
3 *
4 * Use of this source code is governed by a BSD-style
5 * license that can be found in the LICENSE file or at
6 * https://developers.google.com/open-source/licenses/bsd
7 */
8
9 part of charted.charts;
10
11 /**
12 * Function callback to filter items in the input
13 */
14 typedef bool AggregationFilterFunc(var item);
15
16 typedef dynamic FieldAccessor(dynamic item, dynamic key);
17
18
19 // TODO(midoringo, prsd): Consider splitting each aggregation type into its own
20 // strategy object for readability, maintainability, and scalability.
21 /**
22 * Given list of items, dimensions and facts, compute
23 * aggregates (COUNT, SUM, MIN, MAX) for facts at each dimension level.
24 */
25 class AggregationModel {
26
27 // Number of aggregations we collect on each fact
28 int _aggregationTypesCount = 0;
29
30 // Currently supported list of aggregations.
31 static final List<String> supportedAggregationTypes =
32 ['sum', 'min', 'max', 'valid'];
33
34 // Computed aggregation types.
35 List<String> computedAggregationTypes;
36
37 // Offsets of aggregations that are computed once per fact per dimension
38 // If an offset is null, it will not be computed
39 int _offsetSum;
40 int _offsetMin;
41 int _offsetMax;
42 int _offsetCnt;
43
44 // Offset of aggregations that one computed once per dimension
45 int _offsetFilteredCount;
46 int _offsetSortedIndex;
47
48 // Number of bits we can use in an integer without making it medium int
49 static const int SMI_BITS = 30;
50
51 // Computed aggregations
52 static const int AGGREGATIONS_BUFFER_LENGTH = 1024 * 1024;
53 Float64List _aggregations;
54
55 // Cache of fact values
56 Float64List _factsCache;
57
58 // Cache of enumerated dimensions
59 List<int> _dimEnumCache;
60
61 // Sorted list of indices (for data in _rows/_dimEnumCache/_factsCache)
62 List<int> _sorted;
63
64 // Enumeration map for dimension values
65 List<Map<dynamic, int>> _dimToIntMap;
66
67 // Sort orders for dimension values
68 List<List<int>> _dimSortOrders;
69
70 // Input
71 List _rows;
72 List _dimFields;
73 List _factFields;
74
75 // When groupBy is called, this value represents the
76 // common prefix of the old and new dimensions list.
77 int _dimPrefixLength = 0;
78
79 // Dimensions mapped to computed aggregates
80 Map<String, int> _dimToAggrMap;
81
82 // null when no filter was applied.
83 // Otherwise, store a bitmap indicating if an item was included.
84 List<int> _filterResults;
85
86 // Cache of entities created for the facts on this aggregation view.
87 Map<String, AggregationItem> _entityCache;
88
89 // List of field names that aggregation items will have.
90 List<String> _itemFieldNamesCache;
91
92 // Walk through the map, by splitting key at '.'
93 final bool walkThroughMap;
94
95 // Map of fieldName to comparator function.
96 final Map<String, Function> comparators;
97
98 // Timing operations
99 static final Logger _logger = new Logger('aggregations');
100 Stopwatch _timeItWatch;
101 String _timeItName;
102
103 FieldAccessor dimensionAccessor;
104 FieldAccessor factsAccessor;
105
106 /**
107 * Create a new [AggregationModel] from a [collection] of items,
108 * list of [dimensions] on which the items are grouped and a list of [facts]
109 * on which aggregations are computed.
110 */
111 AggregationModel(List collection, List dimensions,
112 List facts,
113 { List<String> aggregationTypes,
114 this.walkThroughMap: false,
115 this.comparators,
116 this.dimensionAccessor,
117 this.factsAccessor}) {
118 _init(collection, dimensions, facts, aggregationTypes);
119 }
120
121 void _timeItStart(String name) {
122 _timeItName = name;
123 _timeItWatch = new Stopwatch();
124 _timeItWatch.start();
125 }
126
127 void _timeItEnd() {
128 _timeItWatch.stop();
129 _logger.info('[aggregations/$_timeItName] '
130 '${_timeItWatch.elapsed.inMilliseconds}ms/${_rows.length}r');
131 }
132
133 List get factFields => _factFields;
134 List get dimensionFields => _dimFields;
135
136 /**
137 * Initialize the view
138 */
139 void _init(List collection, List dimensions,
140 List facts, List<String> aggregationTypes) {
141 if (collection == null) {
142 throw new ArgumentError('Data cannot be empty or null');
143 }
144
145 if (facts == null || facts.isEmpty) {
146 throw new ArgumentError('Facts cannot be empty or null');
147 }
148
149 if (dimensions == null) {
150 dimensions = [];
151 }
152
153 if (dimensionAccessor == null) {
154 dimensionAccessor = _fetch;
155 }
156
157 if (factsAccessor == null) {
158 factsAccessor = _fetch;
159 }
160
161 if (aggregationTypes != null) {
162 Iterable unknownAggregationTypes =
163 aggregationTypes.where((e) => !supportedAggregationTypes.contains(e));
164 if (unknownAggregationTypes.length != 0) {
165 throw new ArgumentError(
166 'Unknown aggregation types: ${unknownAggregationTypes.join(', ')}');
167 }
168 } else {
169 aggregationTypes = ['sum'];
170 }
171
172 // Always adding 'count' for correct computation of average and count.
173 if (!aggregationTypes.contains('valid')) {
174 aggregationTypes.add('valid');
175 }
176
177 _rows = collection;
178 _dimFields = new List.from(dimensions, growable: false);
179 _factFields = new List.from(facts, growable: false);
180 _entityCache = new Map<String, AggregationItem>();
181
182 _createBuffers();
183
184 _aggregationTypesCount = aggregationTypes.length;
185 for (int i = 0; i < _aggregationTypesCount; i++) {
186 switch(aggregationTypes[i]) {
187 case 'sum':
188 _offsetSum = i;
189 break;
190 case 'min':
191 _offsetMin = i;
192 break;
193 case 'max':
194 _offsetMax = i;
195 break;
196 case 'valid':
197 _offsetCnt = i;
198 break;
199 }
200 }
201 computedAggregationTypes = new List.from(aggregationTypes, growable: false);
202
203 // Preprocess the data
204 _preprocess();
205 }
206
207 /**
208 * Re-calculate aggregations based on new dimensions.
209 */
210 void groupBy(List dimensions, [AggregationFilterFunc filter = null]) {
211 if (dimensions == null) {
212 dimensions = [];
213 }
214
215 List savedDimFields = _dimFields;
216 _dimFields = new List.from(dimensions, growable: false);
217
218 _dimPrefixLength = 0;
219 while (_dimPrefixLength < _dimFields.length &&
220 _dimPrefixLength < savedDimFields.length &&
221 savedDimFields[_dimPrefixLength] == _dimFields[_dimPrefixLength]) {
222 ++_dimPrefixLength;
223 }
224
225 _createBuffers();
226 _preprocess(groupBy:true);
227
228 // For groupBy, compute immediately.
229 compute(filter);
230
231 // Ensure that cache represents updated dimensions
232 _updateCachedEntities();
233 }
234
235 /**
236 * Create buffers.
237 * This method is called when the object is being created and when
238 * a groupBy is called to change the dimensions on which
239 * aggregations are computed.
240 */
241 void _createBuffers() {
242 // Create both when object is created and groupBy is called
243 _dimEnumCache = new Int32List(_dimFields.length * _rows.length);
244
245 // Create only when the object is created
246 if (_factsCache == null) {
247 _factsCache = new Float64List((_factFields.length + 1) * _rows.length);
248 }
249
250 // Create only when the object is created
251 if (_filterResults == null) {
252 _filterResults = new List<int>((_rows.length) ~/ SMI_BITS + 1);
253 }
254
255 // Create both when object is created and groupBy is called
256 // Reuse dimension enumerations if possible.
257 var oldDimToInt = _dimToIntMap;
258 _dimToIntMap = new List<Map<dynamic, int>>.generate(_dimFields.length,
259 (i) => i < _dimPrefixLength ? oldDimToInt[i] : new Map<dynamic, int>());
260 }
261
262 /**
263 * Check cache entries
264 * When data is grouped by a new dimensions, entities that were
265 * created prior to the groupBy should be cleared and removed from cache
266 * if they aren't valid anymore.
267 * Update the entities that are valid after the groupBy.
268 */
269 void _updateCachedEntities() {
270 List keys = new List.from(_entityCache.keys, growable: false);
271 keys.forEach((key) {
272 _AggregationItemImpl entity = _entityCache[key];
273 if (entity == null) {
274 _entityCache.remove(key);
275 } else if (entity != null && entity.isValid) {
276 if (key.split(':').length <= _dimPrefixLength) {
277 entity.update();
278 } else {
279 _entityCache.remove(key);
280 entity.clear();
281 }
282 }
283 });
284 }
285
286 final Map<String, List> _parsedKeys = {};
287 /**
288 * Get value from a map-like object
289 */
290 dynamic _fetch(var item, String key) {
291 if (walkThroughMap && key.contains('.')) {
292 return walk(item, key, _parsedKeys);
293 } else {
294 return item[key];
295 }
296 }
297
298 /*
299 * Preprocess Data
300 * - Enumerate dimension values
301 * - Create sort orders for dimension values
302 * - Cache facts in lists
303 */
304 void _preprocess({bool groupBy: false}) {
305
306 _timeItStart('preprocess');
307
308 // Enumerate dimensions...
309 // Cache dimensions and facts.
310
311 List<int> dimensionValCount =
312 new List<int>.generate(_dimFields.length, (idx) => 0);
313
314 int dimensionsCount = _dimFields.length;
315 int factsCount = _factFields.length;
316 int rowCount = _rows.length;
317
318 for (int ri = 0, factsDataOffset = 0, dimDataOffset = 0;
319 ri < rowCount; ++ri, factsDataOffset += factsCount,
320 dimDataOffset += dimensionsCount) {
321 var item = _rows[ri];
322
323 // Cache the fact values in the big buffer, but only
324 // when we are initializing (not when a groupBy was called
325 // after initialization)
326 if (!groupBy) {
327 for (int fi = 0; fi < factsCount; fi++) {
328 var value = factsAccessor(item,_factFields[fi]);
329 _factsCache[factsDataOffset + fi] =
330 (value == null) ? double.NAN : value.toDouble();
331 }
332 }
333
334 // Enumerate the dimension values and cache enumerated rows
335 for (int di = 0; di < dimensionsCount; di++) {
336 var dimensionVal = dimensionAccessor(item, _dimFields[di]);
337 int dimensionValEnum = _dimToIntMap[di][dimensionVal];
338 if (dimensionValEnum == null) {
339 _dimToIntMap[di][dimensionVal] = dimensionValCount[di];
340 dimensionValEnum = dimensionValCount[di]++;
341 }
342 _dimEnumCache[dimDataOffset + di] = dimensionValEnum;
343 }
344 }
345
346 // Sort all dimensions internally
347 // The resulting arrays would be used to sort the entire data
348
349 List oldSortOrders = _dimSortOrders;
350 _dimSortOrders = new List.generate(dimensionsCount, (i) {
351 if (groupBy && i < _dimPrefixLength) {
352 return oldSortOrders[i];
353 }
354
355 List dimensionVals = new List.from(_dimToIntMap[i].keys);
356 List retval = new List(_dimToIntMap[i].length);
357
358 // When a comparator is not specified, our implementation of the
359 // comparator tries to gracefully handle null values.
360 dimensionVals.sort(
361 comparators != null && comparators.containsKey(_dimFields[i]) ?
362 comparators[_dimFields[i]] : _defaultDimComparator);
363
364 for (int si = 0; si < retval.length; ++si) {
365 retval[_dimToIntMap[i][dimensionVals[si]]] = si;
366 }
367 return retval;
368 }, growable: false);
369
370 // Create a list of sorted indices - only if we are not having a full
371 // overlap of dimensionFields.
372 if (!groupBy || _dimPrefixLength != _dimFields.length) {
373 _sorted = new List<int>.generate(_rows.length, (i) => i, growable: false);
374 _sorted.sort(_comparator);
375 }
376
377 // Pre-compute frequently used values
378 _offsetSortedIndex = factsCount * _aggregationTypesCount;
379 _offsetFilteredCount = factsCount * _aggregationTypesCount + 1;
380
381 _timeItEnd();
382 }
383
384 // Ensures that null dimension values don't cause an issue with sorting
385 _defaultDimComparator(Comparable left, Comparable right) =>
386 (left == null && right == null) ? 0 :
387 (left == null) ? -1 :
388 (right == null) ? 1 : left.compareTo(right);
389
390 /*
391 * Given item indices in rows, compare them based
392 * on the sort orders created while preprocessing data.
393 */
394 _comparator(int one, int two) {
395 if (one == two) {
396 return 0;
397 }
398
399 int offsetOne = _dimFields.length * one;
400 int offsetTwo = _dimFields.length * two;
401
402 for (int i = 0; i < _dimFields.length; ++i) {
403 int diff = _dimSortOrders[i][_dimEnumCache[offsetOne + i]] -
404 _dimSortOrders[i][_dimEnumCache[offsetTwo + i]];
405 if (diff != 0) {
406 return diff;
407 }
408 }
409 return 0;
410 }
411
412 /**
413 * Compute aggregations
414 * If [filter] is not null, it would be used to filter out items that
415 * should not be included in the aggregates.
416 */
417 void compute([AggregationFilterFunc filter = null]) {
418 _timeItStart('compute');
419
420 _dimToAggrMap = new Map<String, int>();
421 _aggregations = new Float64List(AGGREGATIONS_BUFFER_LENGTH);
422 _filterResults = filter == null ?
423 null : new List<int>.filled((_rows.length ~/ SMI_BITS) + 1, 0);
424
425 int rowCount = _rows.length;
426 int dimensionCount = _dimFields.length;
427 int factsCount = _factFields.length;
428
429 // Saves current dimension value to which we are aggregating
430 // Values of dimensions are in even indices (starting at 0) and
431 // location of respective dimension in buffer is in odd indices.
432 List<int> currentDim = new List<int>(dimensionCount * 2);
433 bool reset = true;
434 bool isNewDimension = false;
435 int aggregationSizePerDim = factsCount * _aggregationTypesCount;
436
437 // Reserve the 0th position for top-level aggregations.
438 int currentBufferPos = (factsCount * _aggregationTypesCount + 2);
439 _dimToAggrMap[''] = 0;
440 _aggregations[_offsetSortedIndex] = 0.0;
441
442
443 for (int ri = 0, index = 0, dimensionDataOffset = 0, factsDataOffset = 0;
444 ri < rowCount; ++ri, reset = false) {
445
446 // If filter is not null, check if this row must be included in
447 // the aggregations and mark it accordingly.
448 index = _sorted[ri];
449 if (filter != null) {
450 if (!filter(_rows[index])) {
451 continue;
452 } else {
453 _filterResults[ri ~/ SMI_BITS] |= (1 << (ri % SMI_BITS));
454 }
455 }
456
457 dimensionDataOffset = index * dimensionCount;
458 factsDataOffset = index * factsCount;
459
460 // Update top-level aggregations.
461 _updateAggregationsAt(0, factsDataOffset, ri == 0 ? true : false);
462
463 // See which dimensions get effected by this row.
464 // di => the index of the dimension
465 // ci => index of the cached value in [currentDim]
466 for (int di = 0, ci = 0; di < dimensionCount; ++di, ci += 2) {
467 // If a dimension value changed, then all dimensions that are lower
468 // in the hierarchy change too.
469 if (reset ||
470 currentDim[ci] != _dimEnumCache[dimensionDataOffset + di]) {
471 currentDim[ci] = _dimEnumCache[dimensionDataOffset + di];
472 currentDim[ci + 1] = currentBufferPos;
473
474 // Save location to aggregations position in the buffer
475 _dimToAggrMap[new List.generate(di + 1,
476 (i) => currentDim[2 * i]).join(':')] = currentBufferPos;
477
478 // Store items start position
479 _aggregations[currentBufferPos + _offsetSortedIndex] = ri.toDouble();
480
481 // After the aggregated values, we save the filtered count,
482 // index of the first item (in sorted)
483 currentBufferPos += (aggregationSizePerDim + 2);
484 reset = true;
485 isNewDimension = true;
486 }
487
488 _updateAggregationsAt(currentDim[ci + 1],
489 factsDataOffset, isNewDimension);
490 isNewDimension = false;
491 }
492 }
493
494 _timeItEnd();
495 }
496
497 /**
498 * Helper function that does the actual aggregations.
499 * This function is called once per row per dimension.
500 */
501 _updateAggregationsAt(int aggrDataOffset,
502 int factsDataOffset, bool isNewDimension) {
503 // Update count.
504 _aggregations[aggrDataOffset + _offsetFilteredCount] += 1;
505
506 // Update aggregation for each of the facts.
507 for (int fi = 0, bufferFactOffset = aggrDataOffset;
508 fi < _factFields.length;
509 bufferFactOffset += _aggregationTypesCount, ++fi) {
510
511 double factValue = _factsCache[factsDataOffset + fi];
512 if (factValue.isNaN) {
513 continue;
514 }
515
516 // Sum
517 if (_offsetSum != null) {
518 _aggregations[bufferFactOffset + _offsetSum] += factValue;
519 }
520
521 // Min
522 if (_offsetMin != null && (isNewDimension || factValue <
523 _aggregations[bufferFactOffset + _offsetMin])) {
524 _aggregations[bufferFactOffset + _offsetMin] = factValue;
525 }
526
527 // Max
528 if (_offsetMax != null && (isNewDimension || factValue >
529 _aggregations[bufferFactOffset + _offsetMax])) {
530 _aggregations[bufferFactOffset + _offsetMax] = factValue;
531 }
532
533 // Count
534 if (_offsetCnt != null) {
535 _aggregations[bufferFactOffset + _offsetCnt]++;
536 }
537 }
538 }
539
540 /*
541 * TODO(prsd):
542 * 1. Implementation of updates and posting updates to entities.
543 * patchEntity and addToEntity must add listeners on AggregationItems
544 * and any changes must be propagated to entities.
545 * 2. Updates (add/remove/update) should do their best to update the
546 * aggregations and then maybe do a delayed recomputation (sort etc;)
547 */
548
549 /**
550 * Update an item.
551 * If aggregates were already computed, they are updated to reflect the
552 * new value and any observers are notified.
553 */
554 void updateItem(dynamic item, String field) {
555 throw new UnimplementedError();
556 }
557
558 /**
559 * Add a new item.
560 * If aggregates were already computed, they are updated to reflect
561 * values on the new item.
562 */
563 void addItem(dynamic item) {
564 throw new UnimplementedError();
565 }
566
567 /**
568 * Remove an existing item.
569 * If aggregates were already computed, they are updated to reflect
570 * facts on the removed item.
571 */
572 void removeItem(dynamic item) {
573 throw new UnimplementedError();
574 }
575
576 /**
577 * Return an [AggregationItem] that represents facts for dimension
578 * represented by [dimension] Only one instance of an entity is created
579 * per dimension (even if this function is called multiple times)
580 *
581 * Callers of this method can observe the returned entity for updates to
582 * aggregations caused by changes to filter or done through add, remove
583 * or modify of items in the collection.
584 */
585 AggregationItem facts(List dimension) {
586 List<int> enumeratedList = new List<int>();
587 for (int i = 0; i < dimension.length; ++i) {
588 enumeratedList.add(_dimToIntMap[i][dimension[i]]);
589 }
590
591 String key = enumeratedList.join(':');
592 AggregationItem item = _entityCache[key];
593
594 if (item == null && _dimToAggrMap.containsKey(key)) {
595 item = new _AggregationItemImpl(this, dimension, key);
596 _entityCache[key] = item;
597 }
598
599 return item;
600 }
601
602 /**
603 * Return a list of values that are present for a dimension field.
604 */
605 List valuesForDimension(dynamic dimensionFieldName) {
606 int di = _dimFields.indexOf(dimensionFieldName);
607 if (di < 0) {
608 return null;
609 }
610 List values = new List.from(_dimToIntMap[di].keys);
611 values.sort(
612 comparators != null && comparators.containsKey(dimensionFieldName) ?
613 comparators[dimensionFieldName] : _defaultDimComparator);
614 return values;
615 }
616 }
617
618 /**
619 * Parse a path for nested map-like objects.
620 * Caches the parsed key in the passed map.
621 *
622 * Takes map keys of the format:
623 * "list(key=val;val=m).another(state=good).state"
624 * and outputs:
625 * ["list", {"key": "val", "val": "m"},
626 * "another", {"state": "good"}, "state"]
627 */
628 List _parseKey(String key, Map parsedKeysCache) {
629 List parts = parsedKeysCache == null ? null : parsedKeysCache[key];
630 if (parts == null && key != null) {
631 parts = new List();
632 if (key.contains(').')) {
633 int start = 0;
634 int cursor = 0;
635 bool inParams = false;
636 List matchKeyVals;
637 Map listMatchingMap = {};
638
639 while (cursor < key.length) {
640 if (!inParams) {
641 cursor = key.indexOf('(', start);
642 if (cursor == -1) {
643 parts.addAll(key.substring(start).split('.'));
644 break;
645 }
646 parts.addAll(key.substring(start, cursor).split('.'));
647 cursor++;
648 start = cursor;
649 inParams = true;
650 } else {
651 cursor = key.indexOf(')', start);
652 if (cursor == -1) {
653 throw new ArgumentError('Invalid field name: $key');
654 }
655 listMatchingMap.clear();
656 matchKeyVals = key.substring(start, cursor).split(';');
657 matchKeyVals.forEach((value) {
658 List keyval = value.split('=');
659 if (keyval.length != 2) {
660 throw new ArgumentError('Invalid field name: ${key}');
661 }
662 listMatchingMap[keyval[0]] = keyval[1];
663 });
664 parts.add(listMatchingMap);
665 cursor += 2;
666 start = cursor;
667 inParams = false;
668 }
669 }
670 } else {
671 parts = key.split('.');
672 }
673 if (parsedKeysCache != null) {
674 parsedKeysCache[key] = parts;
675 }
676 }
677
678 return parts;
679 }
680
681 /**
682 * Walk a map-like structure that could have list in the path.
683 *
684 * Example:
685 * Map testMap = {
686 * "first": "firstval",
687 * "list": [
688 * { "val": "m",
689 * "key": "val",
690 * "another": [
691 * { 'state': 'good' },
692 * { 'state': 'bad' }
693 * ]
694 * },
695 * { "val": "m", "key": "invalid" },
696 * { "val": "o" }
697 * ]
698 * };
699 *
700 * For the above map:
701 * walk(testMap, "list(key=val;val=m).another(state=good).state");
702 * outputs:
703 * good
704 */
705 dynamic walk(initial, String key, Map parsedKeyCache) {
706 List parts = _parseKey(key, parsedKeyCache);
707 return parts.fold(initial, (current, part) {
708 if (current == null) {
709 return null;
710 } else if (current is List && part is Map) {
711 for (int i = 0; i < current.length; i++) {
712 bool match = true;
713 part.forEach((key, val) {
714 if ((key.contains('.') &&
715 walk(part, key, parsedKeyCache).toString() != val) ||
716 part[key] != val) {
717 match = false;
718 }
719 });
720 if (match) {
721 return current[i];
722 }
723 }
724 } else {
725 return current[part];
726 }
727 });
728 }
OLDNEW
« no previous file with comments | « charted/lib/charts/charts.dart ('k') | charted/lib/charts/data_transformers/aggregation_item.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698