Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1332)

Unified Diff: charted/lib/charts/data_transformers/aggregation.dart

Issue 1400473008: Roll Observatory packages and add a roll script (Closed) Base URL: git@github.com:dart-lang/observatory_pub_packages.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « charted/lib/charts/charts.dart ('k') | charted/lib/charts/data_transformers/aggregation_item.dart » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: charted/lib/charts/data_transformers/aggregation.dart
diff --git a/charted/lib/charts/data_transformers/aggregation.dart b/charted/lib/charts/data_transformers/aggregation.dart
deleted file mode 100644
index ffde9ee970f8aed56f54c0a4f1edcccdfcde25a0..0000000000000000000000000000000000000000
--- a/charted/lib/charts/data_transformers/aggregation.dart
+++ /dev/null
@@ -1,728 +0,0 @@
-/*
- * Copyright 2014 Google Inc. All rights reserved.
- *
- * Use of this source code is governed by a BSD-style
- * license that can be found in the LICENSE file or at
- * https://developers.google.com/open-source/licenses/bsd
- */
-
-part of charted.charts;
-
-/**
- * Function callback to filter items in the input
- */
-typedef bool AggregationFilterFunc(var item);
-
-typedef dynamic FieldAccessor(dynamic item, dynamic key);
-
-
-// TODO(midoringo, prsd): Consider splitting each aggregation type into its own
-// strategy object for readability, maintainability, and scalability.
-/**
- * Given list of items, dimensions and facts, compute
- * aggregates (COUNT, SUM, MIN, MAX) for facts at each dimension level.
- */
-class AggregationModel {
-
- // Number of aggregations we collect on each fact
- int _aggregationTypesCount = 0;
-
- // Currently supported list of aggregations.
- static final List<String> supportedAggregationTypes =
- ['sum', 'min', 'max', 'valid'];
-
- // Computed aggregation types.
- List<String> computedAggregationTypes;
-
- // Offsets of aggregations that are computed once per fact per dimension
- // If an offset is null, it will not be computed
- int _offsetSum;
- int _offsetMin;
- int _offsetMax;
- int _offsetCnt;
-
- // Offset of aggregations that one computed once per dimension
- int _offsetFilteredCount;
- int _offsetSortedIndex;
-
- // Number of bits we can use in an integer without making it medium int
- static const int SMI_BITS = 30;
-
- // Computed aggregations
- static const int AGGREGATIONS_BUFFER_LENGTH = 1024 * 1024;
- Float64List _aggregations;
-
- // Cache of fact values
- Float64List _factsCache;
-
- // Cache of enumerated dimensions
- List<int> _dimEnumCache;
-
- // Sorted list of indices (for data in _rows/_dimEnumCache/_factsCache)
- List<int> _sorted;
-
- // Enumeration map for dimension values
- List<Map<dynamic, int>> _dimToIntMap;
-
- // Sort orders for dimension values
- List<List<int>> _dimSortOrders;
-
- // Input
- List _rows;
- List _dimFields;
- List _factFields;
-
- // When groupBy is called, this value represents the
- // common prefix of the old and new dimensions list.
- int _dimPrefixLength = 0;
-
- // Dimensions mapped to computed aggregates
- Map<String, int> _dimToAggrMap;
-
- // null when no filter was applied.
- // Otherwise, store a bitmap indicating if an item was included.
- List<int> _filterResults;
-
- // Cache of entities created for the facts on this aggregation view.
- Map<String, AggregationItem> _entityCache;
-
- // List of field names that aggregation items will have.
- List<String> _itemFieldNamesCache;
-
- // Walk through the map, by splitting key at '.'
- final bool walkThroughMap;
-
- // Map of fieldName to comparator function.
- final Map<String, Function> comparators;
-
- // Timing operations
- static final Logger _logger = new Logger('aggregations');
- Stopwatch _timeItWatch;
- String _timeItName;
-
- FieldAccessor dimensionAccessor;
- FieldAccessor factsAccessor;
-
- /**
- * Create a new [AggregationModel] from a [collection] of items,
- * list of [dimensions] on which the items are grouped and a list of [facts]
- * on which aggregations are computed.
- */
- AggregationModel(List collection, List dimensions,
- List facts,
- { List<String> aggregationTypes,
- this.walkThroughMap: false,
- this.comparators,
- this.dimensionAccessor,
- this.factsAccessor}) {
- _init(collection, dimensions, facts, aggregationTypes);
- }
-
- void _timeItStart(String name) {
- _timeItName = name;
- _timeItWatch = new Stopwatch();
- _timeItWatch.start();
- }
-
- void _timeItEnd() {
- _timeItWatch.stop();
- _logger.info('[aggregations/$_timeItName] '
- '${_timeItWatch.elapsed.inMilliseconds}ms/${_rows.length}r');
- }
-
- List get factFields => _factFields;
- List get dimensionFields => _dimFields;
-
- /**
- * Initialize the view
- */
- void _init(List collection, List dimensions,
- List facts, List<String> aggregationTypes) {
- if (collection == null) {
- throw new ArgumentError('Data cannot be empty or null');
- }
-
- if (facts == null || facts.isEmpty) {
- throw new ArgumentError('Facts cannot be empty or null');
- }
-
- if (dimensions == null) {
- dimensions = [];
- }
-
- if (dimensionAccessor == null) {
- dimensionAccessor = _fetch;
- }
-
- if (factsAccessor == null) {
- factsAccessor = _fetch;
- }
-
- if (aggregationTypes != null) {
- Iterable unknownAggregationTypes =
- aggregationTypes.where((e) => !supportedAggregationTypes.contains(e));
- if (unknownAggregationTypes.length != 0) {
- throw new ArgumentError(
- 'Unknown aggregation types: ${unknownAggregationTypes.join(', ')}');
- }
- } else {
- aggregationTypes = ['sum'];
- }
-
- // Always adding 'count' for correct computation of average and count.
- if (!aggregationTypes.contains('valid')) {
- aggregationTypes.add('valid');
- }
-
- _rows = collection;
- _dimFields = new List.from(dimensions, growable: false);
- _factFields = new List.from(facts, growable: false);
- _entityCache = new Map<String, AggregationItem>();
-
- _createBuffers();
-
- _aggregationTypesCount = aggregationTypes.length;
- for (int i = 0; i < _aggregationTypesCount; i++) {
- switch(aggregationTypes[i]) {
- case 'sum':
- _offsetSum = i;
- break;
- case 'min':
- _offsetMin = i;
- break;
- case 'max':
- _offsetMax = i;
- break;
- case 'valid':
- _offsetCnt = i;
- break;
- }
- }
- computedAggregationTypes = new List.from(aggregationTypes, growable: false);
-
- // Preprocess the data
- _preprocess();
- }
-
- /**
- * Re-calculate aggregations based on new dimensions.
- */
- void groupBy(List dimensions, [AggregationFilterFunc filter = null]) {
- if (dimensions == null) {
- dimensions = [];
- }
-
- List savedDimFields = _dimFields;
- _dimFields = new List.from(dimensions, growable: false);
-
- _dimPrefixLength = 0;
- while (_dimPrefixLength < _dimFields.length &&
- _dimPrefixLength < savedDimFields.length &&
- savedDimFields[_dimPrefixLength] == _dimFields[_dimPrefixLength]) {
- ++_dimPrefixLength;
- }
-
- _createBuffers();
- _preprocess(groupBy:true);
-
- // For groupBy, compute immediately.
- compute(filter);
-
- // Ensure that cache represents updated dimensions
- _updateCachedEntities();
- }
-
- /**
- * Create buffers.
- * This method is called when the object is being created and when
- * a groupBy is called to change the dimensions on which
- * aggregations are computed.
- */
- void _createBuffers() {
- // Create both when object is created and groupBy is called
- _dimEnumCache = new Int32List(_dimFields.length * _rows.length);
-
- // Create only when the object is created
- if (_factsCache == null) {
- _factsCache = new Float64List((_factFields.length + 1) * _rows.length);
- }
-
- // Create only when the object is created
- if (_filterResults == null) {
- _filterResults = new List<int>((_rows.length) ~/ SMI_BITS + 1);
- }
-
- // Create both when object is created and groupBy is called
- // Reuse dimension enumerations if possible.
- var oldDimToInt = _dimToIntMap;
- _dimToIntMap = new List<Map<dynamic, int>>.generate(_dimFields.length,
- (i) => i < _dimPrefixLength ? oldDimToInt[i] : new Map<dynamic, int>());
- }
-
- /**
- * Check cache entries
- * When data is grouped by a new dimensions, entities that were
- * created prior to the groupBy should be cleared and removed from cache
- * if they aren't valid anymore.
- * Update the entities that are valid after the groupBy.
- */
- void _updateCachedEntities() {
- List keys = new List.from(_entityCache.keys, growable: false);
- keys.forEach((key) {
- _AggregationItemImpl entity = _entityCache[key];
- if (entity == null) {
- _entityCache.remove(key);
- } else if (entity != null && entity.isValid) {
- if (key.split(':').length <= _dimPrefixLength) {
- entity.update();
- } else {
- _entityCache.remove(key);
- entity.clear();
- }
- }
- });
- }
-
- final Map<String, List> _parsedKeys = {};
- /**
- * Get value from a map-like object
- */
- dynamic _fetch(var item, String key) {
- if (walkThroughMap && key.contains('.')) {
- return walk(item, key, _parsedKeys);
- } else {
- return item[key];
- }
- }
-
- /*
- * Preprocess Data
- * - Enumerate dimension values
- * - Create sort orders for dimension values
- * - Cache facts in lists
- */
- void _preprocess({bool groupBy: false}) {
-
- _timeItStart('preprocess');
-
- // Enumerate dimensions...
- // Cache dimensions and facts.
-
- List<int> dimensionValCount =
- new List<int>.generate(_dimFields.length, (idx) => 0);
-
- int dimensionsCount = _dimFields.length;
- int factsCount = _factFields.length;
- int rowCount = _rows.length;
-
- for (int ri = 0, factsDataOffset = 0, dimDataOffset = 0;
- ri < rowCount; ++ri, factsDataOffset += factsCount,
- dimDataOffset += dimensionsCount) {
- var item = _rows[ri];
-
- // Cache the fact values in the big buffer, but only
- // when we are initializing (not when a groupBy was called
- // after initialization)
- if (!groupBy) {
- for (int fi = 0; fi < factsCount; fi++) {
- var value = factsAccessor(item,_factFields[fi]);
- _factsCache[factsDataOffset + fi] =
- (value == null) ? double.NAN : value.toDouble();
- }
- }
-
- // Enumerate the dimension values and cache enumerated rows
- for (int di = 0; di < dimensionsCount; di++) {
- var dimensionVal = dimensionAccessor(item, _dimFields[di]);
- int dimensionValEnum = _dimToIntMap[di][dimensionVal];
- if (dimensionValEnum == null) {
- _dimToIntMap[di][dimensionVal] = dimensionValCount[di];
- dimensionValEnum = dimensionValCount[di]++;
- }
- _dimEnumCache[dimDataOffset + di] = dimensionValEnum;
- }
- }
-
- // Sort all dimensions internally
- // The resulting arrays would be used to sort the entire data
-
- List oldSortOrders = _dimSortOrders;
- _dimSortOrders = new List.generate(dimensionsCount, (i) {
- if (groupBy && i < _dimPrefixLength) {
- return oldSortOrders[i];
- }
-
- List dimensionVals = new List.from(_dimToIntMap[i].keys);
- List retval = new List(_dimToIntMap[i].length);
-
- // When a comparator is not specified, our implementation of the
- // comparator tries to gracefully handle null values.
- dimensionVals.sort(
- comparators != null && comparators.containsKey(_dimFields[i]) ?
- comparators[_dimFields[i]] : _defaultDimComparator);
-
- for (int si = 0; si < retval.length; ++si) {
- retval[_dimToIntMap[i][dimensionVals[si]]] = si;
- }
- return retval;
- }, growable: false);
-
- // Create a list of sorted indices - only if we are not having a full
- // overlap of dimensionFields.
- if (!groupBy || _dimPrefixLength != _dimFields.length) {
- _sorted = new List<int>.generate(_rows.length, (i) => i, growable: false);
- _sorted.sort(_comparator);
- }
-
- // Pre-compute frequently used values
- _offsetSortedIndex = factsCount * _aggregationTypesCount;
- _offsetFilteredCount = factsCount * _aggregationTypesCount + 1;
-
- _timeItEnd();
- }
-
- // Ensures that null dimension values don't cause an issue with sorting
- _defaultDimComparator(Comparable left, Comparable right) =>
- (left == null && right == null) ? 0 :
- (left == null) ? -1 :
- (right == null) ? 1 : left.compareTo(right);
-
- /*
- * Given item indices in rows, compare them based
- * on the sort orders created while preprocessing data.
- */
- _comparator(int one, int two) {
- if (one == two) {
- return 0;
- }
-
- int offsetOne = _dimFields.length * one;
- int offsetTwo = _dimFields.length * two;
-
- for (int i = 0; i < _dimFields.length; ++i) {
- int diff = _dimSortOrders[i][_dimEnumCache[offsetOne + i]] -
- _dimSortOrders[i][_dimEnumCache[offsetTwo + i]];
- if (diff != 0) {
- return diff;
- }
- }
- return 0;
- }
-
- /**
- * Compute aggregations
- * If [filter] is not null, it would be used to filter out items that
- * should not be included in the aggregates.
- */
- void compute([AggregationFilterFunc filter = null]) {
- _timeItStart('compute');
-
- _dimToAggrMap = new Map<String, int>();
- _aggregations = new Float64List(AGGREGATIONS_BUFFER_LENGTH);
- _filterResults = filter == null ?
- null : new List<int>.filled((_rows.length ~/ SMI_BITS) + 1, 0);
-
- int rowCount = _rows.length;
- int dimensionCount = _dimFields.length;
- int factsCount = _factFields.length;
-
- // Saves current dimension value to which we are aggregating
- // Values of dimensions are in even indices (starting at 0) and
- // location of respective dimension in buffer is in odd indices.
- List<int> currentDim = new List<int>(dimensionCount * 2);
- bool reset = true;
- bool isNewDimension = false;
- int aggregationSizePerDim = factsCount * _aggregationTypesCount;
-
- // Reserve the 0th position for top-level aggregations.
- int currentBufferPos = (factsCount * _aggregationTypesCount + 2);
- _dimToAggrMap[''] = 0;
- _aggregations[_offsetSortedIndex] = 0.0;
-
-
- for (int ri = 0, index = 0, dimensionDataOffset = 0, factsDataOffset = 0;
- ri < rowCount; ++ri, reset = false) {
-
- // If filter is not null, check if this row must be included in
- // the aggregations and mark it accordingly.
- index = _sorted[ri];
- if (filter != null) {
- if (!filter(_rows[index])) {
- continue;
- } else {
- _filterResults[ri ~/ SMI_BITS] |= (1 << (ri % SMI_BITS));
- }
- }
-
- dimensionDataOffset = index * dimensionCount;
- factsDataOffset = index * factsCount;
-
- // Update top-level aggregations.
- _updateAggregationsAt(0, factsDataOffset, ri == 0 ? true : false);
-
- // See which dimensions get effected by this row.
- // di => the index of the dimension
- // ci => index of the cached value in [currentDim]
- for (int di = 0, ci = 0; di < dimensionCount; ++di, ci += 2) {
- // If a dimension value changed, then all dimensions that are lower
- // in the hierarchy change too.
- if (reset ||
- currentDim[ci] != _dimEnumCache[dimensionDataOffset + di]) {
- currentDim[ci] = _dimEnumCache[dimensionDataOffset + di];
- currentDim[ci + 1] = currentBufferPos;
-
- // Save location to aggregations position in the buffer
- _dimToAggrMap[new List.generate(di + 1,
- (i) => currentDim[2 * i]).join(':')] = currentBufferPos;
-
- // Store items start position
- _aggregations[currentBufferPos + _offsetSortedIndex] = ri.toDouble();
-
- // After the aggregated values, we save the filtered count,
- // index of the first item (in sorted)
- currentBufferPos += (aggregationSizePerDim + 2);
- reset = true;
- isNewDimension = true;
- }
-
- _updateAggregationsAt(currentDim[ci + 1],
- factsDataOffset, isNewDimension);
- isNewDimension = false;
- }
- }
-
- _timeItEnd();
- }
-
- /**
- * Helper function that does the actual aggregations.
- * This function is called once per row per dimension.
- */
- _updateAggregationsAt(int aggrDataOffset,
- int factsDataOffset, bool isNewDimension) {
- // Update count.
- _aggregations[aggrDataOffset + _offsetFilteredCount] += 1;
-
- // Update aggregation for each of the facts.
- for (int fi = 0, bufferFactOffset = aggrDataOffset;
- fi < _factFields.length;
- bufferFactOffset += _aggregationTypesCount, ++fi) {
-
- double factValue = _factsCache[factsDataOffset + fi];
- if (factValue.isNaN) {
- continue;
- }
-
- // Sum
- if (_offsetSum != null) {
- _aggregations[bufferFactOffset + _offsetSum] += factValue;
- }
-
- // Min
- if (_offsetMin != null && (isNewDimension || factValue <
- _aggregations[bufferFactOffset + _offsetMin])) {
- _aggregations[bufferFactOffset + _offsetMin] = factValue;
- }
-
- // Max
- if (_offsetMax != null && (isNewDimension || factValue >
- _aggregations[bufferFactOffset + _offsetMax])) {
- _aggregations[bufferFactOffset + _offsetMax] = factValue;
- }
-
- // Count
- if (_offsetCnt != null) {
- _aggregations[bufferFactOffset + _offsetCnt]++;
- }
- }
- }
-
- /*
- * TODO(prsd):
- * 1. Implementation of updates and posting updates to entities.
- * patchEntity and addToEntity must add listeners on AggregationItems
- * and any changes must be propagated to entities.
- * 2. Updates (add/remove/update) should do their best to update the
- * aggregations and then maybe do a delayed recomputation (sort etc;)
- */
-
- /**
- * Update an item.
- * If aggregates were already computed, they are updated to reflect the
- * new value and any observers are notified.
- */
- void updateItem(dynamic item, String field) {
- throw new UnimplementedError();
- }
-
- /**
- * Add a new item.
- * If aggregates were already computed, they are updated to reflect
- * values on the new item.
- */
- void addItem(dynamic item) {
- throw new UnimplementedError();
- }
-
- /**
- * Remove an existing item.
- * If aggregates were already computed, they are updated to reflect
- * facts on the removed item.
- */
- void removeItem(dynamic item) {
- throw new UnimplementedError();
- }
-
- /**
- * Return an [AggregationItem] that represents facts for dimension
- * represented by [dimension] Only one instance of an entity is created
- * per dimension (even if this function is called multiple times)
- *
- * Callers of this method can observe the returned entity for updates to
- * aggregations caused by changes to filter or done through add, remove
- * or modify of items in the collection.
- */
- AggregationItem facts(List dimension) {
- List<int> enumeratedList = new List<int>();
- for (int i = 0; i < dimension.length; ++i) {
- enumeratedList.add(_dimToIntMap[i][dimension[i]]);
- }
-
- String key = enumeratedList.join(':');
- AggregationItem item = _entityCache[key];
-
- if (item == null && _dimToAggrMap.containsKey(key)) {
- item = new _AggregationItemImpl(this, dimension, key);
- _entityCache[key] = item;
- }
-
- return item;
- }
-
- /**
- * Return a list of values that are present for a dimension field.
- */
- List valuesForDimension(dynamic dimensionFieldName) {
- int di = _dimFields.indexOf(dimensionFieldName);
- if (di < 0) {
- return null;
- }
- List values = new List.from(_dimToIntMap[di].keys);
- values.sort(
- comparators != null && comparators.containsKey(dimensionFieldName) ?
- comparators[dimensionFieldName] : _defaultDimComparator);
- return values;
- }
-}
-
-/**
- * Parse a path for nested map-like objects.
- * Caches the parsed key in the passed map.
- *
- * Takes map keys of the format:
- * "list(key=val;val=m).another(state=good).state"
- * and outputs:
- * ["list", {"key": "val", "val": "m"},
- * "another", {"state": "good"}, "state"]
- */
-List _parseKey(String key, Map parsedKeysCache) {
- List parts = parsedKeysCache == null ? null : parsedKeysCache[key];
- if (parts == null && key != null) {
- parts = new List();
- if (key.contains(').')) {
- int start = 0;
- int cursor = 0;
- bool inParams = false;
- List matchKeyVals;
- Map listMatchingMap = {};
-
- while (cursor < key.length) {
- if (!inParams) {
- cursor = key.indexOf('(', start);
- if (cursor == -1) {
- parts.addAll(key.substring(start).split('.'));
- break;
- }
- parts.addAll(key.substring(start, cursor).split('.'));
- cursor++;
- start = cursor;
- inParams = true;
- } else {
- cursor = key.indexOf(')', start);
- if (cursor == -1) {
- throw new ArgumentError('Invalid field name: $key');
- }
- listMatchingMap.clear();
- matchKeyVals = key.substring(start, cursor).split(';');
- matchKeyVals.forEach((value) {
- List keyval = value.split('=');
- if (keyval.length != 2) {
- throw new ArgumentError('Invalid field name: ${key}');
- }
- listMatchingMap[keyval[0]] = keyval[1];
- });
- parts.add(listMatchingMap);
- cursor += 2;
- start = cursor;
- inParams = false;
- }
- }
- } else {
- parts = key.split('.');
- }
- if (parsedKeysCache != null) {
- parsedKeysCache[key] = parts;
- }
- }
-
- return parts;
-}
-
-/**
- * Walk a map-like structure that could have list in the path.
- *
- * Example:
- * Map testMap = {
- * "first": "firstval",
- * "list": [
- * { "val": "m",
- * "key": "val",
- * "another": [
- * { 'state': 'good' },
- * { 'state': 'bad' }
- * ]
- * },
- * { "val": "m", "key": "invalid" },
- * { "val": "o" }
- * ]
- * };
- *
- * For the above map:
- * walk(testMap, "list(key=val;val=m).another(state=good).state");
- * outputs:
- * good
- */
-dynamic walk(initial, String key, Map parsedKeyCache) {
- List parts = _parseKey(key, parsedKeyCache);
- return parts.fold(initial, (current, part) {
- if (current == null) {
- return null;
- } else if (current is List && part is Map) {
- for (int i = 0; i < current.length; i++) {
- bool match = true;
- part.forEach((key, val) {
- if ((key.contains('.') &&
- walk(part, key, parsedKeyCache).toString() != val) ||
- part[key] != val) {
- match = false;
- }
- });
- if (match) {
- return current[i];
- }
- }
- } else {
- return current[part];
- }
- });
-}
« no previous file with comments | « charted/lib/charts/charts.dart ('k') | charted/lib/charts/data_transformers/aggregation_item.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698