Index: tools/perf/metrics/statistics.py |
diff --git a/tools/perf/metrics/statistics.py b/tools/perf/metrics/statistics.py |
deleted file mode 100644 |
index 84ffc164a333035d2f40a2c2c596b41edc19c126..0000000000000000000000000000000000000000 |
--- a/tools/perf/metrics/statistics.py |
+++ /dev/null |
@@ -1,265 +0,0 @@ |
-# Copyright 2013 The Chromium Authors. All rights reserved. |
-# Use of this source code is governed by a BSD-style license that can be |
-# found in the LICENSE file. |
- |
-"""A collection of statistical utility functions to be used by metrics.""" |
- |
-import bisect |
-import math |
- |
- |
-def Clamp(value, low=0.0, high=1.0): |
- """Clamp a value between some low and high value.""" |
- return min(max(value, low), high) |
- |
- |
-def NormalizeSamples(samples): |
- """Sorts the samples, and map them linearly to the range [0,1]. |
- |
- They're mapped such that for the N samples, the first sample is 0.5/N and the |
- last sample is (N-0.5)/N. |
- |
- Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N, |
- twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case |
- we don't want to distinguish between these two cases, as our original domain |
- is not bounded (it is for Monte Carlo integration, where discrepancy was |
- first used). |
- """ |
- if not samples: |
- return samples, 1.0 |
- samples = sorted(samples) |
- low = min(samples) |
- high = max(samples) |
- new_low = 0.5 / len(samples) |
- new_high = (len(samples)-0.5) / len(samples) |
- if high-low == 0.0: |
- return samples, 1.0 |
- scale = (new_high - new_low) / (high - low) |
- for i in xrange(0, len(samples)): |
- samples[i] = float(samples[i] - low) * scale + new_low |
- return samples, scale |
- |
- |
-def Discrepancy(samples, interval_multiplier=10000): |
- """Computes the discrepancy of a set of 1D samples from the interval [0,1]. |
- |
- The samples must be sorted. |
- |
- http://en.wikipedia.org/wiki/Low-discrepancy_sequence |
- http://mathworld.wolfram.com/Discrepancy.html |
- """ |
- if not samples: |
- return 1.0 |
- |
- max_local_discrepancy = 0 |
- locations = [] |
- # For each location, stores the number of samples less than that location. |
- left = [] |
- # For each location, stores the number of samples less than or equal to that |
- # location. |
- right = [] |
- |
- interval_count = len(samples) * interval_multiplier |
- # Compute number of locations the will roughly result in the requested number |
- # of intervals. |
- location_count = int(math.ceil(math.sqrt(interval_count*2))) |
- inv_sample_count = 1.0 / len(samples) |
- |
- # Generate list of equally spaced locations. |
- for i in xrange(0, location_count): |
- location = float(i) / (location_count-1) |
- locations.append(location) |
- left.append(bisect.bisect_left(samples, location)) |
- right.append(bisect.bisect_right(samples, location)) |
- |
- # Iterate over the intervals defined by any pair of locations. |
- for i in xrange(0, len(locations)): |
- for j in xrange(i, len(locations)): |
- # Compute length of interval and number of samples in the interval. |
- length = locations[j] - locations[i] |
- count = right[j] - left[i] |
- |
- # Compute local discrepancy and update max_local_discrepancy. |
- local_discrepancy = abs(float(count)*inv_sample_count - length) |
- max_local_discrepancy = max(local_discrepancy, max_local_discrepancy) |
- |
- return max_local_discrepancy |
- |
- |
-def TimestampsDiscrepancy(timestamps, absolute=True, |
- interval_multiplier=10000): |
- """A discrepancy based metric for measuring timestamp jank. |
- |
- TimestampsDiscrepancy quantifies the largest area of jank observed in a series |
- of timestamps. Note that this is different from metrics based on the |
- max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] and |
- B = [0,1,2,3,5,7] have the same max_time_interval = 2, but |
- Discrepancy(B) > Discrepancy(A). |
- |
- Two variants of discrepancy can be computed: |
- |
- Relative discrepancy is following the original definition of |
- discrepancy. It characterized the largest area of jank, relative to the |
- duration of the entire time stamp series. We normalize the raw results, |
- because the best case discrepancy for a set of N samples is 1/N (for |
- equally spaced samples), and we want our metric to report 0.0 in that |
- case. |
- |
- Absolute discrepancy also characterizes the largest area of jank, but its |
- value wouldn't change (except for imprecisions due to a low |
- |interval_multiplier|) if additional 'good' intervals were added to an |
- exisiting list of time stamps. Its range is [0,inf] and the unit is |
- milliseconds. |
- |
- The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same |
- absolute discrepancy, but D has lower relative discrepancy than C. |
- |
- |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each |
- S_i is a time stamp series. In that case, the discrepancy D(S) is: |
- D(S) = max(D(S_1), D(S_2), ..., D(S_N)) |
- """ |
- if not timestamps: |
- return 1.0 |
- |
- if isinstance(timestamps[0], list): |
- range_discrepancies = [TimestampsDiscrepancy(r) for r in timestamps] |
- return max(range_discrepancies) |
- |
- samples, sample_scale = NormalizeSamples(timestamps) |
- discrepancy = Discrepancy(samples, interval_multiplier) |
- inv_sample_count = 1.0 / len(samples) |
- if absolute: |
- # Compute absolute discrepancy |
- discrepancy /= sample_scale |
- else: |
- # Compute relative discrepancy |
- discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count)) |
- return discrepancy |
- |
- |
-def DurationsDiscrepancy(durations, absolute=True, |
- interval_multiplier=10000): |
- """A discrepancy based metric for measuring duration jank. |
- |
- DurationsDiscrepancy computes a jank metric which measures how irregular a |
- given sequence of intervals is. In order to minimize jank, each duration |
- should be equally long. This is similar to how timestamp jank works, |
- and we therefore reuse the timestamp discrepancy function above to compute a |
- similar duration discrepancy number. |
- |
- Because timestamp discrepancy is defined in terms of timestamps, we first |
- convert the list of durations to monotonically increasing timestamps. |
- |
- Args: |
- durations: List of interval lengths in milliseconds. |
- absolute: See TimestampsDiscrepancy. |
- interval_multiplier: See TimestampsDiscrepancy. |
- """ |
- timestamps = reduce(lambda x, y: x + [x[-1] + y], durations, [0])[1:] |
- return TimestampsDiscrepancy(timestamps, absolute, interval_multiplier) |
- |
- |
-def ArithmeticMean(numerator, denominator): |
- """Calculates arithmetic mean. |
- |
- Both numerator and denominator can be given as either individual |
- values or lists of values which will be summed. |
- |
- Args: |
- numerator: A quantity that represents a sum total value. |
- denominator: A quantity that represents a count of the number of things. |
- |
- Returns: |
- The arithmetic mean value, or 0 if the denominator value was 0. |
- """ |
- numerator_total = Total(numerator) |
- denominator_total = Total(denominator) |
- return DivideIfPossibleOrZero(numerator_total, denominator_total) |
- |
- |
-def Total(data): |
- """Returns the float value of a number or the sum of a list.""" |
- if type(data) == float: |
- total = data |
- elif type(data) == int: |
- total = float(data) |
- elif type(data) == list: |
- total = float(sum(data)) |
- else: |
- raise TypeError |
- return total |
- |
- |
-def DivideIfPossibleOrZero(numerator, denominator): |
- """Returns the quotient, or zero if the denominator is zero.""" |
- if not denominator: |
- return 0.0 |
- else: |
- return numerator / denominator |
- |
- |
-def GeneralizedMean(values, exponent): |
- """See http://en.wikipedia.org/wiki/Generalized_mean""" |
- if not values: |
- return 0.0 |
- sum_of_powers = 0.0 |
- for v in values: |
- sum_of_powers += v ** exponent |
- return (sum_of_powers / len(values)) ** (1.0/exponent) |
- |
- |
-def Median(values): |
- """Gets the median of a list of values.""" |
- return Percentile(values, 50) |
- |
- |
-def Percentile(values, percentile): |
- """Calculates the value below which a given percentage of values fall. |
- |
- For example, if 17% of the values are less than 5.0, then 5.0 is the 17th |
- percentile for this set of values. When the percentage doesn't exactly |
- match a rank in the list of values, the percentile is computed using linear |
- interpolation between closest ranks. |
- |
- Args: |
- values: A list of numerical values. |
- percentile: A number between 0 and 100. |
- |
- Returns: |
- The Nth percentile for the list of values, where N is the given percentage. |
- """ |
- if not values: |
- return 0.0 |
- sorted_values = sorted(values) |
- n = len(values) |
- percentile /= 100.0 |
- if percentile <= 0.5 / n: |
- return sorted_values[0] |
- elif percentile >= (n - 0.5) / n: |
- return sorted_values[-1] |
- else: |
- floor_index = int(math.floor(n * percentile - 0.5)) |
- floor_value = sorted_values[floor_index] |
- ceil_value = sorted_values[floor_index+1] |
- alpha = n * percentile - 0.5 - floor_index |
- return floor_value + alpha * (ceil_value - floor_value) |
- |
- |
-def GeometricMean(values): |
- """Compute a rounded geometric mean from an array of values.""" |
- if not values: |
- return None |
- # To avoid infinite value errors, make sure no value is less than 0.001. |
- new_values = [] |
- for value in values: |
- if value > 0.001: |
- new_values.append(value) |
- else: |
- new_values.append(0.001) |
- # Compute the sum of the log of the values. |
- log_sum = sum(map(math.log, new_values)) |
- # Raise e to that sum over the number of values. |
- mean = math.pow(math.e, (log_sum / len(new_values))) |
- # Return the rounded mean. |
- return int(round(mean)) |
- |