Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(212)

Unified Diff: tools/telemetry/telemetry/util/statistics.py

Issue 1647513002: Delete tools/telemetry. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « tools/telemetry/telemetry/util/rgba_color.py ('k') | tools/telemetry/telemetry/util/statistics_unittest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/telemetry/telemetry/util/statistics.py
diff --git a/tools/telemetry/telemetry/util/statistics.py b/tools/telemetry/telemetry/util/statistics.py
deleted file mode 100644
index 381c6b487694f321da219ad5813262c825e0bc24..0000000000000000000000000000000000000000
--- a/tools/telemetry/telemetry/util/statistics.py
+++ /dev/null
@@ -1,346 +0,0 @@
-# Copyright 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-"""A collection of statistical utility functions to be used by metrics."""
-
-import math
-
-
-def Clamp(value, low=0.0, high=1.0):
- """Clamp a value between some low and high value."""
- return min(max(value, low), high)
-
-
-def NormalizeSamples(samples):
- """Sorts the samples, and map them linearly to the range [0,1].
-
- They're mapped such that for the N samples, the first sample is 0.5/N and the
- last sample is (N-0.5)/N.
-
- Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N,
- twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case
- we don't want to distinguish between these two cases, as our original domain
- is not bounded (it is for Monte Carlo integration, where discrepancy was
- first used).
- """
- if not samples:
- return samples, 1.0
- samples = sorted(samples)
- low = min(samples)
- high = max(samples)
- new_low = 0.5 / len(samples)
- new_high = (len(samples)-0.5) / len(samples)
- if high-low == 0.0:
- return [0.5] * len(samples), 1.0
- scale = (new_high - new_low) / (high - low)
- for i in xrange(0, len(samples)):
- samples[i] = float(samples[i] - low) * scale + new_low
- return samples, scale
-
-
-def Discrepancy(samples, location_count=None):
- """Computes the discrepancy of a set of 1D samples from the interval [0,1].
-
- The samples must be sorted. We define the discrepancy of an empty set
- of samples to be zero.
-
- http://en.wikipedia.org/wiki/Low-discrepancy_sequence
- http://mathworld.wolfram.com/Discrepancy.html
- """
- if not samples:
- return 0.0
-
- max_local_discrepancy = 0
- inv_sample_count = 1.0 / len(samples)
- locations = []
- # For each location, stores the number of samples less than that location.
- count_less = []
- # For each location, stores the number of samples less than or equal to that
- # location.
- count_less_equal = []
-
- if location_count:
- # Generate list of equally spaced locations.
- sample_index = 0
- for i in xrange(0, int(location_count)):
- location = float(i) / (location_count-1)
- locations.append(location)
- while sample_index < len(samples) and samples[sample_index] < location:
- sample_index += 1
- count_less.append(sample_index)
- while sample_index < len(samples) and samples[sample_index] <= location:
- sample_index += 1
- count_less_equal.append(sample_index)
- else:
- # Populate locations with sample positions. Append 0 and 1 if necessary.
- if samples[0] > 0.0:
- locations.append(0.0)
- count_less.append(0)
- count_less_equal.append(0)
- for i in xrange(0, len(samples)):
- locations.append(samples[i])
- count_less.append(i)
- count_less_equal.append(i+1)
- if samples[-1] < 1.0:
- locations.append(1.0)
- count_less.append(len(samples))
- count_less_equal.append(len(samples))
-
- # Compute discrepancy as max(overshoot, -undershoot), where
- # overshoot = max(count_closed(i, j)/N - length(i, j)) for all i < j,
- # undershoot = min(count_open(i, j)/N - length(i, j)) for all i < j,
- # N = len(samples),
- # count_closed(i, j) is the number of points between i and j including ends,
- # count_open(i, j) is the number of points between i and j excluding ends,
- # length(i, j) is locations[i] - locations[j].
-
- # The following algorithm is modification of Kadane's algorithm,
- # see https://en.wikipedia.org/wiki/Maximum_subarray_problem.
-
- # The maximum of (count_closed(k, i-1)/N - length(k, i-1)) for any k < i-1.
- max_diff = 0
- # The minimum of (count_open(k, i-1)/N - length(k, i-1)) for any k < i-1.
- min_diff = 0
- for i in xrange(1, len(locations)):
- length = locations[i] - locations[i - 1]
- count_closed = count_less_equal[i] - count_less[i - 1]
- count_open = count_less[i] - count_less_equal[i - 1]
- # Number of points that are added if we extend a closed range that
- # ends at location (i-1).
- count_closed_increment = count_less_equal[i] - count_less_equal[i - 1]
- # Number of points that are added if we extend an open range that
- # ends at location (i-1).
- count_open_increment = count_less[i] - count_less[i - 1]
-
- # Either extend the previous optimal range or start a new one.
- max_diff = max(
- float(count_closed_increment) * inv_sample_count - length + max_diff,
- float(count_closed) * inv_sample_count - length)
- min_diff = min(
- float(count_open_increment) * inv_sample_count - length + min_diff,
- float(count_open) * inv_sample_count - length)
-
- max_local_discrepancy = max(max_diff, -min_diff, max_local_discrepancy)
- return max_local_discrepancy
-
-
-def TimestampsDiscrepancy(timestamps, absolute=True,
- location_count=None):
- """A discrepancy based metric for measuring timestamp jank.
-
- TimestampsDiscrepancy quantifies the largest area of jank observed in a series
- of timestamps. Note that this is different from metrics based on the
- max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] and
- B = [0,1,2,3,5,7] have the same max_time_interval = 2, but
- Discrepancy(B) > Discrepancy(A).
-
- Two variants of discrepancy can be computed:
-
- Relative discrepancy is following the original definition of
- discrepancy. It characterized the largest area of jank, relative to the
- duration of the entire time stamp series. We normalize the raw results,
- because the best case discrepancy for a set of N samples is 1/N (for
- equally spaced samples), and we want our metric to report 0.0 in that
- case.
-
- Absolute discrepancy also characterizes the largest area of jank, but its
- value wouldn't change (except for imprecisions due to a low
- |interval_multiplier|) if additional 'good' intervals were added to an
- exisiting list of time stamps. Its range is [0,inf] and the unit is
- milliseconds.
-
- The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same
- absolute discrepancy, but D has lower relative discrepancy than C.
-
- |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each
- S_i is a time stamp series. In that case, the discrepancy D(S) is:
- D(S) = max(D(S_1), D(S_2), ..., D(S_N))
- """
- if not timestamps:
- return 0.0
-
- if isinstance(timestamps[0], list):
- range_discrepancies = [TimestampsDiscrepancy(r) for r in timestamps]
- return max(range_discrepancies)
-
- samples, sample_scale = NormalizeSamples(timestamps)
- discrepancy = Discrepancy(samples, location_count)
- inv_sample_count = 1.0 / len(samples)
- if absolute:
- # Compute absolute discrepancy
- discrepancy /= sample_scale
- else:
- # Compute relative discrepancy
- discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count))
- return discrepancy
-
-
-def DurationsDiscrepancy(durations, absolute=True,
- location_count=None):
- """A discrepancy based metric for measuring duration jank.
-
- DurationsDiscrepancy computes a jank metric which measures how irregular a
- given sequence of intervals is. In order to minimize jank, each duration
- should be equally long. This is similar to how timestamp jank works,
- and we therefore reuse the timestamp discrepancy function above to compute a
- similar duration discrepancy number.
-
- Because timestamp discrepancy is defined in terms of timestamps, we first
- convert the list of durations to monotonically increasing timestamps.
-
- Args:
- durations: List of interval lengths in milliseconds.
- absolute: See TimestampsDiscrepancy.
- interval_multiplier: See TimestampsDiscrepancy.
- """
- if not durations:
- return 0.0
-
- timestamps = reduce(lambda x, y: x + [x[-1] + y], durations, [0])
- return TimestampsDiscrepancy(timestamps, absolute, location_count)
-
-
-def ArithmeticMean(data):
- """Calculates arithmetic mean.
-
- Args:
- data: A list of samples.
-
- Returns:
- The arithmetic mean value, or 0 if the list is empty.
- """
- numerator_total = Total(data)
- denominator_total = Total(len(data))
- return DivideIfPossibleOrZero(numerator_total, denominator_total)
-
-
-def StandardDeviation(data):
- """Calculates the standard deviation.
-
- Args:
- data: A list of samples.
-
- Returns:
- The standard deviation of the samples provided.
- """
- if len(data) == 1:
- return 0.0
-
- mean = ArithmeticMean(data)
- variances = [float(x) - mean for x in data]
- variances = [x * x for x in variances]
- std_dev = math.sqrt(ArithmeticMean(variances))
-
- return std_dev
-
-
-def TrapezoidalRule(data, dx):
- """ Calculate the integral according to the trapezoidal rule
-
- TrapezoidalRule approximates the definite integral of f from a to b by
- the composite trapezoidal rule, using n subintervals.
- http://en.wikipedia.org/wiki/Trapezoidal_rule#Uniform_grid
-
- Args:
- data: A list of samples
- dx: The uniform distance along the x axis between any two samples
-
- Returns:
- The area under the curve defined by the samples and the uniform distance
- according to the trapezoidal rule.
- """
-
- n = len(data) - 1
- s = data[0] + data[n]
-
- if n == 0:
- return 0.0
-
- for i in range(1, n):
- s += 2 * data[i]
-
- return s * dx / 2.0
-
-def Total(data):
- """Returns the float value of a number or the sum of a list."""
- if type(data) == float:
- total = data
- elif type(data) == int:
- total = float(data)
- elif type(data) == list:
- total = float(sum(data))
- else:
- raise TypeError
- return total
-
-
-def DivideIfPossibleOrZero(numerator, denominator):
- """Returns the quotient, or zero if the denominator is zero."""
- return (float(numerator) / float(denominator)) if denominator else 0
-
-
-def GeneralizedMean(values, exponent):
- """See http://en.wikipedia.org/wiki/Generalized_mean"""
- if not values:
- return 0.0
- sum_of_powers = 0.0
- for v in values:
- sum_of_powers += v ** exponent
- return (sum_of_powers / len(values)) ** (1.0/exponent)
-
-
-def Median(values):
- """Gets the median of a list of values."""
- return Percentile(values, 50)
-
-
-def Percentile(values, percentile):
- """Calculates the value below which a given percentage of values fall.
-
- For example, if 17% of the values are less than 5.0, then 5.0 is the 17th
- percentile for this set of values. When the percentage doesn't exactly
- match a rank in the list of values, the percentile is computed using linear
- interpolation between closest ranks.
-
- Args:
- values: A list of numerical values.
- percentile: A number between 0 and 100.
-
- Returns:
- The Nth percentile for the list of values, where N is the given percentage.
- """
- if not values:
- return 0.0
- sorted_values = sorted(values)
- n = len(values)
- percentile /= 100.0
- if percentile <= 0.5 / n:
- return sorted_values[0]
- elif percentile >= (n - 0.5) / n:
- return sorted_values[-1]
- else:
- floor_index = int(math.floor(n * percentile - 0.5))
- floor_value = sorted_values[floor_index]
- ceil_value = sorted_values[floor_index+1]
- alpha = n * percentile - 0.5 - floor_index
- return floor_value + alpha * (ceil_value - floor_value)
-
-
-def GeometricMean(values):
- """Compute a rounded geometric mean from an array of values."""
- if not values:
- return None
- # To avoid infinite value errors, make sure no value is less than 0.001.
- new_values = []
- for value in values:
- if value > 0.001:
- new_values.append(value)
- else:
- new_values.append(0.001)
- # Compute the sum of the log of the values.
- log_sum = sum(map(math.log, new_values))
- # Raise e to that sum over the number of values.
- mean = math.pow(math.e, (log_sum / len(new_values)))
- # Return the rounded mean.
- return int(round(mean))
« no previous file with comments | « tools/telemetry/telemetry/util/rgba_color.py ('k') | tools/telemetry/telemetry/util/statistics_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698