tools/perf/metrics/statistics.py - Issue 185953004: Add some statistics to the monsoon profile run

Unified Diff: tools/perf/metrics/statistics.py

Issue 185953004: Add some statistics to the monsoon profile run (Closed) Base URL: https://git.chromium.org/chromium/src.git@master

Patch Set: Rebase and provide own statistics functions Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: tools/perf/metrics/statistics.py

diff --git a/tools/perf/metrics/statistics.py b/tools/perf/metrics/statistics.py

deleted file mode 100644

index 84ffc164a333035d2f40a2c2c596b41edc19c126..0000000000000000000000000000000000000000

--- a/tools/perf/metrics/statistics.py

+++ /dev/null

@@ -1,265 +0,0 @@

-# Use of this source code is governed by a BSD-style license that can be

-# found in the LICENSE file.

-"""A collection of statistical utility functions to be used by metrics."""

-import bisect

-import math

-def Clamp(value, low=0.0, high=1.0):

- """Clamp a value between some low and high value."""

- return min(max(value, low), high)

-def NormalizeSamples(samples):

- """Sorts the samples, and map them linearly to the range [0,1].

- They're mapped such that for the N samples, the first sample is 0.5/N and the

- last sample is (N-0.5)/N.

- Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N,

- twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case

- we don't want to distinguish between these two cases, as our original domain

- is not bounded (it is for Monte Carlo integration, where discrepancy was

- first used).

- """

- if not samples:

- return samples, 1.0

- samples = sorted(samples)

- low = min(samples)

- high = max(samples)

- new_low = 0.5 / len(samples)

- new_high = (len(samples)-0.5) / len(samples)

- if high-low == 0.0:

- return samples, 1.0

- scale = (new_high - new_low) / (high - low)

- for i in xrange(0, len(samples)):

- samples[i] = float(samples[i] - low) * scale + new_low

- return samples, scale

-def Discrepancy(samples, interval_multiplier=10000):

- """Computes the discrepancy of a set of 1D samples from the interval [0,1].

- The samples must be sorted.

- http://en.wikipedia.org/wiki/Low-discrepancy_sequence

- http://mathworld.wolfram.com/Discrepancy.html

- """

- if not samples:

- return 1.0

- max_local_discrepancy = 0

- locations = []

- # For each location, stores the number of samples less than that location.

- left = []

- # For each location, stores the number of samples less than or equal to that

- # location.

- right = []

- interval_count = len(samples) * interval_multiplier

- # Compute number of locations the will roughly result in the requested number

- # of intervals.

- location_count = int(math.ceil(math.sqrt(interval_count*2)))

- inv_sample_count = 1.0 / len(samples)

- # Generate list of equally spaced locations.

- for i in xrange(0, location_count):

- location = float(i) / (location_count-1)

- locations.append(location)

- left.append(bisect.bisect_left(samples, location))

- right.append(bisect.bisect_right(samples, location))

- # Iterate over the intervals defined by any pair of locations.

- for i in xrange(0, len(locations)):

- for j in xrange(i, len(locations)):

- # Compute length of interval and number of samples in the interval.

- length = locations[j] - locations[i]

- count = right[j] - left[i]

- # Compute local discrepancy and update max_local_discrepancy.

- local_discrepancy = abs(float(count)*inv_sample_count - length)

- max_local_discrepancy = max(local_discrepancy, max_local_discrepancy)

- return max_local_discrepancy

-def TimestampsDiscrepancy(timestamps, absolute=True,

- interval_multiplier=10000):

- """A discrepancy based metric for measuring timestamp jank.

- TimestampsDiscrepancy quantifies the largest area of jank observed in a series

- of timestamps. Note that this is different from metrics based on the

- max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] and

- B = [0,1,2,3,5,7] have the same max_time_interval = 2, but

- Discrepancy(B) > Discrepancy(A).

- Two variants of discrepancy can be computed:

- Relative discrepancy is following the original definition of

- discrepancy. It characterized the largest area of jank, relative to the

- duration of the entire time stamp series. We normalize the raw results,

- because the best case discrepancy for a set of N samples is 1/N (for

- equally spaced samples), and we want our metric to report 0.0 in that

- case.

- Absolute discrepancy also characterizes the largest area of jank, but its

- value wouldn't change (except for imprecisions due to a low

- |interval_multiplier|) if additional 'good' intervals were added to an

- exisiting list of time stamps. Its range is [0,inf] and the unit is

- milliseconds.

- The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same

- absolute discrepancy, but D has lower relative discrepancy than C.

- |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each

- S_i is a time stamp series. In that case, the discrepancy D(S) is:

- D(S) = max(D(S_1), D(S_2), ..., D(S_N))

- """

- if not timestamps:

- return 1.0

- if isinstance(timestamps[0], list):

- range_discrepancies = [TimestampsDiscrepancy(r) for r in timestamps]

- return max(range_discrepancies)

- samples, sample_scale = NormalizeSamples(timestamps)

- discrepancy = Discrepancy(samples, interval_multiplier)

- inv_sample_count = 1.0 / len(samples)

- if absolute:

- # Compute absolute discrepancy

- discrepancy /= sample_scale

- else:

- # Compute relative discrepancy

- discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count))

- return discrepancy

-def DurationsDiscrepancy(durations, absolute=True,

- interval_multiplier=10000):

- """A discrepancy based metric for measuring duration jank.

- DurationsDiscrepancy computes a jank metric which measures how irregular a

- given sequence of intervals is. In order to minimize jank, each duration

- should be equally long. This is similar to how timestamp jank works,

- and we therefore reuse the timestamp discrepancy function above to compute a

- similar duration discrepancy number.

- Because timestamp discrepancy is defined in terms of timestamps, we first

- convert the list of durations to monotonically increasing timestamps.

- Args:

- durations: List of interval lengths in milliseconds.

- absolute: See TimestampsDiscrepancy.

- interval_multiplier: See TimestampsDiscrepancy.

- """

- timestamps = reduce(lambda x, y: x + [x[-1] + y], durations, [0])[1:]

- return TimestampsDiscrepancy(timestamps, absolute, interval_multiplier)

-def ArithmeticMean(numerator, denominator):

- """Calculates arithmetic mean.

- Both numerator and denominator can be given as either individual

- values or lists of values which will be summed.

- Args:

- numerator: A quantity that represents a sum total value.

- denominator: A quantity that represents a count of the number of things.

- Returns:

- The arithmetic mean value, or 0 if the denominator value was 0.

- """

- numerator_total = Total(numerator)

- denominator_total = Total(denominator)

- return DivideIfPossibleOrZero(numerator_total, denominator_total)

-def Total(data):

- """Returns the float value of a number or the sum of a list."""

- if type(data) == float:

- total = data

- elif type(data) == int:

- total = float(data)

- elif type(data) == list:

- total = float(sum(data))

- else:

- raise TypeError

- return total

-def DivideIfPossibleOrZero(numerator, denominator):

- """Returns the quotient, or zero if the denominator is zero."""

- if not denominator:

- return 0.0

- else:

- return numerator / denominator

-def GeneralizedMean(values, exponent):

- """See http://en.wikipedia.org/wiki/Generalized_mean"""

- if not values:

- return 0.0

- sum_of_powers = 0.0

- for v in values:

- sum_of_powers += v ** exponent

- return (sum_of_powers / len(values)) ** (1.0/exponent)

-def Median(values):

- """Gets the median of a list of values."""

- return Percentile(values, 50)

-def Percentile(values, percentile):

- """Calculates the value below which a given percentage of values fall.

- For example, if 17% of the values are less than 5.0, then 5.0 is the 17th

- percentile for this set of values. When the percentage doesn't exactly

- match a rank in the list of values, the percentile is computed using linear

- interpolation between closest ranks.

- Args:

- values: A list of numerical values.

- percentile: A number between 0 and 100.

- Returns:

- The Nth percentile for the list of values, where N is the given percentage.

- """

- if not values:

- return 0.0

- sorted_values = sorted(values)

- n = len(values)

- percentile /= 100.0

- if percentile <= 0.5 / n:

- return sorted_values[0]

- elif percentile >= (n - 0.5) / n:

- return sorted_values[-1]

- else:

- floor_index = int(math.floor(n * percentile - 0.5))

- floor_value = sorted_values[floor_index]

- ceil_value = sorted_values[floor_index+1]

- alpha = n * percentile - 0.5 - floor_index

- return floor_value + alpha * (ceil_value - floor_value)

-def GeometricMean(values):

- """Compute a rounded geometric mean from an array of values."""

- if not values:

- return None

- # To avoid infinite value errors, make sure no value is less than 0.001.

- new_values = []

- for value in values:

- if value > 0.001:

- new_values.append(value)

- else:

- new_values.append(0.001)

- # Compute the sum of the log of the values.

- log_sum = sum(map(math.log, new_values))

- # Raise e to that sum over the number of values.

- mean = math.pow(math.e, (log_sum / len(new_values)))

- # Return the rounded mean.

- return int(round(mean))

« no previous file with comments | « tools/perf/metrics/smoothness.py ('k') | tools/perf/metrics/statistics_unittest.py » ('j') | tools/perf/metrics/statistics_unittest.py » ('J')