Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(750)

Side by Side Diff: tools/perf/metrics/statistics.py

Issue 185953004: Add some statistics to the monsoon profile run (Closed) Base URL: https://git.chromium.org/chromium/src.git@master
Patch Set: Rebase to apply upon latest trunk Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « tools/perf/metrics/smoothness.py ('k') | tools/perf/metrics/statistics_unittest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """A collection of statistical utility functions to be used by metrics."""
6
7 import bisect
8 import math
9
10
11 def Clamp(value, low=0.0, high=1.0):
12 """Clamp a value between some low and high value."""
13 return min(max(value, low), high)
14
15
16 def NormalizeSamples(samples):
17 """Sorts the samples, and map them linearly to the range [0,1].
18
19 They're mapped such that for the N samples, the first sample is 0.5/N and the
20 last sample is (N-0.5)/N.
21
22 Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N,
23 twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case
24 we don't want to distinguish between these two cases, as our original domain
25 is not bounded (it is for Monte Carlo integration, where discrepancy was
26 first used).
27 """
28 if not samples:
29 return samples, 1.0
30 samples = sorted(samples)
31 low = min(samples)
32 high = max(samples)
33 new_low = 0.5 / len(samples)
34 new_high = (len(samples)-0.5) / len(samples)
35 if high-low == 0.0:
36 return samples, 1.0
37 scale = (new_high - new_low) / (high - low)
38 for i in xrange(0, len(samples)):
39 samples[i] = float(samples[i] - low) * scale + new_low
40 return samples, scale
41
42
43 def Discrepancy(samples, interval_multiplier=10000):
44 """Computes the discrepancy of a set of 1D samples from the interval [0,1].
45
46 The samples must be sorted.
47
48 http://en.wikipedia.org/wiki/Low-discrepancy_sequence
49 http://mathworld.wolfram.com/Discrepancy.html
50 """
51 if not samples:
52 return 1.0
53
54 max_local_discrepancy = 0
55 locations = []
56 # For each location, stores the number of samples less than that location.
57 left = []
58 # For each location, stores the number of samples less than or equal to that
59 # location.
60 right = []
61
62 interval_count = len(samples) * interval_multiplier
63 # Compute number of locations the will roughly result in the requested number
64 # of intervals.
65 location_count = int(math.ceil(math.sqrt(interval_count*2)))
66 inv_sample_count = 1.0 / len(samples)
67
68 # Generate list of equally spaced locations.
69 for i in xrange(0, location_count):
70 location = float(i) / (location_count-1)
71 locations.append(location)
72 left.append(bisect.bisect_left(samples, location))
73 right.append(bisect.bisect_right(samples, location))
74
75 # Iterate over the intervals defined by any pair of locations.
76 for i in xrange(0, len(locations)):
77 for j in xrange(i, len(locations)):
78 # Compute length of interval and number of samples in the interval.
79 length = locations[j] - locations[i]
80 count = right[j] - left[i]
81
82 # Compute local discrepancy and update max_local_discrepancy.
83 local_discrepancy = abs(float(count)*inv_sample_count - length)
84 max_local_discrepancy = max(local_discrepancy, max_local_discrepancy)
85
86 return max_local_discrepancy
87
88
89 def TimestampsDiscrepancy(timestamps, absolute=True,
90 interval_multiplier=10000):
91 """A discrepancy based metric for measuring timestamp jank.
92
93 TimestampsDiscrepancy quantifies the largest area of jank observed in a series
94 of timestamps. Note that this is different from metrics based on the
95 max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] and
96 B = [0,1,2,3,5,7] have the same max_time_interval = 2, but
97 Discrepancy(B) > Discrepancy(A).
98
99 Two variants of discrepancy can be computed:
100
101 Relative discrepancy is following the original definition of
102 discrepancy. It characterized the largest area of jank, relative to the
103 duration of the entire time stamp series. We normalize the raw results,
104 because the best case discrepancy for a set of N samples is 1/N (for
105 equally spaced samples), and we want our metric to report 0.0 in that
106 case.
107
108 Absolute discrepancy also characterizes the largest area of jank, but its
109 value wouldn't change (except for imprecisions due to a low
110 |interval_multiplier|) if additional 'good' intervals were added to an
111 exisiting list of time stamps. Its range is [0,inf] and the unit is
112 milliseconds.
113
114 The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same
115 absolute discrepancy, but D has lower relative discrepancy than C.
116
117 |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each
118 S_i is a time stamp series. In that case, the discrepancy D(S) is:
119 D(S) = max(D(S_1), D(S_2), ..., D(S_N))
120 """
121 if not timestamps:
122 return 1.0
123
124 if isinstance(timestamps[0], list):
125 range_discrepancies = [TimestampsDiscrepancy(r) for r in timestamps]
126 return max(range_discrepancies)
127
128 samples, sample_scale = NormalizeSamples(timestamps)
129 discrepancy = Discrepancy(samples, interval_multiplier)
130 inv_sample_count = 1.0 / len(samples)
131 if absolute:
132 # Compute absolute discrepancy
133 discrepancy /= sample_scale
134 else:
135 # Compute relative discrepancy
136 discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count))
137 return discrepancy
138
139
140 def DurationsDiscrepancy(durations, absolute=True,
141 interval_multiplier=10000):
142 """A discrepancy based metric for measuring duration jank.
143
144 DurationsDiscrepancy computes a jank metric which measures how irregular a
145 given sequence of intervals is. In order to minimize jank, each duration
146 should be equally long. This is similar to how timestamp jank works,
147 and we therefore reuse the timestamp discrepancy function above to compute a
148 similar duration discrepancy number.
149
150 Because timestamp discrepancy is defined in terms of timestamps, we first
151 convert the list of durations to monotonically increasing timestamps.
152
153 Args:
154 durations: List of interval lengths in milliseconds.
155 absolute: See TimestampsDiscrepancy.
156 interval_multiplier: See TimestampsDiscrepancy.
157 """
158 timestamps = reduce(lambda x, y: x + [x[-1] + y], durations, [0])[1:]
159 return TimestampsDiscrepancy(timestamps, absolute, interval_multiplier)
160
161
162 def ArithmeticMean(numerator, denominator):
163 """Calculates arithmetic mean.
164
165 Both numerator and denominator can be given as either individual
166 values or lists of values which will be summed.
167
168 Args:
169 numerator: A quantity that represents a sum total value.
170 denominator: A quantity that represents a count of the number of things.
171
172 Returns:
173 The arithmetic mean value, or 0 if the denominator value was 0.
174 """
175 numerator_total = Total(numerator)
176 denominator_total = Total(denominator)
177 return DivideIfPossibleOrZero(numerator_total, denominator_total)
178
179
180 def Total(data):
181 """Returns the float value of a number or the sum of a list."""
182 if type(data) == float:
183 total = data
184 elif type(data) == int:
185 total = float(data)
186 elif type(data) == list:
187 total = float(sum(data))
188 else:
189 raise TypeError
190 return total
191
192
193 def DivideIfPossibleOrZero(numerator, denominator):
194 """Returns the quotient, or zero if the denominator is zero."""
195 if not denominator:
196 return 0.0
197 else:
198 return numerator / denominator
199
200
201 def GeneralizedMean(values, exponent):
202 """See http://en.wikipedia.org/wiki/Generalized_mean"""
203 if not values:
204 return 0.0
205 sum_of_powers = 0.0
206 for v in values:
207 sum_of_powers += v ** exponent
208 return (sum_of_powers / len(values)) ** (1.0/exponent)
209
210
211 def Median(values):
212 """Gets the median of a list of values."""
213 return Percentile(values, 50)
214
215
216 def Percentile(values, percentile):
217 """Calculates the value below which a given percentage of values fall.
218
219 For example, if 17% of the values are less than 5.0, then 5.0 is the 17th
220 percentile for this set of values. When the percentage doesn't exactly
221 match a rank in the list of values, the percentile is computed using linear
222 interpolation between closest ranks.
223
224 Args:
225 values: A list of numerical values.
226 percentile: A number between 0 and 100.
227
228 Returns:
229 The Nth percentile for the list of values, where N is the given percentage.
230 """
231 if not values:
232 return 0.0
233 sorted_values = sorted(values)
234 n = len(values)
235 percentile /= 100.0
236 if percentile <= 0.5 / n:
237 return sorted_values[0]
238 elif percentile >= (n - 0.5) / n:
239 return sorted_values[-1]
240 else:
241 floor_index = int(math.floor(n * percentile - 0.5))
242 floor_value = sorted_values[floor_index]
243 ceil_value = sorted_values[floor_index+1]
244 alpha = n * percentile - 0.5 - floor_index
245 return floor_value + alpha * (ceil_value - floor_value)
246
247
248 def GeometricMean(values):
249 """Compute a rounded geometric mean from an array of values."""
250 if not values:
251 return None
252 # To avoid infinite value errors, make sure no value is less than 0.001.
253 new_values = []
254 for value in values:
255 if value > 0.001:
256 new_values.append(value)
257 else:
258 new_values.append(0.001)
259 # Compute the sum of the log of the values.
260 log_sum = sum(map(math.log, new_values))
261 # Raise e to that sum over the number of values.
262 mean = math.pow(math.e, (log_sum / len(new_values)))
263 # Return the rounded mean.
264 return int(round(mean))
265
OLDNEW
« no previous file with comments | « tools/perf/metrics/smoothness.py ('k') | tools/perf/metrics/statistics_unittest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698