| OLD | NEW |
| (Empty) |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """A collection of statistical utility functions to be used by metrics.""" | |
| 6 | |
| 7 import bisect | |
| 8 import math | |
| 9 | |
| 10 | |
| 11 def Clamp(value, low=0.0, high=1.0): | |
| 12 """Clamp a value between some low and high value.""" | |
| 13 return min(max(value, low), high) | |
| 14 | |
| 15 | |
| 16 def NormalizeSamples(samples): | |
| 17 """Sorts the samples, and map them linearly to the range [0,1]. | |
| 18 | |
| 19 They're mapped such that for the N samples, the first sample is 0.5/N and the | |
| 20 last sample is (N-0.5)/N. | |
| 21 | |
| 22 Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N, | |
| 23 twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case | |
| 24 we don't want to distinguish between these two cases, as our original domain | |
| 25 is not bounded (it is for Monte Carlo integration, where discrepancy was | |
| 26 first used). | |
| 27 """ | |
| 28 if not samples: | |
| 29 return samples, 1.0 | |
| 30 samples = sorted(samples) | |
| 31 low = min(samples) | |
| 32 high = max(samples) | |
| 33 new_low = 0.5 / len(samples) | |
| 34 new_high = (len(samples)-0.5) / len(samples) | |
| 35 if high-low == 0.0: | |
| 36 return samples, 1.0 | |
| 37 scale = (new_high - new_low) / (high - low) | |
| 38 for i in xrange(0, len(samples)): | |
| 39 samples[i] = float(samples[i] - low) * scale + new_low | |
| 40 return samples, scale | |
| 41 | |
| 42 | |
| 43 def Discrepancy(samples, interval_multiplier=10000): | |
| 44 """Computes the discrepancy of a set of 1D samples from the interval [0,1]. | |
| 45 | |
| 46 The samples must be sorted. | |
| 47 | |
| 48 http://en.wikipedia.org/wiki/Low-discrepancy_sequence | |
| 49 http://mathworld.wolfram.com/Discrepancy.html | |
| 50 """ | |
| 51 if not samples: | |
| 52 return 1.0 | |
| 53 | |
| 54 max_local_discrepancy = 0 | |
| 55 locations = [] | |
| 56 # For each location, stores the number of samples less than that location. | |
| 57 left = [] | |
| 58 # For each location, stores the number of samples less than or equal to that | |
| 59 # location. | |
| 60 right = [] | |
| 61 | |
| 62 interval_count = len(samples) * interval_multiplier | |
| 63 # Compute number of locations the will roughly result in the requested number | |
| 64 # of intervals. | |
| 65 location_count = int(math.ceil(math.sqrt(interval_count*2))) | |
| 66 inv_sample_count = 1.0 / len(samples) | |
| 67 | |
| 68 # Generate list of equally spaced locations. | |
| 69 for i in xrange(0, location_count): | |
| 70 location = float(i) / (location_count-1) | |
| 71 locations.append(location) | |
| 72 left.append(bisect.bisect_left(samples, location)) | |
| 73 right.append(bisect.bisect_right(samples, location)) | |
| 74 | |
| 75 # Iterate over the intervals defined by any pair of locations. | |
| 76 for i in xrange(0, len(locations)): | |
| 77 for j in xrange(i, len(locations)): | |
| 78 # Compute length of interval and number of samples in the interval. | |
| 79 length = locations[j] - locations[i] | |
| 80 count = right[j] - left[i] | |
| 81 | |
| 82 # Compute local discrepancy and update max_local_discrepancy. | |
| 83 local_discrepancy = abs(float(count)*inv_sample_count - length) | |
| 84 max_local_discrepancy = max(local_discrepancy, max_local_discrepancy) | |
| 85 | |
| 86 return max_local_discrepancy | |
| 87 | |
| 88 | |
| 89 def TimestampsDiscrepancy(timestamps, absolute=True, | |
| 90 interval_multiplier=10000): | |
| 91 """A discrepancy based metric for measuring timestamp jank. | |
| 92 | |
| 93 TimestampsDiscrepancy quantifies the largest area of jank observed in a series | |
| 94 of timestamps. Note that this is different from metrics based on the | |
| 95 max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] and | |
| 96 B = [0,1,2,3,5,7] have the same max_time_interval = 2, but | |
| 97 Discrepancy(B) > Discrepancy(A). | |
| 98 | |
| 99 Two variants of discrepancy can be computed: | |
| 100 | |
| 101 Relative discrepancy is following the original definition of | |
| 102 discrepancy. It characterized the largest area of jank, relative to the | |
| 103 duration of the entire time stamp series. We normalize the raw results, | |
| 104 because the best case discrepancy for a set of N samples is 1/N (for | |
| 105 equally spaced samples), and we want our metric to report 0.0 in that | |
| 106 case. | |
| 107 | |
| 108 Absolute discrepancy also characterizes the largest area of jank, but its | |
| 109 value wouldn't change (except for imprecisions due to a low | |
| 110 |interval_multiplier|) if additional 'good' intervals were added to an | |
| 111 exisiting list of time stamps. Its range is [0,inf] and the unit is | |
| 112 milliseconds. | |
| 113 | |
| 114 The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same | |
| 115 absolute discrepancy, but D has lower relative discrepancy than C. | |
| 116 | |
| 117 |timestamps| may be a list of lists S = [S_1, S_2, ..., S_N], where each | |
| 118 S_i is a time stamp series. In that case, the discrepancy D(S) is: | |
| 119 D(S) = max(D(S_1), D(S_2), ..., D(S_N)) | |
| 120 """ | |
| 121 if not timestamps: | |
| 122 return 1.0 | |
| 123 | |
| 124 if isinstance(timestamps[0], list): | |
| 125 range_discrepancies = [TimestampsDiscrepancy(r) for r in timestamps] | |
| 126 return max(range_discrepancies) | |
| 127 | |
| 128 samples, sample_scale = NormalizeSamples(timestamps) | |
| 129 discrepancy = Discrepancy(samples, interval_multiplier) | |
| 130 inv_sample_count = 1.0 / len(samples) | |
| 131 if absolute: | |
| 132 # Compute absolute discrepancy | |
| 133 discrepancy /= sample_scale | |
| 134 else: | |
| 135 # Compute relative discrepancy | |
| 136 discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count)) | |
| 137 return discrepancy | |
| 138 | |
| 139 | |
| 140 def DurationsDiscrepancy(durations, absolute=True, | |
| 141 interval_multiplier=10000): | |
| 142 """A discrepancy based metric for measuring duration jank. | |
| 143 | |
| 144 DurationsDiscrepancy computes a jank metric which measures how irregular a | |
| 145 given sequence of intervals is. In order to minimize jank, each duration | |
| 146 should be equally long. This is similar to how timestamp jank works, | |
| 147 and we therefore reuse the timestamp discrepancy function above to compute a | |
| 148 similar duration discrepancy number. | |
| 149 | |
| 150 Because timestamp discrepancy is defined in terms of timestamps, we first | |
| 151 convert the list of durations to monotonically increasing timestamps. | |
| 152 | |
| 153 Args: | |
| 154 durations: List of interval lengths in milliseconds. | |
| 155 absolute: See TimestampsDiscrepancy. | |
| 156 interval_multiplier: See TimestampsDiscrepancy. | |
| 157 """ | |
| 158 timestamps = reduce(lambda x, y: x + [x[-1] + y], durations, [0])[1:] | |
| 159 return TimestampsDiscrepancy(timestamps, absolute, interval_multiplier) | |
| 160 | |
| 161 | |
| 162 def ArithmeticMean(numerator, denominator): | |
| 163 """Calculates arithmetic mean. | |
| 164 | |
| 165 Both numerator and denominator can be given as either individual | |
| 166 values or lists of values which will be summed. | |
| 167 | |
| 168 Args: | |
| 169 numerator: A quantity that represents a sum total value. | |
| 170 denominator: A quantity that represents a count of the number of things. | |
| 171 | |
| 172 Returns: | |
| 173 The arithmetic mean value, or 0 if the denominator value was 0. | |
| 174 """ | |
| 175 numerator_total = Total(numerator) | |
| 176 denominator_total = Total(denominator) | |
| 177 return DivideIfPossibleOrZero(numerator_total, denominator_total) | |
| 178 | |
| 179 | |
| 180 def Total(data): | |
| 181 """Returns the float value of a number or the sum of a list.""" | |
| 182 if type(data) == float: | |
| 183 total = data | |
| 184 elif type(data) == int: | |
| 185 total = float(data) | |
| 186 elif type(data) == list: | |
| 187 total = float(sum(data)) | |
| 188 else: | |
| 189 raise TypeError | |
| 190 return total | |
| 191 | |
| 192 | |
| 193 def DivideIfPossibleOrZero(numerator, denominator): | |
| 194 """Returns the quotient, or zero if the denominator is zero.""" | |
| 195 if not denominator: | |
| 196 return 0.0 | |
| 197 else: | |
| 198 return numerator / denominator | |
| 199 | |
| 200 | |
| 201 def GeneralizedMean(values, exponent): | |
| 202 """See http://en.wikipedia.org/wiki/Generalized_mean""" | |
| 203 if not values: | |
| 204 return 0.0 | |
| 205 sum_of_powers = 0.0 | |
| 206 for v in values: | |
| 207 sum_of_powers += v ** exponent | |
| 208 return (sum_of_powers / len(values)) ** (1.0/exponent) | |
| 209 | |
| 210 | |
| 211 def Median(values): | |
| 212 """Gets the median of a list of values.""" | |
| 213 return Percentile(values, 50) | |
| 214 | |
| 215 | |
| 216 def Percentile(values, percentile): | |
| 217 """Calculates the value below which a given percentage of values fall. | |
| 218 | |
| 219 For example, if 17% of the values are less than 5.0, then 5.0 is the 17th | |
| 220 percentile for this set of values. When the percentage doesn't exactly | |
| 221 match a rank in the list of values, the percentile is computed using linear | |
| 222 interpolation between closest ranks. | |
| 223 | |
| 224 Args: | |
| 225 values: A list of numerical values. | |
| 226 percentile: A number between 0 and 100. | |
| 227 | |
| 228 Returns: | |
| 229 The Nth percentile for the list of values, where N is the given percentage. | |
| 230 """ | |
| 231 if not values: | |
| 232 return 0.0 | |
| 233 sorted_values = sorted(values) | |
| 234 n = len(values) | |
| 235 percentile /= 100.0 | |
| 236 if percentile <= 0.5 / n: | |
| 237 return sorted_values[0] | |
| 238 elif percentile >= (n - 0.5) / n: | |
| 239 return sorted_values[-1] | |
| 240 else: | |
| 241 floor_index = int(math.floor(n * percentile - 0.5)) | |
| 242 floor_value = sorted_values[floor_index] | |
| 243 ceil_value = sorted_values[floor_index+1] | |
| 244 alpha = n * percentile - 0.5 - floor_index | |
| 245 return floor_value + alpha * (ceil_value - floor_value) | |
| 246 | |
| 247 | |
| 248 def GeometricMean(values): | |
| 249 """Compute a rounded geometric mean from an array of values.""" | |
| 250 if not values: | |
| 251 return None | |
| 252 # To avoid infinite value errors, make sure no value is less than 0.001. | |
| 253 new_values = [] | |
| 254 for value in values: | |
| 255 if value > 0.001: | |
| 256 new_values.append(value) | |
| 257 else: | |
| 258 new_values.append(0.001) | |
| 259 # Compute the sum of the log of the values. | |
| 260 log_sum = sum(map(math.log, new_values)) | |
| 261 # Raise e to that sum over the number of values. | |
| 262 mean = math.pow(math.e, (log_sum / len(new_values))) | |
| 263 # Return the rounded mean. | |
| 264 return int(round(mean)) | |
| 265 | |
| OLD | NEW |