tools/perf/metrics/statistics.py - Issue 185953004: Add some statistics to the monsoon profile run

Side by Side Diff: tools/perf/metrics/statistics.py

Issue 185953004: Add some statistics to the monsoon profile run (Closed) Base URL: https://git.chromium.org/chromium/src.git@master

Patch Set: Rebase and provide own statistics functions Created 6 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 # Copyright 2013 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.

4

5 """A collection of statistical utility functions to be used by metrics."""

6

7 import bisect

8 import math

9

10

11 def Clamp(value, low=0.0, high=1.0):

12 """Clamp a value between some low and high value."""

13 return min(max(value, low), high)

14

15

16 def NormalizeSamples(samples):

17 """Sorts the samples, and map them linearly to the range [0,1].

18

19 They're mapped such that for the N samples, the first sample is 0.5/N and the

20 last sample is (N-0.5)/N.

21

22 Background: The discrepancy of the sample set i/(N-1); i=0, ..., N-1 is 2/N,

23 twice the discrepancy of the sample set (i+1/2)/N; i=0, ..., N-1. In our case

24 we don't want to distinguish between these two cases, as our original domain

25 is not bounded (it is for Monte Carlo integration, where discrepancy was

26 first used).

27 """

28 if not samples:

29 return samples, 1.0

30 samples = sorted(samples)

31 low = min(samples)

32 high = max(samples)

33 new_low = 0.5 / len(samples)

34 new_high = (len(samples)-0.5) / len(samples)

35 if high-low == 0.0:

36 return samples, 1.0

37 scale = (new_high - new_low) / (high - low)

38 for i in xrange(0, len(samples)):

39 samples[i] = float(samples[i] - low) * scale + new_low

40 return samples, scale

41

42

43 def Discrepancy(samples, interval_multiplier=10000):

44 """Computes the discrepancy of a set of 1D samples from the interval [0,1].

45

46 The samples must be sorted.

47

48 http://en.wikipedia.org/wiki/Low-discrepancy_sequence

49 http://mathworld.wolfram.com/Discrepancy.html

50 """

51 if not samples:

52 return 1.0

53

54 max_local_discrepancy = 0

55 locations = []

56 # For each location, stores the number of samples less than that location.

57 left = []

58 # For each location, stores the number of samples less than or equal to that

59 # location.

60 right = []

61

62 interval_count = len(samples) * interval_multiplier

63 # Compute number of locations the will roughly result in the requested number

64 # of intervals.

65 location_count = int(math.ceil(math.sqrt(interval_count*2)))

66 inv_sample_count = 1.0 / len(samples)

67

68 # Generate list of equally spaced locations.

69 for i in xrange(0, location_count):

70 location = float(i) / (location_count-1)

71 locations.append(location)

72 left.append(bisect.bisect_left(samples, location))

73 right.append(bisect.bisect_right(samples, location))

74

75 # Iterate over the intervals defined by any pair of locations.

76 for i in xrange(0, len(locations)):

77 for j in xrange(i, len(locations)):

78 # Compute length of interval and number of samples in the interval.

79 length = locations[j] - locations[i]

80 count = right[j] - left[i]

81

82 # Compute local discrepancy and update max_local_discrepancy.

83 local_discrepancy = abs(float(count)*inv_sample_count - length)

84 max_local_discrepancy = max(local_discrepancy, max_local_discrepancy)

85

86 return max_local_discrepancy

87

88

89 def TimestampsDiscrepancy(timestamps, absolute=True,

90 interval_multiplier=10000):

91 """A discrepancy based metric for measuring timestamp jank.

92

93 TimestampsDiscrepancy quantifies the largest area of jank observed in a series

94 of timestamps. Note that this is different from metrics based on the

95 max_time_interval. For example, the time stamp series A = [0,1,2,3,5,6] and

96 B = [0,1,2,3,5,7] have the same max_time_interval = 2, but

97 Discrepancy(B) > Discrepancy(A).

98

99 Two variants of discrepancy can be computed:

100

101 Relative discrepancy is following the original definition of

102 discrepancy. It characterized the largest area of jank, relative to the

103 duration of the entire time stamp series. We normalize the raw results,

104 because the best case discrepancy for a set of N samples is 1/N (for

105 equally spaced samples), and we want our metric to report 0.0 in that

106 case.

107

108 Absolute discrepancy also characterizes the largest area of jank, but its

109 value wouldn't change (except for imprecisions due to a low

110 \|interval_multiplier\|) if additional 'good' intervals were added to an

111 exisiting list of time stamps. Its range is [0,inf] and the unit is

112 milliseconds.

113

114 The time stamp series C = [0,2,3,4] and D = [0,2,3,4,5] have the same

115 absolute discrepancy, but D has lower relative discrepancy than C.

116

117 \|timestamps\| may be a list of lists S = [S_1, S_2, ..., S_N], where each

118 S_i is a time stamp series. In that case, the discrepancy D(S) is:

119 D(S) = max(D(S_1), D(S_2), ..., D(S_N))

120 """

121 if not timestamps:

122 return 1.0

123

124 if isinstance(timestamps[0], list):

125 range_discrepancies = [TimestampsDiscrepancy(r) for r in timestamps]

126 return max(range_discrepancies)

127

128 samples, sample_scale = NormalizeSamples(timestamps)

129 discrepancy = Discrepancy(samples, interval_multiplier)

130 inv_sample_count = 1.0 / len(samples)

131 if absolute:

132 # Compute absolute discrepancy

133 discrepancy /= sample_scale

134 else:

135 # Compute relative discrepancy

136 discrepancy = Clamp((discrepancy-inv_sample_count) / (1.0-inv_sample_count))

137 return discrepancy

138

139

140 def DurationsDiscrepancy(durations, absolute=True,

141 interval_multiplier=10000):

142 """A discrepancy based metric for measuring duration jank.

143

144 DurationsDiscrepancy computes a jank metric which measures how irregular a

145 given sequence of intervals is. In order to minimize jank, each duration

146 should be equally long. This is similar to how timestamp jank works,

147 and we therefore reuse the timestamp discrepancy function above to compute a

148 similar duration discrepancy number.

149

150 Because timestamp discrepancy is defined in terms of timestamps, we first

151 convert the list of durations to monotonically increasing timestamps.

152

153 Args:

154 durations: List of interval lengths in milliseconds.

155 absolute: See TimestampsDiscrepancy.

156 interval_multiplier: See TimestampsDiscrepancy.

157 """

158 timestamps = reduce(lambda x, y: x + [x[-1] + y], durations, [0])[1:]

159 return TimestampsDiscrepancy(timestamps, absolute, interval_multiplier)

160

161

162 def ArithmeticMean(numerator, denominator):

163 """Calculates arithmetic mean.

164

165 Both numerator and denominator can be given as either individual

166 values or lists of values which will be summed.

167

168 Args:

169 numerator: A quantity that represents a sum total value.

170 denominator: A quantity that represents a count of the number of things.

171

172 Returns:

173 The arithmetic mean value, or 0 if the denominator value was 0.

174 """

175 numerator_total = Total(numerator)

176 denominator_total = Total(denominator)

177 return DivideIfPossibleOrZero(numerator_total, denominator_total)

178

179

180 def Total(data):

181 """Returns the float value of a number or the sum of a list."""

182 if type(data) == float:

183 total = data

184 elif type(data) == int:

185 total = float(data)

186 elif type(data) == list:

187 total = float(sum(data))

188 else:

189 raise TypeError

190 return total

191

192

193 def DivideIfPossibleOrZero(numerator, denominator):

194 """Returns the quotient, or zero if the denominator is zero."""

195 if not denominator:

196 return 0.0

197 else:

198 return numerator / denominator

199

200

201 def GeneralizedMean(values, exponent):

202 """See http://en.wikipedia.org/wiki/Generalized_mean"""

203 if not values:

204 return 0.0

205 sum_of_powers = 0.0

206 for v in values:

207 sum_of_powers += v ** exponent

208 return (sum_of_powers / len(values)) ** (1.0/exponent)

209

210

211 def Median(values):

212 """Gets the median of a list of values."""

213 return Percentile(values, 50)

214

215

216 def Percentile(values, percentile):

217 """Calculates the value below which a given percentage of values fall.

218

219 For example, if 17% of the values are less than 5.0, then 5.0 is the 17th

220 percentile for this set of values. When the percentage doesn't exactly

221 match a rank in the list of values, the percentile is computed using linear

222 interpolation between closest ranks.

223

224 Args:

225 values: A list of numerical values.

226 percentile: A number between 0 and 100.

227

228 Returns:

229 The Nth percentile for the list of values, where N is the given percentage.

230 """

231 if not values:

232 return 0.0

233 sorted_values = sorted(values)

234 n = len(values)

235 percentile /= 100.0

236 if percentile <= 0.5 / n:

237 return sorted_values[0]

238 elif percentile >= (n - 0.5) / n:

239 return sorted_values[-1]

240 else:

241 floor_index = int(math.floor(n * percentile - 0.5))

242 floor_value = sorted_values[floor_index]

243 ceil_value = sorted_values[floor_index+1]

244 alpha = n * percentile - 0.5 - floor_index

245 return floor_value + alpha * (ceil_value - floor_value)

246

247

248 def GeometricMean(values):

249 """Compute a rounded geometric mean from an array of values."""

250 if not values:

251 return None

252 # To avoid infinite value errors, make sure no value is less than 0.001.

253 new_values = []

254 for value in values:

255 if value > 0.001:

256 new_values.append(value)

257 else:

258 new_values.append(0.001)

259 # Compute the sum of the log of the values.

260 log_sum = sum(map(math.log, new_values))

261 # Raise e to that sum over the number of values.

262 mean = math.pow(math.e, (log_sum / len(new_values)))

263 # Return the rounded mean.

264 return int(round(mean))

265

OLD	NEW

« no previous file with comments | « tools/perf/metrics/smoothness.py ('k') | tools/perf/metrics/statistics_unittest.py » ('j') | tools/perf/metrics/statistics_unittest.py » ('J')