Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(223)

Side by Side Diff: tools/auto_bisect/math_utils.py

Issue 417013003: Move statistical functions in bisect script to their own module. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Rebased and fixed test failures Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tools/auto_bisect/math_utils_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2014 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 """General statistical or mathematical functions."""
6
7 import math
8
9
10 def TruncatedMean(data_set, truncate_percent):
11 """Calculates the truncated mean of a set of values.
12
13 Note that this isn't just the mean of the set of values with the highest
14 and lowest values discarded; the non-discarded values are also weighted
15 differently depending how many values are discarded.
16
17 Args:
18 data_set: Non-empty list of values.
19 truncate_percent: The % from the upper and lower portions of the data set
20 to discard, expressed as a value in [0, 1].
21
22 Returns:
23 The truncated mean as a float.
24
25 Raises:
26 TypeError: The data set was empty after discarding values.
27 """
28 if len(data_set) > 2:
29 data_set = sorted(data_set)
30
31 discard_num_float = len(data_set) * truncate_percent
32 discard_num_int = int(math.floor(discard_num_float))
33 kept_weight = len(data_set) - discard_num_float * 2
34
35 data_set = data_set[discard_num_int:len(data_set)-discard_num_int]
36
37 weight_left = 1.0 - (discard_num_float - discard_num_int)
38
39 if weight_left < 1:
40 # If the % to discard leaves a fractional portion, need to weight those
41 # values.
42 unweighted_vals = data_set[1:len(data_set)-1]
43 weighted_vals = [data_set[0], data_set[len(data_set)-1]]
44 weighted_vals = [w * weight_left for w in weighted_vals]
45 data_set = weighted_vals + unweighted_vals
46 else:
47 kept_weight = len(data_set)
48
49 truncated_mean = reduce(lambda x, y: float(x) + float(y),
50 data_set) / kept_weight
51
52 return truncated_mean
53
54
55 def Mean(values):
56 """Calculates the arithmetic mean of a list of values."""
57 return TruncatedMean(values, 0.0)
58
59
60 def StandardDeviation(values):
61 """Calculates the sample standard deviation of the given list of values."""
62 if len(values) == 1:
63 return 0.0
64
65 mean = Mean(values)
66 differences_from_mean = [float(x) - mean for x in values]
67 squared_differences = [float(x * x) for x in differences_from_mean]
68 variance = sum(squared_differences) / (len(values) - 1)
69 std_dev = math.sqrt(variance)
70
71 return std_dev
72
73
74 def RelativeChange(before, after):
75 """Returns the relative change of before and after, relative to before.
76
77 There are several different ways to define relative difference between
78 two numbers; sometimes it is defined as relative to the smaller number,
79 or to the mean of the two numbers. This version returns the difference
80 relative to the first of the two numbers.
81
82 Args:
83 before: A number representing an earlier value.
84 after: Another number, representing a later value.
85
86 Returns:
87 A non-negative floating point number; 0.1 represents a 10% change.
88 """
89 if before == after:
90 return 0.0
91 if before == 0:
92 return float('nan')
93 difference = after - before
94 return math.fabs(difference / before)
95
96
97 def PooledStandardError(work_sets):
98 numerator = 0.0
99 denominator1 = 0.0
100 denominator2 = 0.0
101
102 for current_set in work_sets:
103 std_dev = StandardDeviation(current_set)
104 numerator += (len(current_set) - 1) * std_dev ** 2
105 denominator1 += len(current_set) - 1
106 denominator2 += 1.0 / len(current_set)
prasadv 2014/07/28 21:31:17 Nit: if current_set not empty then evaluate denomi
107
108 if denominator1:
109 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)
110 return 0.0
111
112
113 # Redefining built-in 'StandardError'
114 # pylint: disable=W0622
115 def StandardError(values):
116 """Calculates the standard error of a list of values."""
117 if len(values) <= 1:
118 return 0.0
119 std_dev = StandardDeviation(values)
120 return std_dev / math.sqrt(len(values))
OLDNEW
« no previous file with comments | « no previous file | tools/auto_bisect/math_utils_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698