tools/auto_bisect/math_utils.py - Issue 736573002: Add unit tests for functions in math_utils.py

Side by Side Diff: tools/auto_bisect/math_utils.py

Issue 736573002: Add unit tests for functions in math_utils.py (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Updated an incorrect comment. Created 6 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2014 The Chromium Authors. All rights reserved.	1 # Copyright 2014 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 """General statistical or mathematical functions."""	5 """General statistical or mathematical functions."""

6	6

7 import math	7 import math

8	8

9	9

10 def TruncatedMean(data_set, truncate_percent):	10 def TruncatedMean(data_set, truncate_proportion):

11 """Calculates the truncated mean of a set of values.	11 """Calculates the truncated mean of a set of values.

12	12

13 Note that this isn't just the mean of the set of values with the highest	13 Note that this isn't just the mean of the set of values with the highest

14 and lowest values discarded; the non-discarded values are also weighted	14 and lowest values discarded; the non-discarded values are also weighted

15 differently depending how many values are discarded.	15 differently depending how many values are discarded.

16	16

	17 NOTE: If there's not much benefit from this keeping and weighting

	18 partial values, it might be better to use a simplified truncated mean

	19 function without weighting.

	20

17 Args:	21 Args:

18 data_set: Non-empty list of values.	22 data_set: Non-empty list of values.

19 truncate_percent: How much of the upper and lower portions of the data set	23 truncate_proportion: How much of the upper and lower portions of the data

20 to discard, expressed as a value in [0, 1].	24 set to discard, expressed as a value in the range [0, 1].

	25 Note: a value of 0.5 or greater would be meaningless

21	26

22 Returns:	27 Returns:

23 The truncated mean as a float.	28 The truncated mean as a float.

24	29

25 Raises:	30 Raises:

26 TypeError: The data set was empty after discarding values.	31 TypeError: The data set was empty after discarding values.

27 """	32 """

28 if len(data_set) > 2:	33 if len(data_set) > 2:

29 data_set = sorted(data_set)	34 data_set = sorted(data_set)

30	35

31 discard_num_float = len(data_set) * truncate_percent	36 discard_num_float = len(data_set) * truncate_proportion

32 discard_num_int = int(math.floor(discard_num_float))	37 discard_num_int = int(math.floor(discard_num_float))

33 kept_weight = len(data_set) - discard_num_float * 2	38 kept_weight = len(data_set) - (discard_num_float * 2)

34	39

35 data_set = data_set[discard_num_int:len(data_set)-discard_num_int]	40 data_set = data_set[discard_num_int:len(data_set)-discard_num_int]

36	41

37 weight_left = 1.0 - (discard_num_float - discard_num_int)	42 weight_left = 1.0 - (discard_num_float - discard_num_int)

38	43

39 if weight_left < 1:	44 if weight_left < 1:

40 # If the % to discard leaves a fractional portion, need to weight those	45 # If the % to discard leaves a fractional portion, need to weight those

41 # values.	46 # values.

42 unweighted_vals = data_set[1:len(data_set)-1]	47 unweighted_vals = data_set[1:len(data_set)-1]

43 weighted_vals = [data_set[0], data_set[len(data_set)-1]]	48 weighted_vals = [data_set[0], data_set[len(data_set)-1]]

44 weighted_vals = [w * weight_left for w in weighted_vals]	49 weighted_vals = [w * weight_left for w in weighted_vals]

45 data_set = weighted_vals + unweighted_vals	50 data_set = weighted_vals + unweighted_vals

46 else:	51 else:

47 kept_weight = len(data_set)	52 kept_weight = len(data_set)

48	53

49 truncated_mean = reduce(lambda x, y: float(x) + float(y),	54 data_sum = reduce(lambda x, y: float(x) + float(y), data_set)

50 data_set) / kept_weight	55 truncated_mean = data_sum / kept_weight

51

52 return truncated_mean	56 return truncated_mean

53	57

54	58

55 def Mean(values):	59 def Mean(values):

56 """Calculates the arithmetic mean of a list of values."""	60 """Calculates the arithmetic mean of a list of values."""

57 return TruncatedMean(values, 0.0)	61 return TruncatedMean(values, 0.0)

58	62

59	63

60 def Variance(values):	64 def Variance(values):

61 """Calculates the sample variance."""	65 """Calculates the sample variance."""

(...skipping 57 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
119 if denominator1 == 0:	123 if denominator1 == 0:

120 return 0.0	124 return 0.0

121	125

122 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)	126 return math.sqrt(numerator / denominator1) * math.sqrt(denominator2)

123	127

124	128

125 # Redefining built-in 'StandardError'	129 # Redefining built-in 'StandardError'

126 # pylint: disable=W0622	130 # pylint: disable=W0622

127 def StandardError(values):	131 def StandardError(values):

128 """Calculates the standard error of a list of values."""	132 """Calculates the standard error of a list of values."""

	133 # NOTE: This behavior of returning 0.0 in the case of an empty list is

	134 # inconsistent with Variance and StandardDeviation above.

129 if len(values) <= 1:	135 if len(values) <= 1:

130 return 0.0	136 return 0.0

131 std_dev = StandardDeviation(values)	137 std_dev = StandardDeviation(values)

132 return std_dev / math.sqrt(len(values))	138 return std_dev / math.sqrt(len(values))

	139

OLD	NEW

« no previous file with comments | « no previous file | tools/auto_bisect/math_utils_test.py » ('j') | no next file with comments »