OLD | NEW |
| (Empty) |
1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 import bisect | |
6 import collections | |
7 | |
8 | |
9 class Bucketer(object): | |
10 """Bucketing function for histograms recorded by the Distribution class.""" | |
11 | |
12 def __init__(self, width, growth_factor, num_finite_buckets): | |
13 """The bucket sizes are controlled by width and growth_factor, and the total | |
14 number of buckets is set by num_finite_buckets: | |
15 | |
16 Args: | |
17 width: fixed size of each bucket. | |
18 growth_factor: if non-zero, the size of each bucket increases by another | |
19 multiplicative factor of this factor (see lower bound formula below). | |
20 num_finite_buckets: the number of finite buckets. There are two | |
21 additional buckets - an underflow and an overflow bucket - that have | |
22 lower and upper bounds of Infinity. | |
23 | |
24 Specify a width for fixed-size buckets or specify a growth_factor for bucket | |
25 sizes that follow a geometric progression. Specifying both is valid as | |
26 well:: | |
27 | |
28 lower bound of bucket i = width * i + growth_factor ^ (i - 1) | |
29 """ | |
30 | |
31 if num_finite_buckets < 0: | |
32 raise ValueError('num_finite_buckets must be >= 0 (was %d)' % | |
33 num_finite_buckets) | |
34 | |
35 self.width = width | |
36 self.growth_factor = growth_factor | |
37 self.num_finite_buckets = num_finite_buckets | |
38 self.total_buckets = num_finite_buckets + 2 | |
39 self.underflow_bucket = 0 | |
40 self.overflow_bucket = self.total_buckets - 1 | |
41 | |
42 self._lower_bounds = list(self._generate_lower_bounds()) | |
43 | |
44 def _generate_lower_bounds(self): | |
45 yield float('-Inf') | |
46 yield 0 | |
47 | |
48 previous = 0 | |
49 for i in xrange(self.num_finite_buckets): | |
50 lower_bound = self.width * (i + 1) | |
51 if self.growth_factor != 0: | |
52 lower_bound += self.growth_factor ** i | |
53 | |
54 if lower_bound <= previous: | |
55 raise ValueError('bucket boundaries must be monotonically increasing') | |
56 yield lower_bound | |
57 previous = lower_bound | |
58 | |
59 def bucket_for_value(self, value): | |
60 """Returns the index of the bucket that this value belongs to.""" | |
61 | |
62 # bisect.bisect_left is wrong because the buckets are of [lower, upper) form | |
63 return bisect.bisect(self._lower_bounds, value) - 1 | |
64 | |
65 def bucket_boundaries(self, bucket): | |
66 """Returns a tuple that is the [lower, upper) bounds of this bucket. | |
67 | |
68 The lower bound of the first bucket is -Infinity, and the upper bound of the | |
69 last bucket is +Infinity. | |
70 """ | |
71 | |
72 if bucket < 0 or bucket >= self.total_buckets: | |
73 raise IndexError('bucket %d out of range' % bucket) | |
74 if bucket == self.total_buckets - 1: | |
75 return (self._lower_bounds[bucket], float('Inf')) | |
76 return (self._lower_bounds[bucket], self._lower_bounds[bucket + 1]) | |
77 | |
78 def all_bucket_boundaries(self): | |
79 """Generator that produces the [lower, upper) bounds of all buckets. | |
80 | |
81 This is equivalent to calling:: | |
82 | |
83 (b.bucket_boundaries(i) for i in xrange(b.total_buckets)) | |
84 | |
85 but is more efficient. | |
86 """ | |
87 | |
88 lower = self._lower_bounds[0] | |
89 for i in xrange(1, self.total_buckets): | |
90 upper = self._lower_bounds[i] | |
91 yield (lower, upper) | |
92 lower = upper | |
93 | |
94 yield (lower, float('Inf')) | |
95 | |
96 | |
97 def FixedWidthBucketer(width, num_finite_buckets=100): | |
98 """Convenience function that returns a fixed width Bucketer.""" | |
99 return Bucketer(width=width, growth_factor=0.0, | |
100 num_finite_buckets=num_finite_buckets) | |
101 | |
102 | |
103 def GeometricBucketer(growth_factor=10**0.2, num_finite_buckets=100): | |
104 """Convenience function that returns a geometric progression Bucketer.""" | |
105 return Bucketer(width=0, growth_factor=growth_factor, | |
106 num_finite_buckets=num_finite_buckets) | |
107 | |
108 | |
109 class Distribution(object): | |
110 """Holds a histogram distribution. | |
111 | |
112 Buckets are chosen for values by the provided Bucketer. | |
113 """ | |
114 | |
115 def __init__(self, bucketer): | |
116 self.bucketer = bucketer | |
117 self.sum = 0 | |
118 self.count = 0 | |
119 self.buckets = collections.defaultdict(int) | |
120 | |
121 def add(self, value): | |
122 self.buckets[self.bucketer.bucket_for_value(value)] += 1 | |
123 self.sum += value | |
124 self.count += 1 | |
OLD | NEW |