| OLD | NEW |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | 1 # Copyright 2015 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import bisect | 5 import bisect |
| 6 import collections | 6 import collections |
| 7 | 7 |
| 8 | 8 |
| 9 class Bucketer(object): | 9 class _Bucketer(object): |
| 10 """Bucketing function for histograms recorded by the Distribution class.""" | 10 """Bucketing function for histograms recorded by the Distribution class.""" |
| 11 | 11 |
| 12 def __init__(self, width, growth_factor, num_finite_buckets): | 12 def __init__(self, width, growth_factor, num_finite_buckets, scale=1.0): |
| 13 """The bucket sizes are controlled by width and growth_factor, and the total | 13 """The bucket sizes are controlled by width and growth_factor, and the total |
| 14 number of buckets is set by num_finite_buckets: | 14 number of buckets is set by num_finite_buckets: |
| 15 | 15 |
| 16 Args: | 16 Args: |
| 17 width: fixed size of each bucket. | 17 width: fixed size of each bucket (ignores |scale|). |
| 18 growth_factor: if non-zero, the size of each bucket increases by another | 18 growth_factor: if non-zero, the size of each bucket increases by another |
| 19 multiplicative factor of this factor (see lower bound formula below). | 19 multiplicative factor of this factor (see lower bound formula below). |
| 20 num_finite_buckets: the number of finite buckets. There are two | 20 num_finite_buckets: the number of finite buckets. There are two |
| 21 additional buckets - an underflow and an overflow bucket - that have | 21 additional buckets - an underflow and an overflow bucket - that have |
| 22 lower and upper bounds of Infinity. | 22 lower and upper bounds of Infinity. |
| 23 scale: overall scale factor to apply to buckets, if using geometric |
| 24 buckets. |
| 23 | 25 |
| 24 Specify a width for fixed-size buckets or specify a growth_factor for bucket | 26 Specify a width for fixed-size buckets or specify a growth_factor for bucket |
| 25 sizes that follow a geometric progression. Specifying both is valid as | 27 sizes that follow a geometric progression. Specifying both is not valid. |
| 26 well:: | |
| 27 | 28 |
| 28 lower bound of bucket i = width * i + growth_factor ^ (i - 1) | 29 For fixed-size buckets:: |
| 30 |
| 31 The i'th bucket covers the interval [(i-1) * width, i * width), where i |
| 32 ranges from 1 to num_finite_buckets, inclusive: |
| 33 |
| 34 bucket number lower bound upper bound |
| 35 i == 0 (underflow) -inf 0 |
| 36 1 <= i <= num_buckets (i-1) * width i * width |
| 37 i == num_buckets+1 (overflow) (i-1) * width +inf |
| 38 |
| 39 For geometric buckets:: |
| 40 |
| 41 The i'th bucket covers the interval [factor^(i-1), factor^i) * scale |
| 42 where i ranges from 1 to num_finite_buckets inclusive. |
| 43 |
| 44 bucket number lower bound upper bound |
| 45 i == 0 (underflow) -inf scale |
| 46 1 <= i <= num_buckets factor^(i-1) * scale factor^i * scale |
| 47 i == num_buckets+1 (overflow) factor^(i-1) * scale +inf |
| 29 """ | 48 """ |
| 30 | 49 |
| 31 if num_finite_buckets < 0: | 50 if num_finite_buckets < 0: |
| 32 raise ValueError('num_finite_buckets must be >= 0 (was %d)' % | 51 raise ValueError('num_finite_buckets must be >= 0 (was %d)' % |
| 33 num_finite_buckets) | 52 num_finite_buckets) |
| 53 if width != 0 and growth_factor != 0: |
| 54 raise ValueError('a Bucketer must be created with either a width or a ' |
| 55 'growth factor, not both') |
| 34 | 56 |
| 35 self.width = width | 57 self.width = width |
| 36 self.growth_factor = growth_factor | 58 self.growth_factor = growth_factor |
| 37 self.num_finite_buckets = num_finite_buckets | 59 self.num_finite_buckets = num_finite_buckets |
| 38 self.total_buckets = num_finite_buckets + 2 | 60 self.total_buckets = num_finite_buckets + 2 |
| 39 self.underflow_bucket = 0 | 61 self.underflow_bucket = 0 |
| 40 self.overflow_bucket = self.total_buckets - 1 | 62 self.overflow_bucket = self.total_buckets - 1 |
| 63 self.scale = scale |
| 41 | 64 |
| 42 self._lower_bounds = list(self._generate_lower_bounds()) | 65 if width != 0: |
| 66 self._lower_bounds = [float('-Inf')] + self._linear_bounds() |
| 67 else: |
| 68 self._lower_bounds = [float('-Inf')] + self._exponential_bounds() |
| 43 | 69 |
| 44 def _generate_lower_bounds(self): | 70 # Sanity check the bucket lower bounds we created. |
| 45 yield float('-Inf') | 71 assert len(self._lower_bounds) == self.total_buckets |
| 46 yield 0 | 72 assert all(x < y for x, y in zip( |
| 73 self._lower_bounds, self._lower_bounds[1:])), ( |
| 74 'bucket boundaries must be monotonically increasing') |
| 47 | 75 |
| 48 previous = 0 | 76 def _linear_bounds(self): |
| 49 for i in xrange(self.num_finite_buckets): | 77 return [self.width * i for i in xrange(self.num_finite_buckets + 1)] |
| 50 lower_bound = self.width * (i + 1) | |
| 51 if self.growth_factor != 0: | |
| 52 lower_bound += self.growth_factor ** i | |
| 53 | 78 |
| 54 if lower_bound <= previous: | 79 def _exponential_bounds(self): |
| 55 raise ValueError('bucket boundaries must be monotonically increasing') | 80 return [ |
| 56 yield lower_bound | 81 self.scale * self.growth_factor ** i |
| 57 previous = lower_bound | 82 for i in xrange(self.num_finite_buckets + 1)] |
| 58 | 83 |
| 59 def bucket_for_value(self, value): | 84 def bucket_for_value(self, value): |
| 60 """Returns the index of the bucket that this value belongs to.""" | 85 """Returns the index of the bucket that this value belongs to.""" |
| 61 | 86 |
| 62 # bisect.bisect_left is wrong because the buckets are of [lower, upper) form | 87 # bisect.bisect_left is wrong because the buckets are of [lower, upper) form |
| 63 return bisect.bisect(self._lower_bounds, value) - 1 | 88 return bisect.bisect(self._lower_bounds, value) - 1 |
| 64 | 89 |
| 65 def bucket_boundaries(self, bucket): | 90 def bucket_boundaries(self, bucket): |
| 66 """Returns a tuple that is the [lower, upper) bounds of this bucket. | 91 """Returns a tuple that is the [lower, upper) bounds of this bucket. |
| 67 | 92 |
| 68 The lower bound of the first bucket is -Infinity, and the upper bound of the | 93 The lower bound of the first bucket is -Infinity, and the upper bound of the |
| 69 last bucket is +Infinity. | 94 last bucket is +Infinity. |
| 70 """ | 95 """ |
| 71 | 96 |
| 72 if bucket < 0 or bucket >= self.total_buckets: | 97 if bucket < 0 or bucket >= self.total_buckets: |
| 73 raise IndexError('bucket %d out of range' % bucket) | 98 raise IndexError('bucket %d out of range' % bucket) |
| 74 if bucket == self.total_buckets - 1: | 99 if bucket == self.total_buckets - 1: |
| 75 return (self._lower_bounds[bucket], float('Inf')) | 100 return (self._lower_bounds[bucket], float('Inf')) |
| 76 return (self._lower_bounds[bucket], self._lower_bounds[bucket + 1]) | 101 return (self._lower_bounds[bucket], self._lower_bounds[bucket + 1]) |
| 77 | 102 |
| 78 def all_bucket_boundaries(self): | |
| 79 """Generator that produces the [lower, upper) bounds of all buckets. | |
| 80 | |
| 81 This is equivalent to calling:: | |
| 82 | |
| 83 (b.bucket_boundaries(i) for i in xrange(b.total_buckets)) | |
| 84 | |
| 85 but is more efficient. | |
| 86 """ | |
| 87 | |
| 88 lower = self._lower_bounds[0] | |
| 89 for i in xrange(1, self.total_buckets): | |
| 90 upper = self._lower_bounds[i] | |
| 91 yield (lower, upper) | |
| 92 lower = upper | |
| 93 | |
| 94 yield (lower, float('Inf')) | |
| 95 | |
| 96 | 103 |
| 97 def FixedWidthBucketer(width, num_finite_buckets=100): | 104 def FixedWidthBucketer(width, num_finite_buckets=100): |
| 98 """Convenience function that returns a fixed width Bucketer.""" | 105 """Convenience function that returns a fixed width Bucketer.""" |
| 99 return Bucketer(width=width, growth_factor=0.0, | 106 return _Bucketer(width=width, growth_factor=0.0, |
| 100 num_finite_buckets=num_finite_buckets) | 107 num_finite_buckets=num_finite_buckets) |
| 101 | 108 |
| 102 | 109 |
| 103 def GeometricBucketer(growth_factor=10**0.2, num_finite_buckets=100): | 110 def GeometricBucketer(growth_factor=10**0.2, num_finite_buckets=100, |
| 111 scale=1.0): |
| 104 """Convenience function that returns a geometric progression Bucketer.""" | 112 """Convenience function that returns a geometric progression Bucketer.""" |
| 105 return Bucketer(width=0, growth_factor=growth_factor, | 113 return _Bucketer(width=0, growth_factor=growth_factor, |
| 106 num_finite_buckets=num_finite_buckets) | 114 num_finite_buckets=num_finite_buckets, scale=scale) |
| 107 | 115 |
| 108 | 116 |
| 109 class Distribution(object): | 117 class Distribution(object): |
| 110 """Holds a histogram distribution. | 118 """Holds a histogram distribution. |
| 111 | 119 |
| 112 Buckets are chosen for values by the provided Bucketer. | 120 Buckets are chosen for values by the provided Bucketer. |
| 113 """ | 121 """ |
| 114 | 122 |
| 115 def __init__(self, bucketer): | 123 def __init__(self, bucketer): |
| 116 self.bucketer = bucketer | 124 self.bucketer = bucketer |
| 117 self.sum = 0 | 125 self.sum = 0 |
| 118 self.count = 0 | 126 self.count = 0 |
| 119 self.buckets = collections.defaultdict(int) | 127 self.buckets = collections.defaultdict(int) |
| 120 | 128 |
| 121 def add(self, value): | 129 def add(self, value): |
| 122 self.buckets[self.bucketer.bucket_for_value(value)] += 1 | 130 self.buckets[self.bucketer.bucket_for_value(value)] += 1 |
| 123 self.sum += value | 131 self.sum += value |
| 124 self.count += 1 | 132 self.count += 1 |
| OLD | NEW |