| OLD | NEW |
| (Empty) |
| 1 # Copyright 2015 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import bisect | |
| 6 import collections | |
| 7 | |
| 8 | |
| 9 class Bucketer(object): | |
| 10 """Bucketing function for histograms recorded by the Distribution class.""" | |
| 11 | |
| 12 def __init__(self, width, growth_factor, num_finite_buckets): | |
| 13 """The bucket sizes are controlled by width and growth_factor, and the total | |
| 14 number of buckets is set by num_finite_buckets: | |
| 15 | |
| 16 Args: | |
| 17 width: fixed size of each bucket. | |
| 18 growth_factor: if non-zero, the size of each bucket increases by another | |
| 19 multiplicative factor of this factor (see lower bound formula below). | |
| 20 num_finite_buckets: the number of finite buckets. There are two | |
| 21 additional buckets - an underflow and an overflow bucket - that have | |
| 22 lower and upper bounds of Infinity. | |
| 23 | |
| 24 Specify a width for fixed-size buckets or specify a growth_factor for bucket | |
| 25 sizes that follow a geometric progression. Specifying both is valid as | |
| 26 well:: | |
| 27 | |
| 28 lower bound of bucket i = width * i + growth_factor ^ (i - 1) | |
| 29 """ | |
| 30 | |
| 31 if num_finite_buckets < 0: | |
| 32 raise ValueError('num_finite_buckets must be >= 0 (was %d)' % | |
| 33 num_finite_buckets) | |
| 34 | |
| 35 self.width = width | |
| 36 self.growth_factor = growth_factor | |
| 37 self.num_finite_buckets = num_finite_buckets | |
| 38 self.total_buckets = num_finite_buckets + 2 | |
| 39 self.underflow_bucket = 0 | |
| 40 self.overflow_bucket = self.total_buckets - 1 | |
| 41 | |
| 42 self._lower_bounds = list(self._generate_lower_bounds()) | |
| 43 | |
| 44 def _generate_lower_bounds(self): | |
| 45 yield float('-Inf') | |
| 46 yield 0 | |
| 47 | |
| 48 previous = 0 | |
| 49 for i in xrange(self.num_finite_buckets): | |
| 50 lower_bound = self.width * (i + 1) | |
| 51 if self.growth_factor != 0: | |
| 52 lower_bound += self.growth_factor ** i | |
| 53 | |
| 54 if lower_bound <= previous: | |
| 55 raise ValueError('bucket boundaries must be monotonically increasing') | |
| 56 yield lower_bound | |
| 57 previous = lower_bound | |
| 58 | |
| 59 def bucket_for_value(self, value): | |
| 60 """Returns the index of the bucket that this value belongs to.""" | |
| 61 | |
| 62 # bisect.bisect_left is wrong because the buckets are of [lower, upper) form | |
| 63 return bisect.bisect(self._lower_bounds, value) - 1 | |
| 64 | |
| 65 def bucket_boundaries(self, bucket): | |
| 66 """Returns a tuple that is the [lower, upper) bounds of this bucket. | |
| 67 | |
| 68 The lower bound of the first bucket is -Infinity, and the upper bound of the | |
| 69 last bucket is +Infinity. | |
| 70 """ | |
| 71 | |
| 72 if bucket < 0 or bucket >= self.total_buckets: | |
| 73 raise IndexError('bucket %d out of range' % bucket) | |
| 74 if bucket == self.total_buckets - 1: | |
| 75 return (self._lower_bounds[bucket], float('Inf')) | |
| 76 return (self._lower_bounds[bucket], self._lower_bounds[bucket + 1]) | |
| 77 | |
| 78 def all_bucket_boundaries(self): | |
| 79 """Generator that produces the [lower, upper) bounds of all buckets. | |
| 80 | |
| 81 This is equivalent to calling:: | |
| 82 | |
| 83 (b.bucket_boundaries(i) for i in xrange(b.total_buckets)) | |
| 84 | |
| 85 but is more efficient. | |
| 86 """ | |
| 87 | |
| 88 lower = self._lower_bounds[0] | |
| 89 for i in xrange(1, self.total_buckets): | |
| 90 upper = self._lower_bounds[i] | |
| 91 yield (lower, upper) | |
| 92 lower = upper | |
| 93 | |
| 94 yield (lower, float('Inf')) | |
| 95 | |
| 96 | |
| 97 def FixedWidthBucketer(width, num_finite_buckets=100): | |
| 98 """Convenience function that returns a fixed width Bucketer.""" | |
| 99 return Bucketer(width=width, growth_factor=0.0, | |
| 100 num_finite_buckets=num_finite_buckets) | |
| 101 | |
| 102 | |
| 103 def GeometricBucketer(growth_factor=10**0.2, num_finite_buckets=100): | |
| 104 """Convenience function that returns a geometric progression Bucketer.""" | |
| 105 return Bucketer(width=0, growth_factor=growth_factor, | |
| 106 num_finite_buckets=num_finite_buckets) | |
| 107 | |
| 108 | |
| 109 class Distribution(object): | |
| 110 """Holds a histogram distribution. | |
| 111 | |
| 112 Buckets are chosen for values by the provided Bucketer. | |
| 113 """ | |
| 114 | |
| 115 def __init__(self, bucketer): | |
| 116 self.bucketer = bucketer | |
| 117 self.sum = 0 | |
| 118 self.count = 0 | |
| 119 self.buckets = collections.defaultdict(int) | |
| 120 | |
| 121 def add(self, value): | |
| 122 self.buckets[self.bucketer.bucket_for_value(value)] += 1 | |
| 123 self.sum += value | |
| 124 self.count += 1 | |
| OLD | NEW |