OLD | NEW |
(Empty) | |
| 1 # Copyright 2016 Google Inc. All Rights Reserved. |
| 2 # |
| 3 # Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 # you may not use this file except in compliance with the License. |
| 5 # You may obtain a copy of the License at |
| 6 # |
| 7 # http://www.apache.org/licenses/LICENSE-2.0 |
| 8 # |
| 9 # Unless required by applicable law or agreed to in writing, software |
| 10 # distributed under the License is distributed on an "AS IS" BASIS, |
| 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 # See the License for the specific language governing permissions and |
| 13 # limitations under the License. |
| 14 |
| 15 """distribution provides funcs for working with `Distribution` instances. |
| 16 |
| 17 :func:`create_exponential`, :func:`create_linear`, :func:`create_linear` |
| 18 construct new `Distribution` instances initialized with different types |
| 19 of buckets a `Distribution` can have. They are factory functions that |
| 20 include assertions that make sure that the Distribution instances are |
| 21 in the correct state. |
| 22 |
| 23 :func:`add_sample` adds a sample to an existing distribution instance |
| 24 |
| 25 :func:`merge` merges two distribution instances |
| 26 |
| 27 """ |
| 28 |
| 29 from __future__ import absolute_import |
| 30 from __future__ import division |
| 31 |
| 32 import bisect |
| 33 import logging |
| 34 import math |
| 35 |
| 36 from . import messages |
| 37 |
| 38 logger = logging.getLogger(__name__) |
| 39 |
| 40 |
| 41 _BAD_NUM_FINITE_BUCKETS = 'number of finite buckets should be > 0' |
| 42 _BAD_FLOAT_ARG = '%s should be > %f' |
| 43 |
| 44 |
| 45 def create_exponential(num_finite_buckets, growth_factor, scale): |
| 46 """Creates a new instance of distribution with exponential buckets |
| 47 |
| 48 Args: |
| 49 num_finite_buckets (int): initializes number of finite buckets |
| 50 growth_factor (float): initializes the growth factor |
| 51 scale (float): initializes the scale |
| 52 |
| 53 Return: |
| 54 :class:`google.api.gen.servicecontrol_v1_messages.Distribution` |
| 55 |
| 56 Raises: |
| 57 ValueError: if the args are invalid for creating an instance |
| 58 """ |
| 59 if num_finite_buckets <= 0: |
| 60 raise ValueError(_BAD_NUM_FINITE_BUCKETS) |
| 61 if growth_factor <= 1.0: |
| 62 raise ValueError(_BAD_FLOAT_ARG % ('growth factor', 1.0)) |
| 63 if scale <= 0.0: |
| 64 raise ValueError(_BAD_FLOAT_ARG % ('scale', 0.0)) |
| 65 return messages.Distribution( |
| 66 bucketCounts=[0] * (num_finite_buckets + 2), |
| 67 exponentialBuckets=messages.ExponentialBuckets( |
| 68 numFiniteBuckets=num_finite_buckets, |
| 69 growthFactor=growth_factor, |
| 70 scale=scale)) |
| 71 |
| 72 |
| 73 def create_linear(num_finite_buckets, width, offset): |
| 74 """Creates a new instance of distribution with linear buckets. |
| 75 |
| 76 Args: |
| 77 num_finite_buckets (int): initializes number of finite buckets |
| 78 width (float): initializes the width of each bucket |
| 79 offset (float): initializes the offset |
| 80 |
| 81 Return: |
| 82 :class:`google.api.gen.servicecontrol_v1_messages.Distribution` |
| 83 |
| 84 Raises: |
| 85 ValueError: if the args are invalid for creating an instance |
| 86 """ |
| 87 if num_finite_buckets <= 0: |
| 88 raise ValueError(_BAD_NUM_FINITE_BUCKETS) |
| 89 if width <= 0.0: |
| 90 raise ValueError(_BAD_FLOAT_ARG % ('width', 0.0)) |
| 91 return messages.Distribution( |
| 92 bucketCounts=[0] * (num_finite_buckets + 2), |
| 93 linearBuckets=messages.LinearBuckets( |
| 94 numFiniteBuckets=num_finite_buckets, |
| 95 width=width, |
| 96 offset=offset)) |
| 97 |
| 98 |
| 99 def create_explicit(bounds): |
| 100 """Creates a new instance of distribution with explicit buckets. |
| 101 |
| 102 bounds is an iterable of ordered floats that define the explicit buckets |
| 103 |
| 104 Args: |
| 105 bounds (iterable[float]): initializes the bounds |
| 106 |
| 107 Return: |
| 108 :class:`google.api.gen.servicecontrol_v1_messages.Distribution` |
| 109 |
| 110 Raises: |
| 111 ValueError: if the args are invalid for creating an instance |
| 112 """ |
| 113 safe_bounds = sorted(float(x) for x in bounds) |
| 114 if len(safe_bounds) != len(set(safe_bounds)): |
| 115 raise ValueError('Detected two elements of bounds that are the same') |
| 116 return messages.Distribution( |
| 117 bucketCounts=[0] * (len(safe_bounds) + 1), |
| 118 explicitBuckets=messages.ExplicitBuckets(bounds=safe_bounds)) |
| 119 |
| 120 |
| 121 def add_sample(a_float, dist): |
| 122 """Adds `a_float` to `dist`, updating its existing buckets. |
| 123 |
| 124 Args: |
| 125 a_float (float): a new value |
| 126 dist (:class:`google.api.gen.servicecontrol_v1_messages.Distribution`): |
| 127 the Distribution being updated |
| 128 |
| 129 Raises: |
| 130 ValueError: if `dist` does not have known bucket options defined |
| 131 ValueError: if there are not enough bucket count fields in `dist` |
| 132 """ |
| 133 dist_type, _ = _detect_bucket_option(dist) |
| 134 if dist_type == 'exponentialBuckets': |
| 135 _update_general_statistics(a_float, dist) |
| 136 _update_exponential_bucket_count(a_float, dist) |
| 137 elif dist_type == 'linearBuckets': |
| 138 _update_general_statistics(a_float, dist) |
| 139 _update_linear_bucket_count(a_float, dist) |
| 140 elif dist_type == 'explicitBuckets': |
| 141 _update_general_statistics(a_float, dist) |
| 142 _update_explicit_bucket_count(a_float, dist) |
| 143 else: |
| 144 logger.error('Could not determine bucket option type for %s', dist) |
| 145 raise ValueError('Unknown bucket option type') |
| 146 |
| 147 |
| 148 def merge(prior, latest): |
| 149 """Merge `prior` into `latest`. |
| 150 |
| 151 N.B, this mutates latest. It ensures that the statistics and histogram are |
| 152 updated to correctly include the original values from both instances. |
| 153 |
| 154 Args: |
| 155 prior (:class:`google.api.gen.servicecontrol_v1_messages.Distribution`): |
| 156 an instance |
| 157 latest (:class:`google.api.gen.servicecontrol_v1_messages.Distribution`): |
| 158 an instance to be updated |
| 159 |
| 160 Raises: |
| 161 ValueError: if the bucket options of `prior` and `latest` do not match |
| 162 ValueError: if the bucket counts of `prior` and `latest` do not match |
| 163 |
| 164 """ |
| 165 if not _buckets_nearly_equal(prior, latest): |
| 166 logger.error('Bucket options do not match. From %s To: %s', |
| 167 prior, |
| 168 latest) |
| 169 raise ValueError('Bucket options do not match') |
| 170 if len(prior.bucketCounts) != len(latest.bucketCounts): |
| 171 logger.error('Bucket count sizes do not match. From %s To: %s', |
| 172 prior, |
| 173 latest) |
| 174 raise ValueError('Bucket count sizes do not match') |
| 175 if prior.count <= 0: |
| 176 return |
| 177 |
| 178 old_count = latest.count |
| 179 old_mean = latest.mean |
| 180 old_summed_variance = latest.sumOfSquaredDeviation |
| 181 bucket_counts = latest.bucketCounts |
| 182 |
| 183 # Update the latest |
| 184 latest.count += prior.count |
| 185 latest.maximum = max(prior.maximum, latest.maximum) |
| 186 latest.minimum = min(prior.minimum, latest.minimum) |
| 187 latest.mean = ((old_count * old_mean + prior.count * prior.mean) / |
| 188 latest.count) |
| 189 latest.sumOfSquaredDeviation = ( |
| 190 old_summed_variance + prior.sumOfSquaredDeviation + |
| 191 old_count * (latest.mean - old_mean) ** 2 + |
| 192 prior.count * (latest.mean - prior.mean) ** 2) |
| 193 for i, (x, y) in enumerate(zip(prior.bucketCounts, bucket_counts)): |
| 194 bucket_counts[i] = x + y |
| 195 |
| 196 _EPSILON = 1e-5 |
| 197 |
| 198 |
| 199 def _is_close_enough(x, y): |
| 200 if x is None or y is None: |
| 201 return False |
| 202 return abs(x - y) <= _EPSILON * abs(x) |
| 203 |
| 204 |
| 205 # This is derived from the oneof choices of the Distribution message's |
| 206 # bucket_option field in google/api/servicecontrol/v1/distribution.proto, and |
| 207 # should be kept in sync with that |
| 208 _DISTRIBUTION_ONEOF_FIELDS = ( |
| 209 'linearBuckets', 'exponentialBuckets', 'explicitBuckets') |
| 210 |
| 211 |
| 212 def _detect_bucket_option(distribution): |
| 213 for f in _DISTRIBUTION_ONEOF_FIELDS: |
| 214 value = distribution.get_assigned_value(f) |
| 215 if value is not None: |
| 216 return f, value |
| 217 return None, None |
| 218 |
| 219 |
| 220 def _linear_buckets_nearly_equal(a, b): |
| 221 return ((a.numFiniteBuckets == b.numFiniteBuckets) and |
| 222 _is_close_enough(a.width, b.width) or |
| 223 _is_close_enough(a.offset, b.offset)) |
| 224 |
| 225 |
| 226 def _exponential_buckets_nearly_equal(a, b): |
| 227 return ((a.numFiniteBuckets == b.numFiniteBuckets) and |
| 228 _is_close_enough(a.growthFactor, b.growthFactor) and |
| 229 _is_close_enough(a.scale, b.scale)) |
| 230 |
| 231 |
| 232 def _explicit_buckets_nearly_equal(a, b): |
| 233 if len(a.bounds) != len(b.bounds): |
| 234 return False |
| 235 for x, y in zip(a.bounds, b.bounds): |
| 236 if not _is_close_enough(x, y): |
| 237 return False |
| 238 return True |
| 239 |
| 240 |
| 241 def _buckets_nearly_equal(a_dist, b_dist): |
| 242 """Determines whether two `Distributions` are nearly equal. |
| 243 |
| 244 Args: |
| 245 a_dist (:class:`Distribution`): an instance |
| 246 b_dist (:class:`Distribution`): another instance |
| 247 |
| 248 Return: |
| 249 boolean: `True` if the two instances are approximately equal, otherwise |
| 250 False |
| 251 |
| 252 """ |
| 253 a_type, a_buckets = _detect_bucket_option(a_dist) |
| 254 b_type, b_buckets = _detect_bucket_option(b_dist) |
| 255 if a_type != b_type: |
| 256 return False |
| 257 elif a_type == 'linearBuckets': |
| 258 return _linear_buckets_nearly_equal(a_buckets, b_buckets) |
| 259 elif a_type == 'exponentialBuckets': |
| 260 return _exponential_buckets_nearly_equal(a_buckets, b_buckets) |
| 261 elif a_type == 'explicitBuckets': |
| 262 return _explicit_buckets_nearly_equal(a_buckets, b_buckets) |
| 263 else: |
| 264 return False |
| 265 |
| 266 |
| 267 def _update_general_statistics(a_float, dist): |
| 268 """Adds a_float to distribution, updating the statistics fields. |
| 269 |
| 270 Args: |
| 271 a_float (float): a new value |
| 272 dist (:class:`google.api.gen.servicecontrol_v1_messages.Distribution`): |
| 273 the Distribution being updated |
| 274 |
| 275 """ |
| 276 if not dist.count: |
| 277 dist.count = 1 |
| 278 dist.maximum = a_float |
| 279 dist.minimum = a_float |
| 280 dist.mean = a_float |
| 281 dist.sumOfSquaredDeviation = 0 |
| 282 else: |
| 283 old_count = dist.count |
| 284 old_mean = dist.mean |
| 285 new_mean = ((old_count * old_mean) + a_float) / (old_count + 1) |
| 286 delta_sum_squares = (a_float - old_mean) * (a_float - new_mean) |
| 287 dist.count += 1 |
| 288 dist.mean = new_mean |
| 289 dist.maximum = max(a_float, dist.maximum) |
| 290 dist.minimum = min(a_float, dist.minimum) |
| 291 dist.sumOfSquaredDeviation += delta_sum_squares |
| 292 |
| 293 |
| 294 _BAD_UNSET_BUCKETS = 'cannot update a distribution with unset %s' |
| 295 _BAD_LOW_BUCKET_COUNT = 'cannot update a distribution with a low bucket count' |
| 296 |
| 297 |
| 298 def _update_exponential_bucket_count(a_float, dist): |
| 299 """Adds `a_float` to `dist`, updating its exponential buckets. |
| 300 |
| 301 Args: |
| 302 a_float (float): a new value |
| 303 dist (:class:`google.api.gen.servicecontrol_v1_messages.Distribution`): |
| 304 the Distribution being updated |
| 305 |
| 306 Raises: |
| 307 ValueError: if `dist` does not already have exponential buckets defined |
| 308 ValueError: if there are not enough bucket count fields in `dist` |
| 309 """ |
| 310 buckets = dist.exponentialBuckets |
| 311 if buckets is None: |
| 312 raise ValueError(_BAD_UNSET_BUCKETS % ('exponential buckets')) |
| 313 bucket_counts = dist.bucketCounts |
| 314 num_finite_buckets = buckets.numFiniteBuckets |
| 315 if len(bucket_counts) < num_finite_buckets + 2: |
| 316 raise ValueError(_BAD_LOW_BUCKET_COUNT) |
| 317 scale = buckets.scale |
| 318 factor = buckets.growthFactor |
| 319 if (a_float <= scale): |
| 320 index = 0 |
| 321 else: |
| 322 index = 1 + int((math.log(a_float / scale) / math.log(factor))) |
| 323 index = min(index, num_finite_buckets + 1) |
| 324 bucket_counts[index] += 1 |
| 325 logger.debug('scale:%f, factor:%f, sample:%f, index:%d', |
| 326 scale, factor, a_float, index) |
| 327 |
| 328 |
| 329 def _update_linear_bucket_count(a_float, dist): |
| 330 """Adds `a_float` to `dist`, updating the its linear buckets. |
| 331 |
| 332 Args: |
| 333 a_float (float): a new value |
| 334 dist (:class:`google.api.gen.servicecontrol_v1_messages.Distribution`): |
| 335 the Distribution being updated |
| 336 |
| 337 Raises: |
| 338 ValueError: if `dist` does not already have linear buckets defined |
| 339 ValueError: if there are not enough bucket count fields in `dist` |
| 340 """ |
| 341 buckets = dist.linearBuckets |
| 342 if buckets is None: |
| 343 raise ValueError(_BAD_UNSET_BUCKETS % ('linear buckets')) |
| 344 bucket_counts = dist.bucketCounts |
| 345 num_finite_buckets = buckets.numFiniteBuckets |
| 346 if len(bucket_counts) < num_finite_buckets + 2: |
| 347 raise ValueError(_BAD_LOW_BUCKET_COUNT) |
| 348 width = buckets.width |
| 349 lower = buckets.offset |
| 350 upper = lower + (num_finite_buckets * width) |
| 351 if a_float < lower: |
| 352 index = 0 |
| 353 elif a_float >= upper: |
| 354 index = num_finite_buckets + 1 |
| 355 else: |
| 356 index = 1 + int(((a_float - lower) / width)) |
| 357 bucket_counts[index] += 1 |
| 358 logger.debug('upper:%f, lower:%f, width:%f, sample:%f, index:%d', |
| 359 upper, lower, width, a_float, index) |
| 360 |
| 361 |
| 362 def _update_explicit_bucket_count(a_float, dist): |
| 363 """Adds `a_float` to `dist`, updating its explicit buckets. |
| 364 |
| 365 Args: |
| 366 a_float (float): a new value |
| 367 dist (:class:`google.api.gen.servicecontrol_v1_messages.Distribution`): |
| 368 the Distribution being updated |
| 369 |
| 370 Raises: |
| 371 ValueError: if `dist` does not already have explict buckets defined |
| 372 ValueError: if there are not enough bucket count fields in `dist` |
| 373 """ |
| 374 buckets = dist.explicitBuckets |
| 375 if buckets is None: |
| 376 raise ValueError(_BAD_UNSET_BUCKETS % ('explicit buckets')) |
| 377 bucket_counts = dist.bucketCounts |
| 378 bounds = buckets.bounds |
| 379 if len(bucket_counts) < len(bounds) + 1: |
| 380 raise ValueError(_BAD_LOW_BUCKET_COUNT) |
| 381 bucket_counts[bisect.bisect(bounds, a_float)] += 1 |
OLD | NEW |