Chromium Code Reviews| Index: appengine/findit/crash/changelist_features/min_distance.py |
| diff --git a/appengine/findit/crash/changelist_features/min_distance.py b/appengine/findit/crash/changelist_features/min_distance.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..4808692e1d926c87d5bef5c0a5a2d2c9201a9ae1 |
| --- /dev/null |
| +++ b/appengine/findit/crash/changelist_features/min_distance.py |
| @@ -0,0 +1,97 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +from collections import namedtuple |
| + |
| + |
| +# N.B., this must not be infinity, else we'll start getting NaN values |
| +# from LinearMinDistanceFeature (and SquaredMinDistanceFeature). |
| +DEFAULT_MAXIMUM = 50 |
| + |
| + |
| +# N.B., if we make this a namedtuple then it becomes horrible to inherit from. |
| +class MinDistanceFeature(object): |
| + def __init__(self, maximum=None): |
| + """ |
| + Args: |
| + maximum (float): An upper bound on the return result. This |
| + argument is optional and (effectively) defaults to infinity. |
| + """ |
| + self._maximum = maximum |
| + |
| + def __call__(self, result): |
| + """Return the minimum ``AnalysisInfo.min_distance`` across all files. |
| + |
| + Although this method looks like it should be a method on the |
| + ``Result`` class, we have it live here in order to make coverage |
| + tests happy. The downside of this is that we now have to modify |
| + multiple files whenever the guts of ``Result`` change. The upside |
| + is the aforementioned coverage tests, and that it helps keep the |
| + ``Result`` class looking cleaner. |
| + |
| + Args: |
| + result (Result): the result to analyze. |
| + |
| + Returns: |
| + The minimum distance between (the code for) a stack frame in the |
| + ``Result`` and the CL in the ``Result`` as a ``float``. If no |
| + ``maximum`` is given, then we return that minimum directly. If a |
| + ``maximum`` is given, then we return the smaller of it and the |
| + found minimum distance. |
| + """ |
| + if not result.file_to_analysis_info: |
| + return self._maximum |
| + |
| + minimum = min(analysis_info.min_distance |
| + for analysis_info |
| + in result.file_to_analysis_info.itervalues()) |
| + if self._maximum is None: |
| + return minimum |
| + |
| + return min(float(self._maximum), float(minimum)) |
| + |
| + |
| +class LinearMinDistanceFeature(MinDistanceFeature): |
| + """Return the minimum distance scaled linearly between 0 and 1. |
| + |
| + That is, when the minimum distance is 0 we return 1; when it is greater |
| + than the ``maximum`` passed to the constructor, we return 0. And in |
| + between we return values linearly interpolated between those points. |
| + |
| + In principle this normalization isn't strictly required, as the weight |
| + of this feature can be be scaled to account for the normalization. |
| + However, by normalizing things we ensure that the feature's weight is |
| + independent of ``maximum``, which helps training. |
| + """ |
| + def __init__(self, maximum=None): |
| + """ |
| + Args: |
| + maximum (float): An upper bound on the return result. This |
| + argument is optional and defaults to ``DEFAULT_MAXIMUM``. |
| + """ |
| + if maximum is None: |
| + maximum = DEFAULT_MAXIMUM |
|
inferno
2016/12/06 18:07:06
nit: you can just do this in contructor
def __init
Sharu Jiang
2016/12/06 20:49:19
I remember in this way, pylint will complain.
|
| + super(LinearMinDistanceFeature, self).__init__(maximum) |
| + |
| + def __call__(self, result): |
| + min_distance = super(LinearMinDistanceFeature, self).__call__(result) |
| + return (self._maximum - min_distance) / self._maximum |
| + |
| + |
| +class SquaredMinDistanceFeature(LinearMinDistanceFeature): |
| + """Return the minimum distance scaled quadratically between 0 and 1. |
| + |
| + This feature together with ``LinearMinDistanceFeature`` (and a |
| + constant feature) allow us to capture any quadratic polynomial of the |
| + ``MinDistance``. That is, suppose we had a single feature ``c2*x**2 + |
| + c1*x + 1`` with weight ``w``. Rather than using that feature directly |
| + (which would require us to specify the hyperparameters ``c2`` and |
| + ``c1``) we can instead use three features: ``w2*(x**2) + w1*x + w0``; |
| + which enables us to avoid specifying the hyperparameters, by pushing |
| + them into the weight parameters instead. |
| + """ |
| + def __call__(self, result): |
| + linear_min_distance = ( |
| + super(SquaredMinDistanceFeature, self).__call__(result)) |
| + return linear_min_distance * linear_min_distance |