Chromium Code Reviews| Index: appengine/findit/crash/loglinear/model.py |
| diff --git a/appengine/findit/crash/loglinear/model.py b/appengine/findit/crash/loglinear/model.py |
| index b231e90e075534396bfbfbdccaefb5ecdf7ad1ad..fa9f08d53031c68eceba3b4f5ee694a06321f782 100644 |
| --- a/appengine/findit/crash/loglinear/model.py |
| +++ b/appengine/findit/crash/loglinear/model.py |
| @@ -60,7 +60,8 @@ class UnnormalizedLogLinearModel(object): |
| rather than returning a probability per se. |
| """ |
| - def __init__(self, feature_function, weights, epsilon=None): |
| + def __init__(self, feature_function, weights, feature_to_weight, |
| + epsilon=None): |
|
wrengr
2017/01/09 19:31:51
given the dict of weights the list is redundant. F
|
| """Construct a new model with the given weights and feature function. |
| Args: |
| @@ -70,6 +71,10 @@ class UnnormalizedLogLinearModel(object): |
| weights (list of float): coefficients for how important we consider |
| each component of the feature vector for deciding which ``y`` |
| to blame. |
| + feature_to_weight (dict of float): the weights for the features. The keys |
| + of the dictionary are the names of the feature that weight is |
| + for. We take this argument as a dict rather than as a list so that |
| + callers needn't worry about what order to provide the weights in. |
| epsilon (float): The absolute-error threshold for considering a |
| weight to be "equal to zero". N.B., this should be a positive |
| number, as we will compare it against the absolute value of |
| @@ -77,6 +82,8 @@ class UnnormalizedLogLinearModel(object): |
| """ |
| if epsilon is None: |
| epsilon = EPSILON |
| + |
| + self._feature_to_weight = feature_to_weight |
| self._weights = np.array([ |
|
wrengr
2017/01/09 19:31:52
Once we no longer take the list of weights, this w
|
| w if isinstance(w, float) and math.fabs(w) >= epsilon else 0. |
| for w in weights]) |
| @@ -121,6 +128,21 @@ class UnnormalizedLogLinearModel(object): |
| self._features(x).map(lambda fxy: |
| self.weights.dot(np.array([feature.value for feature in fxy])))) |
| + def SingleFeatureScore(self, feature_value): |
| + """Returns the score (aka weighted value) of a ``FeatureValue``. |
| + |
| + This function assumes the report's stacktrace has already had any necessary |
| + preprocessing (like filtering or truncating) applied. |
| + |
| + Args: |
| + feature_value (FeatureValue): the feature value to check. |
| + |
| + Returns: |
| + The score of the feature value. |
| + """ |
| + return feature_value.value * self._feature_to_weight.get( |
| + feature_value.name, 0.) |
| + |
| def ClearWeightBasedMemos(self): |
| """Clear all the memos that depend on the weight covector.""" |
| self._quadrance = None |
| @@ -218,7 +240,8 @@ class LogLinearModel(UnnormalizedLogLinearModel): |
| we can provide probabilities (not just scores). However, to do so we |
| require a specification of the subsets of ``Y`` for each ``x``. |
| """ |
| - def __init__(self, Y_given_X, feature_function, weights, epsilon=None): |
| + def __init__(self, Y_given_X, feature_function, weights, feature_to_weight, |
| + epsilon=None): |
| """Construct a new probabilistic model. |
| Args: |
| @@ -241,7 +264,8 @@ class LogLinearModel(UnnormalizedLogLinearModel): |
| number, as we will compare it against the absolute value of |
| each weight. |
| """ |
| - super(LogLinearModel, self).__init__(feature_function, weights, epsilon) |
| + super(LogLinearModel, self).__init__(feature_function, weights, |
| + feature_to_weight, epsilon) |
| self._Y = Y_given_X |