appengine/findit/crash/loglinear/model.py - Issue 2617273002: [Predator] Move ``SingleFeatureScore`` to LLM.

Unified Diff: appengine/findit/crash/loglinear/model.py

Issue 2617273002: [Predator] Move ``SingleFeatureScore`` to LLM. (Closed)

Patch Set: . Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

« appengine/findit/crash/loglinear/changelist_classifier.py ('K') | « appengine/findit/crash/loglinear/changelist_classifier.py ('k') | appengine/findit/crash/loglinear/test/loglinear_testcase.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: appengine/findit/crash/loglinear/model.py

diff --git a/appengine/findit/crash/loglinear/model.py b/appengine/findit/crash/loglinear/model.py

index b231e90e075534396bfbfbdccaefb5ecdf7ad1ad..fa9f08d53031c68eceba3b4f5ee694a06321f782 100644

--- a/appengine/findit/crash/loglinear/model.py

+++ b/appengine/findit/crash/loglinear/model.py

@@ -60,7 +60,8 @@ class UnnormalizedLogLinearModel(object):

rather than returning a probability per se.

"""

- def __init__(self, feature_function, weights, epsilon=None):

+ def __init__(self, feature_function, weights, feature_to_weight,

+ epsilon=None):

wrengr 2017/01/09 19:31:51 given the dict of weights the list is redundant. F

"""Construct a new model with the given weights and feature function.

Args:

@@ -70,6 +71,10 @@ class UnnormalizedLogLinearModel(object):

weights (list of float): coefficients for how important we consider

each component of the feature vector for deciding which ``y``

to blame.

+ feature_to_weight (dict of float): the weights for the features. The keys

+ of the dictionary are the names of the feature that weight is

+ for. We take this argument as a dict rather than as a list so that

+ callers needn't worry about what order to provide the weights in.

epsilon (float): The absolute-error threshold for considering a

weight to be "equal to zero". N.B., this should be a positive

number, as we will compare it against the absolute value of

@@ -77,6 +82,8 @@ class UnnormalizedLogLinearModel(object):

"""

if epsilon is None:

epsilon = EPSILON

+ self._feature_to_weight = feature_to_weight

self._weights = np.array([

wrengr 2017/01/09 19:31:52 Once we no longer take the list of weights, this w

w if isinstance(w, float) and math.fabs(w) >= epsilon else 0.

for w in weights])

@@ -121,6 +128,21 @@ class UnnormalizedLogLinearModel(object):

self._features(x).map(lambda fxy:

self.weights.dot(np.array([feature.value for feature in fxy]))))

+ def SingleFeatureScore(self, feature_value):

+ """Returns the score (aka weighted value) of a ``FeatureValue``.

+ This function assumes the report's stacktrace has already had any necessary

+ preprocessing (like filtering or truncating) applied.

+ Args:

+ feature_value (FeatureValue): the feature value to check.

+ Returns:

+ The score of the feature value.

+ """

+ return feature_value.value * self._feature_to_weight.get(

+ feature_value.name, 0.)

def ClearWeightBasedMemos(self):

"""Clear all the memos that depend on the weight covector."""

self._quadrance = None

@@ -218,7 +240,8 @@ class LogLinearModel(UnnormalizedLogLinearModel):

we can provide probabilities (not just scores). However, to do so we

require a specification of the subsets of ``Y`` for each ``x``.

"""

- def __init__(self, Y_given_X, feature_function, weights, epsilon=None):

+ def __init__(self, Y_given_X, feature_function, weights, feature_to_weight,

+ epsilon=None):

"""Construct a new probabilistic model.

Args:

@@ -241,7 +264,8 @@ class LogLinearModel(UnnormalizedLogLinearModel):

number, as we will compare it against the absolute value of

each weight.

"""

- super(LogLinearModel, self).__init__(feature_function, weights, epsilon)

+ super(LogLinearModel, self).__init__(feature_function, weights,

+ feature_to_weight, epsilon)

self._Y = Y_given_X