appengine/findit/crash/loglinear/model.py - Issue 2560723005: Implementing a new LogLinearModel-based CL classifier

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Unified Diff: appengine/findit/crash/loglinear/model.py

Issue 2560723005: Implementing a new LogLinearModel-based CL classifier (Closed)

Patch Set: rebase Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « appengine/findit/crash/loglinear/changelist_classifier.py ('k') | appengine/findit/crash/loglinear/test/changelist_classifier_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: appengine/findit/crash/loglinear/model.py

diff --git a/appengine/findit/crash/loglinear/model.py b/appengine/findit/crash/loglinear/model.py

index eb701070430fc8daf76106d6e150e636c6a1908f..ff3afcc29b68d8d4c516f9de697cea61a25d9b18 100644

--- a/appengine/findit/crash/loglinear/model.py

+++ b/appengine/findit/crash/loglinear/model.py

@@ -84,16 +84,32 @@ class UnnormalizedLogLinearModel(object):

self._quadrance = None

# TODO(crbug.com/674752): we need better names for ``self._features``.

- def _FeaturesMemoizedOnY(x):

+ def _Features(x):

+ """Wrap ``feature_function`` to memoize things and ensure types.

+ This outer wrapping takes each ``x`` to a memoized instance of

+ ``_FeaturesGivenX``. That is, for each ``x`` we return a

+ ``MemoizedFunction`` from ``Y`` to ``list(FeatureValue)``.

+ """

fx = feature_function(x)

- def _TypeCheckFeatures(y):

+ def _FeaturesGivenX(y):

+ """Wrap ``feature_function(x)`` to ensure appropriate types.

+ This inner wrapper ensures that the resulting ``FeatureValue``

+ array has the same length as the weight covector.

+ """

fxy = fx(y)

# N.B., we're assuming that ``len(self.weights)`` is O(1).

assert len(fxy) == len(self.weights), TypeError(

"vector length mismatch: %d != %d" % (len(fxy), len(self.weights)))

return fxy

- return MemoizedFunction(_TypeCheckFeatures)

- self._features = MemoizedFunction(_FeaturesMemoizedOnY)

+ # Memoize on ``Y``, to ensure we don't need to recompute

+ # ``FeatureValue``s nor recheck the lengths.

+ return MemoizedFunction(_FeaturesGivenX)

+ # Memoize on ``X``, to ensure we share the memo tables on ``Y``.

+ self._features = MemoizedFunction(_Features)

# TODO(crbug.com/674752): we need better names for ``self._scores``.

# N.B., this is just the inner product of ``self.weights``

@@ -103,8 +119,7 @@ class UnnormalizedLogLinearModel(object):

# variant of the dot product.

self._scores = MemoizedFunction(lambda x:

self._features(x).map(lambda fxy:

- self.weights.dot(np.array(map(lambda feature:

- feature.value, fxy)))))

+ self.weights.dot(np.array([feature.value for feature in fxy]))))

def ClearWeightBasedMemos(self):

"""Clear all the memos that depend on the weight covector."""