Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(878)

Side by Side Diff: appengine/findit/crash/loglinear/model.py

Issue 2560723005: Implementing a new LogLinearModel-based CL classifier (Closed)
Patch Set: rebase Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 # TODO(http://crbug.com/669639): there are lots of ways to make the code 5 # TODO(http://crbug.com/669639): there are lots of ways to make the code
6 # in this file better. We avoid having separate todos per task; instead 6 # in this file better. We avoid having separate todos per task; instead
7 # see that meta-issue ticket. 7 # see that meta-issue ticket.
8 8
9 import math 9 import math
10 import numpy as np 10 import numpy as np
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 """ 77 """
78 if epsilon is None: 78 if epsilon is None:
79 epsilon = EPSILON 79 epsilon = EPSILON
80 self._weights = np.array([ 80 self._weights = np.array([
81 w if isinstance(w, float) and math.fabs(w) >= epsilon else 0. 81 w if isinstance(w, float) and math.fabs(w) >= epsilon else 0.
82 for w in weights]) 82 for w in weights])
83 83
84 self._quadrance = None 84 self._quadrance = None
85 85
86 # TODO(crbug.com/674752): we need better names for ``self._features``. 86 # TODO(crbug.com/674752): we need better names for ``self._features``.
87 def _FeaturesMemoizedOnY(x): 87 def _Features(x):
88 """Wrap ``feature_function`` to memoize things and ensure types.
89
90 This outer wrapping takes each ``x`` to a memoized instance of
91 ``_FeaturesGivenX``. That is, for each ``x`` we return a
92 ``MemoizedFunction`` from ``Y`` to ``list(FeatureValue)``.
93 """
88 fx = feature_function(x) 94 fx = feature_function(x)
89 def _TypeCheckFeatures(y): 95 def _FeaturesGivenX(y):
96 """Wrap ``feature_function(x)`` to ensure appropriate types.
97
98 This inner wrapper ensures that the resulting ``FeatureValue``
99 array has the same length as the weight covector.
100 """
90 fxy = fx(y) 101 fxy = fx(y)
91 # N.B., we're assuming that ``len(self.weights)`` is O(1). 102 # N.B., we're assuming that ``len(self.weights)`` is O(1).
92 assert len(fxy) == len(self.weights), TypeError( 103 assert len(fxy) == len(self.weights), TypeError(
93 "vector length mismatch: %d != %d" % (len(fxy), len(self.weights))) 104 "vector length mismatch: %d != %d" % (len(fxy), len(self.weights)))
94 return fxy 105 return fxy
95 return MemoizedFunction(_TypeCheckFeatures) 106
96 self._features = MemoizedFunction(_FeaturesMemoizedOnY) 107 # Memoize on ``Y``, to ensure we don't need to recompute
108 # ``FeatureValue``s nor recheck the lengths.
109 return MemoizedFunction(_FeaturesGivenX)
110
111 # Memoize on ``X``, to ensure we share the memo tables on ``Y``.
112 self._features = MemoizedFunction(_Features)
97 113
98 # TODO(crbug.com/674752): we need better names for ``self._scores``. 114 # TODO(crbug.com/674752): we need better names for ``self._scores``.
99 # N.B., this is just the inner product of ``self.weights`` 115 # N.B., this is just the inner product of ``self.weights``
100 # against ``self._features(x)``. If we can compute this in some 116 # against ``self._features(x)``. If we can compute this in some
101 # more efficient way, we should. In particular, we will want to 117 # more efficient way, we should. In particular, we will want to
102 # make the weights sparse, in which case we need to use a sparse 118 # make the weights sparse, in which case we need to use a sparse
103 # variant of the dot product. 119 # variant of the dot product.
104 self._scores = MemoizedFunction(lambda x: 120 self._scores = MemoizedFunction(lambda x:
105 self._features(x).map(lambda fxy: 121 self._features(x).map(lambda fxy:
106 self.weights.dot(np.array(map(lambda feature: 122 self.weights.dot(np.array([feature.value for feature in fxy]))))
107 feature.value, fxy)))))
108 123
109 def ClearWeightBasedMemos(self): 124 def ClearWeightBasedMemos(self):
110 """Clear all the memos that depend on the weight covector.""" 125 """Clear all the memos that depend on the weight covector."""
111 self._quadrance = None 126 self._quadrance = None
112 self._scores.ClearMemos() 127 self._scores.ClearMemos()
113 128
114 def ClearAllMemos(self): 129 def ClearAllMemos(self):
115 """Clear all memos, even those independent of the weight covector.""" 130 """Clear all memos, even those independent of the weight covector."""
116 self.ClearWeightBasedMemos() 131 self.ClearWeightBasedMemos()
117 self._features.ClearMemos() 132 self._features.ClearMemos()
(...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after
298 function returns; rather, it's a sort of average of all the results 313 function returns; rather, it's a sort of average of all the results
299 returned. For more information you can take a look at Wikipedia 314 returned. For more information you can take a look at Wikipedia
300 <https://en.wikipedia.org/wiki/Expected_value>. 315 <https://en.wikipedia.org/wiki/Expected_value>.
301 """ 316 """
302 prob_given_x = self.Probability(x) 317 prob_given_x = self.Probability(x)
303 # N.B., the ``*`` below is vector scaling! If we want to make this 318 # N.B., the ``*`` below is vector scaling! If we want to make this
304 # method polymorphic in the return type of ``f`` then we'll need an 319 # method polymorphic in the return type of ``f`` then we'll need an
305 # API that provides both scaling and ``vsum``. 320 # API that provides both scaling and ``vsum``.
306 return vsum([prob_given_x(y) * f(y) for y in self._Y]) 321 return vsum([prob_given_x(y) * f(y) for y in self._Y])
307 322
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698