| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 # TODO(http://crbug.com/669639): there are lots of ways to make the code | 5 # TODO(http://crbug.com/669639): there are lots of ways to make the code |
| 6 # in this file better. We avoid having separate todos per task; instead | 6 # in this file better. We avoid having separate todos per task; instead |
| 7 # see that meta-issue ticket. | 7 # see that meta-issue ticket. |
| 8 | 8 |
| 9 import math | 9 import math |
| 10 import numpy as np | 10 import numpy as np |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 77 """ | 77 """ |
| 78 if epsilon is None: | 78 if epsilon is None: |
| 79 epsilon = EPSILON | 79 epsilon = EPSILON |
| 80 self._weights = np.array([ | 80 self._weights = np.array([ |
| 81 w if isinstance(w, float) and math.fabs(w) >= epsilon else 0. | 81 w if isinstance(w, float) and math.fabs(w) >= epsilon else 0. |
| 82 for w in weights]) | 82 for w in weights]) |
| 83 | 83 |
| 84 self._quadrance = None | 84 self._quadrance = None |
| 85 | 85 |
| 86 # TODO(crbug.com/674752): we need better names for ``self._features``. | 86 # TODO(crbug.com/674752): we need better names for ``self._features``. |
| 87 def _FeaturesMemoizedOnY(x): | 87 def _Features(x): |
| 88 """Wrap ``feature_function`` to memoize things and ensure types. |
| 89 |
| 90 This outer wrapping takes each ``x`` to a memoized instance of |
| 91 ``_FeaturesGivenX``. That is, for each ``x`` we return a |
| 92 ``MemoizedFunction`` from ``Y`` to ``list(FeatureValue)``. |
| 93 """ |
| 88 fx = feature_function(x) | 94 fx = feature_function(x) |
| 89 def _TypeCheckFeatures(y): | 95 def _FeaturesGivenX(y): |
| 96 """Wrap ``feature_function(x)`` to ensure appropriate types. |
| 97 |
| 98 This inner wrapper ensures that the resulting ``FeatureValue`` |
| 99 array has the same length as the weight covector. |
| 100 """ |
| 90 fxy = fx(y) | 101 fxy = fx(y) |
| 91 # N.B., we're assuming that ``len(self.weights)`` is O(1). | 102 # N.B., we're assuming that ``len(self.weights)`` is O(1). |
| 92 assert len(fxy) == len(self.weights), TypeError( | 103 assert len(fxy) == len(self.weights), TypeError( |
| 93 "vector length mismatch: %d != %d" % (len(fxy), len(self.weights))) | 104 "vector length mismatch: %d != %d" % (len(fxy), len(self.weights))) |
| 94 return fxy | 105 return fxy |
| 95 return MemoizedFunction(_TypeCheckFeatures) | 106 |
| 96 self._features = MemoizedFunction(_FeaturesMemoizedOnY) | 107 # Memoize on ``Y``, to ensure we don't need to recompute |
| 108 # ``FeatureValue``s nor recheck the lengths. |
| 109 return MemoizedFunction(_FeaturesGivenX) |
| 110 |
| 111 # Memoize on ``X``, to ensure we share the memo tables on ``Y``. |
| 112 self._features = MemoizedFunction(_Features) |
| 97 | 113 |
| 98 # TODO(crbug.com/674752): we need better names for ``self._scores``. | 114 # TODO(crbug.com/674752): we need better names for ``self._scores``. |
| 99 # N.B., this is just the inner product of ``self.weights`` | 115 # N.B., this is just the inner product of ``self.weights`` |
| 100 # against ``self._features(x)``. If we can compute this in some | 116 # against ``self._features(x)``. If we can compute this in some |
| 101 # more efficient way, we should. In particular, we will want to | 117 # more efficient way, we should. In particular, we will want to |
| 102 # make the weights sparse, in which case we need to use a sparse | 118 # make the weights sparse, in which case we need to use a sparse |
| 103 # variant of the dot product. | 119 # variant of the dot product. |
| 104 self._scores = MemoizedFunction(lambda x: | 120 self._scores = MemoizedFunction(lambda x: |
| 105 self._features(x).map(lambda fxy: | 121 self._features(x).map(lambda fxy: |
| 106 self.weights.dot(np.array(map(lambda feature: | 122 self.weights.dot(np.array([feature.value for feature in fxy])))) |
| 107 feature.value, fxy))))) | |
| 108 | 123 |
| 109 def ClearWeightBasedMemos(self): | 124 def ClearWeightBasedMemos(self): |
| 110 """Clear all the memos that depend on the weight covector.""" | 125 """Clear all the memos that depend on the weight covector.""" |
| 111 self._quadrance = None | 126 self._quadrance = None |
| 112 self._scores.ClearMemos() | 127 self._scores.ClearMemos() |
| 113 | 128 |
| 114 def ClearAllMemos(self): | 129 def ClearAllMemos(self): |
| 115 """Clear all memos, even those independent of the weight covector.""" | 130 """Clear all memos, even those independent of the weight covector.""" |
| 116 self.ClearWeightBasedMemos() | 131 self.ClearWeightBasedMemos() |
| 117 self._features.ClearMemos() | 132 self._features.ClearMemos() |
| (...skipping 180 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 298 function returns; rather, it's a sort of average of all the results | 313 function returns; rather, it's a sort of average of all the results |
| 299 returned. For more information you can take a look at Wikipedia | 314 returned. For more information you can take a look at Wikipedia |
| 300 <https://en.wikipedia.org/wiki/Expected_value>. | 315 <https://en.wikipedia.org/wiki/Expected_value>. |
| 301 """ | 316 """ |
| 302 prob_given_x = self.Probability(x) | 317 prob_given_x = self.Probability(x) |
| 303 # N.B., the ``*`` below is vector scaling! If we want to make this | 318 # N.B., the ``*`` below is vector scaling! If we want to make this |
| 304 # method polymorphic in the return type of ``f`` then we'll need an | 319 # method polymorphic in the return type of ``f`` then we'll need an |
| 305 # API that provides both scaling and ``vsum``. | 320 # API that provides both scaling and ``vsum``. |
| 306 return vsum([prob_given_x(y) * f(y) for y in self._Y]) | 321 return vsum([prob_given_x(y) * f(y) for y in self._Y]) |
| 307 | 322 |
| OLD | NEW |