Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 # TODO(http://crbug.com/669639): there are lots of ways to make the code | 5 # TODO(http://crbug.com/669639): there are lots of ways to make the code |
| 6 # in this file better. We avoid having separate todos per task; instead | 6 # in this file better. We avoid having separate todos per task; instead |
| 7 # see that meta-issue ticket. | 7 # see that meta-issue ticket. |
| 8 | 8 |
| 9 import math | 9 import math |
| 10 import numpy as np | 10 import numpy as np |
| 11 # N.B., ``np.array`` can't take generators; you must pass explicit lists. | 11 # N.B., ``np.array`` can't take generators; you must pass explicit lists. |
| 12 | 12 |
| 13 from libs.math.functions import MemoizedFunction | 13 from libs.math.functions import MemoizedFunction |
| 14 from libs.math.logarithms import logsumexp | 14 from libs.math.logarithms import logsumexp |
| 15 from libs.math.vectors import vsum | 15 from libs.math.vectors import vsum |
| 16 | 16 |
| 17 EPSILON = 0.00001 | 17 EPSILON = 0.00001 |
| 18 | 18 |
| 19 | 19 |
| 20 def ToFeatureFunction(fs): | |
| 21 """Given an array of scalar-valued functions, return an array-valued function. | |
| 22 | |
| 23 Args: | |
| 24 fs (iterable): A collection of curried functions ``X -> Y -> A``. | |
| 25 That is, given a particular ``x`` they return a function ``Y -> A``. | |
| 26 | |
| 27 Returns: | |
| 28 A function ``X -> Y -> list(A)`` where for all ``x``, ``y``, and | |
| 29 ``i`` we have that ``ToFeatureFunction(fs)(x)(y)[i] == fs[i](x)(y)``. | |
| 30 """ | |
| 31 def _FeatureFunction(x): | |
| 32 fxs = [f(x) for f in fs] | |
| 33 return lambda y: [fx(y) for fx in fxs] | |
| 34 | |
| 35 return _FeatureFunction | |
| 36 | |
| 37 | |
| 38 class UnnormalizedLogLinearModel(object): | 20 class UnnormalizedLogLinearModel(object): |
| 39 """An unnormalized loglinear model. | 21 """An unnormalized loglinear model. |
| 40 | 22 |
| 41 We use this model for combining a bunch of different features in order | 23 We use this model for combining a bunch of different features in order |
| 42 to decide who to blame. In particular, the nice thing about loglinear | 24 to decide who to blame. In particular, the nice thing about loglinear |
| 43 models is that (a) they allow separating the amount of weight we | 25 models is that (a) they allow separating the amount of weight we |
| 44 give to each feature from the feature itself, and (b) they allow us | 26 give to each feature from the feature itself, and (b) they allow us |
| 45 to keep adding features at will without needing to worry about the | 27 to keep adding features at will without needing to worry about the |
| 46 fiddly details needed for it to be a probability model. | 28 fiddly details needed for it to be a probability model. |
| 47 | 29 |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 60 rather than returning a probability per se. | 42 rather than returning a probability per se. |
| 61 """ | 43 """ |
| 62 | 44 |
| 63 def __init__(self, feature_function, weights, epsilon=None): | 45 def __init__(self, feature_function, weights, epsilon=None): |
| 64 """Construct a new model with the given weights and feature function. | 46 """Construct a new model with the given weights and feature function. |
| 65 | 47 |
| 66 Args: | 48 Args: |
| 67 feature_function: A function ``X -> Y -> list(FeatureValue)``. N.B., | 49 feature_function: A function ``X -> Y -> list(FeatureValue)``. N.B., |
| 68 for all ``x`` and ``y`` the length of ``feature_function(x)(y)`` | 50 for all ``x`` and ``y`` the length of ``feature_function(x)(y)`` |
| 69 must be the same as the length of ``weights``. | 51 must be the same as the length of ``weights``. |
| 70 weights (list of float): coefficients for how important we consider | 52 weights (dict of float): the weights for the features. The keys of |
| 71 each component of the feature vector for deciding which ``y`` | 53 the dictionary are the names of the feature that weight is |
| 72 to blame. | 54 for. We take this argument as a dict rather than as a list so that |
| 55 callers needn't worry about what order to provide the weights in. | |
| 73 epsilon (float): The absolute-error threshold for considering a | 56 epsilon (float): The absolute-error threshold for considering a |
| 74 weight to be "equal to zero". N.B., this should be a positive | 57 weight to be "equal to zero". N.B., this should be a positive |
| 75 number, as we will compare it against the absolute value of | 58 number, as we will compare it against the absolute value of |
| 76 each weight. | 59 each weight. |
| 77 """ | 60 """ |
| 78 if epsilon is None: | 61 if epsilon is None: |
| 79 epsilon = EPSILON | 62 epsilon = EPSILON |
| 80 self._weights = np.array([ | 63 |
| 81 w if isinstance(w, float) and math.fabs(w) >= epsilon else 0. | 64 self._weights = { |
| 82 for w in weights]) | 65 name: weight if isinstance(weight, float) and |
| 66 math.fabs(weight) >= epsilon else 0. | |
|
wrengr
2017/01/11 20:38:30
The ``if...`` should be moved to after the ``for..
Sharu Jiang
2017/01/12 01:41:38
Done.
| |
| 67 for name, weight in weights.iteritems() | |
| 68 } | |
| 83 | 69 |
| 84 self._quadrance = None | 70 self._quadrance = None |
| 85 | 71 |
| 86 # TODO(crbug.com/674752): we need better names for ``self._features``. | 72 # TODO(crbug.com/674752): we need better names for ``self._features``. |
| 87 def _Features(x): | 73 def _Features(x): |
| 88 """Wrap ``feature_function`` to memoize things and ensure types. | 74 """Wrap ``feature_function`` to memoize things and ensure types. |
| 89 | 75 |
| 90 This outer wrapping takes each ``x`` to a memoized instance of | 76 This outer wrapping takes each ``x`` to a memoized instance of |
| 91 ``_FeaturesGivenX``. That is, for each ``x`` we return a | 77 ``_FeaturesGivenX``. That is, for each ``x`` we return a |
| 92 ``MemoizedFunction`` from ``Y`` to ``list(FeatureValue)``. | 78 ``MemoizedFunction`` from ``Y`` to ``dict(str to FeatureValue)``. |
| 93 """ | 79 """ |
| 94 fx = feature_function(x) | 80 fx = feature_function(x) |
| 95 def _FeaturesGivenX(y): | 81 def _FeaturesGivenX(y): |
| 96 """Wrap ``feature_function(x)`` to ensure appropriate types. | 82 """Wrap ``feature_function(x)`` to ensure appropriate types. |
| 97 | 83 |
| 98 This inner wrapper ensures that the resulting ``FeatureValue`` | 84 This inner wrapper ensures that the resulting ``FeatureValue`` |
| 99 array has the same length as the weight covector. | 85 array has the same length as the weight covector. |
| 100 """ | 86 """ |
| 101 fxy = fx(y) | 87 fxy = fx(y) |
| 102 # N.B., we're assuming that ``len(self.weights)`` is O(1). | 88 # N.B., we're assuming that ``len(self.weights)`` is O(1). |
| 103 assert len(fxy) == len(self.weights), TypeError( | 89 assert len(fxy) == len(self.weights), TypeError( |
| 104 "vector length mismatch: %d != %d" % (len(fxy), len(self.weights))) | 90 "vector length mismatch: %d != %d" % (len(fxy), len(self.weights))) |
| 105 return fxy | 91 return fxy |
| 106 | 92 |
| 107 # Memoize on ``Y``, to ensure we don't need to recompute | 93 # Memoize on ``Y``, to ensure we don't need to recompute |
| 108 # ``FeatureValue``s nor recheck the lengths. | 94 # ``FeatureValue``s nor recheck the lengths. |
| 109 return MemoizedFunction(_FeaturesGivenX) | 95 return MemoizedFunction(_FeaturesGivenX) |
| 110 | 96 |
| 111 # Memoize on ``X``, to ensure we share the memo tables on ``Y``. | 97 # Memoize on ``X``, to ensure we share the memo tables on ``Y``. |
| 112 self._features = MemoizedFunction(_Features) | 98 self._features = MemoizedFunction(_Features) |
| 113 | 99 |
| 114 # TODO(crbug.com/674752): we need better names for ``self._scores``. | 100 # TODO(crbug.com/674752): we need better names for ``self._scores``. |
| 115 # N.B., this is just the inner product of ``self.weights`` | 101 # N.B., this is just the inner product of ``self.weights`` |
| 116 # against ``self._features(x)``. If we can compute this in some | 102 # against ``self._features(x)``. If we can compute this in some |
| 117 # more efficient way, we should. In particular, we will want to | 103 # more efficient way, we should. In particular, we will want to |
| 118 # make the weights sparse, in which case we need to use a sparse | 104 # make the weights sparse, in which case we need to use a sparse |
| 119 # variant of the dot product. | 105 # variant of the dot product. |
| 120 self._scores = MemoizedFunction(lambda x: | 106 self._scores = MemoizedFunction(lambda x: self._features(x).map( |
| 121 self._features(x).map(lambda fxy: | 107 lambda fxy: sum(self.SingleFeatureScore(feature) |
|
wrengr
2017/01/11 20:38:30
Should use ``math.fsum`` whenever adding floats; n
Sharu Jiang
2017/01/12 01:41:38
Done.
| |
| 122 self.weights.dot(np.array([feature.value for feature in fxy])))) | 108 for feature in fxy.itervalues()))) |
|
wrengr
2017/01/11 20:38:30
N.B., you're not taking advantage of the sparsity
Sharu Jiang
2017/01/12 01:41:38
That's because I didn't filter those 0 weights in
wrengr
2017/01/12 18:16:16
Yeah, the weights will change during training, but
| |
| 109 | |
| 110 def SingleFeatureScore(self, feature_value): | |
| 111 """Returns the score (aka weighted value) of a ``FeatureValue``. | |
| 112 | |
| 113 This function assumes the report's stacktrace has already had any necessary | |
| 114 preprocessing (like filtering or truncating) applied. | |
| 115 | |
| 116 Args: | |
| 117 feature_value (FeatureValue): the feature value to check. | |
| 118 | |
| 119 Returns: | |
| 120 The score of the feature value. | |
| 121 """ | |
| 122 return feature_value.value * self._weights.get(feature_value.name, 0.) | |
| 123 | 123 |
| 124 def ClearWeightBasedMemos(self): | 124 def ClearWeightBasedMemos(self): |
| 125 """Clear all the memos that depend on the weight covector.""" | 125 """Clear all the memos that depend on the weight covector.""" |
| 126 self._quadrance = None | 126 self._quadrance = None |
| 127 self._scores.ClearMemos() | 127 self._scores.ClearMemos() |
| 128 | 128 |
| 129 def ClearAllMemos(self): | 129 def ClearAllMemos(self): |
| 130 """Clear all memos, even those independent of the weight covector.""" | 130 """Clear all memos, even those independent of the weight covector.""" |
| 131 self.ClearWeightBasedMemos() | 131 self.ClearWeightBasedMemos() |
| 132 self._features.ClearMemos() | 132 self._features.ClearMemos() |
| 133 | 133 |
| 134 @property | 134 @property |
| 135 def weights(self): | 135 def weights(self): |
| 136 """The weight covector. | 136 """The weight covector. |
| 137 | 137 |
| 138 At present we return the weights as an ``np.ndarray``, but in the | 138 At present we return the weights as an dict mapping feature name to its |
| 139 future that may be replaced by a more general type which specifies | 139 weight, but in the future that may be replaced by a more general type which |
| 140 the semantics rather than the implementation details. | 140 specifies the semantics rather than the implementation details. |
| 141 """ | 141 """ |
| 142 return self._weights | 142 return self._weights |
| 143 | 143 |
| 144 @property | 144 @property |
| 145 def l0(self): # pragma: no cover | 145 def l0(self): # pragma: no cover |
| 146 """The l0-norm of the weight covector. | 146 """The l0-norm of the weight covector. |
| 147 | 147 |
| 148 N.B., despite being popularly called the "l0-norm", this isn't | 148 N.B., despite being popularly called the "l0-norm", this isn't |
| 149 actually a norm in the mathematical sense.""" | 149 actually a norm in the mathematical sense.""" |
| 150 return float(np.count_nonzero(self.weights)) | 150 return float(np.count_nonzero(self.weights.itervalues())) |
|
wrengr
2017/01/11 20:38:30
You can't use ``np.count_nonzero`` anymore since `
Sharu Jiang
2017/01/12 01:41:38
Since I haven't filtered 0 weights yet, I just use
wrengr
2017/01/12 18:16:16
Testing for exact equality with 0.0 isn't reliable
| |
| 151 | 151 |
| 152 @property | 152 @property |
| 153 def l1(self): # pragma: no cover | 153 def l1(self): # pragma: no cover |
| 154 """The l1 (aka: Manhattan) norm of the weight covector.""" | 154 """The l1 (aka: Manhattan) norm of the weight covector.""" |
| 155 return math.fsum(math.fabs(w) for w in self.weights) | 155 return math.fsum(math.fabs(w) for w in self.weights.itervalues()) |
| 156 | 156 |
| 157 @property | 157 @property |
| 158 def quadrance(self): | 158 def quadrance(self): |
| 159 """The square of the l2 norm of the weight covector. | 159 """The square of the l2 norm of the weight covector. |
| 160 | 160 |
| 161 This value is often more helpful to have direct access to, as it | 161 This value is often more helpful to have direct access to, as it |
| 162 avoids the need for non-rational functions (e.g., sqrt) and shows up | 162 avoids the need for non-rational functions (e.g., sqrt) and shows up |
| 163 as its own quantity in many places. Also, computing it directly avoids | 163 as its own quantity in many places. Also, computing it directly avoids |
| 164 the error introduced by squaring the square-root of an IEEE-754 float. | 164 the error introduced by squaring the square-root of an IEEE-754 float. |
| 165 """ | 165 """ |
| 166 if self._quadrance is None: | 166 if self._quadrance is None: |
| 167 self._quadrance = math.fsum(math.fabs(w)**2 for w in self.weights) | 167 self._quadrance = math.fsum( |
| 168 math.fabs(w)**2 for w in self.weights.itervalues()) | |
| 168 | 169 |
| 169 return self._quadrance | 170 return self._quadrance |
| 170 | 171 |
| 171 @property | 172 @property |
| 172 def l2(self): | 173 def l2(self): |
| 173 """The l2 (aka Euclidean) norm of the weight covector. | 174 """The l2 (aka Euclidean) norm of the weight covector. |
| 174 | 175 |
| 175 If you need the square of the l2-norm, do not use this property. | 176 If you need the square of the l2-norm, do not use this property. |
| 176 Instead, use the ``quadrance`` property which is more accurate than | 177 Instead, use the ``quadrance`` property which is more accurate than |
| 177 squaring this one. | 178 squaring this one. |
| 178 """ | 179 """ |
| 179 return math.sqrt(self.quadrance) | 180 return math.sqrt(self.quadrance) |
| 180 | 181 |
| 181 def Features(self, x): | 182 def Features(self, x): |
| 182 """Returns a function mapping ``y`` to its feature vector given ``x``. | 183 """Returns a function mapping ``y`` to its feature vector given ``x``. |
| 183 | 184 |
| 184 Args: | 185 Args: |
| 185 x (X): the value of the independent variable. | 186 x (X): the value of the independent variable. |
| 186 | 187 |
| 187 Returns: | 188 Returns: |
| 188 A ``MemoizedFunction`` of type ``Y -> np.array(float)``. | 189 A ``MemoizedFunction`` of type ``Y -> dict(str to float)``. |
| 189 """ | 190 """ |
| 190 return self._features(x) | 191 return self._features(x) |
| 191 | 192 |
| 192 def Score(self, x): | 193 def Score(self, x): |
| 193 """Returns a function mapping ``y`` to its "score" given ``x``. | 194 """Returns a function mapping ``y`` to its "score" given ``x``. |
| 194 | 195 |
| 195 Semantically, the "score" of ``y`` given ``x`` is the | 196 Semantically, the "score" of ``y`` given ``x`` is the |
| 196 unnormalized log-domain conditional probability of ``y`` given | 197 unnormalized log-domain conditional probability of ``y`` given |
| 197 ``x``. Operationally, we compute this by taking the inner product | 198 ``x``. Operationally, we compute this by taking the inner product |
| 198 of the weight covector with the ``Features(x)(y)`` vector. The | 199 of the weight covector with the ``Features(x)(y)`` vector. The |
| (...skipping 27 matching lines...) Expand all Loading... | |
| 226 subset of ``Y`` which has non-zero probability given the | 227 subset of ``Y`` which has non-zero probability given the |
| 227 ``x``. When in doubt about whether some ``y`` has zero probability | 228 ``x``. When in doubt about whether some ``y`` has zero probability |
| 228 or not, it is always safe/correct to return a larger subset of | 229 or not, it is always safe/correct to return a larger subset of |
| 229 ``Y`` (it'll just take more computation time is all). This is | 230 ``Y`` (it'll just take more computation time is all). This is |
| 230 needed for computing the partition function and expectation. N.B., | 231 needed for computing the partition function and expectation. N.B., |
| 231 we do not actually need to know/enumerate of *all* of ``Y``, | 232 we do not actually need to know/enumerate of *all* of ``Y``, |
| 232 only the subsets for each ``x``. | 233 only the subsets for each ``x``. |
| 233 feature_function: A function ``X -> Y -> list(float)``. N.B., | 234 feature_function: A function ``X -> Y -> list(float)``. N.B., |
| 234 for all ``x`` and ``y`` the length of ``feature_function(x)(y)`` | 235 for all ``x`` and ``y`` the length of ``feature_function(x)(y)`` |
| 235 must be the same as the length of ``weights``. | 236 must be the same as the length of ``weights``. |
| 236 weights (list of float): coefficients for how important we consider | 237 weights (dict of float): the weights for the features. The keys of |
| 237 each component of the feature vector for deciding which ``y`` | 238 the dictionary are the names of the feature that weight is |
| 238 to blame. | 239 for. We take this argument as a dict rather than as a list so that |
| 240 callers needn't worry about what order to provide the weights in. | |
| 239 epsilon (float): The absolute-error threshold for considering a | 241 epsilon (float): The absolute-error threshold for considering a |
| 240 weight to be "equal to zero". N.B., this should be a positive | 242 weight to be "equal to zero". N.B., this should be a positive |
| 241 number, as we will compare it against the absolute value of | 243 number, as we will compare it against the absolute value of |
| 242 each weight. | 244 each weight. |
| 243 """ | 245 """ |
| 244 super(LogLinearModel, self).__init__(feature_function, weights, epsilon) | 246 super(LogLinearModel, self).__init__(feature_function, weights, epsilon) |
| 245 | 247 |
| 246 self._Y = Y_given_X | 248 self._Y = Y_given_X |
| 247 | 249 |
| 248 def _LogZ(x): | 250 def _LogZ(x): |
| (...skipping 70 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 319 function returns; rather, it's a sort of average of all the results | 321 function returns; rather, it's a sort of average of all the results |
| 320 returned. For more information you can take a look at Wikipedia | 322 returned. For more information you can take a look at Wikipedia |
| 321 <https://en.wikipedia.org/wiki/Expected_value>. | 323 <https://en.wikipedia.org/wiki/Expected_value>. |
| 322 """ | 324 """ |
| 323 prob_given_x = self.Probability(x) | 325 prob_given_x = self.Probability(x) |
| 324 # N.B., the ``*`` below is vector scaling! If we want to make this | 326 # N.B., the ``*`` below is vector scaling! If we want to make this |
| 325 # method polymorphic in the return type of ``f`` then we'll need an | 327 # method polymorphic in the return type of ``f`` then we'll need an |
| 326 # API that provides both scaling and ``vsum``. | 328 # API that provides both scaling and ``vsum``. |
| 327 return vsum([prob_given_x(y) * f(y) for y in self._Y(x)]) | 329 return vsum([prob_given_x(y) * f(y) for y in self._Y(x)]) |
| 328 | 330 |
| OLD | NEW |