Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import math | |
| 6 import numpy as np | |
| 7 | |
| 8 from libs.math.functions import MemoizedFunction | |
| 9 from libs.math.logarithms import logsumexp | |
| 10 from libs.math.vectors import vsum | |
| 11 | |
| 12 | |
| 13 def ToFeatureFunction(fs): | |
| 14 """Given an array of scalar-valued functions, return a vector-valued function. | |
| 15 | |
| 16 Args: | |
| 17 fs (iterable): A collection of functions from ``X`` to functions from | |
|
Sharu Jiang
2016/12/06 20:49:19
X -> Y -> float should be more clear.
wrengr
2016/12/07 00:55:38
Can do. I wasn't sure if folks were comfortable wi
| |
| 18 ``Y`` to ``float``. | |
| 19 | |
| 20 Returns: | |
| 21 A function from ``X`` to a function from ``Y`` to a ``list`` of | |
| 22 ``float`` where ``ToFeatureFunction(fs)(x)(y)[i] == fs[i](x)(y)``. | |
| 23 """ | |
| 24 def _FeatureFunction(x): | |
|
Sharu Jiang
2016/12/06 20:49:19
What would X look like? (regression or stacktrace
wrengr
2016/12/07 00:55:38
X will be whatever information is given a priori a
Sharu Jiang
2016/12/07 18:40:04
Acknowledged.
| |
| 25 fxs = [f(x) for f in fs] | |
| 26 # TODO(wrengr): allow ``fx(y)`` to be a list, and flatten everything. | |
| 27 return lambda y: [fx(y) for fx in fxs] | |
| 28 | |
| 29 return _FeatureFunction | |
| 30 | |
| 31 | |
| 32 # TODO(http://crbug.com/669639): lots of ways to make this code better. | |
| 33 class LogLinearModel(object): | |
| 34 """A loglinear probability model. | |
| 35 | |
| 36 We use this model for combining a bunch of different features in order | |
| 37 to decide who to blame. In particular, the nice thing about loglinear | |
| 38 models is that (a) they allow separating the amount of weight we | |
| 39 give to each feature from the feature itself, and (b) they allow us | |
| 40 to keep adding features at will without needing to worry about the | |
| 41 fiddly details needed for it to be a probability model. | |
| 42 | |
| 43 Throughout the documentation we use the following conventional | |
| 44 terminology. The independent variable (aka: data to be classified) is | |
| 45 called ``X``, with particular values for that variable called ``x``. The | |
| 46 dependent variable (aka: the label returned by classification) is | |
| 47 called ``Y``, with particular values for that random variable called | |
| 48 ``y``. The partition function is called ``Z``. And, somewhat | |
| 49 non-conventionally, we will call the log of the partition function | |
| 50 ``zeta``. | |
| 51 """ | |
| 52 | |
| 53 def __init__(self, Y, feature_function, weights, epsilon=None): | |
| 54 """ | |
| 55 Args: | |
| 56 Y (iterable): the entire range of values for the independent | |
| 57 variable. This is needed for computing the partition function. | |
| 58 feature_function: A function from ``X`` to ``Y`` to a list of | |
| 59 ``float``. N.B., for all ``x`` and ``y`` the length of | |
| 60 ``feature_function(x)(y)`` must be the same as the length of | |
| 61 ``weights``. | |
| 62 weights (list of float): coefficients for how much we believe | |
| 63 each component of the feature vector. | |
| 64 epsilon (float): The absolute-error threshold for considering a | |
| 65 weight to be "equal to zero". N.B., this should be a positive | |
| 66 number, as we will compare it against the absolute value of | |
| 67 each weight. | |
| 68 """ | |
| 69 self._Y = frozenset(Y) | |
|
Sharu Jiang
2016/12/07 00:05:06
Instead of passing all Y(changelogs) into LogLinea
wrengr
2016/12/07 00:55:38
Could do that. Of course, those constants need to
| |
| 70 | |
| 71 if epsilon is None: | |
| 72 epsilon = 0.00001 | |
|
Sharu Jiang
2016/12/06 20:49:19
we should define a EPSILON constant.
wrengr
2016/12/07 00:55:38
Done.
| |
| 73 # N.B., ``np.array`` can't take generators. | |
| 74 self._weights = np.array([ | |
| 75 w if isinstance(w, float) and math.fabs(w) >= epsilon else 0.0 | |
| 76 for w in weights]) | |
| 77 | |
| 78 def _FeaturesMemoizedOnY(x): | |
| 79 fx = feature_function(x) | |
| 80 def _FeaturesCoercedToCovector(y): | |
| 81 # N.B., ``np.array`` can't take generators. | |
| 82 fxy = np.array(list(fx(y))) | |
| 83 # N.B., we're assuming that ``len(self.weights)`` is O(1). | |
| 84 assert len(fxy) == len(self.weights), TypeError( | |
| 85 "vector length mismatch: %d != %d" % (len(fxy), len(self.weights))) | |
| 86 return fxy | |
| 87 return MemoizedFunction(_FeaturesCoercedToCovector) | |
| 88 self._features = MemoizedFunction(_FeaturesMemoizedOnY) | |
| 89 | |
| 90 # N.B., this is just the inner product of ``self.weights`` | |
| 91 # against ``self._features(x)``. If we can compute this in some | |
| 92 # more efficient way, we should. In particular, we will want to | |
| 93 # make the wrights sparse, in which case we need to use a sparse | |
| 94 # variant of the dot product. | |
| 95 self._scores = MemoizedFunction(lambda x: | |
| 96 self._features(x).map(self.weights.dot)) | |
| 97 | |
| 98 def _LogZ(x): | |
| 99 score_given_x = self._scores(x) | |
| 100 return logsumexp([score_given_x(y) for y in self._Y]) | |
| 101 self._zetas = MemoizedFunction(_LogZ) | |
| 102 | |
| 103 self._quadrance = None | |
| 104 | |
| 105 def ClearWeightBasedMemos(self): | |
| 106 """Clear all the memos that depend on the weight covector.""" | |
| 107 self._quadrance = None | |
| 108 self._scores.ClearMemos() | |
| 109 self._zetas.ClearMemos() | |
| 110 | |
| 111 def ClearAllMemos(self): | |
| 112 """Clear all memos, even those independent of the weight covector.""" | |
| 113 self.ClearWeightBasedMemos() | |
| 114 self._features.ClearMemos() | |
| 115 | |
| 116 @property | |
| 117 def weights(self): | |
| 118 """The weight covector. | |
| 119 | |
| 120 At present we return the weights as an ``np.ndarray``, but in the | |
| 121 future that may be replaced by a more general type which specifies | |
| 122 the semantics rather than the implementation details. | |
| 123 """ | |
| 124 return self._weights | |
| 125 | |
| 126 @property | |
| 127 def l0(self): # pragma: no cover | |
| 128 """The l0-norm of the weight covector. | |
| 129 | |
| 130 N.B., despite being popularly called the "l0-norm", this isn't | |
| 131 actually a norm in the mathematical sense.""" | |
| 132 return float(np.count_nonzero(self.weights)) | |
| 133 | |
| 134 @property | |
| 135 def l1(self): # pragma: no cover | |
| 136 """The l1 (aka: Manhattan) norm of the weight covector.""" | |
| 137 return math.fsum(math.fabs(w) for w in self.weights) | |
| 138 | |
| 139 @property | |
| 140 def quadrance(self): | |
| 141 """The square of the l2 norm of the weight covector. | |
| 142 | |
| 143 This value is often more helpful to have direct access to, as it | |
| 144 avoids the need for non-rational functions (e.g., sqrt) and shows up | |
| 145 as its own quantity in many places. Also, computing it directly avoids | |
| 146 the error introduced by squaring the square-root of an IEEE-754 float. | |
| 147 """ | |
| 148 if self._quadrance is None: | |
| 149 self._quadrance = math.fsum(math.fabs(w)**2 for w in self.weights) | |
| 150 | |
| 151 return self._quadrance | |
| 152 | |
| 153 @property | |
| 154 def l2(self): | |
| 155 """The l2 (aka Euclidean) norm of the weight covector. | |
| 156 | |
| 157 If you need the square of the l2-norm, do not use this property. | |
| 158 Instead, use the ``quadrance`` property which is more accurate than | |
| 159 squaring this one. | |
| 160 """ | |
| 161 return math.sqrt(self.quadrance) | |
| 162 | |
| 163 # TODO(wrengr): avoid the extra indirection from this method to the | |
| 164 # MemoizedFunction stored in the field. | |
| 165 def Features(self, x): | |
| 166 """Return a function mapping ``y`` to its feature vector given ``x``. | |
| 167 | |
| 168 Args: | |
| 169 x (X): the value of the dependent variable. | |
| 170 | |
| 171 Returns: | |
| 172 A ``MemoizedFunction`` from ``Y`` to an ``np.array`` of ``float``. | |
| 173 """ | |
| 174 return self._features(x) | |
| 175 | |
| 176 # TODO(wrengr): avoid the extra indirection from this method to the | |
| 177 # MemoizedFunction stored in the field. | |
| 178 def Score(self, x): | |
| 179 """Return a function mapping ``y`` to its "score" given ``x``. | |
| 180 | |
| 181 Semantically, the "score" of ``y`` given ``x`` is the | |
| 182 unnormalized log-domain conditional probability of ``y`` given | |
| 183 ``x``. Operationally, we compute this by taking the inner product | |
| 184 of the weight covector with the ``Features(x)(y)`` vector. The | |
| 185 conditional probability itself can be computed by exponentiating | |
| 186 the "score" and normalizing with the partition function. However, | |
| 187 for determining the "best" ``y`` we don't need to bother computing | |
| 188 the probability, because ``Score(x)`` is monotonic with respect to | |
| 189 ``Probability(x)``. | |
| 190 | |
| 191 Args: | |
| 192 x (X): the value of the dependent variable. | |
| 193 | |
| 194 Returns: | |
| 195 A ``MemoizedFunction`` from ``Y`` to ``float``. | |
| 196 """ | |
| 197 return self._scores(x) | |
| 198 | |
| 199 def Z(self, x): | |
| 200 """The normal-domain conditional partition-function given ``x``. | |
| 201 | |
| 202 If you need the log of the partition function, do not use this | |
| 203 method. Instead, use the ``logZ`` method which computes it directly | |
| 204 and thus is more accurate than taking the log of this one. | |
| 205 | |
| 206 Args: | |
| 207 x (X): the value of the dependent variable. | |
|
Sharu Jiang
2016/12/07 00:05:06
I think x is independent variable?
wrengr
2016/12/07 00:55:38
yep, good catch.
| |
| 208 | |
| 209 Returns: | |
| 210 The normalizing constant of ``Probability(x)``. | |
| 211 """ | |
| 212 return math.exp(self.logZ(x)) | |
| 213 | |
| 214 # TODO(wrengr): avoid the extra indirection from this method to the | |
| 215 # MemoizedFunction stored in the field. | |
| 216 def logZ(self, x): | |
|
Sharu Jiang
2016/12/07 00:05:06
To be consistent with other names, this should be
wrengr
2016/12/07 00:55:38
Done.
| |
| 217 """The log-domain conditional partition-function given ``x``. | |
| 218 | |
| 219 Args: | |
| 220 x (X): the value of the dependent variable. | |
| 221 | |
| 222 Returns: | |
| 223 The normalizing constant of ``LogProbability(x)``. | |
| 224 """ | |
| 225 return self._zetas(x) | |
| 226 | |
| 227 def Probability(self, x): | |
| 228 """The normal-domain distribution over ``y`` given ``x``. | |
| 229 | |
| 230 That is, ``self.Probability(x)(y)`` returns ``p(y | x; self.weights)`` | |
| 231 which is the model's estimation of ``Pr(y|x)``. | |
| 232 | |
| 233 If you need the log-probability, don't use this method. Instead, | |
| 234 use the ``LogProbability`` method which computes it directly and | |
| 235 thus is more accurate than taking the log of this one. | |
| 236 """ | |
| 237 logprob_given_x = self.LogProbability(x) | |
| 238 return lambda y: math.exp(logprob_given_x(y)) | |
| 239 | |
| 240 def LogProbability(self, x): | |
| 241 """The log-domain distribution over ``y`` given ``x``. | |
| 242 | |
| 243 That is, ``self.LogProbability(x)(y)`` returns the log of | |
| 244 ``self.Probability(x)(y)``. However, this method computes the | |
| 245 log-probabilities directly and so is more accurate and efficient | |
| 246 than taking the log of ``Probability``. | |
| 247 """ | |
| 248 zeta_x = self.logZ(x) | |
| 249 score_given_x = self.Score(x) | |
| 250 return lambda y: score_given_x(y) - zeta_x | |
| 251 | |
| 252 def Expectation(self, x, f): | |
| 253 """Compute the expectation of a function with respect to ``Probability(x)``. | |
|
Sharu Jiang
2016/12/07 00:05:06
Probability and Expectation are only used for trai
wrengr
2016/12/07 00:55:38
Semantically there's actually a three-way split ba
| |
| 254 | |
| 255 Args: | |
| 256 x (X): the value of the dependent variable. | |
| 257 f (...): a function from ``Y`` to ``np.ndarray`` of ``float``. | |
| 258 | |
| 259 Returns: | |
| 260 An ``np.ndarray`` of ``float`` | |
| 261 """ | |
| 262 prob_given_x = self.Probability(x) | |
| 263 # N.B., the ``*`` below is vector scaling! If we want to make this | |
| 264 # method polymorphic in the return type of ``f`` then we'll need an | |
| 265 # API that provides both scaling and ``vsum``. | |
| 266 return vsum([prob_given_x(y) * f(y) for y in self._Y]) | |
| 267 | |
| OLD | NEW |