Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import math | |
| 6 import numpy as np | |
| 7 | |
| 8 from libs.math.functions import MemoizedFunction | |
| 9 from libs.math.logarithms import logsumexp | |
| 10 from libs.math.vectors import vsum | |
| 11 | |
| 12 EPSILON = 0.00001 | |
| 13 | |
| 14 | |
| 15 def ToFeatureFunction(fs): | |
| 16 """Given an array of scalar-valued functions, return a vector-valued function. | |
| 17 | |
| 18 Args: | |
| 19 fs (iterable): A collection of curried functions ``X -> Y -> float``. | |
| 20 That is, given a particular ``x`` they return a function ``Y -> float``. | |
| 21 | |
| 22 Returns: | |
| 23 A function ``X -> Y -> list(float)`` where for all ``x``, ``y``, and | |
| 24 ``i`` we have that ``ToFeatureFunction(fs)(x)(y)[i] == fs[i](x)(y)``. | |
| 25 """ | |
| 26 def _FeatureFunction(x): | |
| 27 fxs = [f(x) for f in fs] | |
| 28 # TODO(wrengr): allow ``fx(y)`` to be a list, and flatten everything. | |
| 29 return lambda y: [fx(y) for fx in fxs] | |
| 30 | |
| 31 return _FeatureFunction | |
| 32 | |
| 33 | |
| 34 # TODO(http://crbug.com/669639): lots of ways to make this code better. | |
| 35 class UnnormalizedLogLinearModel(object): | |
| 36 """An unnormalized loglinear model. | |
| 37 | |
| 38 We use this model for combining a bunch of different features in order | |
| 39 to decide who to blame. In particular, the nice thing about loglinear | |
| 40 models is that (a) they allow separating the amount of weight we | |
| 41 give to each feature from the feature itself, and (b) they allow us | |
| 42 to keep adding features at will without needing to worry about the | |
| 43 fiddly details needed for it to be a probability model. | |
| 44 | |
| 45 Throughout the documentation we use the following conventional | |
| 46 terminology. The independent variable (aka: the given data which | |
| 47 sets up the classification problem) is called ``X``, where particular | |
| 48 values for that variable called ``x``. The dependent variable (aka: | |
| 49 the answers/labels returned by classification) is called ``Y``, | |
| 50 where particular values for that random variable called ``y``. The | |
| 51 partition function is called ``Z``. And, somewhat non-conventionally, | |
| 52 we will call the log of the partition function ``zeta``. | |
| 53 | |
| 54 This class is distinct from ``LogLinearModel`` in that we do not require | |
| 55 a specification of ``Y``. This class is sufficient for determining | |
| 56 which ``y`` is the most likely, though it can only return a "score" | |
| 57 rather than returning a probability per se. | |
| 58 """ | |
| 59 | |
| 60 def __init__(self, feature_function, weights, epsilon=None): | |
| 61 """Construct a new model with the given weights and feature function. | |
| 62 | |
| 63 Args: | |
| 64 feature_function: A function ``X -> Y -> list(float)``. N.B., | |
| 65 for all ``x`` and ``y`` the length of ``feature_function(x)(y)`` | |
| 66 must be the same as the length of ``weights``. | |
| 67 weights (list of float): coefficients for how important we consider | |
| 68 each component of the feature vector for deciding which ``y`` | |
| 69 to blame. | |
| 70 epsilon (float): The absolute-error threshold for considering a | |
| 71 weight to be "equal to zero". N.B., this should be a positive | |
| 72 number, as we will compare it against the absolute value of | |
| 73 each weight. | |
| 74 """ | |
| 75 if epsilon is None: | |
| 76 epsilon = EPSILON | |
| 77 # N.B., ``np.array`` can't take generators. | |
| 78 self._weights = np.array([ | |
| 79 w if isinstance(w, float) and math.fabs(w) >= epsilon else 0.0 | |
| 80 for w in weights]) | |
| 81 | |
| 82 self._quadrance = None | |
| 83 | |
| 84 def _FeaturesMemoizedOnY(x): | |
| 85 fx = feature_function(x) | |
| 86 def _FeaturesCoercedToCovector(y): | |
| 87 # N.B., ``np.array`` can't take generators. | |
| 88 fxy = np.array(list(fx(y))) | |
| 89 # N.B., we're assuming that ``len(self.weights)`` is O(1). | |
| 90 assert len(fxy) == len(self.weights), TypeError( | |
| 91 "vector length mismatch: %d != %d" % (len(fxy), len(self.weights))) | |
| 92 return fxy | |
| 93 return MemoizedFunction(_FeaturesCoercedToCovector) | |
| 94 self._features = MemoizedFunction(_FeaturesMemoizedOnY) | |
| 95 | |
| 96 # N.B., this is just the inner product of ``self.weights`` | |
| 97 # against ``self._features(x)``. If we can compute this in some | |
| 98 # more efficient way, we should. In particular, we will want to | |
| 99 # make the weights sparse, in which case we need to use a sparse | |
| 100 # variant of the dot product. | |
| 101 self._scores = MemoizedFunction(lambda x: | |
| 102 self._features(x).map(self.weights.dot)) | |
| 103 | |
| 104 def ClearWeightBasedMemos(self): | |
| 105 """Clear all the memos that depend on the weight covector.""" | |
| 106 self._quadrance = None | |
| 107 self._scores.ClearMemos() | |
| 108 | |
| 109 def ClearAllMemos(self): | |
| 110 """Clear all memos, even those independent of the weight covector.""" | |
| 111 self.ClearWeightBasedMemos() | |
| 112 self._features.ClearMemos() | |
| 113 | |
| 114 @property | |
| 115 def weights(self): | |
| 116 """The weight covector. | |
| 117 | |
| 118 At present we return the weights as an ``np.ndarray``, but in the | |
| 119 future that may be replaced by a more general type which specifies | |
| 120 the semantics rather than the implementation details. | |
| 121 """ | |
| 122 return self._weights | |
| 123 | |
| 124 @property | |
| 125 def l0(self): # pragma: no cover | |
| 126 """The l0-norm of the weight covector. | |
| 127 | |
| 128 N.B., despite being popularly called the "l0-norm", this isn't | |
| 129 actually a norm in the mathematical sense.""" | |
| 130 return float(np.count_nonzero(self.weights)) | |
| 131 | |
| 132 @property | |
| 133 def l1(self): # pragma: no cover | |
| 134 """The l1 (aka: Manhattan) norm of the weight covector.""" | |
| 135 return math.fsum(math.fabs(w) for w in self.weights) | |
| 136 | |
| 137 @property | |
| 138 def quadrance(self): | |
| 139 """The square of the l2 norm of the weight covector. | |
| 140 | |
| 141 This value is often more helpful to have direct access to, as it | |
| 142 avoids the need for non-rational functions (e.g., sqrt) and shows up | |
| 143 as its own quantity in many places. Also, computing it directly avoids | |
| 144 the error introduced by squaring the square-root of an IEEE-754 float. | |
| 145 """ | |
| 146 if self._quadrance is None: | |
| 147 self._quadrance = math.fsum(math.fabs(w)**2 for w in self.weights) | |
| 148 | |
| 149 return self._quadrance | |
| 150 | |
| 151 @property | |
| 152 def l2(self): | |
| 153 """The l2 (aka Euclidean) norm of the weight covector. | |
| 154 | |
| 155 If you need the square of the l2-norm, do not use this property. | |
| 156 Instead, use the ``quadrance`` property which is more accurate than | |
| 157 squaring this one. | |
| 158 """ | |
| 159 return math.sqrt(self.quadrance) | |
| 160 | |
| 161 def Features(self, x): | |
| 162 """Return a function mapping ``y`` to its feature vector given ``x``. | |
| 163 | |
| 164 Args: | |
| 165 x (X): the value of the independent variable. | |
| 166 | |
| 167 Returns: | |
| 168 A ``MemoizedFunction`` of type ``Y -> np.array(float)``. | |
| 169 """ | |
| 170 return self._features(x) | |
| 171 | |
| 172 def Score(self, x): | |
| 173 """Return a function mapping ``y`` to its "score" given ``x``. | |
| 174 | |
| 175 Semantically, the "score" of ``y`` given ``x`` is the | |
| 176 unnormalized log-domain conditional probability of ``y`` given | |
| 177 ``x``. Operationally, we compute this by taking the inner product | |
| 178 of the weight covector with the ``Features(x)(y)`` vector. The | |
| 179 conditional probability itself can be computed by exponentiating | |
| 180 the "score" and normalizing with the partition function. However, | |
| 181 for determining the "best" ``y`` we don't need to bother computing | |
| 182 the probability, because ``Score(x)`` is monotonic with respect to | |
| 183 ``Probability(x)``. | |
| 184 | |
| 185 Args: | |
| 186 x (X): the value of the independent variable. | |
| 187 | |
| 188 Returns: | |
| 189 A ``MemoizedFunction`` of type ``Y -> float``. | |
| 190 """ | |
| 191 return self._scores(x) | |
| 192 | |
| 193 | |
| 194 class LogLinearModel(UnnormalizedLogLinearModel): | |
|
Sharu Jiang
2016/12/07 18:40:04
one question, so the online/offline split is betwe
wrengr
2016/12/08 20:09:07
I'm not sure exactly which sense of "on-/offline"
| |
| 195 """A loglinear probability model. | |
| 196 | |
| 197 This class is distinct from ``UnnormalizedLogLinearModel`` in that | |
| 198 we can provide probabilities (not just scores). However, to do so we | |
| 199 require a specification of the entire set ``Y``. | |
| 200 """ | |
| 201 def __init__(self, Y, feature_function, weights, epsilon=None): | |
| 202 """Construct a new probabilistic model. | |
| 203 | |
| 204 Args: | |
| 205 Y (iterable): the entire range of values for the independent | |
| 206 variable. This is needed for computing the partition function. | |
| 207 feature_function: A function ``X -> Y -> list(float)``. N.B., | |
| 208 for all ``x`` and ``y`` the length of ``feature_function(x)(y)`` | |
| 209 must be the same as the length of ``weights``. | |
| 210 weights (list of float): coefficients for how important we consider | |
| 211 each component of the feature vector for deciding which ``y`` | |
| 212 to blame. | |
| 213 epsilon (float): The absolute-error threshold for considering a | |
| 214 weight to be "equal to zero". N.B., this should be a positive | |
| 215 number, as we will compare it against the absolute value of | |
| 216 each weight. | |
| 217 """ | |
| 218 super(LogLinearModel, self).__init__(feature_function, weights, epsilon) | |
| 219 | |
| 220 self._Y = frozenset(Y) | |
| 221 | |
| 222 def _LogZ(x): | |
| 223 score_given_x = self._scores(x) | |
| 224 return logsumexp([score_given_x(y) for y in self._Y]) | |
| 225 self._zetas = MemoizedFunction(_LogZ) | |
| 226 | |
| 227 def ClearWeightBasedMemos(self): | |
| 228 """Clear all the memos that depend on the weight covector.""" | |
| 229 super(LogLinearModel, self).ClearWeightBasedMemos() | |
| 230 self._zetas.ClearMemos() | |
| 231 | |
| 232 def Z(self, x): | |
| 233 """The normal-domain conditional partition-function given ``x``. | |
| 234 | |
| 235 If you need the log of the partition function, do not use this | |
| 236 method. Instead, use the ``LogZ`` method which computes it directly | |
| 237 and thus is more accurate than taking the log of this one. | |
| 238 | |
| 239 Args: | |
| 240 x (X): the value of the independent variable. | |
| 241 | |
| 242 Returns: | |
| 243 The normalizing constant of ``Probability(x)``. | |
| 244 """ | |
| 245 return math.exp(self.LogZ(x)) | |
| 246 | |
| 247 def LogZ(self, x): | |
| 248 """The log-domain conditional partition-function given ``x``. | |
| 249 | |
| 250 Args: | |
| 251 x (X): the value of the independent variable. | |
| 252 | |
| 253 Returns: | |
| 254 The normalizing constant of ``LogProbability(x)``. | |
| 255 """ | |
| 256 return self._zetas(x) | |
| 257 | |
| 258 def Probability(self, x): | |
| 259 """The normal-domain distribution over ``y`` given ``x``. | |
| 260 | |
| 261 That is, ``self.Probability(x)(y)`` returns ``p(y | x; self.weights)`` | |
| 262 which is the model's estimation of ``Pr(y|x)``. | |
| 263 | |
| 264 If you need the log-probability, don't use this method. Instead, | |
| 265 use the ``LogProbability`` method which computes it directly and | |
| 266 thus is more accurate than taking the log of this one. | |
| 267 """ | |
| 268 logprob_given_x = self.LogProbability(x) | |
| 269 return lambda y: math.exp(logprob_given_x(y)) | |
| 270 | |
| 271 def LogProbability(self, x): | |
| 272 """The log-domain distribution over ``y`` given ``x``. | |
| 273 | |
| 274 That is, ``self.LogProbability(x)(y)`` returns the log of | |
| 275 ``self.Probability(x)(y)``. However, this method computes the | |
| 276 log-probabilities directly and so is more accurate and efficient | |
| 277 than taking the log of ``Probability``. | |
| 278 """ | |
| 279 zeta_x = self.LogZ(x) | |
| 280 score_given_x = self.Score(x) | |
| 281 return lambda y: score_given_x(y) - zeta_x | |
| 282 | |
| 283 def Expectation(self, x, f): | |
| 284 """Compute the expectation of a function with respect to ``Probability(x)``. | |
| 285 | |
| 286 Args: | |
| 287 x (X): the value of the independent variable. | |
| 288 f: a function of type ``Y -> np.ndarray(float)``. | |
| 289 | |
| 290 Returns: | |
| 291 An ``np.ndarray`` of ``float`` called the "expected value". N.B., | |
| 292 the particular array returned may not actually be one that the | |
| 293 function returns; rather, it's a sort of average of all the results | |
| 294 returned. For more information you can take a look at Wikipedia | |
| 295 <https://en.wikipedia.org/wiki/Expected_value>. | |
| 296 """ | |
| 297 prob_given_x = self.Probability(x) | |
| 298 # N.B., the ``*`` below is vector scaling! If we want to make this | |
| 299 # method polymorphic in the return type of ``f`` then we'll need an | |
| 300 # API that provides both scaling and ``vsum``. | |
| 301 return vsum([prob_given_x(y) * f(y) for y in self._Y]) | |
| 302 | |
| OLD | NEW |