Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import math | |
| 6 import numpy as np | |
| 7 # N.B., ``np.array`` can't take generators; you must pass explicit lists. | |
| 8 import scipy.optimize as spo | |
| 9 | |
| 10 from crash.loglinear.model import LogLinearModel | |
| 11 from libs.math.vectors import vsum | |
| 12 # N.B., ``vsum`` can't take generators; you must pass explicit lists. | |
| 13 | |
| 14 | |
| 15 class TrainableLogLinearModel(LogLinearModel): | |
| 16 """A loglinear model with some labelled data set for training the weights.""" | |
| 17 | |
| 18 def __init__(self, Y, training_data, feature_function, initial_weights, | |
| 19 epsilon=None): | |
|
Sharu Jiang
2016/12/21 21:30:01
nit: alignment.
| |
| 20 """ | |
| 21 Args: | |
| 22 Y (iterable): the entire range of values for the independent | |
| 23 variable. This is needed for computing the partition function. | |
| 24 training_data (iterable): a collection of ``(x, y)`` pairs where | |
|
Sharu Jiang
2016/12/21 21:30:01
Using ``y`` denotation is a bit confusing, since w
wrengr
2016/12/21 22:34:32
``Y`` is the type of the second argument of the fe
| |
| 25 ``y`` is the known-correct label for ``x``. | |
| 26 feature_function: A function from ``X`` to ``Y`` to a list of | |
| 27 ``float``. N.B., the length of the list must be the same for all | |
| 28 ``x`` and ``y``, and must be the same as the length of the list | |
| 29 of weights. | |
| 30 initial_weights (list of float): the pre-training coefficients | |
| 31 for how much we believe components of the feature vector. This | |
| 32 provides the seed for training; this starting value shouldn't | |
| 33 affect the final weights obtained by training (thanks to | |
| 34 convexity), but will affect how long it takes for training | |
| 35 to converge. | |
| 36 epsilon (float): The absolute-error threshold for considering a | |
| 37 weight to be "equal to zero". N.B., this should be a positive | |
| 38 number, as we will compare it against the absolute value of | |
| 39 each weight. | |
| 40 """ | |
| 41 super(TrainableLogLinearModel, self).__init__( | |
| 42 Y, feature_function, initial_weights, epsilon) | |
| 43 self._training_data = training_data | |
| 44 | |
| 45 self._observed_feature_vector = vsum([ | |
| 46 self.FeaturesAsNumPyArray(x)(y) | |
| 47 for x, y in self._training_data]) | |
| 48 | |
| 49 # Even though this is identical to the superclass definition, we must | |
| 50 # re-provide it in order to define the setter. | |
|
Sharu Jiang
2016/12/21 21:30:01
Interesting to know, so we cannot overwrite the se
wrengr
2016/12/21 22:34:32
The problem has to do with the way the @property d
| |
| 51 @property | |
| 52 def weights(self): | |
| 53 """The weight covector. | |
| 54 | |
| 55 At present we return the weights as an ``np.ndarray``, but in the | |
| 56 future that may be replaced by a more general type which specifies | |
| 57 the semantics rather than the implementation details. | |
| 58 """ | |
| 59 return self._weights | |
| 60 | |
| 61 @weights.setter | |
| 62 def weights(self, new_weights): # pylint: disable=W0221 | |
| 63 """Mutate the weight covector, and clear memos as necessary. | |
| 64 | |
| 65 This setter attempts to avoid clearing memos whenever possible, | |
| 66 but errs on the side of caution/correctness when it needs to. | |
| 67 | |
| 68 Args: | |
| 69 new_weights (np.ndarray): the new weights to use. Must have the | |
| 70 same shape as the old ``np.ndarray``. | |
| 71 """ | |
| 72 if new_weights is self._weights: | |
| 73 return | |
| 74 | |
| 75 if not isinstance(new_weights, np.ndarray): | |
| 76 raise TypeError('Expected an np.ndarray but got %s instead' | |
| 77 % new_weights.__class__.__name__) | |
| 78 | |
| 79 if new_weights.shape != self._weights.shape: | |
| 80 raise TypeError('Weight shape mismatch: %s != %s' | |
| 81 % (new_weights.shape, self._weights.shape)) | |
| 82 | |
| 83 self.ClearWeightBasedMemos() | |
| 84 self._weights = new_weights | |
| 85 | |
| 86 def FeaturesAsNumPyArray(self, x): | |
| 87 """A variant of ``Features`` which returns a ``np.ndarray``. | |
| 88 | |
| 89 For training we need to have the feature function return an | |
| 90 ``np.ndarray(float)`` rather than the ``list(FeatureValue)`` used | |
| 91 elsewhere. This function performes the necessary conversion. | |
| 92 | |
| 93 N.B., at present we do not memoize this function. The underlying | |
| 94 ``Features`` method is memoized, so we won't re-compute the features | |
| 95 each time; but we will repeatedly copy the floats into newly allocated | |
| 96 ``np.ndarray`` objects. If that turns out to be a performance | |
| 97 bottleneck, we can add the extra layer of memoization to avoid that. | |
| 98 """ | |
| 99 fx = self.Features(x) | |
| 100 return lambda y: np.array([fxy.value for fxy in fx(y)]) | |
| 101 | |
| 102 def LogLikelihood(self): | |
| 103 """The conditional log-likelihood of the training data. | |
| 104 | |
| 105 The conditional likelihood of the training data is the product | |
| 106 of ``Pr(y|x)`` for each ``(x, y)`` pair in the training data; so | |
| 107 the conditional log-likelihood is the log of that. This is called | |
| 108 "likelihood" because it is thought of as a function of the weight | |
| 109 covector, with the training data held fixed. | |
| 110 | |
| 111 This is the ideal objective function for training the weights, as it | |
| 112 will give us the MLE weight covector for the training data. However, | |
| 113 in practice, we want to do regularization to ensure we don't overfit | |
| 114 the training data and to reduce classification time by ensuring that | |
| 115 the weight vector is sparse. Thus, the actual objective function | |
| 116 will be the log-likelihood plus some penalty terms for regularization. | |
| 117 """ | |
| 118 observed_zeta = math.fsum(self.LogZ(x) for x, _ in self._training_data) | |
| 119 observed_score = self.weights.dot(self._observed_feature_vector) | |
| 120 return observed_score - observed_zeta | |
| 121 | |
| 122 def LogLikelihoodGradient(self): | |
| 123 """The gradient (aka Jacobian) of ``LogLikelihood``.""" | |
| 124 expected_feature_vector = vsum([ | |
| 125 self.Expectation(x, self.FeaturesAsNumPyArray(x)) | |
| 126 for x, _ in self._training_data]) | |
| 127 return self._observed_feature_vector - expected_feature_vector | |
| 128 | |
| 129 def TrainWeights(self, l2_penalty): | |
| 130 """Optimize the weight covector based on the training data. | |
| 131 | |
| 132 Args: | |
| 133 l2_penalty (float): the hyperparameter for how much to penalize | |
| 134 weight covectors far from zero. | |
| 135 | |
| 136 Returns: | |
| 137 Nothing, but has the side effect of mutating the stored weights. | |
| 138 """ | |
| 139 initial_weights = self.weights | |
| 140 | |
| 141 # We want to minimize the number of times we reset the weights since | |
| 142 # that clears our memos. One might think we could do that in the | |
| 143 # between-iterations callback; but actually, in a single iteration, | |
| 144 # BFGS calls the objective function and gradient more than once with | |
| 145 # different arguments; so, alas, we must reset the weights in both. | |
| 146 # This is why the ``weights`` setter tries to avoid clearing memos | |
| 147 # when possible. | |
| 148 | |
| 149 def objective_function(new_weights): | |
| 150 self.weights = new_weights | |
| 151 return -self.LogLikelihood() + 0.5 * l2_penalty * self.quadrance | |
| 152 | |
| 153 def objective_function_gradient(new_weights): | |
| 154 self.weights = new_weights | |
| 155 return -self.LogLikelihoodGradient() + l2_penalty * self.weights | |
| 156 | |
| 157 result = spo.minimize( | |
| 158 objective_function, | |
| 159 initial_weights, | |
| 160 method='BFGS', | |
| 161 jac=objective_function_gradient) | |
| 162 | |
| 163 if not result.success: # pragma: no cover | |
| 164 # This should happen infrequently enough that there's no point in | |
| 165 # logging it and attempting to carry on. | |
| 166 raise Exception( | |
| 167 'TrainableLogLinearModel.TrainWeights failed:' | |
| 168 '\n\tReason: %s' | |
| 169 '\n\tCurrent objective value: %s' | |
| 170 '\n\tCurrent objective gradient: %s' | |
| 171 '\n\tIterations: %d' | |
| 172 '\n\tFunction evaluations: %d' | |
| 173 '\n\tGradient evaluations: %d' | |
| 174 % (result.message, result.fun, result.jac, result.nit, result.nfev, | |
| 175 result.njev)) | |
| 176 | |
| 177 # This shouldn't really be necessary, since we're resetting it | |
| 178 # directly during training; but just to be safe/sure. | |
| 179 self.weights = result.x | |
| OLD | NEW |