Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(62)

Side by Side Diff: appengine/findit/crash/loglinear/training.py

Issue 2544493004: [Predator] Implement training for loglinear models (Closed)
Patch Set: Breaking out the shared code of loglinear/{model,training}_test.py Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « appengine/findit/crash/loglinear/test/training_test.py ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
4
5 import math
6 import numpy as np
7 # N.B., ``np.array`` can't take generators; you must pass explicit lists.
8 import scipy.optimize as spo
9
10 from crash.loglinear.model import LogLinearModel
11 from libs.math.vectors import vsum
12 # N.B., ``vsum`` can't take generators; you must pass explicit lists.
13
14
15 class TrainableLogLinearModel(LogLinearModel):
16 """A loglinear model with some labelled data set for training the weights."""
17
18 def __init__(self, Y, training_data, feature_function, initial_weights,
19 epsilon=None):
20 """
21 Args:
22 Y (iterable): the entire range of values for the independent
23 variable. This is needed for computing the partition function.
24 training_data (iterable): a collection of ``(x, y)`` pairs where
25 ``y`` is the known-correct label for ``x``.
26 feature_function: A function from ``X`` to ``Y`` to a list of
27 ``float``. N.B., the length of the list must be the same for all
28 ``x`` and ``y``, and must be the same as the length of the list
29 of weights.
30 initial_weights (list of float): the pre-training coefficients
31 for how much we believe components of the feature vector. This
32 provides the seed for training; this starting value shouldn't
33 affect the final weights obtained by training (thanks to
34 convexity), but will affect how long it takes for training
35 to converge.
36 epsilon (float): The absolute-error threshold for considering a
37 weight to be "equal to zero". N.B., this should be a positive
38 number, as we will compare it against the absolute value of
39 each weight.
40 """
41 super(TrainableLogLinearModel, self).__init__(
42 Y, feature_function, initial_weights, epsilon)
43 self._training_data = training_data
44
45 self._observed_feature_vector = vsum([
46 self.FeaturesAsNumPyArray(x)(y)
47 for x, y in self._training_data])
48
49 # Even though this is identical to the superclass definition, we must
50 # re-provide it in order to define the setter.
51 @property
52 def weights(self):
53 """The weight covector.
54
55 At present we return the weights as an ``np.ndarray``, but in the
56 future that may be replaced by a more general type which specifies
57 the semantics rather than the implementation details.
58 """
59 return self._weights
60
61 @weights.setter
62 def weights(self, new_weights): # pylint: disable=W0221
63 """Mutate the weight covector, and clear memos as necessary.
64
65 This setter attempts to avoid clearing memos whenever possible,
66 but errs on the side of caution/correctness when it needs to.
67
68 Args:
69 new_weights (np.ndarray): the new weights to use. Must have the
70 same shape as the old ``np.ndarray``.
71 """
72 if new_weights is self._weights:
73 return
74
75 if not isinstance(new_weights, np.ndarray):
76 raise TypeError('Expected an np.ndarray but got %s instead'
77 % new_weights.__class__.__name__)
78
79 if new_weights.shape != self._weights.shape:
80 raise TypeError('Weight shape mismatch: %s != %s'
81 % (new_weights.shape, self._weights.shape))
82
83 self.ClearWeightBasedMemos()
84 self._weights = new_weights
85
86 def FeaturesAsNumPyArray(self, x):
87 """A variant of ``Features`` which returns a ``np.ndarray``.
88
89 For training we need to have the feature function return an
90 ``np.ndarray(float)`` rather than the ``list(FeatureValue)`` used
91 elsewhere. This function performes the necessary conversion.
92
93 N.B., at present we do not memoize this function. The underlying
94 ``Features`` method is memoized, so we won't re-compute the features
95 each time; but we will repeatedly copy the floats into newly allocated
96 ``np.ndarray`` objects. If that turns out to be a performance
97 bottleneck, we can add the extra layer of memoization to avoid that.
98 """
99 fx = self.Features(x)
100 return lambda y: np.array([fxy.value for fxy in fx(y)])
101
102 def LogLikelihood(self):
103 """The conditional log-likelihood of the training data.
104
105 The conditional likelihood of the training data is the product
106 of ``Pr(y|x)`` for each ``(x, y)`` pair in the training data; so
107 the conditional log-likelihood is the log of that. This is called
108 "likelihood" because it is thought of as a function of the weight
109 covector, with the training data held fixed.
110
111 This is the ideal objective function for training the weights, as it
112 will give us the MLE weight covector for the training data. However,
113 in practice, we want to do regularization to ensure we don't overfit
114 the training data and to reduce classification time by ensuring that
115 the weight vector is sparse. Thus, the actual objective function
116 will be the log-likelihood plus some penalty terms for regularization.
117 """
118 observed_zeta = math.fsum(self.LogZ(x) for x, _ in self._training_data)
119 observed_score = self.weights.dot(self._observed_feature_vector)
120 return observed_score - observed_zeta
121
122 def LogLikelihoodGradient(self):
123 """The gradient (aka Jacobian) of ``LogLikelihood``."""
124 expected_feature_vector = vsum([
125 self.Expectation(x, self.FeaturesAsNumPyArray(x))
126 for x, _ in self._training_data])
127 return self._observed_feature_vector - expected_feature_vector
128
129 def TrainWeights(self, l2_penalty):
130 """Optimize the weight covector based on the training data.
131
132 Args:
133 l2_penalty (float): the hyperparameter for how much to penalize
134 weight covectors far from zero.
135
136 Returns:
137 Nothing, but has the side effect of mutating the stored weights.
138 """
139 initial_weights = self.weights
140
141 # We want to minimize the number of times we reset the weights since
142 # that clears our memos. One might think we could do that in the
143 # between-iterations callback; but actually, in a single iteration,
144 # BFGS calls the objective function and gradient more than once with
145 # different arguments; so, alas, we must reset the weights in both.
146 # This is why the ``weights`` setter tries to avoid clearing memos
147 # when possible.
148
149 def objective_function(new_weights):
150 self.weights = new_weights
151 return -self.LogLikelihood() + 0.5 * l2_penalty * self.quadrance
152
153 def objective_function_gradient(new_weights):
154 self.weights = new_weights
155 return -self.LogLikelihoodGradient() + l2_penalty * self.weights
156
157 result = spo.minimize(
158 objective_function,
159 initial_weights,
160 method='BFGS',
161 jac=objective_function_gradient)
162
163 if not result.success: # pragma: no cover
164 # This should happen infrequently enough that there's no point in
165 # logging it and attempting to carry on.
166 raise Exception(
167 'TrainableLogLinearModel.TrainWeights failed:'
168 '\n\tReason: %s'
169 '\n\tCurrent objective value: %s'
170 '\n\tCurrent objective gradient: %s'
171 '\n\tIterations: %d'
172 '\n\tFunction evaluations: %d'
173 '\n\tGradient evaluations: %d'
174 % (result.message, result.fun, result.jac, result.nit, result.nfev,
175 result.njev))
176
177 # This shouldn't really be necessary, since we're resetting it
178 # directly during training; but just to be safe/sure.
179 self.weights = result.x
OLDNEW
« no previous file with comments | « appengine/findit/crash/loglinear/test/training_test.py ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698