appengine/findit/crash/loglinear/training.py - Issue 2544493004: [Predator] Implement training for loglinear models

Side by Side Diff: appengine/findit/crash/loglinear/training.py

Issue 2544493004: [Predator] Implement training for loglinear models (Closed)

Patch Set: Breaking out the shared code of loglinear/{model,training}_test.py Created 4 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import math

	6 import numpy as np

	7 # N.B., ``np.array`` can't take generators; you must pass explicit lists.

	8 import scipy.optimize as spo

	9

	10 from crash.loglinear.model import LogLinearModel

	11 from libs.math.vectors import vsum

	12 # N.B., ``vsum`` can't take generators; you must pass explicit lists.

	13

	14

	15 class TrainableLogLinearModel(LogLinearModel):

	16 """A loglinear model with some labelled data set for training the weights."""

	17

	18 def __init__(self, Y, training_data, feature_function, initial_weights,

	19 epsilon=None):

	20 """

	21 Args:

	22 Y (iterable): the entire range of values for the independent

	23 variable. This is needed for computing the partition function.

	24 training_data (iterable): a collection of ``(x, y)`` pairs where

	25 ``y`` is the known-correct label for ``x``.

	26 feature_function: A function from ``X`` to ``Y`` to a list of

	27 ``float``. N.B., the length of the list must be the same for all

	28 ``x`` and ``y``, and must be the same as the length of the list

	29 of weights.

	30 initial_weights (list of float): the pre-training coefficients

	31 for how much we believe components of the feature vector. This

	32 provides the seed for training; this starting value shouldn't

	33 affect the final weights obtained by training (thanks to

	34 convexity), but will affect how long it takes for training

	35 to converge.

	36 epsilon (float): The absolute-error threshold for considering a

	37 weight to be "equal to zero". N.B., this should be a positive

	38 number, as we will compare it against the absolute value of

	39 each weight.

	40 """

	41 super(TrainableLogLinearModel, self).__init__(

	42 Y, feature_function, initial_weights, epsilon)

	43 self._training_data = training_data

	44

	45 self._observed_feature_vector = vsum([

	46 self.FeaturesAsNumPyArray(x)(y)

	47 for x, y in self._training_data])

	48

	49 # Even though this is identical to the superclass definition, we must

	50 # re-provide it in order to define the setter.

	51 @property

	52 def weights(self):

	53 """The weight covector.

	54

	55 At present we return the weights as an ``np.ndarray``, but in the

	56 future that may be replaced by a more general type which specifies

	57 the semantics rather than the implementation details.

	58 """

	59 return self._weights

	60

	61 @weights.setter

	62 def weights(self, new_weights): # pylint: disable=W0221

	63 """Mutate the weight covector, and clear memos as necessary.

	64

	65 This setter attempts to avoid clearing memos whenever possible,

	66 but errs on the side of caution/correctness when it needs to.

	67

	68 Args:

	69 new_weights (np.ndarray): the new weights to use. Must have the

	70 same shape as the old ``np.ndarray``.

	71 """

	72 if new_weights is self._weights:

	73 return

	74

	75 if not isinstance(new_weights, np.ndarray):

	76 raise TypeError('Expected an np.ndarray but got %s instead'

	77 % new_weights.__class__.__name__)

	78

	79 if new_weights.shape != self._weights.shape:

	80 raise TypeError('Weight shape mismatch: %s != %s'

	81 % (new_weights.shape, self._weights.shape))

	82

	83 self.ClearWeightBasedMemos()

	84 self._weights = new_weights

	85

	86 def FeaturesAsNumPyArray(self, x):

	87 """A variant of ``Features`` which returns a ``np.ndarray``.

	88

	89 For training we need to have the feature function return an

	90 ``np.ndarray(float)`` rather than the ``list(FeatureValue)`` used

	91 elsewhere. This function performes the necessary conversion.

	92

	93 N.B., at present we do not memoize this function. The underlying

	94 ``Features`` method is memoized, so we won't re-compute the features

	95 each time; but we will repeatedly copy the floats into newly allocated

	96 ``np.ndarray`` objects. If that turns out to be a performance

	97 bottleneck, we can add the extra layer of memoization to avoid that.

	98 """

	99 fx = self.Features(x)

	100 return lambda y: np.array([fxy.value for fxy in fx(y)])

	101

	102 def LogLikelihood(self):

	103 """The conditional log-likelihood of the training data.

	104

	105 The conditional likelihood of the training data is the product

	106 of ``Pr(y\|x)`` for each ``(x, y)`` pair in the training data; so

	107 the conditional log-likelihood is the log of that. This is called

	108 "likelihood" because it is thought of as a function of the weight

	109 covector, with the training data held fixed.

	110

	111 This is the ideal objective function for training the weights, as it

	112 will give us the MLE weight covector for the training data. However,

	113 in practice, we want to do regularization to ensure we don't overfit

	114 the training data and to reduce classification time by ensuring that

	115 the weight vector is sparse. Thus, the actual objective function

	116 will be the log-likelihood plus some penalty terms for regularization.

	117 """

	118 observed_zeta = math.fsum(self.LogZ(x) for x, _ in self._training_data)

	119 observed_score = self.weights.dot(self._observed_feature_vector)

	120 return observed_score - observed_zeta

	121

	122 def LogLikelihoodGradient(self):

	123 """The gradient (aka Jacobian) of ``LogLikelihood``."""

	124 expected_feature_vector = vsum([

	125 self.Expectation(x, self.FeaturesAsNumPyArray(x))

	126 for x, _ in self._training_data])

	127 return self._observed_feature_vector - expected_feature_vector

	128

	129 def TrainWeights(self, l2_penalty):

	130 """Optimize the weight covector based on the training data.

	131

	132 Args:

	133 l2_penalty (float): the hyperparameter for how much to penalize

	134 weight covectors far from zero.

	135

	136 Returns:

	137 Nothing, but has the side effect of mutating the stored weights.

	138 """

	139 initial_weights = self.weights

	140

	141 # We want to minimize the number of times we reset the weights since

	142 # that clears our memos. One might think we could do that in the

	143 # between-iterations callback; but actually, in a single iteration,

	144 # BFGS calls the objective function and gradient more than once with

	145 # different arguments; so, alas, we must reset the weights in both.

	146 # This is why the ``weights`` setter tries to avoid clearing memos

	147 # when possible.

	148

	149 def objective_function(new_weights):

	150 self.weights = new_weights

	151 return -self.LogLikelihood() + 0.5 * l2_penalty * self.quadrance

	152

	153 def objective_function_gradient(new_weights):

	154 self.weights = new_weights

	155 return -self.LogLikelihoodGradient() + l2_penalty * self.weights

	156

	157 result = spo.minimize(

	158 objective_function,

	159 initial_weights,

	160 method='BFGS',

	161 jac=objective_function_gradient)

	162

	163 if not result.success: # pragma: no cover

	164 # This should happen infrequently enough that there's no point in

	165 # logging it and attempting to carry on.

	166 raise Exception(

	167 'TrainableLogLinearModel.TrainWeights failed:'

	168 '\n\tReason: %s'

	169 '\n\tCurrent objective value: %s'

	170 '\n\tCurrent objective gradient: %s'

	171 '\n\tIterations: %d'

	172 '\n\tFunction evaluations: %d'

	173 '\n\tGradient evaluations: %d'

	174 % (result.message, result.fun, result.jac, result.nit, result.nfev,

	175 result.njev))

	176

	177 # This shouldn't really be necessary, since we're resetting it

	178 # directly during training; but just to be safe/sure.

	179 self.weights = result.x

OLD	NEW

« no previous file with comments | « appengine/findit/crash/loglinear/test/training_test.py ('k') | no next file » | no next file with comments »