appengine/findit/crash/loglinear/training.py - Issue 2625073003: [Predator] Add MetaWeight and MetaFeatureValue to group multiple weights and features together.

Side by Side Diff: appengine/findit/crash/loglinear/training.py

Issue 2625073003: [Predator] Add MetaWeight and MetaFeatureValue to group multiple weights and features together. (Closed)

Patch Set: Rebase. Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

	5 from collections import OrderedDict

5 import math	6 import math

6 import numpy as np	7 import numpy as np

7 # N.B., ``np.array`` can't take generators; you must pass explicit lists.	8 # N.B., ``np.array`` can't take generators; you must pass explicit lists.

8 import scipy.optimize as spo	9 import scipy.optimize as spo

9	10

10 from crash.loglinear.model import LogLinearModel	11 from crash.loglinear.model import LogLinearModel

	12 from crash.loglinear.weight import MetaWeight

	13 from crash.loglinear.weight import Weight

	14 from libs.meta_dict_serializer import GetSerializer

11 from libs.math.vectors import vsum	15 from libs.math.vectors import vsum

12 # N.B., ``vsum`` can't take generators; you must pass explicit lists.	16 # N.B., ``vsum`` can't take generators; you must pass explicit lists.

13	17

14	18

15 class TrainableLogLinearModel(LogLinearModel):	19 class TrainableLogLinearModel(LogLinearModel):

16 """A loglinear model with some labelled data set for training the weights."""	20 """A loglinear model with labelled data set for training the meta_weight."""

17	21

18 def __init__(self, Y_given_X, training_data, feature_function,	22 def __init__(self, Y_given_X, training_data, meta_feature, meta_weight,

19 initial_weights, epsilon=None):	23 epsilon=None):

20 """	24 """

21 Args:	25 Args:

22 Y_given_X: a function from ``X`` to an iterable object giving the	26 Y_given_X: a function from ``X`` to an iterable object giving the

23 subset of ``Y`` which has non-zero probability given the	27 subset of ``Y`` which has non-zero probability given the

24 ``x``. When in doubt about whether some ``y`` has zero probability	28 ``x``. When in doubt about whether some ``y`` has zero probability

25 or not, it is always safe/correct to return a larger subset of	29 or not, it is always safe/correct to return a larger subset of

26 ``Y`` (it'll just take more computation time is all). This is	30 ``Y`` (it'll just take more computation time is all). This is

27 needed for computing the partition function and expectation. N.B.,	31 needed for computing the partition function and expectation. N.B.,

28 we do not actually need to know/enumerate of all of ``Y``,	32 we do not actually need to know/enumerate of all of ``Y``,

29 only the subsets for each ``x``.	33 only the subsets for each ``x``.

30 training_data (iterable): a collection of ``(x, y)`` pairs where	34 training_data (iterable): a collection of ``(x, y)`` pairs where

31 ``y`` is the known-correct label for ``x``.	35 ``y`` is the known-correct label for ``x``.

32 feature_function: A function from ``X`` to ``Y`` to a list of	36 meta_feature: A function from ``X`` to ``Y`` to a list of

33 ``float``. N.B., the length of the list must be the same for all	37 ``float``. N.B., the length of the list must be the same for all

34 ``x`` and ``y``, and must be the same as the length of the list	38 ``x`` and ``y``, and must be the same as the length of the list

35 of weights.	39 of meta_weight.

36 initial_weights (dict from str to float): the pre-training coefficients	40 meta_weight (dict from str to (Vector)Weight): the pre-training

37 for how much we believe components of the feature vector. This	41 coefficients for how much we believe components of the feature vector.

38 provides the seed for training; this starting value shouldn't	42 This provides the seed for training; this starting value shouldn't

39 affect the final weights obtained by training (thanks to	43 affect the final meta_weight obtained by training (thanks to

40 convexity), but will affect how long it takes for training	44 convexity), but will affect how long it takes for training

41 to converge.	45 to converge.

42 N.B. The dict should not be sparse (only contains non-zero weights),	46 N.B. The dict should not be sparse (only contains non-zero meta_weight),

43 because we only train those features whose names are keys in this dict.	47 because we only train those features whose names are keys in this dict.

44 epsilon (float): The absolute-error threshold for considering a	48 epsilon (float): The absolute-error threshold for considering a

45 weight to be "equal to zero". N.B., this should be a positive	49 weight to be "equal to zero". N.B., this should be a positive

46 number, as we will compare it against the absolute value of	50 number, as we will compare it against the absolute value of

47 each weight.	51 each weight.

48 """	52 """

49 super(TrainableLogLinearModel, self).__init__(	53 super(TrainableLogLinearModel, self).__init__(

50 Y_given_X, feature_function, initial_weights, epsilon)	54 Y_given_X, meta_feature, meta_weight, epsilon)

51 self._training_data = training_data	55 self._training_data = training_data

52 # Use self._weights instead of initialz_weights, since self._weights already	56 # Use self._meta_weight instead of initialz_meta_weight,

53 # filtered zero weights in the __init__ of superclass.	57 # since self._meta_weight already filtered zero meta_weight in the __init__

54 self._feature_order = self._weights.keys()	58 # of superclass.

55 self._np_weights = self._DictToNumPyArray(self._weights)	59 self._serializer = GetSerializer(meta_feature)

	60 self._np_weight = self._MetaToNumPyArray(self.meta_weight)

56 self._observed_feature_vector = vsum([	61 self._observed_feature_vector = vsum([

57 self.FeaturesAsNumPyArray(x)(y)	62 self.FeaturesAsNumPyArray(x)(y)

58 for x, y in self._training_data])	63 for x, y in self._training_data])

59	64

60 @property	65 @property

61 def np_weights(self):	66 def np_weight(self):

62 """The NumPy Array of the weight covector."""	67 """The NumPy Array of the weight covector."""

63 return self._np_weights	68 return self._np_weight

64	69

65 @np_weights.setter	70 @np_weight.setter

66 def np_weights(self, new_np_weights): # pylint: disable=W0221	71 def np_weight(self, new_np_weight): # pylint: disable=W0221

67 """Mutate the weight covector, and clear memos as necessary.	72 """Mutate the weight covector, and clear memos as necessary.

68	73

69 This setter attempts to avoid clearing memos whenever possible,	74 This setter attempts to avoid clearing memos whenever possible,

70 but errs on the side of caution/correctness when it needs to.	75 but errs on the side of caution/correctness when it needs to.

71 This setter also drop all the zero weights in weight covector using	76 This setter also drop all the zero meta_weight in weight covector using

72 self._epsilon.	77 self._epsilon.

73	78

74 Note, the conversion between dict and np array is needed because model uses	79 Note, the conversion between dict and np array is needed because model uses

75 dict to organize weights of features, however SciPy trainning (e.g. BFGS)	80 dict to organize meta_weight of features, however SciPy trainning

76 needs numpy array to do computaion.	81 (e.g. BFGS) needs numpy array to do computaion.

77	82

78 Args:	83 Args:

79 new_np_weights (np.ndarray): the new weights to use. It will be converted	84 new_np_weight (np.ndarray): the new meta_weight to use. It will be

80 to weights dict mapping feature_name to its weight.	85 converted to meta_weight dict mapping feature_name to its weight.

81 """	86 """

82 if np.array_equal(self._np_weights, new_np_weights):	87 if np.array_equal(self._np_weight, new_np_weight):

83 return	88 return

84	89

85 if not isinstance(new_np_weights, np.ndarray):	90 if not isinstance(new_np_weight, np.ndarray):

86 raise TypeError('Expected an np.ndarray but got %s instead' %	91 raise TypeError('Expected an np.ndarray but got %s instead' %

87 new_np_weights.__class__.__name__)	92 new_np_weight.__class__.__name__)

88	93

89 if new_np_weights.shape != self._np_weights.shape:	94 if new_np_weight.shape != self._np_weight.shape:

90 raise TypeError('Weight shape mismatch: %s != %s' %	95 raise TypeError('Weight shape mismatch: %s != %s' %

91 (new_np_weights.shape, self._np_weights.shape))	96 (new_np_weight.shape, self._np_weight.shape))

92	97

93 self._np_weights = np.array(filter(self.IsNonZeroWeight, new_np_weights))	98 self._np_weight = new_np_weight

	99 self.meta_weight = self._NumPyArrayToMeta(self.np_weight)

94 self.ClearWeightBasedMemos()	100 self.ClearWeightBasedMemos()

95 self._weights = self._NumPyArrayToDict(self._np_weights)

96 self._feature_order = self._weights.keys()

97	101

98 def _NumPyArrayToDict(self, np_weights):	102 def _NumPyArrayToMeta(self, np_weight):

99 """Converts numpy array to dict (mapping feature name to weight).	103 """Converts numpy array to dict (mapping feature name to weight).

100	104

101 Note, this conversion is needed because model uses weights dict to organize	105 Note, this conversion is needed because model uses meta_weight dict to

102 weights for features, however SciPy trainning (e.g. BFGS) needs numpy array	106 organize meta_weight for features, however SciPy trainning (e.g. BFGS) needs

103 to do computaion.	107 numpy array to do computaion.

104	108

105 Args:	109 Args:

106 np_weights (np.ndarray): Weights which have the same order of	110 np_weight (np.ndarray): meta_weight which have the same order of

107 self._feature_order. Note, feature np array should also be serialized by	111 self._ordered_feature_to_len. Note, featuer np array should also be

108 the same order as self._feature_order to match.	112 serialized by the same order as self._ordered_feature_to_len to match.

109	113

110 Returns:	114 Returns:

111 A dict mapping feature name to weight.	115 A dict mapping feature name to weight.

112 """	116 """

113 return {feature_name: weight	117 return self._serializer.FromList(np_weight, meta_constructor=MetaWeight,

114 for feature_name, weight in zip(self._feature_order, np_weights)}	118 element_constructor=Weight)

115	119

116 def _DictToNumPyArray(self, weights, default=0.):	120 def _MetaToNumPyArray(self, meta_weight):

117 """Converts dict (mapping feature name to weight) to numpy array."""	121 """Converts dict (mapping feature name to weight) to numpy array."""

118 return np.array([weights.get(feature_name, default)	122 return np.array([weight.value

119 for feature_name in self._feature_order])	123 for weight in self._serializer.ToList(meta_weight)])

120	124

121 def FeaturesAsNumPyArray(self, x):	125 def FeaturesAsNumPyArray(self, x):

122 """A variant of ``Features`` which returns a ``np.ndarray``.	126 """A variant of ``Features`` which returns a ``np.ndarray``.

123	127

124 Note, the features np array should have the same order as in	128 Note, the features nparray should have the same order as in

125 self._feature_order to stay aligned with weights np array.	129 self._ordered_feature_to_len to stay aligned with meta_weight np array.

126	130

127 For training we need to have the feature function return an	131 For training we need to have the feature function return an

128 ``np.ndarray(float)`` rather than the ``list(FeatureValue)`` used	132 ``np.ndarray(float)`` rather than the ``list(FeatureValue)`` used

129 elsewhere. This function performes the necessary conversion.	133 elsewhere. This function performes the necessary conversion.

130	134

131 N.B., at present we do not memoize this function. The underlying	135 N.B., at present we do not memoize this function. The underlying

132 ``Features`` method is memoized, so we won't re-compute the features	136 ``Features`` method is memoized, so we won't re-compute the features

133 each time; but we will repeatedly copy the floats into newly allocated	137 each time; but we will repeatedly copy the floats into newly allocated

134 ``np.ndarray`` objects. If that turns out to be a performance	138 ``np.ndarray`` objects. If that turns out to be a performance

135 bottleneck, we can add the extra layer of memoization to avoid that.	139 bottleneck, we can add the extra layer of memoization to avoid that.

136 """	140 """

137 fx = self.Features(x)	141 return lambda y: np.array(self._serializer.ToList(self.Features(x)(y)))

138 def FeaturesAsNumPyArrayGivenX(y):

139 fxys = fx(y)

140 return np.array([fxys[feature_name].value

141 for feature_name in self._feature_order])

142

143 return FeaturesAsNumPyArrayGivenX

144	142

145 def LogLikelihood(self):	143 def LogLikelihood(self):

146 """The conditional log-likelihood of the training data.	144 """The conditional log-likelihood of the training data.

147	145

148 The conditional likelihood of the training data is the product	146 The conditional likelihood of the training data is the product

149 of ``Pr(y\|x)`` for each ``(x, y)`` pair in the training data; so	147 of ``Pr(y\|x)`` for each ``(x, y)`` pair in the training data; so

150 the conditional log-likelihood is the log of that. This is called	148 the conditional log-likelihood is the log of that. This is called

151 "likelihood" because it is thought of as a function of the weight	149 "likelihood" because it is thought of as a function of the weight

152 covector, with the training data held fixed.	150 covector, with the training data held fixed.

153	151

154 This is the ideal objective function for training the weights, as it	152 This is the ideal objective function for training the meta_weight, as it

155 will give us the MLE weight covector for the training data. However,	153 will give us the MLE weight covector for the training data. However,

156 in practice, we want to do regularization to ensure we don't overfit	154 in practice, we want to do regularization to ensure we don't overfit

157 the training data and to reduce classification time by ensuring that	155 the training data and to reduce classification time by ensuring that

158 the weight vector is sparse. Thus, the actual objective function	156 the weight vector is sparse. Thus, the actual objective function

159 will be the log-likelihood plus some penalty terms for regularization.	157 will be the log-likelihood plus some penalty terms for regularization.

160 """	158 """

161 observed_zeta = math.fsum(self.LogZ(x) for x, _ in self._training_data)	159 observed_zeta = math.fsum(self.LogZ(x) for x, _ in self._training_data)

162 observed_score = self.np_weights.dot(	160 observed_score = self.np_weight.dot(

163 self._observed_feature_vector)	161 self._observed_feature_vector)

164 return observed_score - observed_zeta	162 return observed_score - observed_zeta

165	163

166 def LogLikelihoodGradient(self):	164 def LogLikelihoodGradient(self):

167 """The gradient (aka Jacobian) of ``LogLikelihood``."""	165 """The gradient (aka Jacobian) of ``LogLikelihood``."""

168 expected_feature_vector = vsum([	166 expected_feature_vector = vsum([

169 self.Expectation(x, self.FeaturesAsNumPyArray(x))	167 self.Expectation(x, self.FeaturesAsNumPyArray(x))

170 for x, _ in self._training_data])	168 for x, _ in self._training_data])

171 return self._observed_feature_vector - expected_feature_vector	169 return self._observed_feature_vector - expected_feature_vector

172	170

173 def TrainWeights(self, l2_penalty):	171 def TrainWeights(self, l2_penalty):

174 """Optimize the weight covector based on the training data.	172 """Optimize the weight covector based on the training data.

175	173

176 Args:	174 Args:

177 l2_penalty (float): the hyperparameter for how much to penalize	175 l2_penalty (float): the hyperparameter for how much to penalize

178 weight covectors far from zero.	176 weight covectors far from zero.

179	177

180 Returns:	178 Returns:

181 Nothing, but has the side effect of mutating the stored weights.	179 Nothing, but has the side effect of mutating the stored meta_weight.

182 """	180 """

183 initial_np_weights = self.np_weights	181 initial_np_weight = self.np_weight

184	182

185 # We want to minimize the number of times we reset the weights since	183 # We want to minimize the number of times we reset the meta_weight since

186 # that clears our memos. One might think we could do that in the	184 # that clears our memos. One might think we could do that in the

187 # between-iterations callback; but actually, in a single iteration,	185 # between-iterations callback; but actually, in a single iteration,

188 # BFGS calls the objective function and gradient more than once with	186 # BFGS calls the objective function and gradient more than once with

189 # different arguments; so, alas, we must reset the weights in both.	187 # different arguments; so, alas, we must reset the meta_weight in both.

190 # This is why the ``weights`` setter tries to avoid clearing memos	188 # This is why the ``meta_weight`` setter tries to avoid clearing memos

191 # when possible.	189 # when possible.

192	190

193 def objective_function(new_np_weights):	191 def objective_function(new_np_weight):

194 self.np_weights = new_np_weights	192 self.np_weight = new_np_weight

195 return -self.LogLikelihood() + 0.5 * l2_penalty * self.quadrance	193 return -self.LogLikelihood() + 0.5 * l2_penalty * self.quadrance

196	194

197 def objective_function_gradient(new_np_weights):	195 def objective_function_gradient(new_np_weight):

198 self.np_weights = new_np_weights	196 self.np_weight = new_np_weight

199 return -self.LogLikelihoodGradient() + l2_penalty * self.np_weights	197 return -self.LogLikelihoodGradient() + l2_penalty * self.np_weight

200	198

201 result = spo.minimize(	199 result = spo.minimize(

202 objective_function,	200 objective_function,

203 initial_np_weights,	201 initial_np_weight,

204 method='BFGS',	202 method='BFGS',

205 jac=objective_function_gradient)	203 jac=objective_function_gradient)

206	204

207 if not result.success: # pragma: no cover	205 if not result.success: # pragma: no cover

208 # This should happen infrequently enough that there's no point in	206 # This should happen infrequently enough that there's no point in

209 # logging it and attempting to carry on.	207 # logging it and attempting to carry on.

210 raise Exception(	208 raise Exception(

211 'TrainableLogLinearModel.TrainWeights failed:'	209 'TrainableLogLinearModel.TrainWeights failed:'

212 '\n\tReason: %s'	210 '\n\tReason: %s'

213 '\n\tCurrent objective value: %s'	211 '\n\tCurrent objective value: %s'

214 '\n\tCurrent objective gradient: %s'	212 '\n\tCurrent objective gradient: %s'

215 '\n\tIterations: %d'	213 '\n\tIterations: %d'

216 '\n\tFunction evaluations: %d'	214 '\n\tFunction evaluations: %d'

217 '\n\tGradient evaluations: %d'	215 '\n\tGradient evaluations: %d'

218 % (result.message, result.fun, result.jac, result.nit, result.nfev,	216 % (result.message, result.fun, result.jac, result.nit, result.nfev,

219 result.njev))	217 result.njev))

220	218

221 # This shouldn't really be necessary, since we're resetting it	219 # This shouldn't really be necessary, since we're resetting it

222 # directly during training; but just to be safe/sure.	220 # directly during training; but just to be safe/sure.

223 self.np_weights = result.x	221 self.np_weight = result.x

OLD	NEW

« no previous file with comments | « appengine/findit/crash/loglinear/test/weight_test.py ('k') | appengine/findit/crash/loglinear/weight.py » ('j') | no next file with comments »