| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 import math | 5 import math |
| 6 import numpy as np | 6 import numpy as np |
| 7 # N.B., ``np.array`` can't take generators; you must pass explicit lists. | 7 # N.B., ``np.array`` can't take generators; you must pass explicit lists. |
| 8 import scipy.optimize as spo | 8 import scipy.optimize as spo |
| 9 | 9 |
| 10 from crash.loglinear.model import LogLinearModel | 10 from crash.loglinear.model import LogLinearModel |
| 11 from libs.math.vectors import vsum | 11 from libs.math.vectors import vsum |
| 12 # N.B., ``vsum`` can't take generators; you must pass explicit lists. | 12 # N.B., ``vsum`` can't take generators; you must pass explicit lists. |
| 13 | 13 |
| 14 | 14 |
| 15 class TrainableLogLinearModel(LogLinearModel): | 15 class TrainableLogLinearModel(LogLinearModel): |
| 16 """A loglinear model with some labelled data set for training the weights.""" | 16 """A loglinear model with some labelled data set for training the weights.""" |
| 17 | 17 |
| 18 def __init__(self, Y, training_data, feature_function, initial_weights, | 18 def __init__(self, Y_given_X, training_data, feature_function, |
| 19 epsilon=None): | 19 initial_weights, epsilon=None): |
| 20 """ | 20 """ |
| 21 Args: | 21 Args: |
| 22 Y (iterable): the entire range of values for the independent | 22 Y_given_X: a function from ``X`` to an iterable object giving the |
| 23 variable. This is needed for computing the partition function. | 23 subset of ``Y`` which has non-zero probability given the |
| 24 ``x``. When in doubt about whether some ``y`` has zero probability |
| 25 or not, it is always safe/correct to return a larger subset of |
| 26 ``Y`` (it'll just take more computation time is all). This is |
| 27 needed for computing the partition function and expectation. N.B., |
| 28 we do not actually need to know/enumerate of *all* of ``Y``, |
| 29 only the subsets for each ``x``. |
| 24 training_data (iterable): a collection of ``(x, y)`` pairs where | 30 training_data (iterable): a collection of ``(x, y)`` pairs where |
| 25 ``y`` is the known-correct label for ``x``. | 31 ``y`` is the known-correct label for ``x``. |
| 26 feature_function: A function from ``X`` to ``Y`` to a list of | 32 feature_function: A function from ``X`` to ``Y`` to a list of |
| 27 ``float``. N.B., the length of the list must be the same for all | 33 ``float``. N.B., the length of the list must be the same for all |
| 28 ``x`` and ``y``, and must be the same as the length of the list | 34 ``x`` and ``y``, and must be the same as the length of the list |
| 29 of weights. | 35 of weights. |
| 30 initial_weights (list of float): the pre-training coefficients | 36 initial_weights (list of float): the pre-training coefficients |
| 31 for how much we believe components of the feature vector. This | 37 for how much we believe components of the feature vector. This |
| 32 provides the seed for training; this starting value shouldn't | 38 provides the seed for training; this starting value shouldn't |
| 33 affect the final weights obtained by training (thanks to | 39 affect the final weights obtained by training (thanks to |
| 34 convexity), but will affect how long it takes for training | 40 convexity), but will affect how long it takes for training |
| 35 to converge. | 41 to converge. |
| 36 epsilon (float): The absolute-error threshold for considering a | 42 epsilon (float): The absolute-error threshold for considering a |
| 37 weight to be "equal to zero". N.B., this should be a positive | 43 weight to be "equal to zero". N.B., this should be a positive |
| 38 number, as we will compare it against the absolute value of | 44 number, as we will compare it against the absolute value of |
| 39 each weight. | 45 each weight. |
| 40 """ | 46 """ |
| 41 super(TrainableLogLinearModel, self).__init__( | 47 super(TrainableLogLinearModel, self).__init__( |
| 42 Y, feature_function, initial_weights, epsilon) | 48 Y_given_X, feature_function, initial_weights, epsilon) |
| 43 self._training_data = training_data | 49 self._training_data = training_data |
| 44 | 50 |
| 45 self._observed_feature_vector = vsum([ | 51 self._observed_feature_vector = vsum([ |
| 46 self.FeaturesAsNumPyArray(x)(y) | 52 self.FeaturesAsNumPyArray(x)(y) |
| 47 for x, y in self._training_data]) | 53 for x, y in self._training_data]) |
| 48 | 54 |
| 49 # Even though this is identical to the superclass definition, we must | 55 # Even though this is identical to the superclass definition, we must |
| 50 # re-provide it in order to define the setter. | 56 # re-provide it in order to define the setter. |
| 51 @property | 57 @property |
| 52 def weights(self): | 58 def weights(self): |
| (...skipping 117 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 170 '\n\tCurrent objective gradient: %s' | 176 '\n\tCurrent objective gradient: %s' |
| 171 '\n\tIterations: %d' | 177 '\n\tIterations: %d' |
| 172 '\n\tFunction evaluations: %d' | 178 '\n\tFunction evaluations: %d' |
| 173 '\n\tGradient evaluations: %d' | 179 '\n\tGradient evaluations: %d' |
| 174 % (result.message, result.fun, result.jac, result.nit, result.nfev, | 180 % (result.message, result.fun, result.jac, result.nit, result.nfev, |
| 175 result.njev)) | 181 result.njev)) |
| 176 | 182 |
| 177 # This shouldn't really be necessary, since we're resetting it | 183 # This shouldn't really be necessary, since we're resetting it |
| 178 # directly during training; but just to be safe/sure. | 184 # directly during training; but just to be safe/sure. |
| 179 self.weights = result.x | 185 self.weights = result.x |
| OLD | NEW |