| OLD | NEW |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
| 3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
| 4 | 4 |
| 5 from collections import namedtuple | 5 from collections import namedtuple |
| 6 import math | 6 import math |
| 7 | 7 |
| 8 import libs.math.logarithms as lmath | 8 import libs.math.logarithms as lmath |
| 9 | 9 |
| 10 | 10 |
| (...skipping 72 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 83 ['name', 'value', 'reason', 'changed_files'])): # pragma: no cover | 83 ['name', 'value', 'reason', 'changed_files'])): # pragma: no cover |
| 84 """The result of an individual feature. | 84 """The result of an individual feature. |
| 85 | 85 |
| 86 Attributes: | 86 Attributes: |
| 87 name (str): the name of the feature producing this value. | 87 name (str): the name of the feature producing this value. |
| 88 value (convertable to float): the value itself. N.B. we call the | 88 value (convertable to float): the value itself. N.B. we call the |
| 89 ``float`` builtin function to coerce this value to float; thus | 89 ``float`` builtin function to coerce this value to float; thus |
| 90 it is okay to pass an ``int`` or ``bool`` value as well. | 90 it is okay to pass an ``int`` or ``bool`` value as well. |
| 91 reason (str): some explanation of where the value came from. | 91 reason (str): some explanation of where the value came from. |
| 92 changed_files (list of ChangedFile, or None): A list of files changed | 92 changed_files (list of ChangedFile, or None): A list of files changed |
| 93 by the ``Result`` annotated with reasons why the feature function | 93 by the ``Suspect`` annotated with reasons why the feature function |
| 94 generating this object blames those changes. | 94 generating this object blames those changes. |
| 95 """ | 95 """ |
| 96 __slots__ = () | 96 __slots__ = () |
| 97 | 97 |
| 98 def __new__(cls, name, value, reason, changed_files): | 98 def __new__(cls, name, value, reason, changed_files): |
| 99 return super(cls, FeatureValue).__new__(cls, | 99 return super(cls, FeatureValue).__new__(cls, |
| 100 str(name), float(value), str(reason), changed_files) | 100 str(name), float(value), str(reason), changed_files) |
| 101 | 101 |
| 102 def __str__(self): | 102 def __str__(self): |
| 103 return ( | 103 return ( |
| 104 'FeatureValue(name = %s, value = %f, reason = %s, changed_files = %s)' | 104 'FeatureValue(name = %s, value = %f, reason = %s, changed_files = %s)' |
| 105 % (self.name, self.value, self.reason, self.changed_files)) | 105 % (self.name, self.value, self.reason, self.changed_files)) |
| 106 | 106 |
| 107 | 107 |
| 108 class Feature(object): | 108 class Feature(object): |
| 109 """Abstract base class for features use by loglinear models.""" | 109 """Abstract base class for features use by loglinear models.""" |
| 110 | 110 |
| 111 @property | 111 @property |
| 112 def name(self): | 112 def name(self): |
| 113 """The name of this feature.""" | 113 """The name of this feature.""" |
| 114 raise NotImplementedError() | 114 raise NotImplementedError() |
| 115 | 115 |
| 116 def __call__(self, report): | 116 def __call__(self, report): |
| 117 """Returns a value for a result given some report. | 117 """Returns a value for a ``y`` given some ``x``. |
| 118 | 118 |
| 119 The loglinear model this feature is used in will specify some types | 119 The loglinear model this feature is used in will specify some types |
| 120 ``X`` and ``Y``, as described in the documentation there. As an | 120 ``X`` and ``Y``, as described in the documentation there. As an |
| 121 example: for the CL classifier, ``X`` is ``CrashReport`` and ``Y`` | 121 example: for the CL classifier, ``X`` is ``CrashReport`` and ``Y`` is |
| 122 is ``MatchResult``. Given those two types, this method is a curried | 122 ``Suspect``. Given those two types, this method is a curried function |
| 123 function of type ``X -> Y -> FeatureValue``. That is, given some ``x`` | 123 of type ``X -> Y -> FeatureValue``. That is, given some ``x`` of type |
| 124 of type ``X``, we return a function of type ``Y -> FeatureValue``, | 124 ``X``, we return a function of type ``Y -> FeatureValue``, where |
| 125 where the final result for each ``y`` of type ``Y`` is the value of | 125 the final result for each ``y`` of type ``Y`` is the value of that |
| 126 that ``y`` given that ``x``. | 126 ``y`` given that ``x``. |
| 127 | 127 |
| 128 Values closer to zero indicate this feature has less to say about | 128 Values closer to zero indicate this feature has less to say about |
| 129 whether the result is to be blamed. Values further from zero indicate | 129 whether the ``y`` is to be blamed. Values further from zero indicate |
| 130 that this feature has more to say about it. (Whether this feature | 130 that this feature has more to say about it. (Whether this feature |
| 131 thinks the result should be blamed or should not be depends on | 131 thinks the ``y`` should be blamed or should not be depends on the sign |
| 132 the sign of the value and the sign of the weight given to this | 132 of the value and the sign of the weight given to this feature.) As |
| 133 feature.) As special cases, a value of negative infinity means | 133 special cases, a value of negative infinity means "do not blame this |
| 134 "do not blame this ``y`` no matter what any other features say", | 134 ``y`` no matter what any other features say", and a value of positive |
| 135 and a value of positive infinity means "definitely blame this ``y`` | 135 infinity means "definitely blame this ``y`` no matter what any other |
| 136 no matter what any other features say". Both of those special values | 136 features say". Both of those special values should be used sparingly, |
| 137 should be used sparingly, since they override the model's ability | 137 since they override the model's ability to combine multiple sources of |
| 138 to combine multiple sources of information and decide the cuplrit | 138 information and decide the cuplrit based on all the evidence together. |
| 139 based on all the evidence together. | |
| 140 """ | 139 """ |
| 141 raise NotImplementedError() | 140 raise NotImplementedError() |
| OLD | NEW |