appengine/findit/crash/loglinear/test/model_test.py - Issue 2617273002: [Predator] Move ``SingleFeatureScore`` to LLM.

Side by Side Diff: appengine/findit/crash/loglinear/test/model_test.py

Issue 2617273002: [Predator] Move ``SingleFeatureScore`` to LLM. (Closed)

Patch Set: Address comments. Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« appengine/findit/crash/loglinear/model.py ('K') | « appengine/findit/crash/loglinear/test/loglinear_testcase.py ('k') | appengine/findit/crash/loglinear/test/training_test.py » ('j') | appengine/findit/crash/loglinear/test/training_test.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import math	5 import math

6 import numpy as np	6 import numpy as np

7	7

8 from crash.loglinear.model import ToFeatureFunction	8 from crash.loglinear.feature import ChangedFile

	9 from crash.loglinear.feature import FeatureValue

	10 from crash.loglinear.feature import FeatureFunction

9 from crash.loglinear.model import LogLinearModel	11 from crash.loglinear.model import LogLinearModel

	12 from crash.loglinear.model import UnnormalizedLogLinearModel

10 from crash.loglinear.test.loglinear_testcase import LoglinearTestCase	13 from crash.loglinear.test.loglinear_testcase import LoglinearTestCase

11	14

12	15

	16 class UnnormalizedLogLinearModelTest(LoglinearTestCase):

	17

	18 def setUp(self):

	19 super(UnnormalizedLogLinearModelTest, self).setUp()

	20 self.model = UnnormalizedLogLinearModel(self._feature_function,

	21 self._weights, 0.1)
	wrengr 2017/01/12 19:09:09 Do you really want so large of an epsilon? Why not Do you really want so large of an epsilon? Why not stick with the default? (We do want to test that the default behaves reasonably, otherwise we should choose a different default) Sharu Jiang 2017/01/13 01:08:34 no, just picked a random number. Show quoted text On 2017/01/12 19:09:09, wrengr wrote: > Do you really want so large of an epsilon? Why not stick with the default? (We > do want to test that the default behaves reasonably, otherwise we should choose > a different default) no, just picked a random number.
	22

	23 def testSingleFeatureScore(self):

	24 """Test that ``SingleFeatureScore`` returns weighted feature score."""

	25 for feature in self._feature_list:

	26 feature_value = feature(5)(True)

	27 self.assertEqual(

	28 self.model.SingleFeatureScore(feature_value),

	29 feature_value.value * self.model._weights.get(feature_value.name, 0.))

	30

	31 def testFormatReasons(self):

	32 """Tests ``FormatReasons`` returnes a list of formated reasons."""

	33 features = [feature(3)(False) for feature in self._feature_list]

	34 self.assertListEqual([(feature.name, self.model.SingleFeatureScore(feature),

	35 feature.reason) for feature in features],

	36 self.model.FormatReasons(features))

	37

	38 def testAggregateChangedFilesAggregates(self):

	39 """Test that ``AggregateChangedFiles`` does aggregate reasons per file.

	40

	41 In the main/inner loop of ``AggregateChangedFiles``: if multiple

	42 features all blame the same file change, we try to aggregate those

	43 reasons so that we only report the file once (with all reasons). None

	44 of the other tests here actually check the case where the same file

	45 is blamed multiple times, so we check that here.

	46

	47 In particular, we provide the same ``FeatureValue`` twice, and

	48 hence the same ``ChangedFile`` twice; so we should get back a single

	49 ``ChangedFile`` but with the ``reasons`` fields concatenated.

	50 """

	51 file_reason = 'I blame you!'

	52 file_blame = ChangedFile(

	53 name = 'a.cc',

	54 blame_url = None,

	55 reasons = [file_reason]

	56 )

	57

	58 feature_value = FeatureValue(

	59 name = 'dummy feature',

	60 value = 42,

	61 reason = 'dummy reason',

	62 changed_files = [file_blame]

	63 )

	64

	65 expected_file_blame = file_blame._replace(reasons = [file_reason] * 2)

	66

	67 self.assertListEqual(

	68 [expected_file_blame],

	69 self.model.AggregateChangedFiles([feature_value] * 2))

	70

	71

13 class LoglinearTest(LoglinearTestCase):	72 class LoglinearTest(LoglinearTestCase):

14	73

15 def testToFeatureFunction(self):

16 """Test that ``ToFeatureFunction`` obeys the equality its docstring says."""

17 for x in self._X:

18 for y in self._Y(x):

19 for i in xrange(self._qty_features):

20 self.assertEqual(self._feature_list[i](x)(y),

21 self._feature_function(x)(y)[i])

22

23 def testLogLinearModel(self):	74 def testLogLinearModel(self):

24 """An arbitrary test to get 100% code coverage.	75 """An arbitrary test to get 100% code coverage.

25	76

26 Right now this test simply calls every method. The only assertions are	77 Right now this test simply calls every method. The only assertions are

27 that log-domain and normal-domain things are related appropriately;	78 that log-domain and normal-domain things are related appropriately;

28 and similarly for the quadrance and l2-norm. Since the one is defined	79 and similarly for the quadrance and l2-norm. Since the one is defined

29 in terms of the other in exactly the way written here, those should	80 in terms of the other in exactly the way written here, those should

30 be trivially true. However, if the implementation changes, then they	81 be trivially true. However, if the implementation changes, then they

31 may become flaky due to floating point fuzz. Really this should be	82 may become flaky due to floating point fuzz. Really this should be

32 replaced by a collection of semantically meaningful tests, i.e.,	83 replaced by a collection of semantically meaningful tests, i.e.,

33 ones that actually look for bugs we might realistically need to	84 ones that actually look for bugs we might realistically need to

34 guard against. At least this test is good for detecting typo-style	85 guard against. At least this test is good for detecting typo-style

35 errors where we try accessing fields/methods that don't exist.	86 errors where we try accessing fields/methods that don't exist.

36 """	87 """

37 model = LogLinearModel(self._Y, self._feature_function, self._weights, 0.1)	88 model = LogLinearModel(self._Y, self._feature_function, self._weights, 0.1)

38 model.ClearAllMemos()	89 model.ClearAllMemos()

39 model = LogLinearModel(self._Y, self._feature_function, self._weights)	90 model = LogLinearModel(self._Y, self._feature_function, self._weights)

40 self.assertListEqual(self._weights, model.weights.tolist())	91 self.assertDictEqual(self._weights, model.weights)

41 self.assertEqual(math.sqrt(model.quadrance), model.l2)	92 self.assertEqual(math.sqrt(model.quadrance), model.l2)

42	93

43 for x in self._X:	94 for x in self._X:

44 self.assertEqual(math.exp(model.LogZ(x)), model.Z(x))	95 self.assertEqual(math.exp(model.LogZ(x)), model.Z(x))

45 model.Expectation(x, lambda y: np.array([1.0]))	96 model.Expectation(x, lambda y: np.array([1.0]))

46 for y in self._Y(x):	97 for y in self._Y(x):

47 model.Features(x)(y)	98 model.Features(x)(y)

48 model.Score(x)(y)	99 model.Score(x)(y)

49 self.assertEqual(	100 self.assertEqual(

50 math.exp(model.LogProbability(x)(y)),	101 math.exp(model.LogProbability(x)(y)),

51 model.Probability(x)(y))	102 model.Probability(x)(y))

OLD	NEW