appengine/findit/crash/loglinear/test/model_test.py - Issue 2625073003: [Predator] Add MetaWeight and MetaFeatureValue to group multiple weights and features together.

Side by Side Diff: appengine/findit/crash/loglinear/test/model_test.py

Issue 2625073003: [Predator] Add MetaWeight and MetaFeatureValue to group multiple weights and features together. (Closed)

Patch Set: Rebase. Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

	5 import copy

5 import math	6 import math

6 import numpy as np	7 import numpy as np

7	8

8 from crash.loglinear.feature import ChangedFile	9 from crash.loglinear.feature import ChangedFile

9 from crash.loglinear.feature import FeatureValue	10 from crash.loglinear.feature import FeatureValue

10 from crash.loglinear.feature import FeatureFunction	11 from crash.loglinear.feature import WrapperMetaFeature

11 from crash.loglinear.model import LogLinearModel	12 from crash.loglinear.model import LogLinearModel

12 from crash.loglinear.model import UnnormalizedLogLinearModel	13 from crash.loglinear.model import UnnormalizedLogLinearModel

	14 from crash.loglinear.weight import Weight

13 from crash.loglinear.test.loglinear_testcase import LoglinearTestCase	15 from crash.loglinear.test.loglinear_testcase import LoglinearTestCase

14	16

15	17

16 class UnnormalizedLogLinearModelTest(LoglinearTestCase):	18 class UnnormalizedLogLinearModelTest(LoglinearTestCase):

17	19

18 def setUp(self):	20 def setUp(self):

19 super(UnnormalizedLogLinearModelTest, self).setUp()	21 super(UnnormalizedLogLinearModelTest, self).setUp()

20 self.model = UnnormalizedLogLinearModel(self._feature_function,	22 self.model = UnnormalizedLogLinearModel(self._meta_feature,

21 self._weights)	23 self._meta_weight,

	24 0.00001)

22	25

23 def testSingleFeatureScore(self):	26 def testLogZeroish(self):

24 """Test that ``SingleFeatureScore`` returns weighted feature score."""	27 self.assertTrue(self.model.LogZeroish(-float('inf')))

25 for feature in self._feature_list:	28 self.assertFalse(self.model.LogZeroish(2.))

26 feature_value = feature(5)(True)

27 self.assertEqual(

28 self.model.SingleFeatureScore(feature_value),

29 feature_value.value * self.model._weights.get(feature_value.name, 0.))

30

31 def testFormatReasons(self):

32 """Tests ``FormatReasons`` returnes a list of formated reasons."""

33 features = [feature(3)(False) for feature in self._feature_list]

34 self.assertListEqual([(feature.name, self.model.SingleFeatureScore(feature),

35 feature.reason) for feature in features],

36 self.model.FormatReasons(features))

37

38 def testAggregateChangedFilesAggregates(self):

39 """Test that ``AggregateChangedFiles`` does aggregate reasons per file.

40

41 In the main/inner loop of ``AggregateChangedFiles``: if multiple

42 features all blame the same file change, we try to aggregate those

43 reasons so that we only report the file once (with all reasons). None

44 of the other tests here actually check the case where the same file

45 is blamed multiple times, so we check that here.

46

47 In particular, we provide the same ``FeatureValue`` twice, and

48 hence the same ``ChangedFile`` twice; so we should get back a single

49 ``ChangedFile`` but with the ``reasons`` fields concatenated.

50 """

51 file_reason = 'I blame you!'

52 file_blame = ChangedFile(

53 name = 'a.cc',

54 blame_url = None,

55 reasons = [file_reason]

56 )

57

58 feature_value = FeatureValue(

59 name = 'dummy feature',

60 value = 42,

61 reason = 'dummy reason',

62 changed_files = [file_blame]

63 )

64

65 expected_file_blame = file_blame._replace(reasons = [file_reason] * 2)

66

67 self.assertListEqual(

68 [expected_file_blame],

69 self.model.AggregateChangedFiles([feature_value] * 2))

70	29

71	30

72 class LoglinearTest(LoglinearTestCase):	31 class LoglinearTest(LoglinearTestCase):

73	32

74 def testLogLinearModel(self):	33 def testLogLinearModel(self):

75 """An arbitrary test to get 100% code coverage.	34 """An arbitrary test to get 100% code coverage.

76	35

77 Right now this test simply calls every method. The only assertions are	36 Right now this test simply calls every method. The only assertions are

78 that log-domain and normal-domain things are related appropriately;	37 that log-domain and normal-domain things are related appropriately;

79 and similarly for the quadrance and l2-norm. Since the one is defined	38 and similarly for the quadrance and l2-norm. Since the one is defined

80 in terms of the other in exactly the way written here, those should	39 in terms of the other in exactly the way written here, those should

81 be trivially true. However, if the implementation changes, then they	40 be trivially true. However, if the implementation changes, then they

82 may become flaky due to floating point fuzz. Really this should be	41 may become flaky due to floating point fuzz. Really this should be

83 replaced by a collection of semantically meaningful tests, i.e.,	42 replaced by a collection of semantically meaningful tests, i.e.,

84 ones that actually look for bugs we might realistically need to	43 ones that actually look for bugs we might realistically need to

85 guard against. At least this test is good for detecting typo-style	44 guard against. At least this test is good for detecting typo-style

86 errors where we try accessing fields/methods that don't exist.	45 errors where we try accessing fields/methods that don't exist.

87 """	46 """

88 model = LogLinearModel(self._Y, self._feature_function, self._weights, 0.1)	47 model = LogLinearModel(self._Y, self._meta_feature, self._meta_weight)

89 model.ClearAllMemos()	48 model.ClearAllMemos()

90 model = LogLinearModel(self._Y, self._feature_function, self._weights)	49 self.assertEqual(self._meta_weight, model.meta_weight)

91 self.assertDictEqual(self._weights, model.weights)

92 self.assertEqual(math.sqrt(model.quadrance), model.l2)	50 self.assertEqual(math.sqrt(model.quadrance), model.l2)

93	51

94 for x in self._X:	52 for x in self._X:

95 self.assertEqual(math.exp(model.LogZ(x)), model.Z(x))	53 self.assertEqual(math.exp(model.LogZ(x)), model.Z(x))

96 model.Expectation(x, lambda y: np.array([1.0]))	54 model.Expectation(x, lambda y: np.array([1.0]))

97 for y in self._Y(x):	55 for y in self._Y(x):

98 model.Features(x)(y)	56 model.Features(x)(y)

99 model.Score(x)(y)	57 model.Score(x)(y)

100 self.assertEqual(	58 self.assertEqual(

101 math.exp(model.LogProbability(x)(y)),	59 math.exp(model.LogProbability(x)(y)),

102 model.Probability(x)(y))	60 model.Probability(x)(y))

	61

	62 def testMetaWeightSetter(self):

	63 model = LogLinearModel(self._Y, self._meta_feature, self._meta_weight)

	64 new_meta_weight = copy.deepcopy(self._meta_weight)

	65 new_meta_weight['Feature0'] = Weight(2.1)

	66 model.meta_weight = new_meta_weight

	67 self.assertTrue(model.meta_weight == new_meta_weight)

OLD	NEW