appengine/findit/crash/loglinear/changelist_classifier.py - Issue 2625073003: [Predator] Add MetaWeight and MetaFeatureValue to group multiple weights and features together.

Side by Side Diff: appengine/findit/crash/loglinear/changelist_classifier.py

Issue 2625073003: [Predator] Add MetaWeight and MetaFeatureValue to group multiple weights and features together. (Closed)

Patch Set: Rebase. Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 from collections import defaultdict	5 from collections import defaultdict

6 import logging	6 import logging

7 import math	7 import math

8	8

9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher	9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher

10 from crash import changelist_classifier	10 from crash import changelist_classifier

11 from crash.crash_report_with_dependencies import CrashReportWithDependencies	11 from crash.crash_report_with_dependencies import CrashReportWithDependencies

12 from crash.loglinear.changelist_features import min_distance	12 from crash.loglinear.changelist_features.min_distance import MinDistanceFeature

13 from crash.loglinear.changelist_features import top_frame_index	13 from crash.loglinear.changelist_features.top_frame_index import (

14 from crash.loglinear.feature import FeatureFunction	14 TopFrameIndexFeature)

	15 from crash.loglinear.feature import WrapperMetaFeature

15 from crash.loglinear.model import UnnormalizedLogLinearModel	16 from crash.loglinear.model import UnnormalizedLogLinearModel

16 from crash.stacktrace import CallStack	17 from crash.stacktrace import CallStack

17 from crash.stacktrace import Stacktrace	18 from crash.stacktrace import Stacktrace

18 from crash.suspect import StackInfo	19 from crash.suspect import StackInfo

19	20

20	21

21 class LogLinearChangelistClassifier(object):	22 class LogLinearChangelistClassifier(object):

22 """A ``LogLinearModel``-based implementation of CL classification."""	23 """A ``LogLinearModel``-based implementation of CL classification."""

23	24

24 def __init__(self, get_repository, weights, top_n_frames=7, top_n_suspects=3):	25 def __init__(self, get_repository, meta_feature, meta_weight,

	26 top_n_frames=7, top_n_suspects=3):

25 """Args:	27 """Args:

26 get_repository (callable): a function from DEP urls to ``Repository``	28 get_repository (callable): a function from DEP urls to ``Repository``

27 objects, so we can get changelogs and blame for each dep. Notably,	29 objects, so we can get changelogs and blame for each dep. Notably,

28 to keep the code here generic, we make no assumptions about	30 to keep the code here generic, we make no assumptions about

29 which subclass of ``Repository`` this function returns. Thus,	31 which subclass of ``Repository`` this function returns. Thus,

30 it is up to the caller to decide what class to return and handle	32 it is up to the caller to decide what class to return and handle

31 any other arguments that class may require (e.g., an http client	33 any other arguments that class may require (e.g., an http client

32 for ``GitilesRepository``).	34 for ``GitilesRepository``).

33 weights (dict of float): the weights for the features. The keys of	35 meta_feature (MetaFeature): All features.

34 the dictionary are the names of the feature that weight is	36 meta_weight (MetaWeight): All weights. the weights for the features.

	37 The keys of the dictionary are the names of the feature that weight is

35 for. We take this argument as a dict rather than as a list so that	38 for. We take this argument as a dict rather than as a list so that

36 callers needn't worry about what order to provide the weights in.	39 callers needn't worry about what order to provide the weights in.

37 top_n_frames (int): how many frames of each callstack to look at.	40 top_n_frames (int): how many frames of each callstack to look at.

38 top_n_suspects (int): maximum number of suspects to return.	41 top_n_suspects (int): maximum number of suspects to return.

39 """	42 """

40 self._dependency_fetcher = ChromeDependencyFetcher(get_repository)	43 self._dependency_fetcher = ChromeDependencyFetcher(get_repository)

41 self._get_repository = get_repository	44 self._get_repository = get_repository

42 self._top_n_frames = top_n_frames	45 self._top_n_frames = top_n_frames

43 self._top_n_suspects = top_n_suspects	46 self._top_n_suspects = top_n_suspects

44	47 self._model = UnnormalizedLogLinearModel(meta_feature, meta_weight)

45 feature_function = FeatureFunction([

46 top_frame_index.TopFrameIndexFeature(top_n_frames),

47 min_distance.MinDistanceFeature(),

48 ])

49

50 self._model = UnnormalizedLogLinearModel(feature_function, weights)

51	48

52 def __call__(self, report):	49 def __call__(self, report):

53 """Finds changelists suspected of being responsible for the crash report.	50 """Finds changelists suspected of being responsible for the crash report.

54	51

55 Args:	52 Args:

56 report (CrashReport): the report to be analyzed.	53 report (CrashReport): the report to be analyzed.

57	54

58 Returns:	55 Returns:

59 List of ``Suspect``s, sorted by probability from highest to lowest.	56 List of ``Suspect``s, sorted by probability from highest to lowest.

60 """	57 """

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
133	130

134 scored_suspects = []	131 scored_suspects = []

135 for suspect in suspects:	132 for suspect in suspects:

136 score = score_given_report(suspect)	133 score = score_given_report(suspect)

137 if self._model.LogZeroish(score):	134 if self._model.LogZeroish(score):

138 logging.debug('Discarding suspect because it has zero probability: %s'	135 logging.debug('Discarding suspect because it has zero probability: %s'

139 % str(suspect.ToDict()))	136 % str(suspect.ToDict()))

140 continue	137 continue

141	138

142 suspect.confidence = score	139 suspect.confidence = score

	140 # features is ``MetaFeatureValue`` object containing all feature values.

143 features = features_given_report(suspect)	141 features = features_given_report(suspect)

144 suspect.reasons = self._model.FormatReasons(features.itervalues())	142 suspect.reasons = features.reason

145 suspect.changed_files = [	143 suspect.changed_files = [changed_file.ToDict()

146 changed_file.ToDict() for changed_file in	144 for changed_file in features.changed_files]

147 self._model.AggregateChangedFiles(features.itervalues())]

148 scored_suspects.append(suspect)	145 scored_suspects.append(suspect)

149	146

150 scored_suspects.sort(key=lambda suspect: suspect.confidence)	147 scored_suspects.sort(key=lambda suspect: suspect.confidence)

151 return scored_suspects[:self._top_n_suspects]	148 return scored_suspects[:self._top_n_suspects]

OLD	NEW

« no previous file with comments | « appengine/findit/crash/findit_for_chromecrash.py ('k') | appengine/findit/crash/loglinear/feature.py » ('j') | no next file with comments »