appengine/findit/crash/loglinear/changelist_classifier.py - Issue 2608483002: Changed FindSuspects to take a Repository factory, rather than mutating it

Side by Side Diff: appengine/findit/crash/loglinear/changelist_classifier.py

Issue 2608483002: Changed FindSuspects to take a Repository factory, rather than mutating it (Closed)

Patch Set: rebase Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

6 import math	6 import math

7	7

8 from common import chrome_dependency_fetcher	8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher

9 from crash import changelist_classifier	9 from crash import changelist_classifier

10 from crash.loglinear.changelist_features import min_distance	10 from crash.loglinear.changelist_features import min_distance

11 from crash.loglinear.changelist_features import top_frame_index	11 from crash.loglinear.changelist_features import top_frame_index

12 from crash.loglinear.model import ToFeatureFunction	12 from crash.loglinear.model import ToFeatureFunction

13 from crash.loglinear.model import UnnormalizedLogLinearModel	13 from crash.loglinear.model import UnnormalizedLogLinearModel

14 from crash.stacktrace import CallStack	14 from crash.stacktrace import CallStack

15 from crash.stacktrace import Stacktrace	15 from crash.stacktrace import Stacktrace

16	16

17	17

18 class LogLinearChangelistClassifier(object):	18 class LogLinearChangelistClassifier(object):

19 """A ``LogLinearModel``-based implementation of CL classification."""	19 """A ``LogLinearModel``-based implementation of CL classification."""

20	20

21 def __init__(self, repository, weights, top_n_frames=7, top_n_suspects=3):	21 def __init__(self, repository, get_repository, weights, top_n_frames=7,

	22 top_n_suspects=3):

22 """Args:	23 """Args:

23 repository (Repository): the Git repository for getting CLs to classify.	24 repository (Repository): the Git repository for getting CLs to classify.

	25 get_repository (callable): a function from DEP urls to ``Repository``

	26 objects, so we can get changelogs and blame for each dep. Notably,

	27 to keep the code here generic, we make no assumptions about

	28 which subclass of ``Repository`` this function returns. Thus,

	29 it is up to the caller to decide what class to return and handle

	30 any other arguments that class may require (e.g., an http client

	31 for ``GitilesRepository``).

24 weights (dict of float): the weights for the features. The keys of	32 weights (dict of float): the weights for the features. The keys of

25 the dictionary are the names of the feature that weight is	33 the dictionary are the names of the feature that weight is

26 for. We take this argument as a dict rather than as a list so that	34 for. We take this argument as a dict rather than as a list so that

27 callers needn't worry about what order to provide the weights in.	35 callers needn't worry about what order to provide the weights in.

28 top_n_frames (int): how many frames of each callstack to look at.	36 top_n_frames (int): how many frames of each callstack to look at.

29 top_n_suspects (int): maximum number of suspects to return.	37 top_n_suspects (int): maximum number of suspects to return.

30 """	38 """

31 self._repository = repository	39 self._repository = repository

	40 self._dependency_fetcher = ChromeDependencyFetcher(self._repository)

	41 self._get_repository = get_repository

32 self._top_n_frames = top_n_frames	42 self._top_n_frames = top_n_frames

33 self._top_n_suspects = top_n_suspects	43 self._top_n_suspects = top_n_suspects

34	44

35 feature_function = ToFeatureFunction([	45 feature_function = ToFeatureFunction([

36 top_frame_index.TopFrameIndexFeature(top_n_frames),	46 top_frame_index.TopFrameIndexFeature(top_n_frames),

37 min_distance.MinDistanceFeature(),	47 min_distance.MinDistanceFeature(),

38 ])	48 ])

39	49

40 weight_list = [	50 weight_list = [

41 weights['TopFrameIndex'],	51 weights['TopFrameIndex'],

(...skipping 54 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
96 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',	106 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',

97 last_good_version, first_bad_version)	107 last_good_version, first_bad_version)

98	108

99 # We are only interested in the deps in crash stack (the callstack that	109 # We are only interested in the deps in crash stack (the callstack that

100 # caused the crash).	110 # caused the crash).

101 # TODO(wrengr): we may want to receive the crash deps as an argument,	111 # TODO(wrengr): we may want to receive the crash deps as an argument,

102 # so that when this method is called via Findit.FindCulprit, we avoid	112 # so that when this method is called via Findit.FindCulprit, we avoid

103 # doing redundant work creating it.	113 # doing redundant work creating it.

104 stack_deps = changelist_classifier.GetDepsInCrashStack(	114 stack_deps = changelist_classifier.GetDepsInCrashStack(

105 report.stacktrace.crash_stack,	115 report.stacktrace.crash_stack,

106 chrome_dependency_fetcher.ChromeDependencyFetcher(	116 self._dependency_fetcher.GetDependency(

107 self._repository).GetDependency(report.crashed_version,	117 report.crashed_version, report.platform))

108 report.platform))

109	118

110 # Get dep and file to changelogs, stack_info and blame dicts.	119 # Get dep and file to changelogs, stack_info and blame dicts.

111 dep_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher(	120 dep_rolls = self._dependency_fetcher.GetDependencyRollsDict(

112 self._repository).GetDependencyRollsDict(	121 last_good_version, first_bad_version, report.platform)

113 last_good_version, first_bad_version, report.platform)

114	122

115 # Regression of a dep added/deleted (old_revision/new_revision is None) can	123 # Regression of a dep added/deleted (old_revision/new_revision is None) can

116 # not be known for sure and this case rarely happens, so just filter them	124 # not be known for sure and this case rarely happens, so just filter them

117 # out.	125 # out.

118 regression_deps_rolls = {}	126 regression_deps_rolls = {}

119 for dep_path, dep_roll in dep_rolls.iteritems():	127 for dep_path, dep_roll in dep_rolls.iteritems():

120 if not dep_roll.old_revision or not dep_roll.new_revision:	128 if not dep_roll.old_revision or not dep_roll.new_revision:

121 logging.info('Skip %s denpendency %s',	129 logging.info('Skip %s denpendency %s',

122 'added' if dep_roll.new_revision else 'deleted', dep_path)	130 'added' if dep_roll.new_revision else 'deleted', dep_path)

123 continue	131 continue

124 regression_deps_rolls[dep_path] = dep_roll	132 regression_deps_rolls[dep_path] = dep_roll

125	133

126 dep_to_file_to_changelogs, ignore_cls = (	134 dep_to_file_to_changelogs, ignore_cls = (

127 changelist_classifier.GetChangeLogsForFilesGroupedByDeps(	135 changelist_classifier.GetChangeLogsForFilesGroupedByDeps(

128 regression_deps_rolls, stack_deps, self._repository))	136 regression_deps_rolls, stack_deps, self._repository))

129 dep_to_file_to_stack_infos = (	137 dep_to_file_to_stack_infos = (

130 changelist_classifier.GetStackInfosForFilesGroupedByDeps(	138 changelist_classifier.GetStackInfosForFilesGroupedByDeps(

131 report.stacktrace, stack_deps))	139 report.stacktrace, stack_deps))

132	140

133 # Get the possible suspects.	141 # Get the possible suspects.

134 suspects = changelist_classifier.FindSuspects(	142 suspects = changelist_classifier.FindSuspects(

135 dep_to_file_to_changelogs,	143 dep_to_file_to_changelogs,

136 dep_to_file_to_stack_infos,	144 dep_to_file_to_stack_infos,

137 stack_deps,	145 stack_deps,

138 self._repository,	146 self._get_repository,

139 ignore_cls)	147 ignore_cls)

140 if suspects is None:	148 if suspects is None:

141 return []	149 return []

142	150

143 # Score the suspects and organize them for outputting/returning.	151 # Score the suspects and organize them for outputting/returning.

144 features_given_report = self._model.Features(report)	152 features_given_report = self._model.Features(report)

145 score_given_report = self._model.Score(report)	153 score_given_report = self._model.Score(report)

146 scored_suspects = []	154 scored_suspects = []

147 for suspect in suspects:	155 for suspect in suspects:

148 score = score_given_report(suspect)	156 score = score_given_report(suspect)

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
222 all_changed_files[changed_file.name] = changed_file	230 all_changed_files[changed_file.name] = changed_file

223 continue	231 continue

224	232

225 assert accumulated_changed_file.blame_url == changed_file.blame_url, (	233 assert accumulated_changed_file.blame_url == changed_file.blame_url, (

226 ValueError('Blame URLs do not match: %s != %s'	234 ValueError('Blame URLs do not match: %s != %s'

227 % (accumulated_changed_file.blame_url, changed_file.blame_url)))	235 % (accumulated_changed_file.blame_url, changed_file.blame_url)))

228 accumulated_changed_file.reasons.extend(changed_file.reasons or [])	236 accumulated_changed_file.reasons.extend(changed_file.reasons or [])

229	237

230 return sorted(all_changed_files.values(),	238 return sorted(all_changed_files.values(),

231 key=lambda changed_file: changed_file.name)	239 key=lambda changed_file: changed_file.name)

OLD	NEW