Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1296)

Side by Side Diff: appengine/findit/crash/loglinear/changelist_classifier.py

Issue 2608483002: Changed FindSuspects to take a Repository factory, rather than mutating it (Closed)
Patch Set: rebase Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 import math 6 import math
7 7
8 from common import chrome_dependency_fetcher 8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher
9 from crash import changelist_classifier 9 from crash import changelist_classifier
10 from crash.loglinear.changelist_features import min_distance 10 from crash.loglinear.changelist_features import min_distance
11 from crash.loglinear.changelist_features import top_frame_index 11 from crash.loglinear.changelist_features import top_frame_index
12 from crash.loglinear.model import ToFeatureFunction 12 from crash.loglinear.model import ToFeatureFunction
13 from crash.loglinear.model import UnnormalizedLogLinearModel 13 from crash.loglinear.model import UnnormalizedLogLinearModel
14 from crash.stacktrace import CallStack 14 from crash.stacktrace import CallStack
15 from crash.stacktrace import Stacktrace 15 from crash.stacktrace import Stacktrace
16 16
17 17
18 class LogLinearChangelistClassifier(object): 18 class LogLinearChangelistClassifier(object):
19 """A ``LogLinearModel``-based implementation of CL classification.""" 19 """A ``LogLinearModel``-based implementation of CL classification."""
20 20
21 def __init__(self, repository, weights, top_n_frames=7, top_n_suspects=3): 21 def __init__(self, repository, get_repository, weights, top_n_frames=7,
22 top_n_suspects=3):
22 """Args: 23 """Args:
23 repository (Repository): the Git repository for getting CLs to classify. 24 repository (Repository): the Git repository for getting CLs to classify.
25 get_repository (callable): a function from DEP urls to ``Repository``
26 objects, so we can get changelogs and blame for each dep. Notably,
27 to keep the code here generic, we make no assumptions about
28 which subclass of ``Repository`` this function returns. Thus,
29 it is up to the caller to decide what class to return and handle
30 any other arguments that class may require (e.g., an http client
31 for ``GitilesRepository``).
24 weights (dict of float): the weights for the features. The keys of 32 weights (dict of float): the weights for the features. The keys of
25 the dictionary are the names of the feature that weight is 33 the dictionary are the names of the feature that weight is
26 for. We take this argument as a dict rather than as a list so that 34 for. We take this argument as a dict rather than as a list so that
27 callers needn't worry about what order to provide the weights in. 35 callers needn't worry about what order to provide the weights in.
28 top_n_frames (int): how many frames of each callstack to look at. 36 top_n_frames (int): how many frames of each callstack to look at.
29 top_n_suspects (int): maximum number of suspects to return. 37 top_n_suspects (int): maximum number of suspects to return.
30 """ 38 """
31 self._repository = repository 39 self._repository = repository
40 self._dependency_fetcher = ChromeDependencyFetcher(self._repository)
41 self._get_repository = get_repository
32 self._top_n_frames = top_n_frames 42 self._top_n_frames = top_n_frames
33 self._top_n_suspects = top_n_suspects 43 self._top_n_suspects = top_n_suspects
34 44
35 feature_function = ToFeatureFunction([ 45 feature_function = ToFeatureFunction([
36 top_frame_index.TopFrameIndexFeature(top_n_frames), 46 top_frame_index.TopFrameIndexFeature(top_n_frames),
37 min_distance.MinDistanceFeature(), 47 min_distance.MinDistanceFeature(),
38 ]) 48 ])
39 49
40 weight_list = [ 50 weight_list = [
41 weights['TopFrameIndex'], 51 weights['TopFrameIndex'],
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
96 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', 106 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',
97 last_good_version, first_bad_version) 107 last_good_version, first_bad_version)
98 108
99 # We are only interested in the deps in crash stack (the callstack that 109 # We are only interested in the deps in crash stack (the callstack that
100 # caused the crash). 110 # caused the crash).
101 # TODO(wrengr): we may want to receive the crash deps as an argument, 111 # TODO(wrengr): we may want to receive the crash deps as an argument,
102 # so that when this method is called via Findit.FindCulprit, we avoid 112 # so that when this method is called via Findit.FindCulprit, we avoid
103 # doing redundant work creating it. 113 # doing redundant work creating it.
104 stack_deps = changelist_classifier.GetDepsInCrashStack( 114 stack_deps = changelist_classifier.GetDepsInCrashStack(
105 report.stacktrace.crash_stack, 115 report.stacktrace.crash_stack,
106 chrome_dependency_fetcher.ChromeDependencyFetcher( 116 self._dependency_fetcher.GetDependency(
107 self._repository).GetDependency(report.crashed_version, 117 report.crashed_version, report.platform))
108 report.platform))
109 118
110 # Get dep and file to changelogs, stack_info and blame dicts. 119 # Get dep and file to changelogs, stack_info and blame dicts.
111 dep_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher( 120 dep_rolls = self._dependency_fetcher.GetDependencyRollsDict(
112 self._repository).GetDependencyRollsDict( 121 last_good_version, first_bad_version, report.platform)
113 last_good_version, first_bad_version, report.platform)
114 122
115 # Regression of a dep added/deleted (old_revision/new_revision is None) can 123 # Regression of a dep added/deleted (old_revision/new_revision is None) can
116 # not be known for sure and this case rarely happens, so just filter them 124 # not be known for sure and this case rarely happens, so just filter them
117 # out. 125 # out.
118 regression_deps_rolls = {} 126 regression_deps_rolls = {}
119 for dep_path, dep_roll in dep_rolls.iteritems(): 127 for dep_path, dep_roll in dep_rolls.iteritems():
120 if not dep_roll.old_revision or not dep_roll.new_revision: 128 if not dep_roll.old_revision or not dep_roll.new_revision:
121 logging.info('Skip %s denpendency %s', 129 logging.info('Skip %s denpendency %s',
122 'added' if dep_roll.new_revision else 'deleted', dep_path) 130 'added' if dep_roll.new_revision else 'deleted', dep_path)
123 continue 131 continue
124 regression_deps_rolls[dep_path] = dep_roll 132 regression_deps_rolls[dep_path] = dep_roll
125 133
126 dep_to_file_to_changelogs, ignore_cls = ( 134 dep_to_file_to_changelogs, ignore_cls = (
127 changelist_classifier.GetChangeLogsForFilesGroupedByDeps( 135 changelist_classifier.GetChangeLogsForFilesGroupedByDeps(
128 regression_deps_rolls, stack_deps, self._repository)) 136 regression_deps_rolls, stack_deps, self._repository))
129 dep_to_file_to_stack_infos = ( 137 dep_to_file_to_stack_infos = (
130 changelist_classifier.GetStackInfosForFilesGroupedByDeps( 138 changelist_classifier.GetStackInfosForFilesGroupedByDeps(
131 report.stacktrace, stack_deps)) 139 report.stacktrace, stack_deps))
132 140
133 # Get the possible suspects. 141 # Get the possible suspects.
134 suspects = changelist_classifier.FindSuspects( 142 suspects = changelist_classifier.FindSuspects(
135 dep_to_file_to_changelogs, 143 dep_to_file_to_changelogs,
136 dep_to_file_to_stack_infos, 144 dep_to_file_to_stack_infos,
137 stack_deps, 145 stack_deps,
138 self._repository, 146 self._get_repository,
139 ignore_cls) 147 ignore_cls)
140 if suspects is None: 148 if suspects is None:
141 return [] 149 return []
142 150
143 # Score the suspects and organize them for outputting/returning. 151 # Score the suspects and organize them for outputting/returning.
144 features_given_report = self._model.Features(report) 152 features_given_report = self._model.Features(report)
145 score_given_report = self._model.Score(report) 153 score_given_report = self._model.Score(report)
146 scored_suspects = [] 154 scored_suspects = []
147 for suspect in suspects: 155 for suspect in suspects:
148 score = score_given_report(suspect) 156 score = score_given_report(suspect)
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
222 all_changed_files[changed_file.name] = changed_file 230 all_changed_files[changed_file.name] = changed_file
223 continue 231 continue
224 232
225 assert accumulated_changed_file.blame_url == changed_file.blame_url, ( 233 assert accumulated_changed_file.blame_url == changed_file.blame_url, (
226 ValueError('Blame URLs do not match: %s != %s' 234 ValueError('Blame URLs do not match: %s != %s'
227 % (accumulated_changed_file.blame_url, changed_file.blame_url))) 235 % (accumulated_changed_file.blame_url, changed_file.blame_url)))
228 accumulated_changed_file.reasons.extend(changed_file.reasons or []) 236 accumulated_changed_file.reasons.extend(changed_file.reasons or [])
229 237
230 return sorted(all_changed_files.values(), 238 return sorted(all_changed_files.values(),
231 key=lambda changed_file: changed_file.name) 239 key=lambda changed_file: changed_file.name)
OLDNEW
« no previous file with comments | « appengine/findit/crash/findit_for_chromecrash.py ('k') | appengine/findit/crash/loglinear/test/changelist_classifier_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698