Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(293)

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2588513002: [Predator] renamed "Result" to "Suspect" (Closed)
Patch Set: Removing redundant import Created 4 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | appengine/findit/crash/component_classifier.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 from collections import defaultdict 6 from collections import defaultdict
7 from collections import namedtuple 7 from collections import namedtuple
8 8
9 from common import chrome_dependency_fetcher 9 from common import chrome_dependency_fetcher
10 from crash import crash_util 10 from crash import crash_util
11 from crash.results import MatchResults 11 from crash.suspect import Suspect
12 from crash.suspect import Suspects
12 from crash.scorers.aggregated_scorer import AggregatedScorer 13 from crash.scorers.aggregated_scorer import AggregatedScorer
13 from crash.scorers.min_distance import MinDistance 14 from crash.scorers.min_distance import MinDistance
14 from crash.scorers.top_frame_index import TopFrameIndex 15 from crash.scorers.top_frame_index import TopFrameIndex
15 from crash.stacktrace import CallStack 16 from crash.stacktrace import CallStack
16 from crash.stacktrace import Stacktrace 17 from crash.stacktrace import Stacktrace
17 from libs.gitiles.diff import ChangeType 18 from libs.gitiles.diff import ChangeType
18 19
19 20
20 class ChangelistClassifier(namedtuple('ChangelistClassifier', 21 class ChangelistClassifier(namedtuple('ChangelistClassifier',
21 ['repository', 'top_n_frames', 'top_n_results', 'confidence_threshold'])): 22 ['repository', 'top_n_frames', 'top_n_results', 'confidence_threshold'])):
22 __slots__ = () 23 __slots__ = ()
23 24
24 def __new__(cls, repository, 25 def __new__(cls, repository,
25 top_n_frames, top_n_results=3, confidence_threshold=0.999): 26 top_n_frames, top_n_results=3, confidence_threshold=0.999):
26 """Args: 27 """Args:
27 repository (Repository): the Git repository for getting CLs to classify. 28 repository (Repository): the Git repository for getting CLs to classify.
28 top_n_frames (int): how many frames of each callstack to look at. 29 top_n_frames (int): how many frames of each callstack to look at.
29 top_n_results (int): maximum number of results to return. 30 top_n_results (int): maximum number of suspects to return.
30 confidence_threshold (float): In [0,1], above which we only return 31 confidence_threshold (float): In [0,1], above which we only return
31 the first result. 32 the first suspect.
32 """ 33 """
33 return super(cls, ChangelistClassifier).__new__(cls, 34 return super(cls, ChangelistClassifier).__new__(cls,
34 repository, top_n_frames, top_n_results, confidence_threshold) 35 repository, top_n_frames, top_n_results, confidence_threshold)
35 36
36 def __str__(self): # pragma: no cover 37 def __str__(self): # pragma: no cover
37 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)' 38 return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)'
38 % (self.__class__.__name__, 39 % (self.__class__.__name__,
39 self.top_n_frames, 40 self.top_n_frames,
40 self.top_n_results, 41 self.top_n_results,
41 self.confidence_threshold)) 42 self.confidence_threshold))
42 43
43 def __call__(self, report): 44 def __call__(self, report):
44 """Finds changelists suspected of being responsible for the crash report. 45 """Finds changelists suspected of being responsible for the crash report.
45 46
46 Args: 47 Args:
47 report (CrashReport): the report to be analyzed. 48 report (CrashReport): the report to be analyzed.
48 49
49 Returns: 50 Returns:
50 List of Results, sorted by confidence from highest to lowest. 51 List of ``Suspect``s, sorted by confidence from highest to lowest.
51 """ 52 """
52 if not report.regression_range: 53 if not report.regression_range:
53 logging.warning('ChangelistClassifier.__call__: Missing regression range ' 54 logging.warning('ChangelistClassifier.__call__: Missing regression range '
54 'for report: %s', str(report)) 55 'for report: %s', str(report))
55 return [] 56 return []
56 last_good_version, first_bad_version = report.regression_range 57 last_good_version, first_bad_version = report.regression_range
57 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', 58 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',
58 last_good_version, first_bad_version) 59 last_good_version, first_bad_version)
59 60
60 # Restrict analysis to just the top n frames in each callstack. 61 # Restrict analysis to just the top n frames in each callstack.
(...skipping 28 matching lines...) Expand all
89 continue 90 continue
90 regression_deps_rolls[dep_path] = dep_roll 91 regression_deps_rolls[dep_path] = dep_roll
91 92
92 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( 93 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
93 regression_deps_rolls, stack_deps, self.repository) 94 regression_deps_rolls, stack_deps, self.repository)
94 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( 95 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
95 stacktrace, stack_deps) 96 stacktrace, stack_deps)
96 97
97 # TODO: argument order is inconsistent from others. Repository should 98 # TODO: argument order is inconsistent from others. Repository should
98 # be last argument. 99 # be last argument.
99 results = FindMatchResults(dep_to_file_to_changelogs, 100 suspects = FindSuspects(dep_to_file_to_changelogs,
100 dep_to_file_to_stack_infos, 101 dep_to_file_to_stack_infos,
101 stack_deps, self.repository, ignore_cls) 102 stack_deps, self.repository, ignore_cls)
102 if not results: 103 if not suspects:
103 return [] 104 return []
104 105
105 # TODO(wrengr): we should be able to do this map/filter/sort in one pass. 106 # Set confidence, reasons, and changed_files.
106 # Set result.confidence, result.reasons and result.changed_files.
107 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) 107 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
108 map(aggregated_scorer.Score, results) 108 map(aggregated_scorer.Score, suspects)
109 109
110 # Filter all the 0 confidence results. 110 # Filter all the 0 confidence results.
111 results = filter(lambda r: r.confidence != 0, results) 111 suspects = filter(lambda suspect: suspect.confidence != 0, suspects)
112 if not results: 112 if not suspects:
113 return [] 113 return []
114 114
115 sorted_results = sorted(results, key=lambda r: -r.confidence) 115 suspects.sort(key=lambda suspect: -suspect.confidence)
116 116
117 max_results = (1 if sorted_results[0].confidence > self.confidence_threshold 117 max_results = (1 if suspects[0].confidence > self.confidence_threshold
118 else self.top_n_results) 118 else self.top_n_results)
119 119
120 return sorted_results[:max_results] 120 return suspects[:max_results]
121 121
122 122
123 def GetDepsInCrashStack(crash_stack, crash_deps): 123 def GetDepsInCrashStack(crash_stack, crash_deps):
124 """Gets Dependencies in crash stack.""" 124 """Gets Dependencies in crash stack."""
125 if not crash_stack: 125 if not crash_stack:
126 return {} 126 return {}
127 127
128 stack_deps = {} 128 stack_deps = {}
129 for frame in crash_stack: 129 for frame in crash_stack:
130 if frame.dep_path: 130 if frame.dep_path:
(...skipping 126 matching lines...) Expand 10 before | Expand all | Expand 10 after
257 if frame.dep_path not in stack_deps: 257 if frame.dep_path not in stack_deps:
258 continue 258 continue
259 259
260 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append(( 260 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append((
261 frame, callstack.priority)) 261 frame, callstack.priority))
262 262
263 return dep_to_file_to_stack_infos 263 return dep_to_file_to_stack_infos
264 264
265 265
266 # TODO(katesonia): Remove the repository argument after refatoring cl committed. 266 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
267 def FindMatchResults(dep_to_file_to_changelogs, 267 def FindSuspects(dep_to_file_to_changelogs,
268 dep_to_file_to_stack_infos, 268 dep_to_file_to_stack_infos,
269 stack_deps, repository, 269 stack_deps, repository,
270 ignore_cls=None): 270 ignore_cls=None):
271 """Finds results by matching stacktrace and changelogs in regression range. 271 """Finds suspects by matching stacktrace and changelogs in regression range.
272 272
273 This method only applies to those crashes with regression range. 273 This method only applies to those crashes with regression range.
274 274
275 Args: 275 Args:
276 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 276 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
277 to ChangeLogs that touched this file. 277 to ChangeLogs that touched this file.
278 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path 278 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
279 to a list of stack information of this file. A file may occur in several 279 to a list of stack information of this file. A file may occur in several
280 frames, one stack info consist of a StackFrame and the callstack priority 280 frames, one stack info consist of a StackFrame and the callstack priority
281 of it. 281 of it.
282 stack_deps (dict): Represents all the dependencies shown in the crash stack. 282 stack_deps (dict): Represents all the dependencies shown in the crash stack.
283 repository (Repository): Repository to get changelogs and blame from. 283 repository (Repository): Repository to get changelogs and blame from.
284 ignore_cls (set): Set of reverted revisions. 284 ignore_cls (set): Set of reverted revisions.
285 285
286 Returns: 286 Returns:
287 A list of MatchResult instances with confidence and reason unset. 287 A list of ``Suspect`` instances with confidence and reason unset.
288 """ 288 """
289 match_results = MatchResults(ignore_cls) 289 suspects = Suspects(ignore_cls)
290 290
291 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): 291 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
292 file_to_changelogs = dep_to_file_to_changelogs[dep] 292 file_to_changelogs = dep_to_file_to_changelogs[dep]
293 293
294 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): 294 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
295 for touched_file_path, changelogs in file_to_changelogs.iteritems(): 295 for touched_file_path, changelogs in file_to_changelogs.iteritems():
296 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): 296 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
297 continue 297 continue
298 298
299 repository.repo_url = stack_deps[dep].repo_url 299 repository.repo_url = stack_deps[dep].repo_url
300 blame = repository.GetBlame(touched_file_path, 300 blame = repository.GetBlame(touched_file_path,
301 stack_deps[dep].revision) 301 stack_deps[dep].revision)
302 302
303 # Generate/update each result(changelog) in changelogs, blame is used 303 # Generate/update each suspect(changelog) in changelogs, blame is used
304 # to calculate distance between touched lines and crashed lines in file. 304 # to calculate distance between touched lines and crashed lines in file.
305 match_results.GenerateMatchResults( 305 suspects.GenerateSuspects(
306 touched_file_path, dep, stack_infos, changelogs, blame) 306 touched_file_path, dep, stack_infos, changelogs, blame)
307 307
308 return match_results.values() 308 return suspects.values()
OLDNEW
« no previous file with comments | « no previous file | appengine/findit/crash/component_classifier.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698