Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(674)

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2608483002: Changed FindSuspects to take a Repository factory, rather than mutating it (Closed)
Patch Set: Created 3 years, 12 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | appengine/findit/crash/findit_for_chromecrash.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 from collections import defaultdict 6 from collections import defaultdict
7 from collections import namedtuple 7 from collections import namedtuple
8 8
9 from common import chrome_dependency_fetcher 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher
10 from crash import crash_util 10 from crash import crash_util
11 from crash.suspect import StackInfo 11 from crash.suspect import StackInfo
12 from crash.suspect import Suspect 12 from crash.suspect import Suspect
13 from crash.suspect import SuspectMap 13 from crash.suspect import SuspectMap
14 from crash.scorers.aggregated_scorer import AggregatedScorer 14 from crash.scorers.aggregated_scorer import AggregatedScorer
15 from crash.scorers.min_distance import MinDistance 15 from crash.scorers.min_distance import MinDistance
16 from crash.scorers.top_frame_index import TopFrameIndex 16 from crash.scorers.top_frame_index import TopFrameIndex
17 from crash.stacktrace import CallStack 17 from crash.stacktrace import CallStack
18 from crash.stacktrace import Stacktrace 18 from crash.stacktrace import Stacktrace
19 from libs.gitiles.diff import ChangeType 19 from libs.gitiles.diff import ChangeType
20 20
21 21
22 class ChangelistClassifier(namedtuple('ChangelistClassifier', 22 class ChangelistClassifier(namedtuple('ChangelistClassifier',
23 ['repository', 'top_n_results', 'confidence_threshold'])): 23 ['repository', 'get_repository', 'top_n_results', 'confidence_threshold'])):
Sharu Jiang 2016/12/28 00:45:33 The ``get_repository`` here is not actually a fact
Sharu Jiang 2016/12/28 00:45:33 So after we cleaned the mutation in ``get_reposito
wrengr 2016/12/28 18:23:33 The ``get_repository`` argument is indeed (intende
wrengr 2016/12/28 18:23:33 In principle we could... but since the main ``repo
24 __slots__ = () 24 __slots__ = ()
25 25
26 def __new__(cls, repository, top_n_results=3, confidence_threshold=0.999): 26 def __new__(cls, repository, get_repository, top_n_results=3,
27 confidence_threshold=0.999):
27 """Args: 28 """Args:
28 repository (Repository): the Git repository for getting CLs to classify. 29 repository (Repository): the Git repository of the main project
30 we're trying to classify CLs from.
31 get_repository (callable): a function from DEP urls to ``Repository``
32 objects, so we can get changelogs and blame for each dep. Notably,
33 to keep the code here generic, we make no assumptions about
34 which subclass of ``Repository`` this function returns. Thus,
35 it is up to the caller to decide what class to return and handle
36 any other arguments that class may require (e.g., an http client
37 for ``GitilesRepository``).
29 top_n_results (int): maximum number of results to return. 38 top_n_results (int): maximum number of results to return.
30 confidence_threshold (float): In [0,1], above which we only return 39 confidence_threshold (float): In [0,1], above which we only return
31 the first suspect. 40 the first suspect.
32 """ 41 """
33 return super(cls, ChangelistClassifier).__new__( 42 return super(cls, ChangelistClassifier).__new__(
34 cls, repository, top_n_results, confidence_threshold) 43 cls, repository, get_repository, top_n_results, confidence_threshold)
35 44
36 def __str__(self): # pragma: no cover 45 def __str__(self): # pragma: no cover
37 return ('%s(top_n_results=%d, confidence_threshold=%g)' 46 return ('%s(top_n_results=%d, confidence_threshold=%g)'
38 % (self.__class__.__name__, 47 % (self.__class__.__name__,
39 self.top_n_results, 48 self.top_n_results,
40 self.confidence_threshold)) 49 self.confidence_threshold))
41 50
42 def __call__(self, report): 51 def __call__(self, report):
43 """Finds changelists suspected of being responsible for the crash report. 52 """Finds changelists suspected of being responsible for the crash report.
44 53
45 This function assumes the report's stacktrace has already had any necessary 54 This function assumes the report's stacktrace has already had any necessary
46 preprocessing (like filtering or truncating) applied. 55 preprocessing (like filtering or truncating) applied.
47 56
48 Args: 57 Args:
49 report (CrashReport): the report to be analyzed. 58 report (CrashReport): the report to be analyzed.
50 59
51 Returns: 60 Returns:
52 List of ``Suspect``s, sorted by confidence from highest to lowest. 61 List of ``Suspect``s, sorted by confidence from highest to lowest.
53 """ 62 """
54 if not report.regression_range: 63 if not report.regression_range:
55 logging.warning('ChangelistClassifier.__call__: Missing regression range ' 64 logging.warning('ChangelistClassifier.__call__: Missing regression range '
56 'for report: %s', str(report)) 65 'for report: %s', str(report))
57 return [] 66 return []
58 last_good_version, first_bad_version = report.regression_range 67 last_good_version, first_bad_version = report.regression_range
59 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', 68 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',
60 last_good_version, first_bad_version) 69 last_good_version, first_bad_version)
61 70
71 dependency_fetcher = ChromeDependencyFetcher(self.repository)
Sharu Jiang 2016/12/28 00:45:33 I think ``ChromeDependencyFetcher`` should also ta
wrengr 2016/12/28 18:23:33 I'm not sure I follow. Perhaps you could write up
wrengr 2016/12/28 19:09:24 ah, nm. I think I see what you mean now. I'm worki
wrengr 2016/12/28 20:55:52 See https://codereview.chromium.org/2605943002 for
72
62 # We are only interested in the deps in crash stack (the callstack that 73 # We are only interested in the deps in crash stack (the callstack that
63 # caused the crash). 74 # caused the crash).
64 # TODO(wrengr): we may want to receive the crash deps as an argument, 75 # TODO(wrengr): we may want to receive the crash deps as an argument,
65 # so that when this method is called via Findit.FindCulprit, we avoid 76 # so that when this method is called via Findit.FindCulprit, we avoid
66 # doing redundant work creating it. 77 # doing redundant work creating it.
67 stack_deps = GetDepsInCrashStack( 78 stack_deps = GetDepsInCrashStack(
68 report.stacktrace.crash_stack, 79 report.stacktrace.crash_stack,
69 chrome_dependency_fetcher.ChromeDependencyFetcher( 80 dependency_fetcher.GetDependency(
70 self.repository).GetDependency(report.crashed_version, 81 report.crashed_version, report.platform))
71 report.platform))
72 82
73 # Get dep and file to changelogs, stack_info and blame dicts. 83 # Get dep and file to changelogs, stack_info and blame dicts.
74 dep_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher( 84 dep_rolls = dependency_fetcher.GetDependencyRollsDict(
75 self.repository).GetDependencyRollsDict( 85 last_good_version, first_bad_version, report.platform)
76 last_good_version, first_bad_version, report.platform)
77 86
78 # Regression of a dep added/deleted (old_revision/new_revision is None) can 87 # Regression of a dep added/deleted (old_revision/new_revision is None) can
79 # not be known for sure and this case rarely happens, so just filter them 88 # not be known for sure and this case rarely happens, so just filter them
80 # out. 89 # out.
81 regression_deps_rolls = {} 90 regression_deps_rolls = {}
82 for dep_path, dep_roll in dep_rolls.iteritems(): 91 for dep_path, dep_roll in dep_rolls.iteritems():
83 if not dep_roll.old_revision or not dep_roll.new_revision: 92 if not dep_roll.old_revision or not dep_roll.new_revision:
84 logging.info('Skip %s denpendency %s', 93 logging.info('Skip %s denpendency %s',
85 'added' if dep_roll.new_revision else 'deleted', dep_path) 94 'added' if dep_roll.new_revision else 'deleted', dep_path)
86 continue 95 continue
87 regression_deps_rolls[dep_path] = dep_roll 96 regression_deps_rolls[dep_path] = dep_roll
88 97
89 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( 98 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
90 regression_deps_rolls, stack_deps, self.repository) 99 regression_deps_rolls, stack_deps, self.repository)
91 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( 100 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
92 report.stacktrace, stack_deps) 101 report.stacktrace, stack_deps)
93 102
94 # TODO: argument order is inconsistent from others. Repository should
95 # be last argument.
96 suspects = FindSuspects(dep_to_file_to_changelogs, 103 suspects = FindSuspects(dep_to_file_to_changelogs,
97 dep_to_file_to_stack_infos, 104 dep_to_file_to_stack_infos,
98 stack_deps, self.repository, ignore_cls) 105 stack_deps, self.get_repository, ignore_cls)
99 if not suspects: 106 if not suspects:
100 return [] 107 return []
101 108
102 # Set confidence, reasons, and changed_files. 109 # Set confidence, reasons, and changed_files.
103 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()]) 110 aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])
104 map(aggregated_scorer.Score, suspects) 111 map(aggregated_scorer.Score, suspects)
105 112
106 # Filter all the 0 confidence results. 113 # Filter all the 0 confidence results.
107 suspects = filter(lambda suspect: suspect.confidence != 0, suspects) 114 suspects = filter(lambda suspect: suspect.confidence != 0, suspects)
108 if not suspects: 115 if not suspects:
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
250 257
251 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append( 258 dep_to_file_to_stack_infos[frame.dep_path][frame.file_path].append(
252 StackInfo(frame, callstack.priority)) 259 StackInfo(frame, callstack.priority))
253 260
254 return dep_to_file_to_stack_infos 261 return dep_to_file_to_stack_infos
255 262
256 263
257 # TODO(katesonia): Remove the repository argument after refatoring cl committed. 264 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
258 def FindSuspects(dep_to_file_to_changelogs, 265 def FindSuspects(dep_to_file_to_changelogs,
259 dep_to_file_to_stack_infos, 266 dep_to_file_to_stack_infos,
260 stack_deps, repository, 267 stack_deps, get_repository,
261 ignore_cls=None): 268 ignore_cls=None):
262 """Finds suspects by matching stacktrace and changelogs in regression range. 269 """Finds suspects by matching stacktrace and changelogs in regression range.
263 270
264 This method only applies to those crashes with regression range. 271 This method only applies to those crashes with regression range.
265 272
266 Args: 273 Args:
267 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 274 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
268 to ChangeLogs that touched this file. 275 to ChangeLogs that touched this file.
269 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path 276 dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path
270 to a list of stack information of this file. A file may occur in several 277 to a list of stack information of this file. A file may occur in several
271 frames, one stack info consist of a StackFrame and the callstack priority 278 frames, one stack info consist of a StackFrame and the callstack priority
272 of it. 279 of it.
273 stack_deps (dict): Represents all the dependencies shown in the crash stack. 280 stack_deps (dict): Represents all the dependencies shown in the crash stack.
274 repository (Repository): Repository to get changelogs and blame from. 281 get_repository (callable): a function from urls to ``Repository``
282 objects, so we can get changelogs and blame for each dep.
275 ignore_cls (set): Set of reverted revisions. 283 ignore_cls (set): Set of reverted revisions.
276 284
277 Returns: 285 Returns:
278 A list of ``Suspect`` instances with confidence and reason unset. 286 A list of ``Suspect`` instances with confidence and reason unset.
279 """ 287 """
280 suspects = SuspectMap(ignore_cls) 288 suspects = SuspectMap(ignore_cls)
281 289
282 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems(): 290 for dep, file_to_stack_infos in dep_to_file_to_stack_infos.iteritems():
283 file_to_changelogs = dep_to_file_to_changelogs[dep] 291 file_to_changelogs = dep_to_file_to_changelogs[dep]
284 292
285 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems(): 293 for crashed_file_path, stack_infos in file_to_stack_infos.iteritems():
286 for touched_file_path, changelogs in file_to_changelogs.iteritems(): 294 for touched_file_path, changelogs in file_to_changelogs.iteritems():
287 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path): 295 if not crash_util.IsSameFilePath(crashed_file_path, touched_file_path):
288 continue 296 continue
289 297
290 repository.repo_url = stack_deps[dep].repo_url 298 repository = get_repository(stack_deps[dep].repo_url)
291 blame = repository.GetBlame(touched_file_path, 299 blame = repository.GetBlame(touched_file_path,
292 stack_deps[dep].revision) 300 stack_deps[dep].revision)
293 301
294 # Generate/update each suspect(changelog) in changelogs, blame is used 302 # Generate/update each suspect(changelog) in changelogs, blame is used
295 # to calculate distance between touched lines and crashed lines in file. 303 # to calculate distance between touched lines and crashed lines in file.
296 suspects.GenerateSuspects( 304 suspects.GenerateSuspects(
297 touched_file_path, dep, stack_infos, changelogs, blame) 305 touched_file_path, dep, stack_infos, changelogs, blame)
298 306
299 return suspects.values() 307 return suspects.values()
OLDNEW
« no previous file with comments | « no previous file | appengine/findit/crash/findit_for_chromecrash.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698