Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(46)

Side by Side Diff: appengine/findit/crash/changelist_classifier.py

Issue 2605943002: Removing the mutation in the factories for getting dep repositories (Closed)
Patch Set: Added the Factory method to CachedGitilesRepository Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 from collections import defaultdict 6 from collections import defaultdict
7 from collections import namedtuple 7 from collections import namedtuple
8 8
9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher
10 from crash import crash_util 10 from crash import crash_util
11 from crash.suspect import StackInfo 11 from crash.suspect import StackInfo
12 from crash.suspect import Suspect 12 from crash.suspect import Suspect
13 from crash.suspect import SuspectMap 13 from crash.suspect import SuspectMap
14 from crash.scorers.aggregated_scorer import AggregatedScorer 14 from crash.scorers.aggregated_scorer import AggregatedScorer
15 from crash.scorers.min_distance import MinDistance 15 from crash.scorers.min_distance import MinDistance
16 from crash.scorers.top_frame_index import TopFrameIndex 16 from crash.scorers.top_frame_index import TopFrameIndex
17 from crash.stacktrace import CallStack 17 from crash.stacktrace import CallStack
18 from crash.stacktrace import Stacktrace 18 from crash.stacktrace import Stacktrace
19 from libs.gitiles.diff import ChangeType 19 from libs.gitiles.diff import ChangeType
20 20
21 21
22 class ChangelistClassifier(namedtuple('ChangelistClassifier', 22 class ChangelistClassifier(namedtuple('ChangelistClassifier',
23 ['repository', 'get_repository', 'top_n_results', 'confidence_threshold'])): 23 ['get_repository', 'top_n_results', 'confidence_threshold'])):
24 __slots__ = () 24 __slots__ = ()
25 25
26 def __new__(cls, repository, get_repository, top_n_results=3, 26 def __new__(cls, get_repository, top_n_results=3, confidence_threshold=0.999):
27 confidence_threshold=0.999):
28 """Args: 27 """Args:
29 repository (Repository): the Git repository of the main project
30 we're trying to classify CLs from.
31 get_repository (callable): a function from DEP urls to ``Repository`` 28 get_repository (callable): a function from DEP urls to ``Repository``
32 objects, so we can get changelogs and blame for each dep. Notably, 29 objects, so we can get changelogs and blame for each dep. Notably,
33 to keep the code here generic, we make no assumptions about 30 to keep the code here generic, we make no assumptions about
34 which subclass of ``Repository`` this function returns. Thus, 31 which subclass of ``Repository`` this function returns. Thus,
35 it is up to the caller to decide what class to return and handle 32 it is up to the caller to decide what class to return and handle
36 any other arguments that class may require (e.g., an http client 33 any other arguments that class may require (e.g., an http client
37 for ``GitilesRepository``). 34 for ``GitilesRepository``).
38 top_n_results (int): maximum number of results to return. 35 top_n_results (int): maximum number of results to return.
39 confidence_threshold (float): In [0,1], above which we only return 36 confidence_threshold (float): In [0,1], above which we only return
40 the first suspect. 37 the first suspect.
41 """ 38 """
42 return super(cls, ChangelistClassifier).__new__( 39 return super(cls, ChangelistClassifier).__new__(
43 cls, repository, get_repository, top_n_results, confidence_threshold) 40 cls, get_repository, top_n_results, confidence_threshold)
44 41
45 def __str__(self): # pragma: no cover 42 def __str__(self): # pragma: no cover
46 return ('%s(top_n_results=%d, confidence_threshold=%g)' 43 return ('%s(top_n_results=%d, confidence_threshold=%g)'
47 % (self.__class__.__name__, 44 % (self.__class__.__name__,
48 self.top_n_results, 45 self.top_n_results,
49 self.confidence_threshold)) 46 self.confidence_threshold))
50 47
51 def __call__(self, report): 48 def __call__(self, report):
52 """Finds changelists suspected of being responsible for the crash report. 49 """Finds changelists suspected of being responsible for the crash report.
53 50
54 This function assumes the report's stacktrace has already had any necessary 51 This function assumes the report's stacktrace has already had any necessary
55 preprocessing (like filtering or truncating) applied. 52 preprocessing (like filtering or truncating) applied.
56 53
57 Args: 54 Args:
58 report (CrashReport): the report to be analyzed. 55 report (CrashReport): the report to be analyzed.
59 56
60 Returns: 57 Returns:
61 List of ``Suspect``s, sorted by confidence from highest to lowest. 58 List of ``Suspect``s, sorted by confidence from highest to lowest.
62 """ 59 """
63 if not report.regression_range: 60 if not report.regression_range:
64 logging.warning('ChangelistClassifier.__call__: Missing regression range ' 61 logging.warning('ChangelistClassifier.__call__: Missing regression range '
65 'for report: %s', str(report)) 62 'for report: %s', str(report))
66 return [] 63 return []
67 last_good_version, first_bad_version = report.regression_range 64 last_good_version, first_bad_version = report.regression_range
68 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', 65 logging.info('ChangelistClassifier.__call__: Regression range %s:%s',
69 last_good_version, first_bad_version) 66 last_good_version, first_bad_version)
70 67
71 dependency_fetcher = ChromeDependencyFetcher(self.repository) 68 dependency_fetcher = ChromeDependencyFetcher(self.get_repository)
72 69
73 # We are only interested in the deps in crash stack (the callstack that 70 # We are only interested in the deps in crash stack (the callstack that
74 # caused the crash). 71 # caused the crash).
75 # TODO(wrengr): we may want to receive the crash deps as an argument, 72 # TODO(wrengr): we may want to receive the crash deps as an argument,
76 # so that when this method is called via Findit.FindCulprit, we avoid 73 # so that when this method is called via Findit.FindCulprit, we avoid
77 # doing redundant work creating it. 74 # doing redundant work creating it.
78 stack_deps = GetDepsInCrashStack( 75 stack_deps = GetDepsInCrashStack(
79 report.stacktrace.crash_stack, 76 report.stacktrace.crash_stack,
80 dependency_fetcher.GetDependency( 77 dependency_fetcher.GetDependency(
81 report.crashed_version, report.platform)) 78 report.crashed_version, report.platform))
82 79
83 # Get dep and file to changelogs, stack_info and blame dicts. 80 # Get dep and file to changelogs, stack_info and blame dicts.
84 dep_rolls = dependency_fetcher.GetDependencyRollsDict( 81 dep_rolls = dependency_fetcher.GetDependencyRollsDict(
85 last_good_version, first_bad_version, report.platform) 82 last_good_version, first_bad_version, report.platform)
86 83
87 # Regression of a dep added/deleted (old_revision/new_revision is None) can 84 # Regression of a dep added/deleted (old_revision/new_revision is None) can
88 # not be known for sure and this case rarely happens, so just filter them 85 # not be known for sure and this case rarely happens, so just filter them
89 # out. 86 # out.
90 regression_deps_rolls = {} 87 regression_deps_rolls = {}
91 for dep_path, dep_roll in dep_rolls.iteritems(): 88 for dep_path, dep_roll in dep_rolls.iteritems():
92 if not dep_roll.old_revision or not dep_roll.new_revision: 89 if not dep_roll.old_revision or not dep_roll.new_revision:
93 logging.info('Skip %s denpendency %s', 90 logging.info('Skip %s denpendency %s',
94 'added' if dep_roll.new_revision else 'deleted', dep_path) 91 'added' if dep_roll.new_revision else 'deleted', dep_path)
95 continue 92 continue
96 regression_deps_rolls[dep_path] = dep_roll 93 regression_deps_rolls[dep_path] = dep_roll
97 94
98 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( 95 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(
99 regression_deps_rolls, stack_deps, self.repository) 96 regression_deps_rolls, stack_deps, self.get_repository)
100 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( 97 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(
101 report.stacktrace, stack_deps) 98 report.stacktrace, stack_deps)
102 99
103 suspects = FindSuspects(dep_to_file_to_changelogs, 100 suspects = FindSuspects(dep_to_file_to_changelogs,
104 dep_to_file_to_stack_infos, 101 dep_to_file_to_stack_infos,
105 stack_deps, self.get_repository, ignore_cls) 102 stack_deps, self.get_repository, ignore_cls)
106 if not suspects: 103 if not suspects:
107 return [] 104 return []
108 105
109 # Set confidence, reasons, and changed_files. 106 # Set confidence, reasons, and changed_files.
(...skipping 21 matching lines...) Expand all
131 stack_deps = {} 128 stack_deps = {}
132 for frame in crash_stack: 129 for frame in crash_stack:
133 if frame.dep_path: 130 if frame.dep_path:
134 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] 131 stack_deps[frame.dep_path] = crash_deps[frame.dep_path]
135 132
136 return stack_deps 133 return stack_deps
137 134
138 135
139 # TODO(katesonia): Remove the repository argument after refatoring cl committed. 136 # TODO(katesonia): Remove the repository argument after refatoring cl committed.
140 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps, 137 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,
141 repository): 138 get_repository):
142 """Gets a dict containing files touched by changelogs for deps in stack_deps. 139 """Gets a dict containing files touched by changelogs for deps in stack_deps.
143 140
144 Regression ranges for each dep is determined by regression_deps_rolls. 141 Regression ranges for each dep is determined by regression_deps_rolls.
145 Changelogs which were reverted are returned in a reverted_cls set. 142 Changelogs which were reverted are returned in a reverted_cls set.
146 143
147 Args: 144 Args:
148 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in 145 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in
149 regression range. 146 regression range.
150 stack_deps (dict): Represents all the dependencies shown in 147 stack_deps (dict): Represents all the dependencies shown in
151 the crash stack. 148 the crash stack.
152 repository (Repository): Repository to get changelogs from. 149 get_repository (callable): a function from DEP urls to ``Repository``
150 objects, so we can get changelogs and blame for each dep. Notably,
151 to keep the code here generic, we make no assumptions about
152 which subclass of ``Repository`` this function returns. Thus,
153 it is up to the caller to decide what class to return and handle
154 any other arguments that class may require (e.g., an http client
155 for ``GitilesRepository``).
153 156
154 Returns: 157 Returns:
155 A tuple (dep_to_file_to_changelogs, reverted_cls). 158 A tuple (dep_to_file_to_changelogs, reverted_cls).
156 159
157 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path 160 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path
158 to ChangeLogs that touched this file. 161 to ChangeLogs that touched this file.
159 For example: 162 For example:
160 { 163 {
161 'src/': { 164 'src/': {
162 'a.cc': [ 165 'a.cc': [
(...skipping 29 matching lines...) Expand all
192 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) 195 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))
193 reverted_cls = set() 196 reverted_cls = set()
194 197
195 for dep in stack_deps: 198 for dep in stack_deps:
196 # If a dep is not in regression range, than it cannot be the dep of 199 # If a dep is not in regression range, than it cannot be the dep of
197 # culprits. 200 # culprits.
198 dep_roll = regression_deps_rolls.get(dep) 201 dep_roll = regression_deps_rolls.get(dep)
199 if not dep_roll: 202 if not dep_roll:
200 continue 203 continue
201 204
202 repository.repo_url = dep_roll.repo_url 205 repository = get_repository(dep_roll.repo_url)
203 changelogs = repository.GetChangeLogs(dep_roll.old_revision, 206 changelogs = repository.GetChangeLogs(dep_roll.old_revision,
204 dep_roll.new_revision) 207 dep_roll.new_revision)
205 208
206 for changelog in changelogs or []: 209 for changelog in changelogs or []:
207 # When someone reverts, we need to skip both the CL doing 210 # When someone reverts, we need to skip both the CL doing
208 # the reverting as well as the CL that got reverted. If 211 # the reverting as well as the CL that got reverted. If
209 # ``reverted_revision`` is true, then this CL reverts another one, 212 # ``reverted_revision`` is true, then this CL reverts another one,
210 # so we skip it and save the CL it reverts in ``reverted_cls`` to 213 # so we skip it and save the CL it reverts in ``reverted_cls`` to
211 # be filtered out later. 214 # be filtered out later.
212 if changelog.reverted_revision: 215 if changelog.reverted_revision:
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after
298 repository = get_repository(stack_deps[dep].repo_url) 301 repository = get_repository(stack_deps[dep].repo_url)
299 blame = repository.GetBlame(touched_file_path, 302 blame = repository.GetBlame(touched_file_path,
300 stack_deps[dep].revision) 303 stack_deps[dep].revision)
301 304
302 # Generate/update each suspect(changelog) in changelogs, blame is used 305 # Generate/update each suspect(changelog) in changelogs, blame is used
303 # to calculate distance between touched lines and crashed lines in file. 306 # to calculate distance between touched lines and crashed lines in file.
304 suspects.GenerateSuspects( 307 suspects.GenerateSuspects(
305 touched_file_path, dep, stack_infos, changelogs, blame) 308 touched_file_path, dep, stack_infos, changelogs, blame)
306 309
307 return suspects.values() 310 return suspects.values()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698