OLD | NEW |
1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import logging | 5 import logging |
6 from collections import defaultdict | 6 from collections import defaultdict |
7 from collections import namedtuple | 7 from collections import namedtuple |
8 | 8 |
9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher | 9 from common.chrome_dependency_fetcher import ChromeDependencyFetcher |
10 from crash import crash_util | 10 from crash import crash_util |
11 from crash.suspect import StackInfo | 11 from crash.suspect import StackInfo |
12 from crash.suspect import Suspect | 12 from crash.suspect import Suspect |
13 from crash.suspect import SuspectMap | 13 from crash.suspect import SuspectMap |
14 from crash.scorers.aggregated_scorer import AggregatedScorer | 14 from crash.scorers.aggregated_scorer import AggregatedScorer |
15 from crash.scorers.min_distance import MinDistance | 15 from crash.scorers.min_distance import MinDistance |
16 from crash.scorers.top_frame_index import TopFrameIndex | 16 from crash.scorers.top_frame_index import TopFrameIndex |
17 from crash.stacktrace import CallStack | 17 from crash.stacktrace import CallStack |
18 from crash.stacktrace import Stacktrace | 18 from crash.stacktrace import Stacktrace |
19 from libs.gitiles.diff import ChangeType | 19 from libs.gitiles.diff import ChangeType |
20 | 20 |
21 | 21 |
22 class ChangelistClassifier(namedtuple('ChangelistClassifier', | 22 class ChangelistClassifier(namedtuple('ChangelistClassifier', |
23 ['repository', 'get_repository', 'top_n_results', 'confidence_threshold'])): | 23 ['get_repository', 'top_n_results', 'confidence_threshold'])): |
24 __slots__ = () | 24 __slots__ = () |
25 | 25 |
26 def __new__(cls, repository, get_repository, top_n_results=3, | 26 def __new__(cls, get_repository, top_n_results=3, confidence_threshold=0.999): |
27 confidence_threshold=0.999): | |
28 """Args: | 27 """Args: |
29 repository (Repository): the Git repository of the main project | |
30 we're trying to classify CLs from. | |
31 get_repository (callable): a function from DEP urls to ``Repository`` | 28 get_repository (callable): a function from DEP urls to ``Repository`` |
32 objects, so we can get changelogs and blame for each dep. Notably, | 29 objects, so we can get changelogs and blame for each dep. Notably, |
33 to keep the code here generic, we make no assumptions about | 30 to keep the code here generic, we make no assumptions about |
34 which subclass of ``Repository`` this function returns. Thus, | 31 which subclass of ``Repository`` this function returns. Thus, |
35 it is up to the caller to decide what class to return and handle | 32 it is up to the caller to decide what class to return and handle |
36 any other arguments that class may require (e.g., an http client | 33 any other arguments that class may require (e.g., an http client |
37 for ``GitilesRepository``). | 34 for ``GitilesRepository``). |
38 top_n_results (int): maximum number of results to return. | 35 top_n_results (int): maximum number of results to return. |
39 confidence_threshold (float): In [0,1], above which we only return | 36 confidence_threshold (float): In [0,1], above which we only return |
40 the first suspect. | 37 the first suspect. |
41 """ | 38 """ |
42 return super(cls, ChangelistClassifier).__new__( | 39 return super(cls, ChangelistClassifier).__new__( |
43 cls, repository, get_repository, top_n_results, confidence_threshold) | 40 cls, get_repository, top_n_results, confidence_threshold) |
44 | 41 |
45 def __str__(self): # pragma: no cover | 42 def __str__(self): # pragma: no cover |
46 return ('%s(top_n_results=%d, confidence_threshold=%g)' | 43 return ('%s(top_n_results=%d, confidence_threshold=%g)' |
47 % (self.__class__.__name__, | 44 % (self.__class__.__name__, |
48 self.top_n_results, | 45 self.top_n_results, |
49 self.confidence_threshold)) | 46 self.confidence_threshold)) |
50 | 47 |
51 def __call__(self, report): | 48 def __call__(self, report): |
52 """Finds changelists suspected of being responsible for the crash report. | 49 """Finds changelists suspected of being responsible for the crash report. |
53 | 50 |
54 This function assumes the report's stacktrace has already had any necessary | 51 This function assumes the report's stacktrace has already had any necessary |
55 preprocessing (like filtering or truncating) applied. | 52 preprocessing (like filtering or truncating) applied. |
56 | 53 |
57 Args: | 54 Args: |
58 report (CrashReport): the report to be analyzed. | 55 report (CrashReport): the report to be analyzed. |
59 | 56 |
60 Returns: | 57 Returns: |
61 List of ``Suspect``s, sorted by confidence from highest to lowest. | 58 List of ``Suspect``s, sorted by confidence from highest to lowest. |
62 """ | 59 """ |
63 if not report.regression_range: | 60 if not report.regression_range: |
64 logging.warning('ChangelistClassifier.__call__: Missing regression range ' | 61 logging.warning('ChangelistClassifier.__call__: Missing regression range ' |
65 'for report: %s', str(report)) | 62 'for report: %s', str(report)) |
66 return [] | 63 return [] |
67 last_good_version, first_bad_version = report.regression_range | 64 last_good_version, first_bad_version = report.regression_range |
68 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', | 65 logging.info('ChangelistClassifier.__call__: Regression range %s:%s', |
69 last_good_version, first_bad_version) | 66 last_good_version, first_bad_version) |
70 | 67 |
71 dependency_fetcher = ChromeDependencyFetcher(self.repository) | 68 dependency_fetcher = ChromeDependencyFetcher(self.get_repository) |
72 | 69 |
73 # We are only interested in the deps in crash stack (the callstack that | 70 # We are only interested in the deps in crash stack (the callstack that |
74 # caused the crash). | 71 # caused the crash). |
75 # TODO(wrengr): we may want to receive the crash deps as an argument, | 72 # TODO(wrengr): we may want to receive the crash deps as an argument, |
76 # so that when this method is called via Findit.FindCulprit, we avoid | 73 # so that when this method is called via Findit.FindCulprit, we avoid |
77 # doing redundant work creating it. | 74 # doing redundant work creating it. |
78 stack_deps = GetDepsInCrashStack( | 75 stack_deps = GetDepsInCrashStack( |
79 report.stacktrace.crash_stack, | 76 report.stacktrace.crash_stack, |
80 dependency_fetcher.GetDependency( | 77 dependency_fetcher.GetDependency( |
81 report.crashed_version, report.platform)) | 78 report.crashed_version, report.platform)) |
82 | 79 |
83 # Get dep and file to changelogs, stack_info and blame dicts. | 80 # Get dep and file to changelogs, stack_info and blame dicts. |
84 dep_rolls = dependency_fetcher.GetDependencyRollsDict( | 81 dep_rolls = dependency_fetcher.GetDependencyRollsDict( |
85 last_good_version, first_bad_version, report.platform) | 82 last_good_version, first_bad_version, report.platform) |
86 | 83 |
87 # Regression of a dep added/deleted (old_revision/new_revision is None) can | 84 # Regression of a dep added/deleted (old_revision/new_revision is None) can |
88 # not be known for sure and this case rarely happens, so just filter them | 85 # not be known for sure and this case rarely happens, so just filter them |
89 # out. | 86 # out. |
90 regression_deps_rolls = {} | 87 regression_deps_rolls = {} |
91 for dep_path, dep_roll in dep_rolls.iteritems(): | 88 for dep_path, dep_roll in dep_rolls.iteritems(): |
92 if not dep_roll.old_revision or not dep_roll.new_revision: | 89 if not dep_roll.old_revision or not dep_roll.new_revision: |
93 logging.info('Skip %s denpendency %s', | 90 logging.info('Skip %s denpendency %s', |
94 'added' if dep_roll.new_revision else 'deleted', dep_path) | 91 'added' if dep_roll.new_revision else 'deleted', dep_path) |
95 continue | 92 continue |
96 regression_deps_rolls[dep_path] = dep_roll | 93 regression_deps_rolls[dep_path] = dep_roll |
97 | 94 |
98 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( | 95 dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps( |
99 regression_deps_rolls, stack_deps, self.repository) | 96 regression_deps_rolls, stack_deps, self.get_repository) |
100 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( | 97 dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps( |
101 report.stacktrace, stack_deps) | 98 report.stacktrace, stack_deps) |
102 | 99 |
103 suspects = FindSuspects(dep_to_file_to_changelogs, | 100 suspects = FindSuspects(dep_to_file_to_changelogs, |
104 dep_to_file_to_stack_infos, | 101 dep_to_file_to_stack_infos, |
105 stack_deps, self.get_repository, ignore_cls) | 102 stack_deps, self.get_repository, ignore_cls) |
106 if not suspects: | 103 if not suspects: |
107 return [] | 104 return [] |
108 | 105 |
109 # Set confidence, reasons, and changed_files. | 106 # Set confidence, reasons, and changed_files. |
(...skipping 21 matching lines...) Expand all Loading... |
131 stack_deps = {} | 128 stack_deps = {} |
132 for frame in crash_stack: | 129 for frame in crash_stack: |
133 if frame.dep_path: | 130 if frame.dep_path: |
134 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] | 131 stack_deps[frame.dep_path] = crash_deps[frame.dep_path] |
135 | 132 |
136 return stack_deps | 133 return stack_deps |
137 | 134 |
138 | 135 |
139 # TODO(katesonia): Remove the repository argument after refatoring cl committed. | 136 # TODO(katesonia): Remove the repository argument after refatoring cl committed. |
140 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps, | 137 def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps, |
141 repository): | 138 get_repository): |
142 """Gets a dict containing files touched by changelogs for deps in stack_deps. | 139 """Gets a dict containing files touched by changelogs for deps in stack_deps. |
143 | 140 |
144 Regression ranges for each dep is determined by regression_deps_rolls. | 141 Regression ranges for each dep is determined by regression_deps_rolls. |
145 Changelogs which were reverted are returned in a reverted_cls set. | 142 Changelogs which were reverted are returned in a reverted_cls set. |
146 | 143 |
147 Args: | 144 Args: |
148 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in | 145 regression_deps_rolls (dict): Maps dep_path to DependencyRoll in |
149 regression range. | 146 regression range. |
150 stack_deps (dict): Represents all the dependencies shown in | 147 stack_deps (dict): Represents all the dependencies shown in |
151 the crash stack. | 148 the crash stack. |
152 repository (Repository): Repository to get changelogs from. | 149 get_repository (callable): a function from DEP urls to ``Repository`` |
| 150 objects, so we can get changelogs and blame for each dep. Notably, |
| 151 to keep the code here generic, we make no assumptions about |
| 152 which subclass of ``Repository`` this function returns. Thus, |
| 153 it is up to the caller to decide what class to return and handle |
| 154 any other arguments that class may require (e.g., an http client |
| 155 for ``GitilesRepository``). |
153 | 156 |
154 Returns: | 157 Returns: |
155 A tuple (dep_to_file_to_changelogs, reverted_cls). | 158 A tuple (dep_to_file_to_changelogs, reverted_cls). |
156 | 159 |
157 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path | 160 dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path |
158 to ChangeLogs that touched this file. | 161 to ChangeLogs that touched this file. |
159 For example: | 162 For example: |
160 { | 163 { |
161 'src/': { | 164 'src/': { |
162 'a.cc': [ | 165 'a.cc': [ |
(...skipping 29 matching lines...) Expand all Loading... |
192 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) | 195 dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list)) |
193 reverted_cls = set() | 196 reverted_cls = set() |
194 | 197 |
195 for dep in stack_deps: | 198 for dep in stack_deps: |
196 # If a dep is not in regression range, than it cannot be the dep of | 199 # If a dep is not in regression range, than it cannot be the dep of |
197 # culprits. | 200 # culprits. |
198 dep_roll = regression_deps_rolls.get(dep) | 201 dep_roll = regression_deps_rolls.get(dep) |
199 if not dep_roll: | 202 if not dep_roll: |
200 continue | 203 continue |
201 | 204 |
202 repository.repo_url = dep_roll.repo_url | 205 repository = get_repository(dep_roll.repo_url) |
203 changelogs = repository.GetChangeLogs(dep_roll.old_revision, | 206 changelogs = repository.GetChangeLogs(dep_roll.old_revision, |
204 dep_roll.new_revision) | 207 dep_roll.new_revision) |
205 | 208 |
206 for changelog in changelogs or []: | 209 for changelog in changelogs or []: |
207 # When someone reverts, we need to skip both the CL doing | 210 # When someone reverts, we need to skip both the CL doing |
208 # the reverting as well as the CL that got reverted. If | 211 # the reverting as well as the CL that got reverted. If |
209 # ``reverted_revision`` is true, then this CL reverts another one, | 212 # ``reverted_revision`` is true, then this CL reverts another one, |
210 # so we skip it and save the CL it reverts in ``reverted_cls`` to | 213 # so we skip it and save the CL it reverts in ``reverted_cls`` to |
211 # be filtered out later. | 214 # be filtered out later. |
212 if changelog.reverted_revision: | 215 if changelog.reverted_revision: |
(...skipping 85 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
298 repository = get_repository(stack_deps[dep].repo_url) | 301 repository = get_repository(stack_deps[dep].repo_url) |
299 blame = repository.GetBlame(touched_file_path, | 302 blame = repository.GetBlame(touched_file_path, |
300 stack_deps[dep].revision) | 303 stack_deps[dep].revision) |
301 | 304 |
302 # Generate/update each suspect(changelog) in changelogs, blame is used | 305 # Generate/update each suspect(changelog) in changelogs, blame is used |
303 # to calculate distance between touched lines and crashed lines in file. | 306 # to calculate distance between touched lines and crashed lines in file. |
304 suspects.GenerateSuspects( | 307 suspects.GenerateSuspects( |
305 touched_file_path, dep, stack_infos, changelogs, blame) | 308 touched_file_path, dep, stack_infos, changelogs, blame) |
306 | 309 |
307 return suspects.values() | 310 return suspects.values() |
OLD | NEW |