OLD | NEW |
1 # Copyright 2016 The Chromium Authors. All rights reserved. | 1 # Copyright 2016 The Chromium Authors. All rights reserved. |
2 # Use of this source code is governed by a BSD-style license that can be | 2 # Use of this source code is governed by a BSD-style license that can be |
3 # found in the LICENSE file. | 3 # found in the LICENSE file. |
4 | 4 |
5 import logging | 5 import logging |
6 import math | 6 import math |
7 | 7 |
8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher | 8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher |
9 from crash import changelist_classifier | 9 from crash import changelist_classifier |
10 from crash.loglinear.changelist_features import min_distance | 10 from crash.loglinear.changelist_features import min_distance |
11 from crash.loglinear.changelist_features import top_frame_index | 11 from crash.loglinear.changelist_features import top_frame_index |
12 from crash.loglinear.model import ToFeatureFunction | 12 from crash.loglinear.model import ToFeatureFunction |
13 from crash.loglinear.model import UnnormalizedLogLinearModel | 13 from crash.loglinear.model import UnnormalizedLogLinearModel |
14 from crash.stacktrace import CallStack | 14 from crash.stacktrace import CallStack |
15 from crash.stacktrace import Stacktrace | 15 from crash.stacktrace import Stacktrace |
16 | 16 |
17 | 17 |
18 class LogLinearChangelistClassifier(object): | 18 class LogLinearChangelistClassifier(object): |
19 """A ``LogLinearModel``-based implementation of CL classification.""" | 19 """A ``LogLinearModel``-based implementation of CL classification.""" |
20 | 20 |
21 def __init__(self, repository, get_repository, weights, top_n_frames=7, | 21 def __init__(self, get_repository, weights, top_n_frames=7, top_n_suspects=3): |
22 top_n_suspects=3): | |
23 """Args: | 22 """Args: |
24 repository (Repository): the Git repository for getting CLs to classify. | |
25 get_repository (callable): a function from DEP urls to ``Repository`` | 23 get_repository (callable): a function from DEP urls to ``Repository`` |
26 objects, so we can get changelogs and blame for each dep. Notably, | 24 objects, so we can get changelogs and blame for each dep. Notably, |
27 to keep the code here generic, we make no assumptions about | 25 to keep the code here generic, we make no assumptions about |
28 which subclass of ``Repository`` this function returns. Thus, | 26 which subclass of ``Repository`` this function returns. Thus, |
29 it is up to the caller to decide what class to return and handle | 27 it is up to the caller to decide what class to return and handle |
30 any other arguments that class may require (e.g., an http client | 28 any other arguments that class may require (e.g., an http client |
31 for ``GitilesRepository``). | 29 for ``GitilesRepository``). |
32 weights (dict of float): the weights for the features. The keys of | 30 weights (dict of float): the weights for the features. The keys of |
33 the dictionary are the names of the feature that weight is | 31 the dictionary are the names of the feature that weight is |
34 for. We take this argument as a dict rather than as a list so that | 32 for. We take this argument as a dict rather than as a list so that |
35 callers needn't worry about what order to provide the weights in. | 33 callers needn't worry about what order to provide the weights in. |
36 top_n_frames (int): how many frames of each callstack to look at. | 34 top_n_frames (int): how many frames of each callstack to look at. |
37 top_n_suspects (int): maximum number of suspects to return. | 35 top_n_suspects (int): maximum number of suspects to return. |
38 """ | 36 """ |
39 self._repository = repository | 37 self._dependency_fetcher = ChromeDependencyFetcher(get_repository) |
40 self._dependency_fetcher = ChromeDependencyFetcher(self._repository) | |
41 self._get_repository = get_repository | 38 self._get_repository = get_repository |
42 self._top_n_frames = top_n_frames | 39 self._top_n_frames = top_n_frames |
43 self._top_n_suspects = top_n_suspects | 40 self._top_n_suspects = top_n_suspects |
44 | 41 |
45 feature_function = ToFeatureFunction([ | 42 feature_function = ToFeatureFunction([ |
46 top_frame_index.TopFrameIndexFeature(top_n_frames), | 43 top_frame_index.TopFrameIndexFeature(top_n_frames), |
47 min_distance.MinDistanceFeature(), | 44 min_distance.MinDistanceFeature(), |
48 ]) | 45 ]) |
49 | 46 |
50 weight_list = [ | 47 weight_list = [ |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
126 regression_deps_rolls = {} | 123 regression_deps_rolls = {} |
127 for dep_path, dep_roll in dep_rolls.iteritems(): | 124 for dep_path, dep_roll in dep_rolls.iteritems(): |
128 if not dep_roll.old_revision or not dep_roll.new_revision: | 125 if not dep_roll.old_revision or not dep_roll.new_revision: |
129 logging.info('Skip %s denpendency %s', | 126 logging.info('Skip %s denpendency %s', |
130 'added' if dep_roll.new_revision else 'deleted', dep_path) | 127 'added' if dep_roll.new_revision else 'deleted', dep_path) |
131 continue | 128 continue |
132 regression_deps_rolls[dep_path] = dep_roll | 129 regression_deps_rolls[dep_path] = dep_roll |
133 | 130 |
134 dep_to_file_to_changelogs, ignore_cls = ( | 131 dep_to_file_to_changelogs, ignore_cls = ( |
135 changelist_classifier.GetChangeLogsForFilesGroupedByDeps( | 132 changelist_classifier.GetChangeLogsForFilesGroupedByDeps( |
136 regression_deps_rolls, stack_deps, self._repository)) | 133 regression_deps_rolls, stack_deps, self._get_repository)) |
137 dep_to_file_to_stack_infos = ( | 134 dep_to_file_to_stack_infos = ( |
138 changelist_classifier.GetStackInfosForFilesGroupedByDeps( | 135 changelist_classifier.GetStackInfosForFilesGroupedByDeps( |
139 report.stacktrace, stack_deps)) | 136 report.stacktrace, stack_deps)) |
140 | 137 |
141 # Get the possible suspects. | 138 # Get the possible suspects. |
142 suspects = changelist_classifier.FindSuspects( | 139 suspects = changelist_classifier.FindSuspects( |
143 dep_to_file_to_changelogs, | 140 dep_to_file_to_changelogs, |
144 dep_to_file_to_stack_infos, | 141 dep_to_file_to_stack_infos, |
145 stack_deps, | 142 stack_deps, |
146 self._get_repository, | 143 self._get_repository, |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
230 all_changed_files[changed_file.name] = changed_file | 227 all_changed_files[changed_file.name] = changed_file |
231 continue | 228 continue |
232 | 229 |
233 assert accumulated_changed_file.blame_url == changed_file.blame_url, ( | 230 assert accumulated_changed_file.blame_url == changed_file.blame_url, ( |
234 ValueError('Blame URLs do not match: %s != %s' | 231 ValueError('Blame URLs do not match: %s != %s' |
235 % (accumulated_changed_file.blame_url, changed_file.blame_url))) | 232 % (accumulated_changed_file.blame_url, changed_file.blame_url))) |
236 accumulated_changed_file.reasons.extend(changed_file.reasons or []) | 233 accumulated_changed_file.reasons.extend(changed_file.reasons or []) |
237 | 234 |
238 return sorted(all_changed_files.values(), | 235 return sorted(all_changed_files.values(), |
239 key=lambda changed_file: changed_file.name) | 236 key=lambda changed_file: changed_file.name) |
OLD | NEW |