Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(319)

Side by Side Diff: appengine/findit/crash/loglinear/changelist_classifier.py

Issue 2605943002: Removing the mutation in the factories for getting dep repositories (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2016 The Chromium Authors. All rights reserved. 1 # Copyright 2016 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import logging 5 import logging
6 import math 6 import math
7 7
8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher 8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher
9 from crash import changelist_classifier 9 from crash import changelist_classifier
10 from crash.loglinear.changelist_features import min_distance 10 from crash.loglinear.changelist_features import min_distance
11 from crash.loglinear.changelist_features import top_frame_index 11 from crash.loglinear.changelist_features import top_frame_index
12 from crash.loglinear.model import ToFeatureFunction 12 from crash.loglinear.model import ToFeatureFunction
13 from crash.loglinear.model import UnnormalizedLogLinearModel 13 from crash.loglinear.model import UnnormalizedLogLinearModel
14 from crash.stacktrace import CallStack 14 from crash.stacktrace import CallStack
15 from crash.stacktrace import Stacktrace 15 from crash.stacktrace import Stacktrace
16 16
17 17
18 class LogLinearChangelistClassifier(object): 18 class LogLinearChangelistClassifier(object):
19 """A ``LogLinearModel``-based implementation of CL classification.""" 19 """A ``LogLinearModel``-based implementation of CL classification."""
20 20
21 def __init__(self, repository, get_repository, weights, top_n_frames=7, 21 def __init__(self, get_repository, weights, top_n_frames=7, top_n_suspects=3):
22 top_n_suspects=3):
23 """Args: 22 """Args:
24 repository (Repository): the Git repository for getting CLs to classify.
25 get_repository (callable): a function from DEP urls to ``Repository`` 23 get_repository (callable): a function from DEP urls to ``Repository``
26 objects, so we can get changelogs and blame for each dep. Notably, 24 objects, so we can get changelogs and blame for each dep. Notably,
27 to keep the code here generic, we make no assumptions about 25 to keep the code here generic, we make no assumptions about
28 which subclass of ``Repository`` this function returns. Thus, 26 which subclass of ``Repository`` this function returns. Thus,
29 it is up to the caller to decide what class to return and handle 27 it is up to the caller to decide what class to return and handle
30 any other arguments that class may require (e.g., an http client 28 any other arguments that class may require (e.g., an http client
31 for ``GitilesRepository``). 29 for ``GitilesRepository``).
32 weights (dict of float): the weights for the features. The keys of 30 weights (dict of float): the weights for the features. The keys of
33 the dictionary are the names of the feature that weight is 31 the dictionary are the names of the feature that weight is
34 for. We take this argument as a dict rather than as a list so that 32 for. We take this argument as a dict rather than as a list so that
35 callers needn't worry about what order to provide the weights in. 33 callers needn't worry about what order to provide the weights in.
36 top_n_frames (int): how many frames of each callstack to look at. 34 top_n_frames (int): how many frames of each callstack to look at.
37 top_n_suspects (int): maximum number of suspects to return. 35 top_n_suspects (int): maximum number of suspects to return.
38 """ 36 """
39 self._repository = repository 37 self._dependency_fetcher = ChromeDependencyFetcher(get_repository)
40 self._dependency_fetcher = ChromeDependencyFetcher(self._repository)
41 self._get_repository = get_repository 38 self._get_repository = get_repository
42 self._top_n_frames = top_n_frames 39 self._top_n_frames = top_n_frames
43 self._top_n_suspects = top_n_suspects 40 self._top_n_suspects = top_n_suspects
44 41
45 feature_function = ToFeatureFunction([ 42 feature_function = ToFeatureFunction([
46 top_frame_index.TopFrameIndexFeature(top_n_frames), 43 top_frame_index.TopFrameIndexFeature(top_n_frames),
47 min_distance.MinDistanceFeature(), 44 min_distance.MinDistanceFeature(),
48 ]) 45 ])
49 46
50 weight_list = [ 47 weight_list = [
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
126 regression_deps_rolls = {} 123 regression_deps_rolls = {}
127 for dep_path, dep_roll in dep_rolls.iteritems(): 124 for dep_path, dep_roll in dep_rolls.iteritems():
128 if not dep_roll.old_revision or not dep_roll.new_revision: 125 if not dep_roll.old_revision or not dep_roll.new_revision:
129 logging.info('Skip %s denpendency %s', 126 logging.info('Skip %s denpendency %s',
130 'added' if dep_roll.new_revision else 'deleted', dep_path) 127 'added' if dep_roll.new_revision else 'deleted', dep_path)
131 continue 128 continue
132 regression_deps_rolls[dep_path] = dep_roll 129 regression_deps_rolls[dep_path] = dep_roll
133 130
134 dep_to_file_to_changelogs, ignore_cls = ( 131 dep_to_file_to_changelogs, ignore_cls = (
135 changelist_classifier.GetChangeLogsForFilesGroupedByDeps( 132 changelist_classifier.GetChangeLogsForFilesGroupedByDeps(
136 regression_deps_rolls, stack_deps, self._repository)) 133 regression_deps_rolls, stack_deps, self._get_repository))
137 dep_to_file_to_stack_infos = ( 134 dep_to_file_to_stack_infos = (
138 changelist_classifier.GetStackInfosForFilesGroupedByDeps( 135 changelist_classifier.GetStackInfosForFilesGroupedByDeps(
139 report.stacktrace, stack_deps)) 136 report.stacktrace, stack_deps))
140 137
141 # Get the possible suspects. 138 # Get the possible suspects.
142 suspects = changelist_classifier.FindSuspects( 139 suspects = changelist_classifier.FindSuspects(
143 dep_to_file_to_changelogs, 140 dep_to_file_to_changelogs,
144 dep_to_file_to_stack_infos, 141 dep_to_file_to_stack_infos,
145 stack_deps, 142 stack_deps,
146 self._get_repository, 143 self._get_repository,
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
230 all_changed_files[changed_file.name] = changed_file 227 all_changed_files[changed_file.name] = changed_file
231 continue 228 continue
232 229
233 assert accumulated_changed_file.blame_url == changed_file.blame_url, ( 230 assert accumulated_changed_file.blame_url == changed_file.blame_url, (
234 ValueError('Blame URLs do not match: %s != %s' 231 ValueError('Blame URLs do not match: %s != %s'
235 % (accumulated_changed_file.blame_url, changed_file.blame_url))) 232 % (accumulated_changed_file.blame_url, changed_file.blame_url)))
236 accumulated_changed_file.reasons.extend(changed_file.reasons or []) 233 accumulated_changed_file.reasons.extend(changed_file.reasons or [])
237 234
238 return sorted(all_changed_files.values(), 235 return sorted(all_changed_files.values(),
239 key=lambda changed_file: changed_file.name) 236 key=lambda changed_file: changed_file.name)
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698