appengine/findit/crash/loglinear/changelist_classifier.py - Issue 2605943002: Removing the mutation in the factories for getting dep repositories

Side by Side Diff: appengine/findit/crash/loglinear/changelist_classifier.py

Issue 2605943002: Removing the mutation in the factories for getting dep repositories (Closed)

Patch Set: Created 3 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« appengine/findit/crash/crash_pipeline.py ('K') | « appengine/findit/crash/findit_for_clusterfuzz.py ('k') | appengine/findit/crash/loglinear/test/changelist_classifier_test.py » ('j') | appengine/findit/handlers/crash/crash_handler.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import logging	5 import logging

6 import math	6 import math

7	7

8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher	8 from common.chrome_dependency_fetcher import ChromeDependencyFetcher

9 from crash import changelist_classifier	9 from crash import changelist_classifier

10 from crash.loglinear.changelist_features import min_distance	10 from crash.loglinear.changelist_features import min_distance

11 from crash.loglinear.changelist_features import top_frame_index	11 from crash.loglinear.changelist_features import top_frame_index

12 from crash.loglinear.model import ToFeatureFunction	12 from crash.loglinear.model import ToFeatureFunction

13 from crash.loglinear.model import UnnormalizedLogLinearModel	13 from crash.loglinear.model import UnnormalizedLogLinearModel

14 from crash.stacktrace import CallStack	14 from crash.stacktrace import CallStack

15 from crash.stacktrace import Stacktrace	15 from crash.stacktrace import Stacktrace

16	16

17	17

18 class LogLinearChangelistClassifier(object):	18 class LogLinearChangelistClassifier(object):

19 """A ``LogLinearModel``-based implementation of CL classification."""	19 """A ``LogLinearModel``-based implementation of CL classification."""

20	20

21 def __init__(self, repository, get_repository, weights, top_n_frames=7,	21 def __init__(self, get_repository, weights, top_n_frames=7, top_n_suspects=3):

22 top_n_suspects=3):

23 """Args:	22 """Args:

24 repository (Repository): the Git repository for getting CLs to classify.

25 get_repository (callable): a function from DEP urls to ``Repository``	23 get_repository (callable): a function from DEP urls to ``Repository``

26 objects, so we can get changelogs and blame for each dep. Notably,	24 objects, so we can get changelogs and blame for each dep. Notably,

27 to keep the code here generic, we make no assumptions about	25 to keep the code here generic, we make no assumptions about

28 which subclass of ``Repository`` this function returns. Thus,	26 which subclass of ``Repository`` this function returns. Thus,

29 it is up to the caller to decide what class to return and handle	27 it is up to the caller to decide what class to return and handle

30 any other arguments that class may require (e.g., an http client	28 any other arguments that class may require (e.g., an http client

31 for ``GitilesRepository``).	29 for ``GitilesRepository``).

32 weights (dict of float): the weights for the features. The keys of	30 weights (dict of float): the weights for the features. The keys of

33 the dictionary are the names of the feature that weight is	31 the dictionary are the names of the feature that weight is

34 for. We take this argument as a dict rather than as a list so that	32 for. We take this argument as a dict rather than as a list so that

35 callers needn't worry about what order to provide the weights in.	33 callers needn't worry about what order to provide the weights in.

36 top_n_frames (int): how many frames of each callstack to look at.	34 top_n_frames (int): how many frames of each callstack to look at.

37 top_n_suspects (int): maximum number of suspects to return.	35 top_n_suspects (int): maximum number of suspects to return.

38 """	36 """

39 self._repository = repository	37 self._dependency_fetcher = ChromeDependencyFetcher(get_repository)

40 self._dependency_fetcher = ChromeDependencyFetcher(self._repository)

41 self._get_repository = get_repository	38 self._get_repository = get_repository

42 self._top_n_frames = top_n_frames	39 self._top_n_frames = top_n_frames

43 self._top_n_suspects = top_n_suspects	40 self._top_n_suspects = top_n_suspects

44	41

45 feature_function = ToFeatureFunction([	42 feature_function = ToFeatureFunction([

46 top_frame_index.TopFrameIndexFeature(top_n_frames),	43 top_frame_index.TopFrameIndexFeature(top_n_frames),

47 min_distance.MinDistanceFeature(),	44 min_distance.MinDistanceFeature(),

48 ])	45 ])

49	46

50 weight_list = [	47 weight_list = [

(...skipping 75 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
126 regression_deps_rolls = {}	123 regression_deps_rolls = {}

127 for dep_path, dep_roll in dep_rolls.iteritems():	124 for dep_path, dep_roll in dep_rolls.iteritems():

128 if not dep_roll.old_revision or not dep_roll.new_revision:	125 if not dep_roll.old_revision or not dep_roll.new_revision:

129 logging.info('Skip %s denpendency %s',	126 logging.info('Skip %s denpendency %s',

130 'added' if dep_roll.new_revision else 'deleted', dep_path)	127 'added' if dep_roll.new_revision else 'deleted', dep_path)

131 continue	128 continue

132 regression_deps_rolls[dep_path] = dep_roll	129 regression_deps_rolls[dep_path] = dep_roll

133	130

134 dep_to_file_to_changelogs, ignore_cls = (	131 dep_to_file_to_changelogs, ignore_cls = (

135 changelist_classifier.GetChangeLogsForFilesGroupedByDeps(	132 changelist_classifier.GetChangeLogsForFilesGroupedByDeps(

136 regression_deps_rolls, stack_deps, self._repository))	133 regression_deps_rolls, stack_deps, self._get_repository))

137 dep_to_file_to_stack_infos = (	134 dep_to_file_to_stack_infos = (

138 changelist_classifier.GetStackInfosForFilesGroupedByDeps(	135 changelist_classifier.GetStackInfosForFilesGroupedByDeps(

139 report.stacktrace, stack_deps))	136 report.stacktrace, stack_deps))

140	137

141 # Get the possible suspects.	138 # Get the possible suspects.

142 suspects = changelist_classifier.FindSuspects(	139 suspects = changelist_classifier.FindSuspects(

143 dep_to_file_to_changelogs,	140 dep_to_file_to_changelogs,

144 dep_to_file_to_stack_infos,	141 dep_to_file_to_stack_infos,

145 stack_deps,	142 stack_deps,

146 self._get_repository,	143 self._get_repository,

(...skipping 83 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
230 all_changed_files[changed_file.name] = changed_file	227 all_changed_files[changed_file.name] = changed_file

231 continue	228 continue

232	229

233 assert accumulated_changed_file.blame_url == changed_file.blame_url, (	230 assert accumulated_changed_file.blame_url == changed_file.blame_url, (

234 ValueError('Blame URLs do not match: %s != %s'	231 ValueError('Blame URLs do not match: %s != %s'

235 % (accumulated_changed_file.blame_url, changed_file.blame_url)))	232 % (accumulated_changed_file.blame_url, changed_file.blame_url)))

236 accumulated_changed_file.reasons.extend(changed_file.reasons or [])	233 accumulated_changed_file.reasons.extend(changed_file.reasons or [])

237	234

238 return sorted(all_changed_files.values(),	235 return sorted(all_changed_files.values(),

239 key=lambda changed_file: changed_file.name)	236 key=lambda changed_file: changed_file.name)

OLD	NEW