Chromium Code Reviews| Index: appengine/findit/crash/project_classifier.py |
| diff --git a/appengine/findit/crash/project_classifier.py b/appengine/findit/crash/project_classifier.py |
| index 90a4b84adbf54b4fa2897619ed412c556b46e57c..e121c2f2bf99c6fa4540ea1ead50cb609c98154b 100644 |
| --- a/appengine/findit/crash/project_classifier.py |
| +++ b/appengine/findit/crash/project_classifier.py |
| @@ -2,9 +2,11 @@ |
| # Use of this source code is governed by a BSD-style license that can be |
| # found in the LICENSE file. |
| +import functools |
| import logging |
| from crash.occurrence import RankByOccurrence |
| +from crash.project import Project |
| from crash.type_enums import LanguageType |
| from model.crash.crash_config import CrashConfig |
| @@ -16,76 +18,33 @@ class ProjectClassifier(object): |
| """ |
| # TODO(http://crbug.com/657177): remove dependency on CrashConfig. |
| - def __init__(self): |
| + def __init__(self, projects, top_n_frames, |
| + non_chromium_project_rank_priority=None): |
| super(ProjectClassifier, self).__init__() |
| - self.project_classifier_config = CrashConfig.Get().project_classifier |
| - if self.project_classifier_config: |
| - self.project_classifier_config['host_directories'].sort( |
| - key=lambda host: -len(host.split('/'))) |
| - |
| - # TODO(http://crbug.com/657177): refactor this into a method on Project. |
| - def _GetProjectFromDepPath(self, dep_path): |
| - """Returns the project name from a dep path.""" |
| - if not dep_path: |
| - return '' |
| - |
| - if dep_path == 'src/': |
| - return 'chromium' |
| - |
| - for host_directory in self.project_classifier_config['host_directories']: |
| - if dep_path.startswith(host_directory): |
| - path = dep_path[len(host_directory):] |
| - return 'chromium-%s' % path.split('/')[0].lower() |
| - |
| - # Unknown path, return the whole path as project name. |
| - return 'chromium-%s' % '_'.join(dep_path.split('/')) |
| - |
| - # TODO(http://crbug.com/657177): refactor this into Project.MatchesStackFrame. |
| - def GetClassFromStackFrame(self, frame): |
| - """Determine which project is responsible for this frame.""" |
| - for marker, name in self.project_classifier_config[ |
| - 'function_marker_to_project_name'].iteritems(): |
| - if frame.function.startswith(marker): |
| - return name |
| - |
| - for marker, name in self.project_classifier_config[ |
| - 'file_path_marker_to_project_name'].iteritems(): |
| - if marker in frame.file_path or marker in frame.raw_file_path: |
| - return name |
| - |
| - return self._GetProjectFromDepPath(frame.dep_path) |
| - |
| - # TODO(wrengr): refactor this into a method on Suspect which returns |
| - # the cannonical frame (and documents why it's the one we return). |
| - def GetClassFromSuspect(self, suspect): |
| - """Determine which project is responsible for this suspect.""" |
| - if suspect.file_to_stack_infos: |
| - # file_to_stack_infos is a dict mapping file_path to stack_infos, |
| - # where stack_infos is a list of (frame, callstack_priority) |
| - # pairs. So ``.values()`` returns a list of the stack_infos in an |
| - # arbitrary order; the first ``[0]`` grabs the "first" stack_infos; |
| - # the second ``[0]`` grabs the first pair from the list; and |
| - # the third ``[0]`` grabs the ``frame`` from the pair. |
| - # TODO(wrengr): why is that the right frame to look at? |
| - frame = suspect.file_to_stack_infos.values()[0][0][0] |
| - return self.GetClassFromStackFrame(frame) |
| - |
| - return '' |
| - |
| - def Classify(self, suspects, crash_stack): |
| + self.projects = projects |
| + self.top_n_frames = top_n_frames |
| + self.non_chromium_project_rank_priority = non_chromium_project_rank_priority |
| + |
| + @staticmethod |
| + def _GetTopClass(classes, rank_function=None): |
| + """Gets the highest ranking class among classes.""" |
| + projects = RankByOccurrence(classes, 1, rank_function=rank_function) |
| + |
| + if projects: |
| + return projects[0] |
| + |
| + logging.warning('ProjectClassifier.Classify: no projects found.') |
| + return None |
| + |
| + def ClassifyCallStack(self, crash_stack): |
| """Classify project of a crash. |
| Args: |
| suspects (list of Suspect): culprit suspects. |
| - crash_stack (CallStack): the callstack that caused the crash. |
| Returns: |
| The name of the most-suspected project; or the empty string on failure. |
| """ |
| - if not self.project_classifier_config: |
| - logging.warning('ProjectClassifier.Classify: Empty configuration.') |
| - return None |
| - |
| rank_function = None |
| if crash_stack.language_type == LanguageType.JAVA: |
| def _RankFunctionForJava(occurrence): |
| @@ -96,27 +55,52 @@ class ProjectClassifier(object): |
| if 'chromium' in project_name: |
| index = 0 |
| else: |
| - index = self.project_classifier_config[ |
| - 'non_chromium_project_rank_priority'][project_name] |
| + index = self.non_chromium_project_rank_priority[project_name] |
| return (weight, index) |
| rank_function = _RankFunctionForJava |
| - top_n_frames = self.project_classifier_config['top_n'] |
| - # If ``suspects`` are available, we use the projects from there since |
| - # they're more reliable than the ones from the ``crash_stack``. |
| - if suspects: |
| - classes = map(self.GetClassFromSuspect, suspects[:top_n_frames]) |
| - else: |
| - classes = map(self.GetClassFromStackFrame, |
| - crash_stack.frames[:top_n_frames]) |
| - |
| - # Since we're only going to return the highest-ranked class, might |
| - # as well set ``max_classes`` to 1. |
| - projects = RankByOccurrence(classes, 1, rank_function=rank_function) |
| + # TODO(http://crbug.com/657177): refactor this into |
|
Martin Barbella
2017/01/26 23:59:48
Is there something specific that makes this diffic
Sharu Jiang
2017/01/27 03:20:42
Forgot to remove it, this is what this cl does.
|
| + # Project.MatchesStackFrame. |
| + def _GetClassFromStackFrame(frame): |
| + """Determine which project is responsible for this frame.""" |
| + for project in self.projects: |
| + if project.MatchesStackFrame(frame): |
| + return project.GetName(frame.dep_path) |
| - if projects: |
| - return projects[0] |
| + return None |
| - logging.warning('ProjectClassifier.Classify: no projects found.') |
| - return '' |
| + classes = map(_GetClassFromStackFrame, |
| + crash_stack.frames[:self.top_n_frames]) |
| + |
| + return ProjectClassifier._GetTopClass(classes, rank_function=rank_function) |
| + |
| + def ClassifySuspect(self, suspect): |
| + """Determine which project is responsible for this frame.""" |
| + if not suspect or not suspect.changelog: |
| + return None |
| + |
| + def _GetClassFromTouchedFile(dep_path, touched_file): |
| + for project in self.projects: |
| + if project.MatchesTouchedFile(dep_path, touched_file): |
| + return project.GetName(dep_path) |
| + |
| + return None |
| + |
| + get_class = functools.partial(_GetClassFromTouchedFile, suspect.dep_path) |
| + classes = map(get_class, suspect.changelog.touched_files) |
| + return ProjectClassifier._GetTopClass(classes, |
| + rank_function=lambda x:-len(x)) |
| + |
| + def ClassifySuspects(self, suspects): |
| + """Classify project of a crash. |
| + |
| + Args: |
| + suspects (list of Suspect): culprit suspects. |
| + crash_stack (CallStack): the callstack that caused the crash. |
| + |
| + Returns: |
| + The name of the most-suspected project; or the empty string on failure. |
| + """ |
| + classes = map(self.ClassifySuspect, suspects) |
| + return ProjectClassifier._GetTopClass(classes) |