Chromium Code Reviews| Index: appengine/findit/crash/project_classifier.py |
| diff --git a/appengine/findit/crash/project_classifier.py b/appengine/findit/crash/project_classifier.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..e3a4522aa2922754c213f340364658032dfe0aec |
| --- /dev/null |
| +++ b/appengine/findit/crash/project_classifier.py |
| @@ -0,0 +1,99 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import copy |
| + |
| +from crash import classifier |
| +from crash.type_enums import CallStackLanguageType |
| +from model.crash.crash_config import CrashConfig |
| + |
| + |
| +class ProjectClassifier(classifier.Classifier): |
| + """Determines the project of a crash - (project_name, project_path). |
| + |
| + For example: ('chromium', 'src/'), ('skia', 'src/skia/'), ...etc. |
| + """ |
| + |
| + def __init__(self): |
| + super(ProjectClassifier, self).__init__() |
| + |
| + def _GetCompiledConfig(config): |
| + config = copy.copy(config) |
|
stgao
2016/05/17 21:40:57
deepcopy if copy is really needed.
Sharu Jiang
2016/05/20 23:16:33
No need any more.
|
| + config['top_n'] = int(config['top_n']) |
|
stgao
2016/05/17 21:40:57
same here.
Sharu Jiang
2016/05/20 23:16:33
Oops.
|
| + return config |
| + |
| + self.config = _GetCompiledConfig(CrashConfig.Get().project_classifier) |
|
stgao
2016/05/17 21:40:57
self.config is not specific, maybe project_classif
Sharu Jiang
2016/05/20 23:16:33
Done.
|
| + |
| + def _GetProjectFromDepPath(self, dep_path): |
| + """Returns the project name from a dep path.""" |
| + if not dep_path: |
| + return '' |
| + |
| + if dep_path == 'src/': |
| + return 'chromium' |
| + |
| + for host_directory in self.config['host_directories']: |
|
stgao
2016/05/17 21:40:57
should the host directories be sorted like the one
Sharu Jiang
2016/05/20 23:16:33
The order of host_directories is exactly the same
stgao
2016/05/21 00:54:03
Are they always sorted in the config?
Sharu Jiang
2016/05/23 23:54:50
I am a little confused, they are always in this or
stgao
2016/05/24 00:10:58
Yes, they are in order in clusterfuzz, but they ar
Sharu Jiang
2016/05/24 22:22:44
get it, that makes sense.
|
| + if dep_path.startswith(host_directory): |
| + path = dep_path[len(host_directory):] |
| + return 'chromium-%s' % path.split('/')[0].lower() |
| + |
| + # Unknown path, return the whole path as project name. |
| + return 'chromium-%s' % '_'.join(dep_path.split('/')) |
| + |
| + def GetClassFromStackFrame(self, frame): |
| + """Returns a tuple (project_name, project_path) of a StackFrame.""" |
| + for marker, name in self.config[ |
| + 'function_marker_to_project_name'].iteritems(): |
| + if frame.function.startswith(marker): |
| + return name |
| + |
| + for marker, name in self.config[ |
| + 'file_path_marker_to_project_name'].iteritems(): |
| + if marker in frame.file_path or marker in frame.raw_file_path: |
| + return name |
| + |
| + return self._GetProjectFromDepPath(frame.dep_path) |
| + |
| + def GetClassFromResult(self, result): |
| + """Returns (project_name, project_path) of a Result.""" |
| + if result.file_to_stack_infos: |
| + # A file in culprit result should always have its stack_info, namely a |
| + # list of (frame, callstack_priority) pairs. |
| + frame, _ = result.file_to_stack_infos.values()[0][0] |
| + return self.GetClassFromStackFrame(frame) |
| + |
| + return '' |
| + |
| + def Classify(self, results, crash_stack): |
| + """Classify project of a crash. |
| + |
| + Args: |
| + results (list of Result): culprit results. |
| + crash_stack (CallStack): the callstack that caused the crash. |
| + |
| + Returns: |
| + A tuple, project of the crash - (project_name, project_path). |
| + """ |
| + def _GetRankFunction(language_type): |
| + if language_type == CallStackLanguageType.JAVA: |
| + def _RankFunctionForJava(class_occurrences_info): |
| + project_name = class_occurrences_info.name |
| + return (len(class_occurrences_info.occurrences), |
| + 0 if 'chromium' in project_name else self.config[ |
| + 'non_chromium_project_rank_priority'][project_name]) |
| + |
| + return _RankFunctionForJava |
| + |
| + return classifier.DefaultRankFunction |
| + |
| + # Set the max_classes to 1, so the returned projects only has one element. |
| + projects = self._Classify( |
| + results, crash_stack, |
| + self.config['top_n'], 1, |
| + rank_function=_GetRankFunction(crash_stack.language_type)) |
| + |
| + if projects: |
| + return projects[0] |
| + |
| + return '' |