Chromium Code Reviews| Index: appengine/findit/crash/component_classifier.py |
| diff --git a/appengine/findit/crash/component_classifier.py b/appengine/findit/crash/component_classifier.py |
| index 5e6c8287cdbf2260a2c4bad489a6c097d63f70f6..fd027d2f6e684ee60be22872170c28e2896fbb42 100644 |
| --- a/appengine/findit/crash/component_classifier.py |
| +++ b/appengine/findit/crash/component_classifier.py |
| @@ -18,100 +18,82 @@ class ComponentClassifier(object): |
| For example: ['Blink>DOM', 'Blink>HTML']. |
| """ |
| - def __init__(self, components, top_n): |
| + def __init__(self, components, top_n_frames): |
| """Build a classifier for components. |
| Args: |
| components (list of crash.component.Component): the components to |
| check for. |
| - top_n (int): how many frames of the callstack to look at""" |
| - super(ComponentClassifier, self).__init__() |
| - if not components: |
| - logging.warning('Empty configuration for component classifier.') |
| - components = [] # Ensure self.components is not None |
| - self.components = components |
| - self.top_n = top_n |
| - |
| - def GetClassFromStackFrame(self, frame): |
| - """Determine which component is responsible for this frame.""" |
| - for component in self.components: |
| - if component.MatchesStackFrame(frame): |
| - return component.component_name |
| - |
| - return '' |
| - |
| - # TODO(wrengr): refactor this into a method on Suspect which returns |
| - # the cannonical frame (and documents why it's the one we return). |
| - def GetClassFromSuspect(self, suspect): |
| - """Determine which component is responsible for this suspect. |
| - |
| - Note that Findit assumes files that the culprit suspect touched come from |
| - the same component. |
| + top_n_frames (int): how many frames of the callstack to look at |
| """ |
| - if suspect.file_to_stack_infos: |
| - # file_to_stack_infos is a dict mapping file_path to stack_infos, |
| - # where stack_infos is a list of (frame, callstack_priority) |
| - # pairs. So ``.values()`` returns a list of the stack_infos in an |
| - # arbitrary order; the first ``[0]`` grabs the "first" stack_infos; |
| - # the second ``[0]`` grabs the first pair from the list; and |
| - # the third ``[0]`` grabs the ``frame`` from the pair. |
| - # TODO(wrengr): why is that the right frame to look at? |
| - frame = suspect.file_to_stack_infos.values()[0][0][0] |
| - return self.GetClassFromStackFrame(frame) |
| - |
| - return '' |
| + super(ComponentClassifier, self).__init__() |
| + self.components = [] if components is None else components |
|
wrengr
2017/01/30 19:14:18
Can simplify that to ``self.components = component
Sharu Jiang
2017/01/30 22:23:52
Done.
|
| + self.top_n_frames = top_n_frames |
| # TODO(http://crbug.com/657177): return the Component objects |
| # themselves, rather than strings naming them. |
| - def Classify(self, suspects, crash_stack): |
| + def ClassifyCallStack(self, stack, top_n=2): |
| """Classifies component of a crash. |
| Args: |
| - suspects (list of Suspect): Culprit suspects. |
| - crash_stack (CallStack): The callstack that caused the crash. |
| + stack (CallStack): The callstack that caused the crash. |
| + top_n (int): The number of top components for the stack |
|
wrengr
2017/01/30 19:14:18
would help make the code below more legible to cal
Sharu Jiang
2017/01/30 22:23:52
Done.
|
| Returns: |
| List of top 2 components. |
|
wrengr
2017/01/30 19:14:19
Should say returns the top ``top_n`` components
Sharu Jiang
2017/01/30 22:23:52
Done.
|
| """ |
| - # If ``suspects`` are available, we use the components from there since |
| - # they're more reliable than the ones from the ``crash_stack``. |
| - if suspects: |
| - classes = map(self.GetClassFromSuspect, suspects[:self.top_n]) |
| - else: |
| - classes = map(self.GetClassFromStackFrame, |
| - crash_stack.frames[:self.top_n]) |
| - |
| - return RankByOccurrence(classes, 2) |
| - |
| - # TODO(ymzhang): use component of new path as default. RENAME might |
| - # need to return two (old path new path may have different components) |
| - def GetClassFromFileChangeInfo(self, file_change_info): |
| - """Determine which component is responsible for a touched file.""" |
| - if not file_change_info: |
| + def GetComponentFromStackFrame(frame): |
|
wrengr
2017/01/30 19:14:18
Since this doesn't close over any local variables,
Sharu Jiang
2017/01/30 22:23:51
I hide this because I don't want client to use it
|
| + """Determine which component is responsible for this frame.""" |
| + for component in self.components: |
| + if component.MatchesStackFrame(frame): |
| + return component.component_name |
| + |
| return None |
| - for component in self.components: |
| - if (file_change_info.change_type == ChangeType.DELETE): |
| - if component.MatchesFile(file_change_info.old_path): |
| - return component.component_name |
| - else: |
| - if component.MatchesFile(file_change_info.new_path): |
| - return component.component_name |
| + components = map(GetComponentFromStackFrame, |
| + stack.frames[:self.top_n_frames]) |
| + return RankByOccurrence(components, top_n) or None |
|
wrengr
2017/01/30 19:14:18
What's the point of returning ``None`` rather than
Sharu Jiang
2017/01/30 22:23:51
Acknowledged.
|
| - return '' |
| + # TODO(http://crbug.com/657177): return the Component objects |
| + # themselves, rather than strings naming them. |
| + def ClassifySuspects(self, suspects, top_n=2): |
| + """Classifies component of a list of suspects. |
| - def ClassifyChangeLog(self, change_log, top_n=2): |
| - """ Classifies components of a change log. |
| + Args: |
| + suspects (list of Suspect): Culprit suspects. |
|
wrengr
2017/01/30 19:14:19
missing documentation for ``top_n``
Sharu Jiang
2017/01/30 22:23:51
Done.
|
| + |
| + Returns: |
| + List of top 2 components. |
|
wrengr
2017/01/30 19:14:19
should say ``top_n`` since not hard-coded to 2
Sharu Jiang
2017/01/30 22:23:51
Done.
|
| + """ |
| + components = [] |
| + for suspect in suspects: |
| + components.extend(self.ClassifySuspect(suspect)) |
|
wrengr
2017/01/30 19:14:19
Pretty sure this will break whenever ClassifySuspe
Sharu Jiang
2017/01/30 22:23:51
Oops, return [] instead of None.
|
| + |
| + return RankByOccurrence(components, top_n, |
| + rank_function=lambda x: -len(x)) or None |
|
wrengr
2017/01/30 19:14:19
Again, what's the point of returning ``None`` rath
Sharu Jiang
2017/01/30 22:23:52
Done.
|
| + |
| + def ClassifySuspect(self, suspect, top_n=2): |
| + """ Classifies components of a suspect. |
| Args: |
| - change_log: a change log |
| - top_n: number of components assigned to this change log, default is 2 |
| + suspect (Suspect): a change log |
| + top_n (int): number of components assigned to this suspect, default is 2 |
| Returns: |
| List of components |
| """ |
| - if not change_log: |
| + if not suspect or not suspect.changelog: |
| + return None |
| + |
| + def GetComponentFromTouchedFile(touched_file): |
|
wrengr
2017/01/30 19:14:19
Again, would be simpler to float this out. Can be
Sharu Jiang
2017/01/30 22:23:52
This one use local variable suspect, so I'd like t
|
| + """Determine which component is responsible for a touched file.""" |
| + for component in self.components: |
| + if component.MatchesTouchedFile(suspect.dep_path, touched_file): |
| + return component.component_name |
| + |
| return None |
| - classes = map(self.GetClassFromFileChangeInfo, change_log.touched_files) |
| - return RankByOccurrence(classes, top_n, rank_function=lambda x:-len(x)) |
| + components = map(GetComponentFromTouchedFile, |
| + suspect.changelog.touched_files) |
| + return RankByOccurrence(components, top_n, |
| + rank_function=lambda x: -len(x)) or None |