appengine/findit/crash/changelist_classifier.py - Issue 2414523002: [Findit] Reorganizing findit_for_*.py

Unified Diff: appengine/findit/crash/changelist_classifier.py

Issue 2414523002: [Findit] Reorganizing findit_for_*.py (Closed)

Patch Set: Finally fixed the mock tests! Created 4 years, 2 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Index: appengine/findit/crash/changelist_classifier.py

diff --git a/appengine/findit/crash/findit_for_crash.py b/appengine/findit/crash/changelist_classifier.py

similarity index 57%

rename from appengine/findit/crash/findit_for_crash.py

rename to appengine/findit/crash/changelist_classifier.py

index 829a84496acace5f3d7d01386b9f74ae39502481..c96acc1ecae94d7ee1c855002ea3e026616b3c49 100644

--- a/appengine/findit/crash/findit_for_crash.py

+++ b/appengine/findit/crash/changelist_classifier.py

@@ -2,18 +2,113 @@

# Use of this source code is governed by a BSD-style license that can be

# found in the LICENSE file.

+import logging

from collections import defaultdict

+from common import chrome_dependency_fetcher

from common.diff import ChangeType

from common.git_repository import GitRepository

from common.http_client_appengine import HttpClientAppengine

from crash import crash_util

-from crash.stacktrace import CallStack

-from crash.stacktrace import Stacktrace

from crash.results import MatchResults

from crash.scorers.aggregated_scorer import AggregatedScorer

from crash.scorers.min_distance import MinDistance

from crash.scorers.top_frame_index import TopFrameIndex

+from crash.stacktrace import CallStack

+from crash.stacktrace import Stacktrace

+# TODO(wrengr): make this a namedtuple.

+class ChangelistClassifier(object):

+ def __init__(self, repository,

+ top_n_frames, top_n_results=3, confidence_threshold=0.999):

+ """Args:

+ repository (Repository): the Git repository for getting CLs to classify.

+ top_n_frames (int): how many frames of each callstack to look at.

+ top_n_results (int): maximum number of results to return.

+ confidence_threshold (float): In [0,1], above which we only return

+ the first result.

+ """

+ self._repository = repository

+ self.top_n_frames = top_n_frames

+ self.top_n_results = top_n_results

+ self.confidence_threshold = confidence_threshold

+ def __str__(self): # pragma: no cover

+ return ('%s(top_n_frames=%d, top_n_results=%d, confidence_threshold=%g)'

+ % (self.__class__.__name__,

+ self.top_n_frames,

+ self.top_n_results,

+ self.confidence_threshold))

+ def __call__(self, report):

+ """Finds changelists suspected of being responsible for the crash report.

+ Args:

+ report (CrashReport): the report to be analyzed.

+ Returns:

+ List of Results, sorted by confidence from highest to lowest.

+ """

+ if not report.regression_range:

+ logging.warning('ChangelistClassifier.__call__: Missing regression range '

+ 'for report: %s', str(report))

+ return []

+ last_good_version, first_bad_version = report.regression_range

+ logging.info('ChangelistClassifier.__call__: Regression range %s:%s',

+ last_good_version, first_bad_version)

+ # Restrict analysis to just the top n frames in each callstack.

+ # TODO(wrengr): move this to be a Stacktrace method?

+ stacktrace = Stacktrace([

+ CallStack(stack.priority,

+ format_type=stack.format_type,

+ language_type=stack.language_type,

+ frame_list=stack[:self.top_n_frames])

+ for stack in report.stacktrace])

+ # We are only interested in the deps in crash stack (the callstack that

+ # caused the crash).

+ # TODO(wrengr): we may want to receive the crash deps as an argument,

+ # so that when this method is called via Findit.FindCulprit, we avoid

+ # doing redundant work creating it.

+ stack_deps = GetDepsInCrashStack(

+ report.stacktrace.crash_stack,

+ chrome_dependency_fetcher.ChromeDependencyFetcher(self._repository

+ ).GetDependency(report.crashed_version, report.platform))

+ # Get dep and file to changelogs, stack_info and blame dicts.

+ regression_deps_rolls = chrome_dependency_fetcher.ChromeDependencyFetcher(

+ self._repository).GetDependencyRollsDict(

+ last_good_version, first_bad_version, report.platform)

+ dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

+ regression_deps_rolls, stack_deps, self._repository)

+ dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

+ stacktrace, stack_deps)

+ # TODO: argument order is inconsistent from others. Repository should

+ # be last argument.

+ results = FindMatchResults(dep_to_file_to_changelogs,

+ dep_to_file_to_stack_infos,

+ stack_deps, self._repository, ignore_cls)

+ if not results:

+ return []

+ # TODO(wrengr): we should be able to do this map/filter/sort in one pass.

+ # Set result.confidence, result.reasons and result.changed_files.

+ aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])

+ map(aggregated_scorer.Score, results)

+ # Filter all the 0 confidence results.

+ results = filter(lambda r: r.confidence != 0, results)

+ if not results:

+ return []

+ sorted_results = sorted(results, key=lambda r: -r.confidence)

+ max_results = (1 if sorted_results[0].confidence > self.confidence_threshold

+ else self.top_n_results)

+ return sorted_results[:max_results]

def GetDepsInCrashStack(crash_stack, crash_deps):

@@ -35,7 +130,7 @@ def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,

"""Gets a dict containing files touched by changelogs for deps in stack_deps.

Regression ranges for each dep is determined by regression_deps_rolls.

- Those changelogs got reverted should be returned in a ignore_cls set.

+ Changelogs which were reverted are returned in a reverted_cls set.

Args:

regression_deps_rolls (dict): Maps dep_path to DependencyRoll in

@@ -45,7 +140,7 @@ def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,

repository (Repository): Repository to get changelogs from.

Returns:

- A tuple (dep_to_file_to_changelogs, ignore_cls).

+ A tuple (dep_to_file_to_changelogs, reverted_cls).

dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

to ChangeLogs that touched this file.

@@ -80,15 +175,16 @@ def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,

}

- ignore_cls (set): A set of reverted revisions.

+ reverted_cls (set): A set of reverted revisions.

"""

dep_to_file_to_changelogs = defaultdict(lambda: defaultdict(list))

- ignore_cls = set()

+ reverted_cls = set()

for dep in stack_deps:

# If a dep is not in regression range, than it cannot be the dep of

# culprits.

- if dep not in regression_deps_rolls:

+ dep_roll = regression_deps_rolls.get(dep)

+ if not dep_roll:

continue

dep_roll = regression_deps_rolls[dep]

@@ -98,10 +194,13 @@ def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,

dep_roll.new_revision)

for changelog in changelogs:

+ # When someone reverts, we need to skip both the CL doing

+ # the reverting as well as the CL that got reverted. If

+ # |reverted_revision| is true, then this CL reverts another one,

+ # so we skip it and save the CL it reverts in |reverted_cls| to

+ # be filtered out later.

if changelog.reverted_revision:

- # Skip reverting cls and add reverted revisions to ignore_cls to later

- # filter those reverted revisions.

- ignore_cls.add(changelog.reverted_revision)

+ reverted_cls.add(changelog.reverted_revision)

continue

for touched_file in changelog.touched_files:

@@ -110,7 +209,7 @@ def GetChangeLogsForFilesGroupedByDeps(regression_deps_rolls, stack_deps,

dep_to_file_to_changelogs[dep][touched_file.new_path].append(changelog)

- return dep_to_file_to_changelogs, ignore_cls

+ return dep_to_file_to_changelogs, reverted_cls

def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):

@@ -125,8 +224,8 @@ def GetStackInfosForFilesGroupedByDeps(stacktrace, stack_deps):

Returns:

A dict, maps dep path to a dict mapping file path to a list of stack

- inforamtion of this file. A file may occur in several frames, one stack info

- consist of a StackFrame and the callstack priority of it.

+ information of this file. A file may occur in several frames, one

+ stack info consist of a StackFrame and the callstack priority of it.

For example:

{

@@ -165,7 +264,7 @@ def FindMatchResults(dep_to_file_to_changelogs,

dep_to_file_to_changelogs (dict): Maps dep_path to a dict mapping file path

to ChangeLogs that touched this file.

dep_to_file_to_stack_infos (dict): Maps dep path to a dict mapping file path

- to a list of stack inforamtion of this file. A file may occur in several

+ to a list of stack information of this file. A file may occur in several

frames, one stack info consist of a StackFrame and the callstack priority

of it.

stack_deps (dict): Represents all the dependencies shown in the crash stack.

@@ -195,66 +294,3 @@ def FindMatchResults(dep_to_file_to_changelogs,

crashed_file_path, dep, stack_infos, changelogs, blame)

return match_results.values()

-# TODO(katesonia): Remove the repository argument after refatoring cl committed.

-def FindItForCrash(stacktrace, regression_deps_rolls, crashed_deps, top_n,

- repository):

- """Finds culprit results for crash.

- Args:

- stacktrace (Stactrace): Parsed Stactrace object.

- regression_deps_rolls (dict): Maps dep_path to DependencyRoll in

- regression range.

- crashed_deps (dict of Dependencys): Represents all the dependencies of

- crashed revision.

- top_n (int): Top n frames of each stack to be analyzed.

- repository (Repository): Repository to get changelogs and blame from.

- Returns:

- List of Results, sorted by confidence from highest to lowest.

- """

- if not regression_deps_rolls:

- return []

- # Findit will only analyze the top n frames in each callstacks.

- stack_trace = Stacktrace([

- CallStack(stack.priority,

- format_type=stack.format_type,

- language_type=stack.language_type,

- frame_list=stack[:top_n])

- for stack in stacktrace])

- # We are only interested in the deps in crash stack (the callstack that

- # caused the crash).

- stack_deps = GetDepsInCrashStack(stack_trace.crash_stack, crashed_deps)

- # Get dep and file to changelogs, stack_info and blame dicts.

- dep_to_file_to_changelogs, ignore_cls = GetChangeLogsForFilesGroupedByDeps(

- regression_deps_rolls, stack_deps, repository)

- dep_to_file_to_stack_infos = GetStackInfosForFilesGroupedByDeps(

- stack_trace, stack_deps)

- results = FindMatchResults(dep_to_file_to_changelogs,

- dep_to_file_to_stack_infos,

- stack_deps, repository, ignore_cls)

- if not results:

- return []

- aggregated_scorer = AggregatedScorer([TopFrameIndex(), MinDistance()])

- # Set result.confidence, result.reasons and result.changed_files.

- map(aggregated_scorer.Score, results)

- # Filter all the 0 confidence results.

- results = filter(lambda r: r.confidence != 0, results)

- if not results:

- return []

- sorted_results = sorted(results, key=lambda r: -r.confidence)

- if sorted_results[0].confidence > 0.999:

- return sorted_results[:1]

- return sorted_results[:3]

« no previous file with comments | « appengine/findit/crash/azalea.py ('k') | appengine/findit/crash/classifier.py » ('j') | no next file with comments »