appengine/findit/waterfall/build_failure_analysis.py - Issue 838003004: [Findit] Add three sub-pipelines to analyze build failure.

Unified Diff: appengine/findit/waterfall/build_failure_analysis.py

Issue 838003004: [Findit] Add three sub-pipelines to analyze build failure. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Just rebase. Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: appengine/findit/waterfall/build_failure_analysis.py

diff --git a/appengine/findit/waterfall/build_failure_analysis.py b/appengine/findit/waterfall/build_failure_analysis.py

new file mode 100644

index 0000000000000000000000000000000000000000..5fb030f660f9a25f693f034f3affb2b95859c90b

--- /dev/null

+++ b/appengine/findit/waterfall/build_failure_analysis.py

@@ -0,0 +1,279 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import collections

+import os

+import re

+from common.diff import ChangeType

+from waterfall.failure_signal import FailureSignal

+def _IsSameFile(src_file, file_path):

+ """Guesses if the two files are the same.

+ Args:

+ src_file (str): Full path of a file committed to git repo.

+ file_path (str): Path of a file appearing in a failure log. It might not be

+ a full path.

+ Returns:

+ True if the two files are likely the same, otherwise False. Eg.:

+ True: (chrome/test/base/chrome_process_util.h, base/chrome_process_util.h)

+ True: (a/b/x.cc, a/b/x.cc)

+ False: (c/x.cc, a/b/c/x.cc)

+ """

+ if src_file == file_path:

+ return True

+ return src_file.endswith('/%s' % file_path)

+def _NormalizeObjectFile(file_path):

+ # During compile, a/b/c/file.cc in TARGET will be compiled into object

+ # file a/b/c/TARGET.file.o, thus TARGET needs removing from path.

+ if file_path.startswith('obj/'):

+ file_path = file_path[4:]

+ file_dir = os.path.dirname(file_path)

+ file_name = os.path.basename(file_path)

+ parts = file_name.split('.', 1)

+ if len(parts) == 2 and parts[1].endswith('.o'):

+ file_name = parts[1]

+ return os.path.join(file_dir, file_name).replace(os.sep, '/')

+_COMMON_SUFFIXES = [

+ 'impl',

+ 'browser_tests', 'browser_test', 'browsertest', 'browsertests',

+ 'unittests', 'unittest', 'tests', 'test',

+ 'gcc', 'msvc',

+ 'arm', 'arm64', 'mips', 'portable', 'x86',

+ 'android', 'ios', 'linux', 'mac', 'ozone', 'posix', 'win',

+ 'aura', 'x', 'x11',

+_COMMON_SUFFIX_PATTERNS = [

+ re.compile('.*(_%s)$' % suffix) for suffix in _COMMON_SUFFIXES

+def _StripExtensionAndCommonSuffix(file_path):

+ """Strips extension and common suffixes from file name to guess relation.

+ Examples:

+ file_impl.cc, file_unittest.cc, file_impl_mac.h -> file

+ """

+ file_dir = os.path.dirname(file_path)

+ file_name = os.path.splitext(os.path.basename(file_path))[0]

+ while True:

+ match = None

+ for suffix_patten in _COMMON_SUFFIX_PATTERNS:

+ match = suffix_patten.match(file_name)

+ if match:

+ file_name = file_name[:-len(match.group(1))]

+ break

+ if not match:

+ break

+ return os.path.join(file_dir, file_name).replace(os.sep, '/')

+def _IsRelated(src_file, file_path):

+ """Checks if two files are related.

+ Example of related files:

+ 1. file.h <-> file_impl.cc

+ 2. file_impl.cc <-> file_unittest.cc

+ 3. file_win.cc <-> file_mac.cc

+ 4. a/b/x.cc <-> a/b/y.cc

+ 5. x.h <-> x.cc

+ """

+ if file_path.endswith('.o'):

+ file_path = _NormalizeObjectFile(file_path)

+ if _IsSameFile(_StripExtensionAndCommonSuffix(src_file),

+ _StripExtensionAndCommonSuffix(file_path)):

+ return True

+ # Two file are in the same directory: a/b/x.cc <-> a/b/y.cc

+ # TODO: cause noisy result?

+ src_file_dir = os.path.dirname(src_file)

+ return src_file_dir and src_file_dir == os.path.dirname(file_path)

+class _Justification(object):

+ """Justification for why a CL might be suspected for a build failure.

+ A justification includes:

+ 1. suspect points: for a highly-suspected CL, it is given some suspect points.

+ Eg. a CL is highly suspected if it deleted a .cc file appearing in the

+ compile failure.

+ 2. score: for a likely-suspected CL, it won't get suspect points, but a score.

+ Eg. a CL is just likely suspected if it only changed a related file

+ (x_impl.cc vs. x.h) appearing in a test failure.

+ For a highly-suspected CL, it will get a high score besides suspect points.

+ 3. hints: each hint is a string describing a reason for suspecting a CL and

+ could be shown to the user (eg., "add x_impl.cc").

+ """

+ def __init__(self):

+ self._suspect_points = 0

+ self._score = 0

+ self._hints = []

+ @property

+ def score(self):

+ return self._score

+ def AddFileChange(

+ self, change_action, src_file, file_path, suspect_points, score):

+ """Adds a suspected file change.

+ Args:

+ change_action (str): One of the change types in common.diff.ChangeType.

+ src_file (str): Changed file path in a CL.

+ file_path (str): File path appearing in the failure log.

+ suspect_points (int): Number of suspect points for the file change.

+ score (int): Score number for the file change.

+ """

+ self._suspect_points += suspect_points

+ self._score += score

+ # TODO: make hint more descriptive?

+ if src_file != file_path:

+ self._hints.append(

+ '%s %s (%s)' % (change_action, src_file, file_path))

+ else:

+ self._hints.append('%s %s' % (change_action, src_file))

+ def ToDict(self):

+ return {

+ 'suspect_points': self._suspect_points,

+ 'score': self._score,

+ 'hints': self._hints,

+ }

+def _CheckFile(

+ change_action, src_file, file_path, suspect_points, score, justification):

+ """Checks if the given files are the same or correlated.

+ Args:

+ change_action (str): One of the change types in common.diff.ChangeType.

+ src_file (str): Changed file path in a CL.

+ file_path (str): File path appearing in the failure log.

+ suspect_points (int): Number of suspect points if two files are the same.

+ score (int): Score number if two files are the same.

+ justification (_Justification): An instance of _Justification.

+ """

+ if _IsSameFile(src_file, file_path):

+ justification.AddFileChange(

+ change_action, src_file, file_path, suspect_points, score)

+ elif _IsRelated(src_file, file_path):

+ # For correlated files, do suspect=0 and score=1, because it is just likely,

+ # but not highly, suspected.

+ justification.AddFileChange(

+ change_action, src_file, file_path, 0, 1)

+def _CheckFiles(failure_signal, change_log):

+ """Check files in the given change log of a CL against the failure signal.

+ Args:

+ failure_signal (FailureSignal): The failure signal of a failed step or test.

+ change_log (dict): The change log of a CL as returned by

+ common.change_log.ChangeLog.ToJson(). # TODO(stgao): ToJson -> ToDict.

+ Returns:

+ A dict as returned by _Justification.ToDict() if the CL is suspected for the

+ failure; otherwise None.

+ """

+ justification = _Justification()

+ for file_path, _ in failure_signal.files.iteritems():

+ # TODO(stgao): remove this hack when DEPS parsing is supported.

+ if file_path.startswith('src/'):

+ file_path = file_path[4:]

+ for touched_file in change_log['touched_files']:

+ change_type = touched_file['change_type']

+ if change_type == ChangeType.MODIFY:

+ if _IsSameFile(touched_file['new_path'], file_path):

+ # TODO(stgao): use line number for git blame.

+ justification.AddFileChange(

+ 'modify', touched_file['new_path'], file_path, 0, 1)

+ elif _IsRelated(touched_file['new_path'], file_path):

+ justification.AddFileChange(

+ 'modify', touched_file['new_path'], file_path, 0, 1)

+ if change_type in (ChangeType.ADD, ChangeType.COPY, ChangeType.RENAME):

+ _CheckFile('add', touched_file['new_path'], file_path, 1, 5,

+ justification)

+ if change_type in (ChangeType.DELETE, ChangeType.RENAME):

+ _CheckFile('delete', touched_file['old_path'], file_path, 1, 5,

+ justification)

+ if not justification.score:

+ return None

+ else:

+ return justification.ToDict()

+def AnalyzeBuildFailure(failure_info, change_logs, failure_signals):

+ """Analyze the given failure signals, and figure out culprit CLs.

+ Args:

+ failure_info (dict): Output of pipeline DetectFirstFailurePipeline.

+ change_logs (dict): Output of pipeline PullChangelogPipeline.

+ failure_signals (dict): Output of pipeline ExtractSignalPipeline.

+ Returns:

+ A dict with the following form (could be an empty dict):

+ {

+ 'compile': {

+ 'cl_git_hash': {

+ 'suspect_points': 2,

+ 'score': 11,

+ 'hints': [

+ 'add a/b/x.cc',

+ 'delete a/b/y.cc',

+ 'modify e/f/z.cc',

+ ...

+ ]

+ },

+ ...

+ },

+ ...

+ }

+ """

+ analysis_result = {}

+ if not failure_info['failed']:

+ return analysis_result

+ failed_steps = failure_info['failed_steps']

+ builds = failure_info['builds']

+ for step_name, step_failure_info in failed_steps.iteritems():

+ failure_signal = FailureSignal.FromJson(failure_signals[step_name])

+ failed_build_number = step_failure_info['current_failure']

+ build_number = step_failure_info['first_failure']

+ step_analysis_result = {}

+ while build_number <= failed_build_number:

+ for revision in builds[str(build_number)]['blame_list']:

+ justification_dict = _CheckFiles(failure_signal, change_logs[revision])

+ if justification_dict:

+ step_analysis_result[revision] = justification_dict

+ build_number += 1

+ if step_analysis_result:

+ # TODO(stgao): sorted CLs related to a step failure.

+ analysis_result[step_name] = step_analysis_result

+ return analysis_result

« no previous file with comments | « appengine/findit/model/step.py ('k') | appengine/findit/waterfall/build_failure_analysis_pipelines.py » ('j') | no next file with comments »