appengine/findit/waterfall/build_failure_analysis.py - Issue 838003004: [Findit] Add three sub-pipelines to analyze build failure.

Unified Diff: appengine/findit/waterfall/build_failure_analysis.py

Issue 838003004: [Findit] Add three sub-pipelines to analyze build failure. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Address comments. Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« appengine/findit/common/git_repository.py ('K') | « appengine/findit/model/step.py ('k') | appengine/findit/waterfall/build_failure_analysis_pipelines.py » ('j') | appengine/findit/waterfall/identify_culprit_pipeline.py » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: appengine/findit/waterfall/build_failure_analysis.py

diff --git a/appengine/findit/waterfall/build_failure_analysis.py b/appengine/findit/waterfall/build_failure_analysis.py

new file mode 100644

index 0000000000000000000000000000000000000000..22ca60e1082b6ebee77f19c3e4524bf8b8d94c51

--- /dev/null

+++ b/appengine/findit/waterfall/build_failure_analysis.py

@@ -0,0 +1,219 @@

+# Use of this source code is governed by a BSD-style license that can be

+# found in the LICENSE file.

+import collections

+import os

+import re

+from common.diff import ChangeType

+from waterfall.failure_signal import FailureSignal

+def _IsSameFile(src_file, file_path):

+ """Guess if the two files are the same.

qyearsley 2015/01/16 22:55:26 The first line of the docstring should use third-p

stgao 2015/01/21 20:29:22 Done for files in this CL. Will do a cleanup of ot

+ File paths from a failure might not be full paths. So match string by postfix.

+ Examples:

+ src/chrome/test/base/chrome_process_util.h <-> base/chrome_process_util.h

+ """

+ if os.path.basename(src_file) != os.path.basename(file_path):

+ return False

+ else:

+ return src_file.endswith(file_path)

+def _NormalizeObjectFile(file_path):

+ # During compile, a/b/c/file.cc in TARGET will be compiled into object

+ # file a/b/c/TARGET.file.o, thus TARGET needs removing from path.

+ if file_path.startswith('obj/'):

+ file_path = file_path[4:]

+ file_dir = os.path.dirname(file_path)

qyearsley 2015/01/16 22:55:25 Probably no need to have visual separation above a

stgao 2015/01/21 20:29:23 Done.

+ file_name = os.path.basename(file_path)

+ parts = file_name.split('.', 1)

+ if len(parts) == 2 and parts[1].endswith('.o'):

+ file_name = parts[1]

+ return os.path.join(file_dir, file_name).replace(os.sep, '/')

+_COMMON_SUFFIXES= [

qyearsley 2015/01/16 22:55:25 Missing a space after "=".

stgao 2015/01/21 20:29:22 Ooops, done.

+ 'impl',

+ 'gcc', 'msvc',

+ 'arm', 'arm64', 'mips', 'portable', 'x86',

+ 'android', 'ios', 'linux', 'mac', 'ozone', 'posix', 'win',

+ 'aura', 'x', 'x11',

+_COMMON_SUFFIX_PATTERNS = [

+ re.compile('.*(\_[^\_]*test)$'),

qyearsley 2015/01/16 22:55:25 This also matches "foo_blahtest", which means the

stgao 2015/01/21 20:29:22 OK. Restrict this pattern to a list of common suff

+] + [

+ re.compile('.*(\_%s)$' % postfix) for postfix in _COMMON_SUFFIXES

+def _StripExtensionAndCommonPostfix(file_path):

+ """Strip extension and common postfixes from file name to guess correlation.

qyearsley 2015/01/16 22:55:26 Strip -> Strips Postfix -> Suffix

stgao 2015/01/21 20:29:22 Done.

+ Examples:

+ file_impl.cc, file_unittest.cc, file_impl_mac.h -> file

+ """

+ file_dir = os.path.dirname(file_path)

+ file_name = os.path.splitext(os.path.basename(file_path))[0]

+ while True:

+ match = None

+ for postfix_patten in _COMMON_SUFFIX_PATTERNS:

qyearsley 2015/01/16 22:55:25 postfix_patten -> suffix_pattern

stgao 2015/01/21 20:29:23 Done.

+ match = postfix_patten.match(file_name)

+ if match:

+ file_name = file_name[:-len(match.group(1))]

+ break

+ if not match:

+ break

+ return os.path.join(file_dir, file_name).replace(os.sep, '/')

+def _IsCorrelated(src_file, file_path):

+ """Check if two files are correlated.

qyearsley 2015/01/16 22:55:25 Maybe just "related", rather than "correlated"?

stgao 2015/01/21 20:29:22 Done.

+ Example of correlated files:

+ 1. file.h <-> file_impl.cc

+ 2. file_impl.cc <-> file_unittest.cc

+ 3. file_win.cc <-> file_mac.cc

+ 4. a/b/x.cc <-> a/b/y.cc

+ 5. x.h <-> x.cc

+ """

+ if file_path.endswith('.o'):

+ file_path = _NormalizeObjectFile(file_path)

+ if _IsSameFile(_StripExtensionAndCommonPostfix(src_file),

+ _StripExtensionAndCommonPostfix(file_path)):

+ return True

+ # Two file are in the same directory: a/b/x.cc <-> a/b/y.cc

+ # TODO: cause noisy result?

+ src_file_dir = os.path.dirname(src_file)

+ return src_file_dir and src_file_dir == os.path.dirname(file_path)

+class _Justification(object):

qyearsley 2015/01/16 22:55:25 What does this class contain/represent? Evidence f

stgao 2015/01/21 20:29:23 Added a docstring to explain its usage, hints, and

+ def __init__(self):

+ self.suspects = 0

+ self.scores = 0

qyearsley 2015/01/16 22:55:26 Using a plural word in a variable name usually sug

stgao 2015/01/21 20:29:22 Done. Explanation of the difference between the s

+ self.hints = list()

qyearsley 2015/01/16 22:55:26 Do these all need to be public? Also, you could us

stgao 2015/01/21 20:29:23 Done.

+ def AddFileChange(self, change_action, src_file, file_path, suspect, score):

+ """Add a suspected file change.

+ Args:

+ change_action (str): The change type (modify/add/delete) of the file.

qyearsley 2015/01/16 22:55:26 Saying what the arg type is is useful, but args se

stgao 2015/01/21 20:29:22 I also get used to the docstring format as you sug

qyearsley 2015/01/27 00:58:48 Alright, that looks good -- if it's in the style g

+ src_file (str): The file changed by a CL.

+ file_path (str): The file showing in the failure log.

+ suspect (int): Suspect points for the file change.

+ score (int): Score for the file change.

+ """

+ self.suspects += suspect

+ self.scores += score

+ # TODO: make hint more descriptive?

+ if src_file != file_path:

+ self.hints.append(

+ '%s %s (%s)' % (change_action, src_file, file_path))

+ else:

+ self.hints.append('%s %s' % (change_action, src_file))

+ def ToDict(self):

+ return {

+ 'suspects': self.suspects,

+ 'scores': self.scores,

+ 'hints': self.hints,

+ }

+def _CheckFile(

+ change_action, src_file, file_path, suspect, score, justification):

+ """Check if the given files are the same or correlated.

+ Args:

+ change_action (str): The change type (modify/add/delete) of the file.

+ src_file (str): The file changed by a CL.

+ file_path (str): The file showing in the failure log.

+ suspect (int): Suspect points if the two files are the same.

+ score (int): Score if the two files are the same.

+ justification (_Justification): An instance of _Justification.

+ """

+ if _IsSameFile(src_file, file_path):

+ justification.AddFileChange(

+ change_action, src_file, file_path, suspect, score)

+ elif _IsCorrelated(src_file, file_path):

+ # For correlated files, do suspect=0 and score=1, because it is not highly

+ # suspected.

+ justification.AddFileChange(

+ change_action, src_file, file_path, 0, 1)

+def CheckFiles(failure_signal, change_log):

qyearsley 2015/01/16 22:55:26 Non-trivial public functions (CheckFiles and Analy

stgao 2015/01/21 20:29:22 Done.

+ justification = _Justification()

+ for file_path, _ in failure_signal.files.iteritems():

+ # TODO(stgao): remove this hack when DEPS parsing is supported.

+ if file_path.startswith('src/'):

+ file_path = file_path[4:]

+ for touched_file in change_log['touched_files']:

+ change_type = touched_file['change_type']

+ if change_type == ChangeType.MODIFY:

+ if _IsSameFile(touched_file['new_path'], file_path):

+ # TODO: use line number for git blame.

+ justification.AddFileChange(

+ 'modify', touched_file['new_path'], file_path, 0, 1)

+ elif _IsCorrelated(touched_file['new_path'], file_path):

+ justification.AddFileChange(

+ 'modify', touched_file['new_path'], file_path, 0, 1)

+ if change_type in (ChangeType.ADD, ChangeType.COPY, ChangeType.RENAME):

+ _CheckFile('add', touched_file['new_path'], file_path, 1, 5,

+ justification)

+ if change_type in (ChangeType.DELETE, ChangeType.RENAME):

+ _CheckFile('delete', touched_file['old_path'], file_path, 1, 5,

+ justification)

+ if not justification.scores:

+ return None

+ else:

+ return justification.ToDict()

+def AnalyzeBuildFailure(failure_info, change_logs, failure_signals):

+ analysis_result = {}

+ if not failure_info['failed']:

+ return analysis_result

+ failed_steps = failure_info['failed_steps']

+ builds = failure_info['builds']

+ for step_name, step_failure_info in failed_steps.iteritems():

+ failure_signal = FailureSignal.FromJson(failure_signals[step_name])

+ failed_build_number = step_failure_info['current_failure']

+ build_number = step_failure_info['first_failure']

+ step_analysis_result = {}

+ while build_number <= failed_build_number:

+ for revision in builds[str(build_number)]['blame_list']:

+ justification = CheckFiles(failure_signal, change_logs[revision])

qyearsley 2015/01/16 22:55:26 Maybe justification_dict, since this (as is) is ei

stgao 2015/01/21 20:29:22 Done.

+ if justification:

+ step_analysis_result[revision] = justification

+ build_number += 1

+ if step_analysis_result:

+ # TODO: sorted CLs related to a step failure.

+ analysis_result[step_name] = step_analysis_result

+ return analysis_result