Chromium Code Reviews| Index: appengine/findit/waterfall/build_failure_analysis.py |
| diff --git a/appengine/findit/waterfall/build_failure_analysis.py b/appengine/findit/waterfall/build_failure_analysis.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..22ca60e1082b6ebee77f19c3e4524bf8b8d94c51 |
| --- /dev/null |
| +++ b/appengine/findit/waterfall/build_failure_analysis.py |
| @@ -0,0 +1,219 @@ |
| +# Copyright 2015 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import collections |
| +import os |
| +import re |
| + |
| +from common.diff import ChangeType |
| +from waterfall.failure_signal import FailureSignal |
| + |
| + |
| +def _IsSameFile(src_file, file_path): |
| + """Guess if the two files are the same. |
|
qyearsley
2015/01/16 22:55:26
The first line of the docstring should use third-p
stgao
2015/01/21 20:29:22
Done for files in this CL.
Will do a cleanup of ot
|
| + |
| + File paths from a failure might not be full paths. So match string by postfix. |
| + Examples: |
| + src/chrome/test/base/chrome_process_util.h <-> base/chrome_process_util.h |
| + """ |
| + if os.path.basename(src_file) != os.path.basename(file_path): |
| + return False |
| + else: |
| + return src_file.endswith(file_path) |
| + |
| + |
| +def _NormalizeObjectFile(file_path): |
| + # During compile, a/b/c/file.cc in TARGET will be compiled into object |
| + # file a/b/c/TARGET.file.o, thus TARGET needs removing from path. |
| + if file_path.startswith('obj/'): |
| + file_path = file_path[4:] |
| + |
| + file_dir = os.path.dirname(file_path) |
| + |
|
qyearsley
2015/01/16 22:55:25
Probably no need to have visual separation above a
stgao
2015/01/21 20:29:23
Done.
|
| + file_name = os.path.basename(file_path) |
| + parts = file_name.split('.', 1) |
| + if len(parts) == 2 and parts[1].endswith('.o'): |
| + file_name = parts[1] |
| + |
| + return os.path.join(file_dir, file_name).replace(os.sep, '/') |
| + |
| + |
| +_COMMON_SUFFIXES= [ |
|
qyearsley
2015/01/16 22:55:25
Missing a space after "=".
stgao
2015/01/21 20:29:22
Ooops, done.
|
| + 'impl', |
| + 'gcc', 'msvc', |
| + 'arm', 'arm64', 'mips', 'portable', 'x86', |
| + 'android', 'ios', 'linux', 'mac', 'ozone', 'posix', 'win', |
| + 'aura', 'x', 'x11', |
| +] |
| + |
| +_COMMON_SUFFIX_PATTERNS = [ |
| + re.compile('.*(\_[^\_]*test)$'), |
|
qyearsley
2015/01/16 22:55:25
This also matches "foo_blahtest", which means the
stgao
2015/01/21 20:29:22
OK. Restrict this pattern to a list of common suff
|
| +] + [ |
| + re.compile('.*(\_%s)$' % postfix) for postfix in _COMMON_SUFFIXES |
| +] |
| + |
| + |
| +def _StripExtensionAndCommonPostfix(file_path): |
| + """Strip extension and common postfixes from file name to guess correlation. |
|
qyearsley
2015/01/16 22:55:26
Strip -> Strips
Postfix -> Suffix
stgao
2015/01/21 20:29:22
Done.
|
| + |
| + Examples: |
| + file_impl.cc, file_unittest.cc, file_impl_mac.h -> file |
| + """ |
| + file_dir = os.path.dirname(file_path) |
| + file_name = os.path.splitext(os.path.basename(file_path))[0] |
| + while True: |
| + match = None |
| + for postfix_patten in _COMMON_SUFFIX_PATTERNS: |
|
qyearsley
2015/01/16 22:55:25
postfix_patten -> suffix_pattern
stgao
2015/01/21 20:29:23
Done.
|
| + match = postfix_patten.match(file_name) |
| + if match: |
| + file_name = file_name[:-len(match.group(1))] |
| + break |
| + |
| + if not match: |
| + break |
| + |
| + return os.path.join(file_dir, file_name).replace(os.sep, '/') |
| + |
| + |
| +def _IsCorrelated(src_file, file_path): |
| + """Check if two files are correlated. |
|
qyearsley
2015/01/16 22:55:25
Maybe just "related", rather than "correlated"?
stgao
2015/01/21 20:29:22
Done.
|
| + |
| + Example of correlated files: |
| + 1. file.h <-> file_impl.cc |
| + 2. file_impl.cc <-> file_unittest.cc |
| + 3. file_win.cc <-> file_mac.cc |
| + 4. a/b/x.cc <-> a/b/y.cc |
| + 5. x.h <-> x.cc |
| + """ |
| + if file_path.endswith('.o'): |
| + file_path = _NormalizeObjectFile(file_path) |
| + |
| + if _IsSameFile(_StripExtensionAndCommonPostfix(src_file), |
| + _StripExtensionAndCommonPostfix(file_path)): |
| + return True |
| + |
| + # Two file are in the same directory: a/b/x.cc <-> a/b/y.cc |
| + # TODO: cause noisy result? |
| + src_file_dir = os.path.dirname(src_file) |
| + return src_file_dir and src_file_dir == os.path.dirname(file_path) |
| + |
| + |
| +class _Justification(object): |
|
qyearsley
2015/01/16 22:55:25
What does this class contain/represent? Evidence f
stgao
2015/01/21 20:29:23
Added a docstring to explain its usage, hints, and
|
| + def __init__(self): |
| + self.suspects = 0 |
| + self.scores = 0 |
|
qyearsley
2015/01/16 22:55:26
Using a plural word in a variable name usually sug
stgao
2015/01/21 20:29:22
Done.
Explanation of the difference between the s
|
| + self.hints = list() |
|
qyearsley
2015/01/16 22:55:26
Do these all need to be public? Also, you could us
stgao
2015/01/21 20:29:23
Done.
|
| + |
| + def AddFileChange(self, change_action, src_file, file_path, suspect, score): |
| + """Add a suspected file change. |
| + |
| + Args: |
| + change_action (str): The change type (modify/add/delete) of the file. |
|
qyearsley
2015/01/16 22:55:26
Saying what the arg type is is useful, but args se
stgao
2015/01/21 20:29:22
I also get used to the docstring format as you sug
qyearsley
2015/01/27 00:58:48
Alright, that looks good -- if it's in the style g
|
| + src_file (str): The file changed by a CL. |
| + file_path (str): The file showing in the failure log. |
| + suspect (int): Suspect points for the file change. |
| + score (int): Score for the file change. |
| + """ |
| + self.suspects += suspect |
| + self.scores += score |
| + |
| + # TODO: make hint more descriptive? |
| + if src_file != file_path: |
| + self.hints.append( |
| + '%s %s (%s)' % (change_action, src_file, file_path)) |
| + else: |
| + self.hints.append('%s %s' % (change_action, src_file)) |
| + |
| + def ToDict(self): |
| + return { |
| + 'suspects': self.suspects, |
| + 'scores': self.scores, |
| + 'hints': self.hints, |
| + } |
| + |
| + |
| +def _CheckFile( |
| + change_action, src_file, file_path, suspect, score, justification): |
| + """Check if the given files are the same or correlated. |
| + |
| + Args: |
| + change_action (str): The change type (modify/add/delete) of the file. |
| + src_file (str): The file changed by a CL. |
| + file_path (str): The file showing in the failure log. |
| + suspect (int): Suspect points if the two files are the same. |
| + score (int): Score if the two files are the same. |
| + justification (_Justification): An instance of _Justification. |
| + """ |
| + if _IsSameFile(src_file, file_path): |
| + justification.AddFileChange( |
| + change_action, src_file, file_path, suspect, score) |
| + elif _IsCorrelated(src_file, file_path): |
| + # For correlated files, do suspect=0 and score=1, because it is not highly |
| + # suspected. |
| + justification.AddFileChange( |
| + change_action, src_file, file_path, 0, 1) |
| + |
| + |
| +def CheckFiles(failure_signal, change_log): |
|
qyearsley
2015/01/16 22:55:26
Non-trivial public functions (CheckFiles and Analy
stgao
2015/01/21 20:29:22
Done.
|
| + justification = _Justification() |
| + |
| + for file_path, _ in failure_signal.files.iteritems(): |
| + # TODO(stgao): remove this hack when DEPS parsing is supported. |
| + if file_path.startswith('src/'): |
| + file_path = file_path[4:] |
| + |
| + for touched_file in change_log['touched_files']: |
| + change_type = touched_file['change_type'] |
| + |
| + if change_type == ChangeType.MODIFY: |
| + if _IsSameFile(touched_file['new_path'], file_path): |
| + # TODO: use line number for git blame. |
| + justification.AddFileChange( |
| + 'modify', touched_file['new_path'], file_path, 0, 1) |
| + elif _IsCorrelated(touched_file['new_path'], file_path): |
| + justification.AddFileChange( |
| + 'modify', touched_file['new_path'], file_path, 0, 1) |
| + |
| + if change_type in (ChangeType.ADD, ChangeType.COPY, ChangeType.RENAME): |
| + _CheckFile('add', touched_file['new_path'], file_path, 1, 5, |
| + justification) |
| + |
| + if change_type in (ChangeType.DELETE, ChangeType.RENAME): |
| + _CheckFile('delete', touched_file['old_path'], file_path, 1, 5, |
| + justification) |
| + |
| + if not justification.scores: |
| + return None |
| + else: |
| + return justification.ToDict() |
| + |
| + |
| +def AnalyzeBuildFailure(failure_info, change_logs, failure_signals): |
| + analysis_result = {} |
| + |
| + if not failure_info['failed']: |
| + return analysis_result |
| + |
| + failed_steps = failure_info['failed_steps'] |
| + builds = failure_info['builds'] |
| + for step_name, step_failure_info in failed_steps.iteritems(): |
| + failure_signal = FailureSignal.FromJson(failure_signals[step_name]) |
| + failed_build_number = step_failure_info['current_failure'] |
| + build_number = step_failure_info['first_failure'] |
| + |
| + step_analysis_result = {} |
| + |
| + while build_number <= failed_build_number: |
| + for revision in builds[str(build_number)]['blame_list']: |
| + justification = CheckFiles(failure_signal, change_logs[revision]) |
|
qyearsley
2015/01/16 22:55:26
Maybe justification_dict, since this (as is) is ei
stgao
2015/01/21 20:29:22
Done.
|
| + if justification: |
| + step_analysis_result[revision] = justification |
| + |
| + build_number += 1 |
| + |
| + if step_analysis_result: |
| + # TODO: sorted CLs related to a step failure. |
| + analysis_result[step_name] = step_analysis_result |
| + |
| + return analysis_result |