Index: appengine/findit/waterfall/build_failure_analysis.py |
diff --git a/appengine/findit/waterfall/build_failure_analysis.py b/appengine/findit/waterfall/build_failure_analysis.py |
new file mode 100644 |
index 0000000000000000000000000000000000000000..8db224af8922bc8370f7769d162b37f24c2f2520 |
--- /dev/null |
+++ b/appengine/findit/waterfall/build_failure_analysis.py |
@@ -0,0 +1,192 @@ |
+# Copyright 2015 The Chromium Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+import collections |
+import os |
+import re |
+ |
+from common.diff import ChangeType |
+from waterfall.failure_signal import FailureSignal |
+ |
+ |
+def IsSameFile(src_file, file_path): |
+ """Guess if the two files are the same. |
+ |
+ File paths from a failure might not be full paths. So match string by postfix. |
+ Examples: |
+ src/chrome/test/base/chrome_process_util.h <-> base/chrome_process_util.h |
+ """ |
+ if os.path.basename(src_file) != os.path.basename(file_path): |
+ return False |
+ else: |
+ return src_file.endswith(file_path) |
+ |
+ |
+def JoinAsFilePath(file_dir, file_name): |
qyearsley
2015/01/15 19:10:28
Should this function be marked as private? (Same q
stgao
2015/01/16 20:21:38
Done.
|
+ if file_dir: |
+ return '%s/%s' % (file_dir, file_name) |
+ else: |
+ return file_name |
+ |
+ |
+def NormalizeObjectFile(file_path): |
+ # During compile, a/b/c/file.cc in TARGET will be compiled into object |
+ # file a/b/c/TARGET.file.o, thus TARGET needs removing from path. |
+ if file_path.startswith('obj/'): |
+ file_path = file_path[4:] |
+ |
+ file_dir = os.path.dirname(file_path) |
+ |
+ file_name = os.path.basename(file_path) |
+ parts = file_name.split('.', 1) |
+ if len(parts) == 2 and parts[1].endswith('.o'): |
+ file_name = parts[1] |
+ |
+ return JoinAsFilePath(file_dir, file_name) |
qyearsley
2015/01/15 21:15:03
For both uses of JoinAsFilePath in this file, you
stgao
2015/01/16 20:21:38
Done.
Switch to os.path.join, but need to do a re
qyearsley
2015/01/16 22:55:25
Ah, very observant. Sounds good.
|
+ |
+ |
+COMMON_POSTFIXES = [ |
qyearsley
2015/01/15 19:10:28
1) s/POSTFIX/SUFFIX/
2) If this is only used in th
stgao
2015/01/16 20:21:39
Done.
|
+ 'impl', |
+ 'gcc', 'msvc', |
+ 'arm', 'arm64', 'mips', 'portable', 'x86', |
+ 'android', 'ios', 'linux', 'mac', 'ozone', 'posix', 'win', |
+ 'aura', 'x', 'x11'] |
qyearsley
2015/01/15 19:10:28
Style nit: Closing parenthesis goes on its own lin
stgao
2015/01/16 20:21:38
Done.
|
+ |
+COMMON_POSTFIX_PATTERNS = [ |
+ re.compile('.*(\_[^\_]*test)$'), |
+ ] + [ |
+ re.compile('.*(\_%s)$' % postfix) for postfix in COMMON_POSTFIXES |
+ ] |
qyearsley
2015/01/15 19:10:28
It seems like these four lines should be un-indent
stgao
2015/01/16 20:21:38
Done.
|
+ |
+ |
+def StripExtensionAndCommonPostfix(file_path): |
+ """Strip extension and common postfixes from file name to guess correlation. |
+ |
+ Examples: |
+ file_impl.cc, file_unittest.cc, file_impl_mac.h -> file |
+ """ |
+ file_dir = os.path.dirname(file_path) |
+ file_name = os.path.splitext(os.path.basename(file_path))[0] |
+ while True: |
+ match = None |
+ for postfix_patten in COMMON_POSTFIX_PATTERNS: |
+ match = postfix_patten.match(file_name) |
+ if match: |
+ file_name = file_name[:-len(match.group(1))] |
+ break |
+ |
+ if not match: |
+ break |
+ |
+ return JoinAsFilePath(file_dir, file_name) |
+ |
+ |
+def IsCorrelated(src_file, file_path): |
+ """Check if two files are correlated. |
+ |
+ Examples: |
+ 1. file.h <-> file_impl.cc |
+ 2. file_impl.cc <-> file_unittest.cc |
+ 3. file_win.cc <-> file_mac.cc |
+ 4. a/b/x.cc <-> a/b/y.cc |
+ 5. x.h <-> x.cc |
+ """ |
+ if file_path.endswith('.o'): |
+ file_path = NormalizeObjectFile(file_path) |
+ |
+ # Example: a/b/file_impl.cc <-> a/b/file_unittest.cc |
+ if IsSameFile(StripExtensionAndCommonPostfix(src_file), |
+ StripExtensionAndCommonPostfix(file_path)): |
+ return True |
+ |
+ # Two file are in the same directory: a/b/x.cc <-> a/b/y.cc |
+ # TODO: cause noisy result? |
+ src_file_dir = os.path.dirname(src_file) |
+ return src_file_dir and src_file_dir == os.path.dirname(file_path) |
+ |
+ |
+def CheckFiles(failure_signal, change_log): |
+ result = { |
+ 'suspects' : 0, |
+ 'scores' : 0, |
+ 'hints' : [] |
+ } |
+ |
+ def _AddHint(change_action, src_file, file_path): |
qyearsley
2015/01/15 19:10:28
Nested functions don't actually need to be marked
stgao
2015/01/16 20:21:38
Done.
Moved _AddHint to a new class _Justificatio
|
+ # TODO: make hint more descriptive? |
+ if src_file != file_path: |
+ result['hints'].append( |
+ '%s %s (%s)' % (change_action, src_file, file_path)) |
+ else: |
+ result['hints'].append('%s %s' % (change_action, src_file)) |
+ |
+ def _CheckFile(change_action, src_file, file_path, suspect, score): |
+ # 'suspects' and 'scores' already defined - pylint: disable=E0602, W0612 |
+ if IsSameFile(src_file, file_path): |
+ result['suspects'] += suspect |
+ result['scores'] += score |
+ _AddHint(change_action, src_file, file_path) |
+ elif IsCorrelated(src_file, file_path): |
+ result['scores'] += 1 |
+ _AddHint(change_action, src_file, file_path) |
+ |
+ for file_path, _ in failure_signal.files.iteritems(): |
+ # TODO(stgao): remove this hack when DEPS parsing is supported. |
+ if file_path.startswith('src/'): |
+ file_path = file_path[4:] |
+ |
+ for touched_file in change_log['touched_files']: |
+ change_type = touched_file['change_type'] |
+ |
+ if change_type in (ChangeType.ADD, ChangeType.COPY, ChangeType.RENAME): |
+ _CheckFile('add', touched_file['new_path'], file_path, 1, 5) |
+ |
+ if change_type == ChangeType.MODIFY: |
+ if IsSameFile(touched_file['new_path'], file_path): |
+ # TODO: use line number for git blame. |
+ result['scores'] += 1 |
+ _AddHint('modify', touched_file['new_path'], file_path) |
+ continue |
+ elif IsCorrelated(touched_file['new_path'], file_path): |
+ result['scores'] += 1 |
+ _AddHint('modify', touched_file['new_path'], file_path) |
+ continue |
+ |
+ if change_type in (ChangeType.DELETE, ChangeType.RENAME): |
+ _CheckFile('delete', touched_file['old_path'], file_path, 1, 5) |
+ |
+ if not result['scores']: |
+ return None |
+ else: |
+ return result |
+ |
+ |
+def AnalyzeBuildFailure(failure_info, change_logs, failure_signals): |
+ analysis_result = {} |
+ |
+ if not failure_info['failed']: |
+ return analysis_result |
+ |
+ failed_steps = failure_info['failed_steps'] |
+ builds = failure_info['builds'] |
+ for step_name, step_failure_info in failed_steps.iteritems(): |
+ failure_signal = FailureSignal.FromJson(failure_signals[step_name]) |
+ failed_build_number = step_failure_info['current_failure'] |
+ build_number = step_failure_info['first_failure'] |
+ |
+ step_analysis_result = {} |
+ |
+ while build_number <= failed_build_number: |
+ for revision in builds[str(build_number)]['blame_list']: |
+ justification = CheckFiles(failure_signal, change_logs[revision]) |
+ if justification: |
+ step_analysis_result[revision] = justification |
+ |
+ build_number += 1 |
+ |
+ if step_analysis_result: |
+ # TODO: sorted CLs related to a step failure. |
+ analysis_result[step_name] = step_analysis_result |
+ |
+ return analysis_result |