appengine/findit/waterfall/build_failure_analysis.py - Issue 838003004: [Findit] Add three sub-pipelines to analyze build failure.

Side by Side Diff: appengine/findit/waterfall/build_failure_analysis.py

Issue 838003004: [Findit] Add three sub-pipelines to analyze build failure. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Just rebase. Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright 2015 The Chromium Authors. All rights reserved.

	2 # Use of this source code is governed by a BSD-style license that can be

	3 # found in the LICENSE file.

	4

	5 import collections

	6 import os

	7 import re

	8

	9 from common.diff import ChangeType

	10 from waterfall.failure_signal import FailureSignal

	11

	12

	13 def _IsSameFile(src_file, file_path):

	14 """Guesses if the two files are the same.

	15

	16 Args:

	17 src_file (str): Full path of a file committed to git repo.

	18 file_path (str): Path of a file appearing in a failure log. It might not be

	19 a full path.

	20

	21 Returns:

	22 True if the two files are likely the same, otherwise False. Eg.:

	23 True: (chrome/test/base/chrome_process_util.h, base/chrome_process_util.h)

	24 True: (a/b/x.cc, a/b/x.cc)

	25 False: (c/x.cc, a/b/c/x.cc)

	26 """

	27 if src_file == file_path:

	28 return True

	29 return src_file.endswith('/%s' % file_path)

	30

	31

	32 def _NormalizeObjectFile(file_path):

	33 # During compile, a/b/c/file.cc in TARGET will be compiled into object

	34 # file a/b/c/TARGET.file.o, thus TARGET needs removing from path.

	35 if file_path.startswith('obj/'):

	36 file_path = file_path[4:]

	37 file_dir = os.path.dirname(file_path)

	38 file_name = os.path.basename(file_path)

	39 parts = file_name.split('.', 1)

	40 if len(parts) == 2 and parts[1].endswith('.o'):

	41 file_name = parts[1]

	42

	43 return os.path.join(file_dir, file_name).replace(os.sep, '/')

	44

	45

	46 _COMMON_SUFFIXES = [

	47 'impl',

	48 'browser_tests', 'browser_test', 'browsertest', 'browsertests',

	49 'unittests', 'unittest', 'tests', 'test',

	50 'gcc', 'msvc',

	51 'arm', 'arm64', 'mips', 'portable', 'x86',

	52 'android', 'ios', 'linux', 'mac', 'ozone', 'posix', 'win',

	53 'aura', 'x', 'x11',

	54 ]

	55

	56 _COMMON_SUFFIX_PATTERNS = [

	57 re.compile('.*(_%s)$' % suffix) for suffix in _COMMON_SUFFIXES

	58 ]

	59

	60

	61 def _StripExtensionAndCommonSuffix(file_path):

	62 """Strips extension and common suffixes from file name to guess relation.

	63

	64 Examples:

	65 file_impl.cc, file_unittest.cc, file_impl_mac.h -> file

	66 """

	67 file_dir = os.path.dirname(file_path)

	68 file_name = os.path.splitext(os.path.basename(file_path))[0]

	69 while True:

	70 match = None

	71 for suffix_patten in _COMMON_SUFFIX_PATTERNS:

	72 match = suffix_patten.match(file_name)

	73 if match:

	74 file_name = file_name[:-len(match.group(1))]

	75 break

	76

	77 if not match:

	78 break

	79

	80 return os.path.join(file_dir, file_name).replace(os.sep, '/')

	81

	82

	83 def _IsRelated(src_file, file_path):

	84 """Checks if two files are related.

	85

	86 Example of related files:

	87 1. file.h <-> file_impl.cc

	88 2. file_impl.cc <-> file_unittest.cc

	89 3. file_win.cc <-> file_mac.cc

	90 4. a/b/x.cc <-> a/b/y.cc

	91 5. x.h <-> x.cc

	92 """

	93 if file_path.endswith('.o'):

	94 file_path = _NormalizeObjectFile(file_path)

	95

	96 if _IsSameFile(_StripExtensionAndCommonSuffix(src_file),

	97 _StripExtensionAndCommonSuffix(file_path)):

	98 return True

	99

	100 # Two file are in the same directory: a/b/x.cc <-> a/b/y.cc

	101 # TODO: cause noisy result?

	102 src_file_dir = os.path.dirname(src_file)

	103 return src_file_dir and src_file_dir == os.path.dirname(file_path)

	104

	105

	106 class _Justification(object):

	107 """Justification for why a CL might be suspected for a build failure.

	108

	109 A justification includes:

	110 1. suspect points: for a highly-suspected CL, it is given some suspect points.

	111 Eg. a CL is highly suspected if it deleted a .cc file appearing in the

	112 compile failure.

	113 2. score: for a likely-suspected CL, it won't get suspect points, but a score.

	114 Eg. a CL is just likely suspected if it only changed a related file

	115 (x_impl.cc vs. x.h) appearing in a test failure.

	116 For a highly-suspected CL, it will get a high score besides suspect points.

	117 3. hints: each hint is a string describing a reason for suspecting a CL and

	118 could be shown to the user (eg., "add x_impl.cc").

	119 """

	120

	121 def __init__(self):

	122 self._suspect_points = 0

	123 self._score = 0

	124 self._hints = []

	125

	126 @property

	127 def score(self):

	128 return self._score

	129

	130 def AddFileChange(

	131 self, change_action, src_file, file_path, suspect_points, score):

	132 """Adds a suspected file change.

	133

	134 Args:

	135 change_action (str): One of the change types in common.diff.ChangeType.

	136 src_file (str): Changed file path in a CL.

	137 file_path (str): File path appearing in the failure log.

	138 suspect_points (int): Number of suspect points for the file change.

	139 score (int): Score number for the file change.

	140 """

	141 self._suspect_points += suspect_points

	142 self._score += score

	143

	144 # TODO: make hint more descriptive?

	145 if src_file != file_path:

	146 self._hints.append(

	147 '%s %s (%s)' % (change_action, src_file, file_path))

	148 else:

	149 self._hints.append('%s %s' % (change_action, src_file))

	150

	151 def ToDict(self):

	152 return {

	153 'suspect_points': self._suspect_points,

	154 'score': self._score,

	155 'hints': self._hints,

	156 }

	157

	158

	159 def _CheckFile(

	160 change_action, src_file, file_path, suspect_points, score, justification):

	161 """Checks if the given files are the same or correlated.

	162

	163 Args:

	164 change_action (str): One of the change types in common.diff.ChangeType.

	165 src_file (str): Changed file path in a CL.

	166 file_path (str): File path appearing in the failure log.

	167 suspect_points (int): Number of suspect points if two files are the same.

	168 score (int): Score number if two files are the same.

	169 justification (_Justification): An instance of _Justification.

	170 """

	171 if _IsSameFile(src_file, file_path):

	172 justification.AddFileChange(

	173 change_action, src_file, file_path, suspect_points, score)

	174 elif _IsRelated(src_file, file_path):

	175 # For correlated files, do suspect=0 and score=1, because it is just likely,

	176 # but not highly, suspected.

	177 justification.AddFileChange(

	178 change_action, src_file, file_path, 0, 1)

	179

	180

	181 def _CheckFiles(failure_signal, change_log):

	182 """Check files in the given change log of a CL against the failure signal.

	183

	184 Args:

	185 failure_signal (FailureSignal): The failure signal of a failed step or test.

	186 change_log (dict): The change log of a CL as returned by

	187 common.change_log.ChangeLog.ToJson(). # TODO(stgao): ToJson -> ToDict.

	188

	189 Returns:

	190 A dict as returned by _Justification.ToDict() if the CL is suspected for the

	191 failure; otherwise None.

	192 """

	193 justification = _Justification()

	194

	195 for file_path, _ in failure_signal.files.iteritems():

	196 # TODO(stgao): remove this hack when DEPS parsing is supported.

	197 if file_path.startswith('src/'):

	198 file_path = file_path[4:]

	199

	200 for touched_file in change_log['touched_files']:

	201 change_type = touched_file['change_type']

	202

	203 if change_type == ChangeType.MODIFY:

	204 if _IsSameFile(touched_file['new_path'], file_path):

	205 # TODO(stgao): use line number for git blame.

	206 justification.AddFileChange(

	207 'modify', touched_file['new_path'], file_path, 0, 1)

	208 elif _IsRelated(touched_file['new_path'], file_path):

	209 justification.AddFileChange(

	210 'modify', touched_file['new_path'], file_path, 0, 1)

	211

	212 if change_type in (ChangeType.ADD, ChangeType.COPY, ChangeType.RENAME):

	213 _CheckFile('add', touched_file['new_path'], file_path, 1, 5,

	214 justification)

	215

	216 if change_type in (ChangeType.DELETE, ChangeType.RENAME):

	217 _CheckFile('delete', touched_file['old_path'], file_path, 1, 5,

	218 justification)

	219

	220 if not justification.score:

	221 return None

	222 else:

	223 return justification.ToDict()

	224

	225

	226 def AnalyzeBuildFailure(failure_info, change_logs, failure_signals):

	227 """Analyze the given failure signals, and figure out culprit CLs.

	228

	229 Args:

	230 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.

	231 change_logs (dict): Output of pipeline PullChangelogPipeline.

	232 failure_signals (dict): Output of pipeline ExtractSignalPipeline.

	233

	234 Returns:

	235 A dict with the following form (could be an empty dict):

	236 {

	237 'compile': {

	238 'cl_git_hash': {

	239 'suspect_points': 2,

	240 'score': 11,

	241 'hints': [

	242 'add a/b/x.cc',

	243 'delete a/b/y.cc',

	244 'modify e/f/z.cc',

	245 ...

	246 ]

	247 },

	248 ...

	249 },

	250 ...

	251 }

	252 """

	253 analysis_result = {}

	254

	255 if not failure_info['failed']:

	256 return analysis_result

	257

	258 failed_steps = failure_info['failed_steps']

	259 builds = failure_info['builds']

	260 for step_name, step_failure_info in failed_steps.iteritems():

	261 failure_signal = FailureSignal.FromJson(failure_signals[step_name])

	262 failed_build_number = step_failure_info['current_failure']

	263 build_number = step_failure_info['first_failure']

	264

	265 step_analysis_result = {}

	266

	267 while build_number <= failed_build_number:

	268 for revision in builds[str(build_number)]['blame_list']:

	269 justification_dict = _CheckFiles(failure_signal, change_logs[revision])

	270 if justification_dict:

	271 step_analysis_result[revision] = justification_dict

	272

	273 build_number += 1

	274

	275 if step_analysis_result:

	276 # TODO(stgao): sorted CLs related to a step failure.

	277 analysis_result[step_name] = step_analysis_result

	278

	279 return analysis_result

OLD	NEW

« no previous file with comments | « appengine/findit/model/step.py ('k') | appengine/findit/waterfall/build_failure_analysis_pipelines.py » ('j') | no next file with comments »