appengine/findit/waterfall/build_failure_analysis.py - Issue 2230103002: [Findit] Pipeline change to save suspected cls to data store.

Side by Side Diff: appengine/findit/waterfall/build_failure_analysis.py

Issue 2230103002: [Findit] Pipeline change to save suspected cls to data store. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@0808-resubmit-suspected_cl_model

Patch Set: . Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« appengine/findit/handlers/test/build_failure_test.py ('K') | « appengine/findit/handlers/test/list_analyses_test.py ('k') | appengine/findit/waterfall/build_util.py » ('j') | appengine/findit/waterfall/identify_try_job_culprit_pipeline.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import collections	5 from collections import defaultdict

6 import os	6 import os

7 import re	7 import re

8	8

9 from common.diff import ChangeType	9 from common.diff import ChangeType

10 from common.git_repository import GitRepository	10 from common.git_repository import GitRepository

11 from common.http_client_appengine import HttpClientAppengine as HttpClient	11 from common.http_client_appengine import HttpClientAppengine as HttpClient

12 from waterfall import waterfall_config	12 from waterfall import waterfall_config

13 from waterfall.failure_signal import FailureSignal	13 from waterfall.failure_signal import FailureSignal

14	14

15	15

(...skipping 210 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
226 appearing in the compile failure.	226 appearing in the compile failure.

227 2) If a hint shows that a CL is likely-suspected, the hint is given 1	227 2) If a hint shows that a CL is likely-suspected, the hint is given 1

228 score point. Eg. a CL is just likely suspected if it only changed a	228 score point. Eg. a CL is just likely suspected if it only changed a

229 related file (x_impl.cc vs. x.h) appearing in a failure.	229 related file (x_impl.cc vs. x.h) appearing in a failure.

230 2. hints: each hint is a string describing a reason for suspecting a CL and	230 2. hints: each hint is a string describing a reason for suspecting a CL and

231 could be shown to the user (eg., "added x_impl.cc (and it was in log)").	231 could be shown to the user (eg., "added x_impl.cc (and it was in log)").

232 """	232 """

233	233

234 def __init__(self):	234 def __init__(self):

235 self._score = 0	235 self._score = 0

236 self._hints = collections.defaultdict(int)	236 self._hints = defaultdict(int)

237	237

238 @property	238 @property

239 def score(self):	239 def score(self):

240 return self._score	240 return self._score

241	241

242 def AddFileChange(self, change_action, changed_src_file_path,	242 def AddFileChange(self, change_action, changed_src_file_path,

243 file_path_in_log, score, num_file_name_occurrences,	243 file_path_in_log, score, num_file_name_occurrences,

244 changed_line_numbers=None):	244 changed_line_numbers=None):

245 """Adds a suspected file change.	245 """Adds a suspected file change.

246	246

(...skipping 311 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
558 failure_signal (FailureSignal): The failure signal of a failed step or test.	558 failure_signal (FailureSignal): The failure signal of a failed step or test.

559 change_log (dict): The change log of a CL as returned by	559 change_log (dict): The change log of a CL as returned by

560 common.change_log.ChangeLog.ToDict().	560 common.change_log.ChangeLog.ToDict().

561 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.	561 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.

562	562

563 Returns:	563 Returns:

564 A dict as returned by _Justification.ToDict() if the CL is suspected for the	564 A dict as returned by _Justification.ToDict() if the CL is suspected for the

565 failure; otherwise None.	565 failure; otherwise None.

566 """	566 """

567 # Use a dict to map each file name of the touched files to their occurrences.	567 # Use a dict to map each file name of the touched files to their occurrences.

568 file_name_occurrences = collections.defaultdict(int)	568 file_name_occurrences = defaultdict(int)

569 for touched_file in change_log['touched_files']:	569 for touched_file in change_log['touched_files']:

570 change_type = touched_file['change_type']	570 change_type = touched_file['change_type']

571 if (change_type in (ChangeType.ADD, ChangeType.COPY,	571 if (change_type in (ChangeType.ADD, ChangeType.COPY,

572 ChangeType.RENAME, ChangeType.MODIFY)):	572 ChangeType.RENAME, ChangeType.MODIFY)):

573 file_name = os.path.basename(touched_file['new_path'])	573 file_name = os.path.basename(touched_file['new_path'])

574 file_name_occurrences[file_name] += 1	574 file_name_occurrences[file_name] += 1

575	575

576 if change_type in (ChangeType.DELETE, ChangeType.RENAME):	576 if change_type in (ChangeType.DELETE, ChangeType.RENAME):

577 file_name = os.path.basename(touched_file['old_path'])	577 file_name = os.path.basename(touched_file['old_path'])

578 file_name_occurrences[file_name] += 1	578 file_name_occurrences[file_name] += 1

(...skipping 14 matching lines...) Expand all Loading...
593	593

594 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification,	594 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification,

595 line_numbers)	595 line_numbers)

596	596

597 if not justification.score:	597 if not justification.score:

598 return None	598 return None

599 else:	599 else:

600 return justification.ToDict()	600 return justification.ToDict()

601	601

602	602

	603 class _CLInfo(object):

	604 """A object of information we need for a suspected CL.

	605

	606 The information is specific to current build.

	607 """

	608 def __init__(self):

	609 self.failures = defaultdict(list)

	610 self.top_score = 0

	611 self.url = None

	612

	613 @property

	614 def not_none_failures(self):
	stgao 2016/09/28 00:13:27 can we have a better name here too? can we have a better name here too? chanli 2016/09/30 20:41:01 Removed it. Show quoted text On 2016/09/28 00:13:27, stgao (slow) wrote: > can we have a better name here too? Removed it.
	615 # For steps like compile, there is no test_name,

	616 # to ensure we include them in the dict, used None as placeholder.
	stgao 2016/09/28 00:13:27 Can we use a singleton object like below? NO_TEST Can we use a singleton object like below? NO_TEST_PLACEHOLDER = object() chanli 2016/09/30 20:41:01 I don't understand your suggestion... Used another Show quoted text On 2016/09/28 00:13:27, stgao (slow) wrote: > Can we use a singleton object like below? > > NO_TEST_PLACEHOLDER = object() I don't understand your suggestion... Used another way.
	617 # Now removes those Nones.

	618 failures = {

	619 key: [t for t in self.failures[key] if t] for key in self.failures

	620 }

	621 return failures

	622

	623

	624 def _SaveFailureToMap(

	625 cl_failure_map, new_suspected_cl_dict, step_name, test_name, top_score):

	626 """Saves a failure's info to the cl that caused it."""

	627 cl_key = (

	628 new_suspected_cl_dict['repo_name'], new_suspected_cl_dict['revision'],

	629 new_suspected_cl_dict['commit_position'])

	630

	631 cl_failure_map[cl_key].failures[step_name].append(test_name)

	632 # Ignores the case where in the same build for the same cl,

	633 # we have different scores.

	634 # Not sure if we need to handle it since it should be rare.

	635 cl_failure_map[cl_key].top_score = (

	636 cl_failure_map[cl_key].top_score or top_score)

	637 cl_failure_map[cl_key].url = (

	638 cl_failure_map[cl_key].url or new_suspected_cl_dict['url'])

	639

	640

	641 def _ConvertCLFailureMapToList(cl_failure_map):

	642 suspected_cls = []

	643 for cl_key, cl_info in cl_failure_map.iteritems():

	644 suspected_cl = {}

	645 (suspected_cl['repo_name'], suspected_cl['revision'],

	646 suspected_cl['commit_position']) = cl_key

	647 suspected_cl['url'] = cl_info.url

	648 suspected_cl['failures'] = cl_info.not_none_failures

	649 suspected_cl['top_score'] = cl_info.top_score

	650

	651 suspected_cls.append(suspected_cl)

	652 return suspected_cls

	653

603 def AnalyzeBuildFailure(	654 def AnalyzeBuildFailure(

604 failure_info, change_logs, deps_info, failure_signals):	655 failure_info, change_logs, deps_info, failure_signals):

605 """Analyze the given failure signals, and figure out culprit CLs.	656 """Analyzes the given failure signals, and figure out culprit CLs.

606	657

607 Args:	658 Args:

608 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.	659 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.

609 change_logs (dict): Output of pipeline PullChangelogPipeline.	660 change_logs (dict): Output of pipeline PullChangelogPipeline.

610 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.	661 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.

611 failure_signals (dict): Output of pipeline ExtractSignalPipeline.	662 failure_signals (dict): Output of pipeline ExtractSignalPipeline.

612	663

613 Returns:	664 Returns:

614 A dict with the following form:	665 A dict with the following form:

615 {	666 {

(...skipping 16 matching lines...) Expand all Loading...
632 'modify e/f/z.cc': 1,	683 'modify e/f/z.cc': 1,

633 ...	684 ...

634 }	685 }

635 },	686 },

636 ...	687 ...

637 ],	688 ],

638 },	689 },

639 ...	690 ...

640 ]	691 ]

641 }	692 }

	693

	694 And a list of suspected_cls format as below:

	695 [

	696 {

	697 'repo_name': 'chromium',

	698 'revision': 'r98_1',

	699 'commit_position': None,

	700 'url': None,

	701 'failures': {

	702 'b': ['Unittest2.Subtest1', 'Unittest3.Subtest2']

	703 },

	704 'top_score': 4

	705 },

	706 ...

	707 ]

642 """	708 """

643 analysis_result = {	709 analysis_result = {

644 'failures': []	710 'failures': []

645 }	711 }

646	712

647 if not failure_info['failed'] or not failure_info['chromium_revision']:	713 if not failure_info['failed'] or not failure_info['chromium_revision']:

648 # Bail out if no failed step or no chromium revision.	714 # Bail out if no failed step or no chromium revision.

649 return analysis_result	715 return analysis_result, []

650	716

651 def CreateCLInfoDict(justification_dict, build_number, change_log):	717 def CreateCLInfoDict(justification_dict, build_number, change_log):

652 # TODO(stgao): remove hard-coded 'chromium' when DEPS file parsing is	718 # TODO(stgao): remove hard-coded 'chromium' when DEPS file parsing is

653 # supported.	719 # supported.

654 cl_info = {	720 cl_info = {

655 'build_number': build_number,	721 'build_number': build_number,

656 'repo_name': 'chromium',	722 'repo_name': 'chromium',

657 'revision': change_log['revision'],	723 'revision': change_log['revision'],

658 'commit_position': change_log.get('commit_position'),	724 'commit_position': change_log.get('commit_position'),

659 'url':	725 'url':

660 change_log.get('code_review_url') or change_log.get('commit_url'),	726 change_log.get('code_review_url') or change_log.get('commit_url'),

661 }	727 }

662	728

663 cl_info.update(justification_dict)	729 cl_info.update(justification_dict)

664 return cl_info	730 return cl_info

665	731

666 failed_steps = failure_info['failed_steps']	732 failed_steps = failure_info['failed_steps']

667 builds = failure_info['builds']	733 builds = failure_info['builds']

668 master_name = failure_info.get('master_name')	734 master_name = failure_info['master_name']

	735

	736 cl_failure_map = defaultdict(_CLInfo)

669	737

670 for step_name, step_failure_info in failed_steps.iteritems():	738 for step_name, step_failure_info in failed_steps.iteritems():

671 is_test_level = step_failure_info.get('tests') is not None	739 is_test_level = step_failure_info.get('tests') is not None

672	740

673 failed_build_number = step_failure_info['current_failure']	741 failed_build_number = step_failure_info['current_failure']

674 if step_failure_info.get('last_pass') is not None:	742 if step_failure_info.get('last_pass') is not None:

675 start_build_number = step_failure_info.get('last_pass') + 1	743 start_build_number = step_failure_info.get('last_pass') + 1

676 else:	744 else:

677 start_build_number = step_failure_info['first_failure']	745 start_build_number = step_failure_info['first_failure']

678 step_analysis_result = {	746 step_analysis_result = {

(...skipping 30 matching lines...) Expand all Loading...
709 test_signal, change_logs[revision], deps_info)	777 test_signal, change_logs[revision], deps_info)

710	778

711 if not justification_dict:	779 if not justification_dict:

712 continue	780 continue

713	781

714 new_suspected_cl_dict = CreateCLInfoDict(	782 new_suspected_cl_dict = CreateCLInfoDict(

715 justification_dict, build_number, change_logs[revision])	783 justification_dict, build_number, change_logs[revision])

716 test_analysis_result['suspected_cls'].append(	784 test_analysis_result['suspected_cls'].append(

717 new_suspected_cl_dict)	785 new_suspected_cl_dict)

718	786

	787 _SaveFailureToMap(

	788 cl_failure_map, new_suspected_cl_dict, step_name, test_name,

	789 max(justification_dict['hints'].values()))

	790

719 # Checks Files on step level using step level signals	791 # Checks Files on step level using step level signals

720 # regardless of test level signals so we can make sure	792 # regardless of test level signals so we can make sure

721 # no duplicate justifications added to the step result.	793 # no duplicate justifications added to the step result.

722 failure_signal = FailureSignal.FromDict(failure_signals[step_name])	794 failure_signal = FailureSignal.FromDict(failure_signals[step_name])

723 justification_dict = _CheckFiles(	795 justification_dict = _CheckFiles(

724 failure_signal, change_logs[revision], deps_info)	796 failure_signal, change_logs[revision], deps_info)

725	797

726 if not justification_dict:	798 if not justification_dict:

727 continue	799 continue

728	800

729 step_analysis_result['suspected_cls'].append(	801 new_suspected_cl_dict = CreateCLInfoDict(

730 CreateCLInfoDict(justification_dict, build_number,	802 justification_dict, build_number, change_logs[revision])

731 change_logs[revision]))	803 step_analysis_result['suspected_cls'].append(new_suspected_cl_dict)

	804

	805 if not is_test_level:

	806 _SaveFailureToMap(

	807 cl_failure_map, new_suspected_cl_dict, step_name, None,

	808 max(justification_dict['hints'].values()))

732	809

733 # TODO(stgao): sort CLs by score.	810 # TODO(stgao): sort CLs by score.

734 analysis_result['failures'].append(step_analysis_result)	811 analysis_result['failures'].append(step_analysis_result)

735	812

736 return analysis_result	813 suspected_cls = _ConvertCLFailureMapToList(cl_failure_map)

	814

	815 return analysis_result, suspected_cls

OLD	NEW