appengine/findit/waterfall/build_failure_analysis.py - Issue 2230103002: [Findit] Pipeline change to save suspected cls to data store.

Side by Side Diff: appengine/findit/waterfall/build_failure_analysis.py

Issue 2230103002: [Findit] Pipeline change to save suspected cls to data store. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@0808-resubmit-suspected_cl_model

Patch Set: rebase Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « appengine/findit/handlers/test/list_analyses_test.py ('k') | appengine/findit/waterfall/identify_culprit_pipeline.py » ('j') | appengine/findit/waterfall/identify_try_job_culprit_pipeline.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import collections	5 from collections import defaultdict

6 import os	6 import os

7 import re	7 import re

8	8

9 from common.diff import ChangeType	9 from common.diff import ChangeType

10 from common.git_repository import GitRepository	10 from common.git_repository import GitRepository

11 from common.http_client_appengine import HttpClientAppengine as HttpClient	11 from common.http_client_appengine import HttpClientAppengine as HttpClient

12 from waterfall import waterfall_config	12 from waterfall import waterfall_config

13 from waterfall.failure_signal import FailureSignal	13 from waterfall.failure_signal import FailureSignal

14	14

15	15

(...skipping 210 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
226 appearing in the compile failure.	226 appearing in the compile failure.

227 2) If a hint shows that a CL is likely-suspected, the hint is given 1	227 2) If a hint shows that a CL is likely-suspected, the hint is given 1

228 score point. Eg. a CL is just likely suspected if it only changed a	228 score point. Eg. a CL is just likely suspected if it only changed a

229 related file (x_impl.cc vs. x.h) appearing in a failure.	229 related file (x_impl.cc vs. x.h) appearing in a failure.

230 2. hints: each hint is a string describing a reason for suspecting a CL and	230 2. hints: each hint is a string describing a reason for suspecting a CL and

231 could be shown to the user (eg., "added x_impl.cc (and it was in log)").	231 could be shown to the user (eg., "added x_impl.cc (and it was in log)").

232 """	232 """

233	233

234 def __init__(self):	234 def __init__(self):

235 self._score = 0	235 self._score = 0

236 self._hints = collections.defaultdict(int)	236 self._hints = defaultdict(int)

237	237

238 @property	238 @property

239 def score(self):	239 def score(self):

240 return self._score	240 return self._score

241	241

242 def AddFileChange(self, change_action, changed_src_file_path,	242 def AddFileChange(self, change_action, changed_src_file_path,

243 file_path_in_log, score, num_file_name_occurrences,	243 file_path_in_log, score, num_file_name_occurrences,

244 changed_line_numbers=None):	244 changed_line_numbers=None):

245 """Adds a suspected file change.	245 """Adds a suspected file change.

246	246

(...skipping 311 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
558 failure_signal (FailureSignal): The failure signal of a failed step or test.	558 failure_signal (FailureSignal): The failure signal of a failed step or test.

559 change_log (dict): The change log of a CL as returned by	559 change_log (dict): The change log of a CL as returned by

560 common.change_log.ChangeLog.ToDict().	560 common.change_log.ChangeLog.ToDict().

561 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.	561 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.

562	562

563 Returns:	563 Returns:

564 A dict as returned by _Justification.ToDict() if the CL is suspected for the	564 A dict as returned by _Justification.ToDict() if the CL is suspected for the

565 failure; otherwise None.	565 failure; otherwise None.

566 """	566 """

567 # Use a dict to map each file name of the touched files to their occurrences.	567 # Use a dict to map each file name of the touched files to their occurrences.

568 file_name_occurrences = collections.defaultdict(int)	568 file_name_occurrences = defaultdict(int)

569 for touched_file in change_log['touched_files']:	569 for touched_file in change_log['touched_files']:

570 change_type = touched_file['change_type']	570 change_type = touched_file['change_type']

571 if (change_type in (ChangeType.ADD, ChangeType.COPY,	571 if (change_type in (ChangeType.ADD, ChangeType.COPY,

572 ChangeType.RENAME, ChangeType.MODIFY)):	572 ChangeType.RENAME, ChangeType.MODIFY)):

573 file_name = os.path.basename(touched_file['new_path'])	573 file_name = os.path.basename(touched_file['new_path'])

574 file_name_occurrences[file_name] += 1	574 file_name_occurrences[file_name] += 1

575	575

576 if change_type in (ChangeType.DELETE, ChangeType.RENAME):	576 if change_type in (ChangeType.DELETE, ChangeType.RENAME):

577 file_name = os.path.basename(touched_file['old_path'])	577 file_name = os.path.basename(touched_file['old_path'])

578 file_name_occurrences[file_name] += 1	578 file_name_occurrences[file_name] += 1

(...skipping 14 matching lines...) Expand all Loading...
593	593

594 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification,	594 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification,

595 line_numbers)	595 line_numbers)

596	596

597 if not justification.score:	597 if not justification.score:

598 return None	598 return None

599 else:	599 else:

600 return justification.ToDict()	600 return justification.ToDict()

601	601

602	602

	603 class _CLInfo(object):

	604 """A object of information we need for a suspected CL.

	605

	606 The information is specific to current build.

	607 """

	608 def __init__(self):

	609 self.failures = defaultdict(list)

	610 self.top_score = 0

	611 self.url = None

	612

	613 @property

	614 def not_none_failures(self):

	615 failures = {

	616 key: [t for t in self.failures[key] if t] for key in self.failures

	617 }

	618 return failures

	619

	620

	621 def _SaveFailureToMap(

	622 cl_failure_map, new_suspected_cl_dict, step_name, test_name, top_score):

	623 """Saves a failure's info to the cl that caused it."""

	624 cl_key = (

	625 new_suspected_cl_dict['repo_name'], new_suspected_cl_dict['revision'],

	626 new_suspected_cl_dict['commit_position'])

	627

	628 cl_failure_map[cl_key].failures[step_name].append(test_name)

	629 cl_failure_map[cl_key].top_score = (

	630 cl_failure_map[cl_key].top_score or top_score)

	631 cl_failure_map[cl_key].url = (

	632 cl_failure_map[cl_key].url or new_suspected_cl_dict['url'])

	633

	634

	635 def _ConvertCLFailureMapToList(cl_failure_map):

	636 suspected_cls = []

	637 for cl_key, cl_object in cl_failure_map.iteritems():
	stgao 2016/09/23 18:30:21 name nit: cl_object -> cl_info. Matching the clas name nit: cl_object -> cl_info. Matching the class name will improve readability. chanli 2016/09/24 00:04:59 Done. Show quoted text On 2016/09/23 18:30:21, stgao (slow) wrote: > name nit: cl_object -> cl_info. > > Matching the class name will improve readability. Done.
	638 suspected_cl = {}

	639 (suspected_cl['repo_name'], suspected_cl['revision'],

	640 suspected_cl['commit_position']) = cl_key

	641 suspected_cl['url'] = cl_object.url

	642 suspected_cl['failures'] = cl_object.not_none_failures

	643 suspected_cl['top_score'] = cl_object.top_score

	644

	645 suspected_cls.append(suspected_cl)

	646 return suspected_cls

	647

603 def AnalyzeBuildFailure(	648 def AnalyzeBuildFailure(

604 failure_info, change_logs, deps_info, failure_signals):	649 failure_info, change_logs, deps_info, failure_signals):

605 """Analyze the given failure signals, and figure out culprit CLs.	650 """Analyzes the given failure signals, and figure out culprit CLs.

606	651

607 Args:	652 Args:

608 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.	653 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.

609 change_logs (dict): Output of pipeline PullChangelogPipeline.	654 change_logs (dict): Output of pipeline PullChangelogPipeline.

610 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.	655 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.

611 failure_signals (dict): Output of pipeline ExtractSignalPipeline.	656 failure_signals (dict): Output of pipeline ExtractSignalPipeline.

612	657

613 Returns:	658 Returns:

614 A dict with the following form:	659 A dict with the following form:

615 {	660 {

(...skipping 16 matching lines...) Expand all Loading...
632 'modify e/f/z.cc': 1,	677 'modify e/f/z.cc': 1,

633 ...	678 ...

634 }	679 }

635 },	680 },

636 ...	681 ...

637 ],	682 ],

638 },	683 },

639 ...	684 ...

640 ]	685 ]

641 }	686 }

	687

	688 And a list of suspected_cls format as below:

	689 [

	690 {

	691 'repo_name': 'chromium',

	692 'revision': 'r98_1',

	693 'commit_position': None,

	694 'url': None,

	695 'failures': {

	696 'b': ['Unittest2.Subtest1', 'Unittest3.Subtest2']

	697 },

	698 'top_score': 4

	699 },

	700 ...

	701 ]

642 """	702 """

643 analysis_result = {	703 analysis_result = {

644 'failures': []	704 'failures': []

645 }	705 }

646	706

647 if not failure_info['failed'] or not failure_info['chromium_revision']:	707 if not failure_info['failed'] or not failure_info['chromium_revision']:

648 # Bail out if no failed step or no chromium revision.	708 # Bail out if no failed step or no chromium revision.

649 return analysis_result	709 return analysis_result, []

650	710

651 def CreateCLInfoDict(justification_dict, build_number, change_log):	711 def CreateCLInfoDict(justification_dict, build_number, change_log):

652 # TODO(stgao): remove hard-coded 'chromium' when DEPS file parsing is	712 # TODO(stgao): remove hard-coded 'chromium' when DEPS file parsing is

653 # supported.	713 # supported.

654 cl_info = {	714 cl_info = {

655 'build_number': build_number,	715 'build_number': build_number,

656 'repo_name': 'chromium',	716 'repo_name': 'chromium',

657 'revision': change_log['revision'],	717 'revision': change_log['revision'],

658 'commit_position': change_log.get('commit_position'),	718 'commit_position': change_log.get('commit_position'),

659 'url':	719 'url':

660 change_log.get('code_review_url') or change_log.get('commit_url'),	720 change_log.get('code_review_url') or change_log.get('commit_url'),

661 }	721 }

662	722

663 cl_info.update(justification_dict)	723 cl_info.update(justification_dict)

664 return cl_info	724 return cl_info

665	725

666 failed_steps = failure_info['failed_steps']	726 failed_steps = failure_info['failed_steps']

667 builds = failure_info['builds']	727 builds = failure_info['builds']

668 master_name = failure_info.get('master_name')	728 master_name = failure_info['master_name']

	729

	730 cl_failure_map = defaultdict(_CLInfo)

669	731

670 for step_name, step_failure_info in failed_steps.iteritems():	732 for step_name, step_failure_info in failed_steps.iteritems():

671 is_test_level = step_failure_info.get('tests') is not None	733 is_test_level = step_failure_info.get('tests') is not None

672	734

673 failed_build_number = step_failure_info['current_failure']	735 failed_build_number = step_failure_info['current_failure']

674 if step_failure_info.get('last_pass') is not None:	736 if step_failure_info.get('last_pass') is not None:

675 start_build_number = step_failure_info.get('last_pass') + 1	737 start_build_number = step_failure_info.get('last_pass') + 1

676 else:	738 else:

677 start_build_number = step_failure_info['first_failure']	739 start_build_number = step_failure_info['first_failure']

678 step_analysis_result = {	740 step_analysis_result = {

(...skipping 30 matching lines...) Expand all Loading...
709 test_signal, change_logs[revision], deps_info)	771 test_signal, change_logs[revision], deps_info)

710	772

711 if not justification_dict:	773 if not justification_dict:

712 continue	774 continue

713	775

714 new_suspected_cl_dict = CreateCLInfoDict(	776 new_suspected_cl_dict = CreateCLInfoDict(

715 justification_dict, build_number, change_logs[revision])	777 justification_dict, build_number, change_logs[revision])

716 test_analysis_result['suspected_cls'].append(	778 test_analysis_result['suspected_cls'].append(

717 new_suspected_cl_dict)	779 new_suspected_cl_dict)

718	780

	781 _SaveFailureToMap(

	782 cl_failure_map, new_suspected_cl_dict, step_name, test_name,

	783 max(justification_dict['hints'].values()))

	784

719 # Checks Files on step level using step level signals	785 # Checks Files on step level using step level signals

720 # regardless of test level signals so we can make sure	786 # regardless of test level signals so we can make sure

721 # no duplicate justifications added to the step result.	787 # no duplicate justifications added to the step result.

722 failure_signal = FailureSignal.FromDict(failure_signals[step_name])	788 failure_signal = FailureSignal.FromDict(failure_signals[step_name])

723 justification_dict = _CheckFiles(	789 justification_dict = _CheckFiles(

724 failure_signal, change_logs[revision], deps_info)	790 failure_signal, change_logs[revision], deps_info)

725	791

726 if not justification_dict:	792 if not justification_dict:

727 continue	793 continue

728	794

729 step_analysis_result['suspected_cls'].append(	795 new_suspected_cl_dict = CreateCLInfoDict(

730 CreateCLInfoDict(justification_dict, build_number,	796 justification_dict, build_number, change_logs[revision])

731 change_logs[revision]))	797 step_analysis_result['suspected_cls'].append(new_suspected_cl_dict)

	798

	799 if not is_test_level:

	800 _SaveFailureToMap(

	801 cl_failure_map, new_suspected_cl_dict, step_name, None,

	802 max(justification_dict['hints'].values()))

732	803

733 # TODO(stgao): sort CLs by score.	804 # TODO(stgao): sort CLs by score.

734 analysis_result['failures'].append(step_analysis_result)	805 analysis_result['failures'].append(step_analysis_result)

735	806

736 return analysis_result	807 suspected_cls = _ConvertCLFailureMapToList(cl_failure_map)

	808

	809 return analysis_result, suspected_cls

OLD	NEW