appengine/findit/waterfall/build_failure_analysis.py - Issue 2230103002: [Findit] Pipeline change to save suspected cls to data store.

Side by Side Diff: appengine/findit/waterfall/build_failure_analysis.py

Issue 2230103002: [Findit] Pipeline change to save suspected cls to data store. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@0808-resubmit-suspected_cl_model

Patch Set: Add todos and make sure analysis.suspected_cls have consistent format. Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « appengine/findit/handlers/test/list_analyses_test.py ('k') | appengine/findit/waterfall/identify_culprit_pipeline.py » ('j') | appengine/findit/waterfall/suspected_cl_util.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2015 The Chromium Authors. All rights reserved.	1 # Copyright 2015 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import collections	5 from collections import defaultdict

6 import os	6 import os

7 import re	7 import re

8	8

9 from common.diff import ChangeType	9 from common.diff import ChangeType

10 from common.git_repository import GitRepository	10 from common.git_repository import GitRepository

11 from common.http_client_appengine import HttpClientAppengine as HttpClient	11 from common.http_client_appengine import HttpClientAppengine as HttpClient

12 from waterfall import waterfall_config	12 from waterfall import waterfall_config

13 from waterfall.failure_signal import FailureSignal	13 from waterfall.failure_signal import FailureSignal

14	14

15	15

(...skipping 210 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
226 appearing in the compile failure.	226 appearing in the compile failure.

227 2) If a hint shows that a CL is likely-suspected, the hint is given 1	227 2) If a hint shows that a CL is likely-suspected, the hint is given 1

228 score point. Eg. a CL is just likely suspected if it only changed a	228 score point. Eg. a CL is just likely suspected if it only changed a

229 related file (x_impl.cc vs. x.h) appearing in a failure.	229 related file (x_impl.cc vs. x.h) appearing in a failure.

230 2. hints: each hint is a string describing a reason for suspecting a CL and	230 2. hints: each hint is a string describing a reason for suspecting a CL and

231 could be shown to the user (eg., "added x_impl.cc (and it was in log)").	231 could be shown to the user (eg., "added x_impl.cc (and it was in log)").

232 """	232 """

233	233

234 def __init__(self):	234 def __init__(self):

235 self._score = 0	235 self._score = 0

236 self._hints = collections.defaultdict(int)	236 self._hints = defaultdict(int)

237	237

238 @property	238 @property

239 def score(self):	239 def score(self):

240 return self._score	240 return self._score

241	241

242 def AddFileChange(self, change_action, changed_src_file_path,	242 def AddFileChange(self, change_action, changed_src_file_path,

243 file_path_in_log, score, num_file_name_occurrences,	243 file_path_in_log, score, num_file_name_occurrences,

244 changed_line_numbers=None):	244 changed_line_numbers=None):

245 """Adds a suspected file change.	245 """Adds a suspected file change.

246	246

(...skipping 311 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
558 failure_signal (FailureSignal): The failure signal of a failed step or test.	558 failure_signal (FailureSignal): The failure signal of a failed step or test.

559 change_log (dict): The change log of a CL as returned by	559 change_log (dict): The change log of a CL as returned by

560 common.change_log.ChangeLog.ToDict().	560 common.change_log.ChangeLog.ToDict().

561 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.	561 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.

562	562

563 Returns:	563 Returns:

564 A dict as returned by _Justification.ToDict() if the CL is suspected for the	564 A dict as returned by _Justification.ToDict() if the CL is suspected for the

565 failure; otherwise None.	565 failure; otherwise None.

566 """	566 """

567 # Use a dict to map each file name of the touched files to their occurrences.	567 # Use a dict to map each file name of the touched files to their occurrences.

568 file_name_occurrences = collections.defaultdict(int)	568 file_name_occurrences = defaultdict(int)

569 for touched_file in change_log['touched_files']:	569 for touched_file in change_log['touched_files']:

570 change_type = touched_file['change_type']	570 change_type = touched_file['change_type']

571 if (change_type in (ChangeType.ADD, ChangeType.COPY,	571 if (change_type in (ChangeType.ADD, ChangeType.COPY,

572 ChangeType.RENAME, ChangeType.MODIFY)):	572 ChangeType.RENAME, ChangeType.MODIFY)):

573 file_name = os.path.basename(touched_file['new_path'])	573 file_name = os.path.basename(touched_file['new_path'])

574 file_name_occurrences[file_name] += 1	574 file_name_occurrences[file_name] += 1

575	575

576 if change_type in (ChangeType.DELETE, ChangeType.RENAME):	576 if change_type in (ChangeType.DELETE, ChangeType.RENAME):

577 file_name = os.path.basename(touched_file['old_path'])	577 file_name = os.path.basename(touched_file['old_path'])

578 file_name_occurrences[file_name] += 1	578 file_name_occurrences[file_name] += 1

(...skipping 14 matching lines...) Expand all Loading...
593	593

594 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification,	594 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification,

595 line_numbers)	595 line_numbers)

596	596

597 if not justification.score:	597 if not justification.score:

598 return None	598 return None

599 else:	599 else:

600 return justification.ToDict()	600 return justification.ToDict()

601	601

602	602

	603 class _CLInfo(object):

	604 """A object of information we need for a suspected CL.

	605

	606 The information is specific to current build.

	607 """

	608 def __init__(self):

	609 self.failures = defaultdict(list)

	610 self.top_score = 0

	611 self.url = None

	612

	613 @property

	614 def not_none_failures(self):

	615 failures = {

	616 key: [t for t in self.failures[key] if t] for key in self.failures

	617 }

	618 return failures

	619

	620

	621 def _SaveFailureToMap(

	622 cl_failure_map, new_suspected_cl_dict, step_name, test_name, top_score):

	623 """Saves a failure's info to the cl that caused it."""

	624 cl_key = (

	625 new_suspected_cl_dict['repo_name'], new_suspected_cl_dict['revision'],

	626 new_suspected_cl_dict['commit_position'])

	627

	628 cl_failure_map[cl_key].failures[step_name].append(test_name)

	629 cl_failure_map[cl_key].top_score = (

	630 cl_failure_map[cl_key].top_score or top_score)

	631 cl_failure_map[cl_key].url = (

	632 cl_failure_map[cl_key].url or new_suspected_cl_dict['url'])

	633

	634

	635 def _ConvertCLFailureMapToList(cl_failure_map):

	636 suspected_cls = []

	637 for cl_key, cl_object in cl_failure_map.iteritems():

	638 suspected_cl = {}

	639 (suspected_cl['repo_name'], suspected_cl['revision'],

	640 suspected_cl['commit_position']) = cl_key

	641 suspected_cl['url'] = cl_object.url

	642 suspected_cl['failures'] = cl_object.not_none_failures

	643 suspected_cl['top_score'] = cl_object.top_score

	644

	645 suspected_cls.append(suspected_cl)

	646 return suspected_cls

	647

603 def AnalyzeBuildFailure(	648 def AnalyzeBuildFailure(

604 failure_info, change_logs, deps_info, failure_signals):	649 failure_info, change_logs, deps_info, failure_signals):

605 """Analyze the given failure signals, and figure out culprit CLs.	650 """Analyze the given failure signals, and figure out culprit CLs.
	Sharu Jiang 2016/09/19 23:17:54 nit: Analyzes nit: Analyzes chanli 2016/09/21 22:04:25 Done. Show quoted text On 2016/09/19 23:17:54, sharu jiang wrote: > nit: Analyzes Done.
606	651

607 Args:	652 Args:

608 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.	653 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.

609 change_logs (dict): Output of pipeline PullChangelogPipeline.	654 change_logs (dict): Output of pipeline PullChangelogPipeline.

610 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.	655 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.

611 failure_signals (dict): Output of pipeline ExtractSignalPipeline.	656 failure_signals (dict): Output of pipeline ExtractSignalPipeline.

612	657

613 Returns:	658 Returns:

614 A dict with the following form:	659 A dict with the following form:
	Sharu Jiang 2016/09/19 23:17:54 We should update the returns. We should update the returns. chanli 2016/09/21 22:04:25 Done. Show quoted text On 2016/09/19 23:17:54, sharu jiang wrote: > We should update the returns. Done.
615 {	660 {

616 'failures': [	661 'failures': [

617 {	662 {

618 'step_name': 'compile',	663 'step_name': 'compile',

619 'supported': True	664 'supported': True

620 'first_failure': 230,	665 'first_failure': 230,

621 'last_pass': 229,	666 'last_pass': 229,

622 'suspected_cls': [	667 'suspected_cls': [

623 {	668 {

624 'build_number': 230,	669 'build_number': 230,

(...skipping 14 matching lines...) Expand all Loading...
639 ...	684 ...

640 ]	685 ]

641 }	686 }

642 """	687 """

643 analysis_result = {	688 analysis_result = {

644 'failures': []	689 'failures': []

645 }	690 }

646	691

647 if not failure_info['failed'] or not failure_info['chromium_revision']:	692 if not failure_info['failed'] or not failure_info['chromium_revision']:

648 # Bail out if no failed step or no chromium revision.	693 # Bail out if no failed step or no chromium revision.

649 return analysis_result	694 return analysis_result, []

650	695

651 def CreateCLInfoDict(justification_dict, build_number, change_log):	696 def CreateCLInfoDict(justification_dict, build_number, change_log):

652 # TODO(stgao): remove hard-coded 'chromium' when DEPS file parsing is	697 # TODO(stgao): remove hard-coded 'chromium' when DEPS file parsing is

653 # supported.	698 # supported.

654 cl_info = {	699 cl_info = {

655 'build_number': build_number,	700 'build_number': build_number,

656 'repo_name': 'chromium',	701 'repo_name': 'chromium',

657 'revision': change_log['revision'],	702 'revision': change_log['revision'],

658 'commit_position': change_log.get('commit_position'),	703 'commit_position': change_log.get('commit_position'),

659 'url':	704 'url':

660 change_log.get('code_review_url') or change_log.get('commit_url'),	705 change_log.get('code_review_url') or change_log.get('commit_url'),

661 }	706 }

662	707

663 cl_info.update(justification_dict)	708 cl_info.update(justification_dict)

664 return cl_info	709 return cl_info

665	710

666 failed_steps = failure_info['failed_steps']	711 failed_steps = failure_info['failed_steps']

667 builds = failure_info['builds']	712 builds = failure_info['builds']

668 master_name = failure_info.get('master_name')	713 master_name = failure_info['master_name']

	714

	715 cl_failure_map = defaultdict(_CLInfo)

669	716

670 for step_name, step_failure_info in failed_steps.iteritems():	717 for step_name, step_failure_info in failed_steps.iteritems():

671 is_test_level = step_failure_info.get('tests') is not None	718 is_test_level = step_failure_info.get('tests') is not None

672	719

673 failed_build_number = step_failure_info['current_failure']	720 failed_build_number = step_failure_info['current_failure']

674 if step_failure_info.get('last_pass') is not None:	721 if step_failure_info.get('last_pass') is not None:

675 start_build_number = step_failure_info.get('last_pass') + 1	722 start_build_number = step_failure_info.get('last_pass') + 1

676 else:	723 else:

677 start_build_number = step_failure_info['first_failure']	724 start_build_number = step_failure_info['first_failure']

678 step_analysis_result = {	725 step_analysis_result = {

(...skipping 30 matching lines...) Expand all Loading...
709 test_signal, change_logs[revision], deps_info)	756 test_signal, change_logs[revision], deps_info)

710	757

711 if not justification_dict:	758 if not justification_dict:

712 continue	759 continue

713	760

714 new_suspected_cl_dict = CreateCLInfoDict(	761 new_suspected_cl_dict = CreateCLInfoDict(

715 justification_dict, build_number, change_logs[revision])	762 justification_dict, build_number, change_logs[revision])

716 test_analysis_result['suspected_cls'].append(	763 test_analysis_result['suspected_cls'].append(

717 new_suspected_cl_dict)	764 new_suspected_cl_dict)

718	765

	766 _SaveFailureToMap(

	767 cl_failure_map, new_suspected_cl_dict, step_name, test_name,

	768 max(justification_dict['hints'].values()))

	769

719 # Checks Files on step level using step level signals	770 # Checks Files on step level using step level signals

720 # regardless of test level signals so we can make sure	771 # regardless of test level signals so we can make sure

721 # no duplicate justifications added to the step result.	772 # no duplicate justifications added to the step result.

722 failure_signal = FailureSignal.FromDict(failure_signals[step_name])	773 failure_signal = FailureSignal.FromDict(failure_signals[step_name])

723 justification_dict = _CheckFiles(	774 justification_dict = _CheckFiles(

724 failure_signal, change_logs[revision], deps_info)	775 failure_signal, change_logs[revision], deps_info)

725	776

726 if not justification_dict:	777 if not justification_dict:

727 continue	778 continue

728	779

729 step_analysis_result['suspected_cls'].append(	780 new_suspected_cl_dict = CreateCLInfoDict(

730 CreateCLInfoDict(justification_dict, build_number,	781 justification_dict, build_number, change_logs[revision])

731 change_logs[revision]))	782 step_analysis_result['suspected_cls'].append(new_suspected_cl_dict)

	783

	784 if not is_test_level:

	785 _SaveFailureToMap(

	786 cl_failure_map, new_suspected_cl_dict, step_name, None,

	787 max(justification_dict['hints'].values()))

732	788

733 # TODO(stgao): sort CLs by score.	789 # TODO(stgao): sort CLs by score.

734 analysis_result['failures'].append(step_analysis_result)	790 analysis_result['failures'].append(step_analysis_result)

735	791

736 return analysis_result	792 suspected_cls = _ConvertCLFailureMapToList(cl_failure_map)

	793

	794 return analysis_result, suspected_cls

OLD	NEW