Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(39)

Side by Side Diff: appengine/findit/waterfall/build_failure_analysis.py

Issue 2230103002: [Findit] Pipeline change to save suspected cls to data store. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@0808-resubmit-suspected_cl_model
Patch Set: . Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import collections 5 from collections import defaultdict
6 import os 6 import os
7 import re 7 import re
8 8
9 from common.diff import ChangeType 9 from common.diff import ChangeType
10 from common.git_repository import GitRepository 10 from common.git_repository import GitRepository
11 from common.http_client_appengine import HttpClientAppengine as HttpClient 11 from common.http_client_appengine import HttpClientAppengine as HttpClient
12 from waterfall import waterfall_config 12 from waterfall import waterfall_config
13 from waterfall.failure_signal import FailureSignal 13 from waterfall.failure_signal import FailureSignal
14 14
15 15
(...skipping 210 matching lines...) Expand 10 before | Expand all | Expand 10 after
226 appearing in the compile failure. 226 appearing in the compile failure.
227 2) If a hint shows that a CL is likely-suspected, the hint is given 1 227 2) If a hint shows that a CL is likely-suspected, the hint is given 1
228 score point. Eg. a CL is just likely suspected if it only changed a 228 score point. Eg. a CL is just likely suspected if it only changed a
229 related file (x_impl.cc vs. x.h) appearing in a failure. 229 related file (x_impl.cc vs. x.h) appearing in a failure.
230 2. hints: each hint is a string describing a reason for suspecting a CL and 230 2. hints: each hint is a string describing a reason for suspecting a CL and
231 could be shown to the user (eg., "added x_impl.cc (and it was in log)"). 231 could be shown to the user (eg., "added x_impl.cc (and it was in log)").
232 """ 232 """
233 233
234 def __init__(self): 234 def __init__(self):
235 self._score = 0 235 self._score = 0
236 self._hints = collections.defaultdict(int) 236 self._hints = defaultdict(int)
237 237
238 @property 238 @property
239 def score(self): 239 def score(self):
240 return self._score 240 return self._score
241 241
242 def AddFileChange(self, change_action, changed_src_file_path, 242 def AddFileChange(self, change_action, changed_src_file_path,
243 file_path_in_log, score, num_file_name_occurrences, 243 file_path_in_log, score, num_file_name_occurrences,
244 changed_line_numbers=None): 244 changed_line_numbers=None):
245 """Adds a suspected file change. 245 """Adds a suspected file change.
246 246
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after
558 failure_signal (FailureSignal): The failure signal of a failed step or test. 558 failure_signal (FailureSignal): The failure signal of a failed step or test.
559 change_log (dict): The change log of a CL as returned by 559 change_log (dict): The change log of a CL as returned by
560 common.change_log.ChangeLog.ToDict(). 560 common.change_log.ChangeLog.ToDict().
561 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline. 561 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.
562 562
563 Returns: 563 Returns:
564 A dict as returned by _Justification.ToDict() if the CL is suspected for the 564 A dict as returned by _Justification.ToDict() if the CL is suspected for the
565 failure; otherwise None. 565 failure; otherwise None.
566 """ 566 """
567 # Use a dict to map each file name of the touched files to their occurrences. 567 # Use a dict to map each file name of the touched files to their occurrences.
568 file_name_occurrences = collections.defaultdict(int) 568 file_name_occurrences = defaultdict(int)
569 for touched_file in change_log['touched_files']: 569 for touched_file in change_log['touched_files']:
570 change_type = touched_file['change_type'] 570 change_type = touched_file['change_type']
571 if (change_type in (ChangeType.ADD, ChangeType.COPY, 571 if (change_type in (ChangeType.ADD, ChangeType.COPY,
572 ChangeType.RENAME, ChangeType.MODIFY)): 572 ChangeType.RENAME, ChangeType.MODIFY)):
573 file_name = os.path.basename(touched_file['new_path']) 573 file_name = os.path.basename(touched_file['new_path'])
574 file_name_occurrences[file_name] += 1 574 file_name_occurrences[file_name] += 1
575 575
576 if change_type in (ChangeType.DELETE, ChangeType.RENAME): 576 if change_type in (ChangeType.DELETE, ChangeType.RENAME):
577 file_name = os.path.basename(touched_file['old_path']) 577 file_name = os.path.basename(touched_file['old_path'])
578 file_name_occurrences[file_name] += 1 578 file_name_occurrences[file_name] += 1
(...skipping 14 matching lines...) Expand all
593 593
594 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification, 594 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification,
595 line_numbers) 595 line_numbers)
596 596
597 if not justification.score: 597 if not justification.score:
598 return None 598 return None
599 else: 599 else:
600 return justification.ToDict() 600 return justification.ToDict()
601 601
602 602
603 class _CLInfo(object):
604 """A object of information we need for a suspected CL.
605
606 The information is specific to current build.
607 """
608 def __init__(self):
609 self.failures = defaultdict(list)
610 self.top_score = 0
611 self.url = None
612
613 @property
614 def not_none_failures(self):
stgao 2016/09/28 00:13:27 can we have a better name here too?
chanli 2016/09/30 20:41:01 Removed it.
615 # For steps like compile, there is no test_name,
616 # to ensure we include them in the dict, used None as placeholder.
stgao 2016/09/28 00:13:27 Can we use a singleton object like below? NO_TEST
chanli 2016/09/30 20:41:01 I don't understand your suggestion... Used another
617 # Now removes those Nones.
618 failures = {
619 key: [t for t in self.failures[key] if t] for key in self.failures
620 }
621 return failures
622
623
624 def _SaveFailureToMap(
625 cl_failure_map, new_suspected_cl_dict, step_name, test_name, top_score):
626 """Saves a failure's info to the cl that caused it."""
627 cl_key = (
628 new_suspected_cl_dict['repo_name'], new_suspected_cl_dict['revision'],
629 new_suspected_cl_dict['commit_position'])
630
631 cl_failure_map[cl_key].failures[step_name].append(test_name)
632 # Ignores the case where in the same build for the same cl,
633 # we have different scores.
634 # Not sure if we need to handle it since it should be rare.
635 cl_failure_map[cl_key].top_score = (
636 cl_failure_map[cl_key].top_score or top_score)
637 cl_failure_map[cl_key].url = (
638 cl_failure_map[cl_key].url or new_suspected_cl_dict['url'])
639
640
641 def _ConvertCLFailureMapToList(cl_failure_map):
642 suspected_cls = []
643 for cl_key, cl_info in cl_failure_map.iteritems():
644 suspected_cl = {}
645 (suspected_cl['repo_name'], suspected_cl['revision'],
646 suspected_cl['commit_position']) = cl_key
647 suspected_cl['url'] = cl_info.url
648 suspected_cl['failures'] = cl_info.not_none_failures
649 suspected_cl['top_score'] = cl_info.top_score
650
651 suspected_cls.append(suspected_cl)
652 return suspected_cls
653
603 def AnalyzeBuildFailure( 654 def AnalyzeBuildFailure(
604 failure_info, change_logs, deps_info, failure_signals): 655 failure_info, change_logs, deps_info, failure_signals):
605 """Analyze the given failure signals, and figure out culprit CLs. 656 """Analyzes the given failure signals, and figure out culprit CLs.
606 657
607 Args: 658 Args:
608 failure_info (dict): Output of pipeline DetectFirstFailurePipeline. 659 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.
609 change_logs (dict): Output of pipeline PullChangelogPipeline. 660 change_logs (dict): Output of pipeline PullChangelogPipeline.
610 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline. 661 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.
611 failure_signals (dict): Output of pipeline ExtractSignalPipeline. 662 failure_signals (dict): Output of pipeline ExtractSignalPipeline.
612 663
613 Returns: 664 Returns:
614 A dict with the following form: 665 A dict with the following form:
615 { 666 {
(...skipping 16 matching lines...) Expand all
632 'modify e/f/z.cc': 1, 683 'modify e/f/z.cc': 1,
633 ... 684 ...
634 } 685 }
635 }, 686 },
636 ... 687 ...
637 ], 688 ],
638 }, 689 },
639 ... 690 ...
640 ] 691 ]
641 } 692 }
693
694 And a list of suspected_cls format as below:
695 [
696 {
697 'repo_name': 'chromium',
698 'revision': 'r98_1',
699 'commit_position': None,
700 'url': None,
701 'failures': {
702 'b': ['Unittest2.Subtest1', 'Unittest3.Subtest2']
703 },
704 'top_score': 4
705 },
706 ...
707 ]
642 """ 708 """
643 analysis_result = { 709 analysis_result = {
644 'failures': [] 710 'failures': []
645 } 711 }
646 712
647 if not failure_info['failed'] or not failure_info['chromium_revision']: 713 if not failure_info['failed'] or not failure_info['chromium_revision']:
648 # Bail out if no failed step or no chromium revision. 714 # Bail out if no failed step or no chromium revision.
649 return analysis_result 715 return analysis_result, []
650 716
651 def CreateCLInfoDict(justification_dict, build_number, change_log): 717 def CreateCLInfoDict(justification_dict, build_number, change_log):
652 # TODO(stgao): remove hard-coded 'chromium' when DEPS file parsing is 718 # TODO(stgao): remove hard-coded 'chromium' when DEPS file parsing is
653 # supported. 719 # supported.
654 cl_info = { 720 cl_info = {
655 'build_number': build_number, 721 'build_number': build_number,
656 'repo_name': 'chromium', 722 'repo_name': 'chromium',
657 'revision': change_log['revision'], 723 'revision': change_log['revision'],
658 'commit_position': change_log.get('commit_position'), 724 'commit_position': change_log.get('commit_position'),
659 'url': 725 'url':
660 change_log.get('code_review_url') or change_log.get('commit_url'), 726 change_log.get('code_review_url') or change_log.get('commit_url'),
661 } 727 }
662 728
663 cl_info.update(justification_dict) 729 cl_info.update(justification_dict)
664 return cl_info 730 return cl_info
665 731
666 failed_steps = failure_info['failed_steps'] 732 failed_steps = failure_info['failed_steps']
667 builds = failure_info['builds'] 733 builds = failure_info['builds']
668 master_name = failure_info.get('master_name') 734 master_name = failure_info['master_name']
735
736 cl_failure_map = defaultdict(_CLInfo)
669 737
670 for step_name, step_failure_info in failed_steps.iteritems(): 738 for step_name, step_failure_info in failed_steps.iteritems():
671 is_test_level = step_failure_info.get('tests') is not None 739 is_test_level = step_failure_info.get('tests') is not None
672 740
673 failed_build_number = step_failure_info['current_failure'] 741 failed_build_number = step_failure_info['current_failure']
674 if step_failure_info.get('last_pass') is not None: 742 if step_failure_info.get('last_pass') is not None:
675 start_build_number = step_failure_info.get('last_pass') + 1 743 start_build_number = step_failure_info.get('last_pass') + 1
676 else: 744 else:
677 start_build_number = step_failure_info['first_failure'] 745 start_build_number = step_failure_info['first_failure']
678 step_analysis_result = { 746 step_analysis_result = {
(...skipping 30 matching lines...) Expand all
709 test_signal, change_logs[revision], deps_info) 777 test_signal, change_logs[revision], deps_info)
710 778
711 if not justification_dict: 779 if not justification_dict:
712 continue 780 continue
713 781
714 new_suspected_cl_dict = CreateCLInfoDict( 782 new_suspected_cl_dict = CreateCLInfoDict(
715 justification_dict, build_number, change_logs[revision]) 783 justification_dict, build_number, change_logs[revision])
716 test_analysis_result['suspected_cls'].append( 784 test_analysis_result['suspected_cls'].append(
717 new_suspected_cl_dict) 785 new_suspected_cl_dict)
718 786
787 _SaveFailureToMap(
788 cl_failure_map, new_suspected_cl_dict, step_name, test_name,
789 max(justification_dict['hints'].values()))
790
719 # Checks Files on step level using step level signals 791 # Checks Files on step level using step level signals
720 # regardless of test level signals so we can make sure 792 # regardless of test level signals so we can make sure
721 # no duplicate justifications added to the step result. 793 # no duplicate justifications added to the step result.
722 failure_signal = FailureSignal.FromDict(failure_signals[step_name]) 794 failure_signal = FailureSignal.FromDict(failure_signals[step_name])
723 justification_dict = _CheckFiles( 795 justification_dict = _CheckFiles(
724 failure_signal, change_logs[revision], deps_info) 796 failure_signal, change_logs[revision], deps_info)
725 797
726 if not justification_dict: 798 if not justification_dict:
727 continue 799 continue
728 800
729 step_analysis_result['suspected_cls'].append( 801 new_suspected_cl_dict = CreateCLInfoDict(
730 CreateCLInfoDict(justification_dict, build_number, 802 justification_dict, build_number, change_logs[revision])
731 change_logs[revision])) 803 step_analysis_result['suspected_cls'].append(new_suspected_cl_dict)
804
805 if not is_test_level:
806 _SaveFailureToMap(
807 cl_failure_map, new_suspected_cl_dict, step_name, None,
808 max(justification_dict['hints'].values()))
732 809
733 # TODO(stgao): sort CLs by score. 810 # TODO(stgao): sort CLs by score.
734 analysis_result['failures'].append(step_analysis_result) 811 analysis_result['failures'].append(step_analysis_result)
735 812
736 return analysis_result 813 suspected_cls = _ConvertCLFailureMapToList(cl_failure_map)
814
815 return analysis_result, suspected_cls
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698