Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(155)

Side by Side Diff: appengine/findit/waterfall/build_failure_analysis.py

Issue 2230103002: [Findit] Pipeline change to save suspected cls to data store. (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@0808-resubmit-suspected_cl_model
Patch Set: Self review. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 # Copyright 2015 The Chromium Authors. All rights reserved. 1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be 2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file. 3 # found in the LICENSE file.
4 4
5 import collections 5 from collections import defaultdict
6 import os 6 import os
7 import re 7 import re
8 8
9 from common.diff import ChangeType 9 from common.diff import ChangeType
10 from common.git_repository import GitRepository 10 from common.git_repository import GitRepository
11 from common.http_client_appengine import HttpClientAppengine as HttpClient 11 from common.http_client_appengine import HttpClientAppengine as HttpClient
12 from model import analysis_approach_type
13 from waterfall import suspected_cl_util
12 from waterfall import waterfall_config 14 from waterfall import waterfall_config
13 from waterfall.failure_signal import FailureSignal 15 from waterfall.failure_signal import FailureSignal
14 16
15 17
16 def _IsSameFile(changed_src_file_path, file_path_in_log): 18 def _IsSameFile(changed_src_file_path, file_path_in_log):
17 """Guesses if the two files are the same. 19 """Guesses if the two files are the same.
18 20
19 Args: 21 Args:
20 changed_src_file_path (str): Full path of a file committed to git repo. 22 changed_src_file_path (str): Full path of a file committed to git repo.
21 file_path_in_log (str): Path of a file appearing in a failure log. It might 23 file_path_in_log (str): Path of a file appearing in a failure log. It might
(...skipping 204 matching lines...) Expand 10 before | Expand all | Expand 10 after
226 appearing in the compile failure. 228 appearing in the compile failure.
227 2) If a hint shows that a CL is likely-suspected, the hint is given 1 229 2) If a hint shows that a CL is likely-suspected, the hint is given 1
228 score point. Eg. a CL is just likely suspected if it only changed a 230 score point. Eg. a CL is just likely suspected if it only changed a
229 related file (x_impl.cc vs. x.h) appearing in a failure. 231 related file (x_impl.cc vs. x.h) appearing in a failure.
230 2. hints: each hint is a string describing a reason for suspecting a CL and 232 2. hints: each hint is a string describing a reason for suspecting a CL and
231 could be shown to the user (eg., "added x_impl.cc (and it was in log)"). 233 could be shown to the user (eg., "added x_impl.cc (and it was in log)").
232 """ 234 """
233 235
234 def __init__(self): 236 def __init__(self):
235 self._score = 0 237 self._score = 0
236 self._hints = collections.defaultdict(int) 238 self._hints = defaultdict(int)
237 239
238 @property 240 @property
239 def score(self): 241 def score(self):
240 return self._score 242 return self._score
241 243
242 def AddFileChange(self, change_action, changed_src_file_path, 244 def AddFileChange(self, change_action, changed_src_file_path,
243 file_path_in_log, score, num_file_name_occurrences, 245 file_path_in_log, score, num_file_name_occurrences,
244 changed_line_numbers=None): 246 changed_line_numbers=None):
245 """Adds a suspected file change. 247 """Adds a suspected file change.
246 248
(...skipping 311 matching lines...) Expand 10 before | Expand all | Expand 10 after
558 failure_signal (FailureSignal): The failure signal of a failed step or test. 560 failure_signal (FailureSignal): The failure signal of a failed step or test.
559 change_log (dict): The change log of a CL as returned by 561 change_log (dict): The change log of a CL as returned by
560 common.change_log.ChangeLog.ToDict(). 562 common.change_log.ChangeLog.ToDict().
561 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline. 563 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.
562 564
563 Returns: 565 Returns:
564 A dict as returned by _Justification.ToDict() if the CL is suspected for the 566 A dict as returned by _Justification.ToDict() if the CL is suspected for the
565 failure; otherwise None. 567 failure; otherwise None.
566 """ 568 """
567 # Use a dict to map each file name of the touched files to their occurrences. 569 # Use a dict to map each file name of the touched files to their occurrences.
568 file_name_occurrences = collections.defaultdict(int) 570 file_name_occurrences = defaultdict(int)
569 for touched_file in change_log['touched_files']: 571 for touched_file in change_log['touched_files']:
570 change_type = touched_file['change_type'] 572 change_type = touched_file['change_type']
571 if (change_type in (ChangeType.ADD, ChangeType.COPY, 573 if (change_type in (ChangeType.ADD, ChangeType.COPY,
572 ChangeType.RENAME, ChangeType.MODIFY)): 574 ChangeType.RENAME, ChangeType.MODIFY)):
573 file_name = os.path.basename(touched_file['new_path']) 575 file_name = os.path.basename(touched_file['new_path'])
574 file_name_occurrences[file_name] += 1 576 file_name_occurrences[file_name] += 1
575 577
576 if change_type in (ChangeType.DELETE, ChangeType.RENAME): 578 if change_type in (ChangeType.DELETE, ChangeType.RENAME):
577 file_name = os.path.basename(touched_file['old_path']) 579 file_name = os.path.basename(touched_file['old_path'])
578 file_name_occurrences[file_name] += 1 580 file_name_occurrences[file_name] += 1
(...skipping 14 matching lines...) Expand all
593 595
594 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification, 596 _CheckFileInDependencyRolls(file_path_in_log, rolls, justification,
595 line_numbers) 597 line_numbers)
596 598
597 if not justification.score: 599 if not justification.score:
598 return None 600 return None
599 else: 601 else:
600 return justification.ToDict() 602 return justification.ToDict()
601 603
602 604
605 class _CLInfo(object):
lijeffrey 2016/09/12 22:55:11 So how about making this depend on your other cl h
chanli 2016/09/16 21:24:19 Based on our discussion offline, I'll leave it as
606 """A object of information we need for a suspected CL.
607
608 The information is specific to current build.
609 """
610 def __init__(self):
611 self.failures = defaultdict(list)
612 self.top_score = 0
613
614 @property
615 def not_none_failures(self):
stgao 2016/09/15 17:35:12 What does this mean?
chanli 2016/09/16 21:24:19 For compile and other non-swarming steps, we will
stgao 2016/09/23 18:30:21 Why we will have None in the list? Can we avoid th
chanli 2016/09/24 00:04:59 Since I'm using defaultdict here, I need to make s
616 failures = {
617 key: [t for t in self.failures[key] if t] for key in self.failures
618 }
619 return failures
620
621
622 def _SaveFailureToMap(
623 cl_failure_map, new_suspected_cl_dict, step_name, test_name, top_score):
624 """Saves a failure's info to the cl that caused it."""
625 cl_key = (
626 new_suspected_cl_dict['repo_name'], new_suspected_cl_dict['revision'],
627 new_suspected_cl_dict['commit_position'])
628
629 cl_failure_map[cl_key].failures[step_name].append(test_name)
630 cl_failure_map[cl_key].top_score = top_score
stgao 2016/09/15 17:35:12 What if the original score is higher than this one
chanli 2016/09/16 21:24:19 The real problem should be what if the original sc
stgao 2016/09/23 18:30:21 Add a comment for that?
chanli 2016/09/24 00:04:59 Done.
631
632
633 def _SaveSuspectedCLs(
634 cl_failure_map, master_name, builder_name, build_number, failure_type):
635 """Saves suspected CLs to dataStore."""
636 for cl_key, cl_object in cl_failure_map.iteritems():
637 repo_name, revision, commit_position = cl_key
638 suspected_cl_util.UpdateSuspectedCL(
639 repo_name, revision, commit_position, analysis_approach_type.HEURISTIC,
640 master_name, builder_name, build_number, failure_type,
641 cl_object.not_none_failures, cl_object.top_score)
642
643
603 def AnalyzeBuildFailure( 644 def AnalyzeBuildFailure(
604 failure_info, change_logs, deps_info, failure_signals): 645 failure_info, change_logs, deps_info, failure_signals):
605 """Analyze the given failure signals, and figure out culprit CLs. 646 """Analyze the given failure signals, and figure out culprit CLs.
606 647
607 Args: 648 Args:
608 failure_info (dict): Output of pipeline DetectFirstFailurePipeline. 649 failure_info (dict): Output of pipeline DetectFirstFailurePipeline.
609 change_logs (dict): Output of pipeline PullChangelogPipeline. 650 change_logs (dict): Output of pipeline PullChangelogPipeline.
610 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline. 651 deps_info (dict): Output of pipeline ExtractDEPSInfoPipeline.
611 failure_signals (dict): Output of pipeline ExtractSignalPipeline. 652 failure_signals (dict): Output of pipeline ExtractSignalPipeline.
612 653
(...skipping 45 matching lines...) Expand 10 before | Expand all | Expand 10 after
658 'commit_position': change_log.get('commit_position'), 699 'commit_position': change_log.get('commit_position'),
659 'url': 700 'url':
660 change_log.get('code_review_url') or change_log.get('commit_url'), 701 change_log.get('code_review_url') or change_log.get('commit_url'),
661 } 702 }
662 703
663 cl_info.update(justification_dict) 704 cl_info.update(justification_dict)
664 return cl_info 705 return cl_info
665 706
666 failed_steps = failure_info['failed_steps'] 707 failed_steps = failure_info['failed_steps']
667 builds = failure_info['builds'] 708 builds = failure_info['builds']
668 master_name = failure_info.get('master_name') 709 master_name = failure_info['master_name']
710
711 cl_failure_map = defaultdict(_CLInfo)
669 712
670 for step_name, step_failure_info in failed_steps.iteritems(): 713 for step_name, step_failure_info in failed_steps.iteritems():
671 is_test_level = step_failure_info.get('tests') is not None 714 is_test_level = step_failure_info.get('tests') is not None
672 715
673 failed_build_number = step_failure_info['current_failure'] 716 failed_build_number = step_failure_info['current_failure']
674 if step_failure_info.get('last_pass') is not None: 717 if step_failure_info.get('last_pass') is not None:
675 start_build_number = step_failure_info.get('last_pass') + 1 718 start_build_number = step_failure_info.get('last_pass') + 1
676 else: 719 else:
677 start_build_number = step_failure_info['first_failure'] 720 start_build_number = step_failure_info['first_failure']
678 step_analysis_result = { 721 step_analysis_result = {
(...skipping 30 matching lines...) Expand all
709 test_signal, change_logs[revision], deps_info) 752 test_signal, change_logs[revision], deps_info)
710 753
711 if not justification_dict: 754 if not justification_dict:
712 continue 755 continue
713 756
714 new_suspected_cl_dict = CreateCLInfoDict( 757 new_suspected_cl_dict = CreateCLInfoDict(
715 justification_dict, build_number, change_logs[revision]) 758 justification_dict, build_number, change_logs[revision])
716 test_analysis_result['suspected_cls'].append( 759 test_analysis_result['suspected_cls'].append(
717 new_suspected_cl_dict) 760 new_suspected_cl_dict)
718 761
762 _SaveFailureToMap(
stgao 2016/09/15 17:35:12 I'm wondering if it would be possible and cleaner
chanli 2016/09/16 21:24:19 I actually do datastore operations in a patch here
763 cl_failure_map, new_suspected_cl_dict, step_name, test_name,
764 max(justification_dict.values()))
765
719 # Checks Files on step level using step level signals 766 # Checks Files on step level using step level signals
720 # regardless of test level signals so we can make sure 767 # regardless of test level signals so we can make sure
721 # no duplicate justifications added to the step result. 768 # no duplicate justifications added to the step result.
722 failure_signal = FailureSignal.FromDict(failure_signals[step_name]) 769 failure_signal = FailureSignal.FromDict(failure_signals[step_name])
723 justification_dict = _CheckFiles( 770 justification_dict = _CheckFiles(
724 failure_signal, change_logs[revision], deps_info) 771 failure_signal, change_logs[revision], deps_info)
725 772
726 if not justification_dict: 773 if not justification_dict:
727 continue 774 continue
728 775
729 step_analysis_result['suspected_cls'].append( 776 new_suspected_cl_dict = CreateCLInfoDict(
730 CreateCLInfoDict(justification_dict, build_number, 777 justification_dict, build_number, change_logs[revision])
731 change_logs[revision])) 778 step_analysis_result['suspected_cls'].append(new_suspected_cl_dict)
779
780 if not is_test_level:
781 _SaveFailureToMap(
782 cl_failure_map, new_suspected_cl_dict, step_name, None,
783 max(justification_dict.values()))
732 784
733 # TODO(stgao): sort CLs by score. 785 # TODO(stgao): sort CLs by score.
734 analysis_result['failures'].append(step_analysis_result) 786 analysis_result['failures'].append(step_analysis_result)
735 787
788 # Save suspected_cls to data_store.
789 _SaveSuspectedCLs(
790 cl_failure_map, failure_info['master_name'], failure_info['builder_name'],
791 failure_info['build_number'], failure_info['failure_type'])
792
736 return analysis_result 793 return analysis_result
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698