Chromium Code Reviews| Index: appengine/findit/util_scripts/crash_queries/delta_test/delta_test.py |
| diff --git a/appengine/findit/util_scripts/crash_queries/delta_test/delta_test.py b/appengine/findit/util_scripts/crash_queries/delta_test/delta_test.py |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..05105971ef0eaa7d07321a1a84282c838faab046 |
| --- /dev/null |
| +++ b/appengine/findit/util_scripts/crash_queries/delta_test/delta_test.py |
| @@ -0,0 +1,199 @@ |
| +# Copyright 2016 The Chromium Authors. All rights reserved. |
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +import hashlib |
| +import json |
| +import logging |
| +import os |
| +import pickle |
| +import subprocess |
| + |
| +from crash_queries import crash_iterator |
| + |
| +AZALEA_RESULTS_DIRECTORY = os.path.join(os.path.dirname(__file__), |
| + 'azalea_results') |
| + |
| + |
| +class Delta(object): |
| + """Stands for delta between two results.""" |
| + |
| + def __init__(self, result1, result2, fields): |
| + self._result1 = result1 |
| + self._result2 = result2 |
| + self._fields = fields |
| + self._delta_dict = {} |
| + self._delta_str_dict = {} |
| + |
| + @property |
| + def delta_dict(self): |
| + """Dict representation of delta.""" |
| + if self._delta_dict: |
| + return self._delta_dict |
| + |
| + for field in self._fields: |
| + value1 = getattr(self._result1, field) |
| + value2 = getattr(self._result2, field) |
| + if value1 != value2: |
| + if hasattr(value1, 'ToDict') and callable(value1.ToDict): |
| + value1 = value1.ToDict() |
| + value2 = value2.ToDict() |
| + self._delta_dict[field] = (value1, value2) |
| + |
| + return self._delta_dict |
| + |
| + @property |
| + def delta_str_dict(self): |
| + """Converts delta of each field to a string.""" |
| + if self._delta_str_dict: |
| + return self._delta_str_dict |
| + |
| + for key, (value1, value2) in self.delta_dict.iteritems(): |
| + self._delta_str_dict[key] = '%s: %s, %s' % (key, value1, value2) |
| + |
| + return self._delta_str_dict |
| + |
| + def ToDict(self): |
| + return self.delta_dict |
| + |
| + def ToString(self): |
|
wrengr
2016/10/11 22:57:43
Should also set the __str__ method
Sharu Jiang
2016/10/12 01:18:20
Done.
|
| + return '\n'.join(self.delta_str_dict.values()) |
| + |
| + def IsEmpty(self): |
|
wrengr
2016/10/11 22:57:43
Should probably also set the __bool__ method
Sharu Jiang
2016/10/12 01:18:20
Done.
|
| + return not bool(self.delta_dict) |
| + |
| + |
| +def GetDeltaFromTwoSetsOfCulprits(set1, set2): |
| + """Gets delta from two sets of results. |
| + |
| + Results are a list of (message, matches, component_name, cr_label) |
| + Returns a list of delta results (results1, results2). |
| + """ |
| + deltas = {} |
| + for crash_id, culprit1 in set1.iteritems(): |
| + # Even when the command are exactly the same, it's possible that one set is |
| + # loaded from local result file, another is just queried from database, |
| + # sometimes some crash results would get deleted. |
| + if crash_id not in set2: |
| + continue |
| + |
| + culprit2 = set2[crash_id] |
| + delta = Delta(culprit1, culprit2, culprit1.fields) |
| + if not delta.IsEmpty(): |
| + deltas[crash_id] = delta |
| + |
| + return deltas |
| + |
| + |
| +def GetResult(crashes, git_hash, result_path, verbose=False): |
| + """Returns an evaluator function to compute delta between 2 findit githashes. |
| + |
| + Args: |
| + crashes (list): A list of crash infos. |
| + git_hash (str): A git hash of findit repository. |
| + result_path (str): file path for subprocess to write results on. |
| + verbose (bool): If True, print all the findit results. |
| + |
| + Return: |
| + A dict mapping crash id to culprit for every crashes analyzed by |
| + git_hash version. |
| + """ |
| + if not crashes: |
| + return {} |
| + |
| + if verbose: |
| + logging.info('\n\n***************************') |
| + logging.info('Switch to git %s', git_hash) |
| + logging.info('***************************\n\n') |
| + |
| + dev_null_handle = open(os.devnull, 'w') |
| + subprocess.check_call( |
| + 'cd %s; git checkout %s' % (os.path.dirname(__file__), git_hash), |
| + stdout=dev_null_handle, |
| + stderr=dev_null_handle, |
| + shell=True) |
| + |
| + if not os.path.exists(result_path): |
| + # TODO(katesoina): Implement run-azalea.py. |
| + command = 'python %s %s' % ('run-azalea.py', result_path) + ( |
| + ' -v' if verbose else '') |
| + # Results is a dict with testcase_id as key, and findit results as |
| + # value. |
| + p = subprocess.Popen( |
| + command, |
| + stdin=subprocess.PIPE, |
| + shell=True) |
| + |
| + p.communicate(input=json.dumps(crashes)) |
| + else: |
| + logging.info('\nLoading results from %s', result_path) |
| + |
| + if not os.path.exists(result_path): |
| + logging.info('Fail to get results.') |
| + return {} |
| + |
| + with open(result_path) as f: |
| + return pickle.load(f) |
| + |
| + return {} |
| + |
| + |
| +def GenerateResultFileName(*args): |
| + """Encodes args and returns the generated result file.""" |
| + return os.path.join(AZALEA_RESULTS_DIRECTORY, |
| + hashlib.md5(pickle.dumps(args)).hexdigest()) |
| + |
| + |
| +def DeltaEvaluator(git_hash1, git_hash2, |
| + client_id, start_date, end_date, batch_size, |
| + property_values=None, verbose=False): |
| + """Evaluates delta between git_hash1 and git_hash2 on a set of Testcases. |
| + |
| + Args: |
| + git_hash1 (str): A git hash of findit repository. |
| + git_hash2 (str): A git hash of findit repository. |
| + start_date (str): Run delta test on testcases after (including) |
| + the start_date, format should be '%Y-%m-%d'. |
| + end_date (str): Run delta test on testcases before (not including) |
| + the end_date, format should be '%Y-%m-%d'. |
| + client_id (CrashClient): Possible values are 'fracas', 'cracas', |
| + 'cluterfuzz'. |
| + batch_size (int): Size of a batch that can be queried at one time. |
| + property_values (dict): Property values to query. |
| + batch_size (int): The size of crashes that can be queried at one time. |
| + verbose (bool): If True, print all the findit results. |
| + Return: |
| + (deltas, crash_count). |
| + deltas (dict): Mappings id to delta for each culprit value. |
| + crash_count (int): Total count of all the crashes. |
| + """ |
| + head_branch_name = subprocess.check_output( |
| + ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).replace('\n', '') |
| + deltas = {} |
| + try: |
| + crash_count = 0 |
| + for index, crashes in enumerate( |
| + crash_iterator.IterateCrashes(client_id, |
| + property_values=property_values, |
| + start_date=start_date, |
| + end_date=end_date, |
| + batch_size=batch_size, |
| + batch_run=True)): |
| + |
| + results = [] |
| + for git_hash in [git_hash1, git_hash2]: |
| + result_path = GenerateResultFileName(client_id, property_values, |
| + start_date, end_date, |
| + batch_size, index, git_hash) |
| + results.append(GetResult(crashes, git_hash, result_path, |
| + verbose=verbose)) |
| + |
| + crash_count += len(crashes) |
| + deltas.update(GetDeltaFromTwoSetsOfCulprits(*results)) |
| + |
| + return deltas, crash_count |
| + finally: |
| + dev_null_handle = open(os.devnull, 'w') |
| + subprocess.check_call(['git', 'checkout', head_branch_name], |
| + stdout=dev_null_handle, |
| + stderr=dev_null_handle) |