Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 # Copyright 2016 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 import json | |
| 6 import logging | |
| 7 import os | |
| 8 import pickle | |
| 9 import subprocess | |
| 10 | |
| 11 from crash_queries import crash_iterator | |
| 12 from crash_queries.delta_test import delta_util | |
| 13 | |
| 14 AZALEA_RESULTS_DIRECTORY = os.path.join(os.path.dirname(__file__), | |
| 15 'azalea_results') | |
| 16 | |
| 17 | |
| 18 class Delta(object): | |
| 19 """Stands for delta between two results.""" | |
| 20 | |
| 21 def __init__(self, result1, result2, fields): | |
| 22 self._result1 = result1 | |
| 23 self._result2 = result2 | |
| 24 self._fields = fields | |
| 25 self._delta_dict = {} | |
| 26 self._delta_str_dict = {} | |
| 27 | |
| 28 @property | |
| 29 def delta_dict(self): | |
| 30 """Dict representation of delta.""" | |
| 31 if self._delta_dict: | |
| 32 return self._delta_dict | |
| 33 | |
| 34 for field in self._fields: | |
| 35 value1 = getattr(self._result1, field) | |
| 36 value2 = getattr(self._result2, field) | |
| 37 if value1 != value2: | |
| 38 if hasattr(value1, 'ToDict') and callable(value1.ToDict): | |
| 39 value1 = value1.ToDict() | |
| 40 value2 = value2.ToDict() | |
| 41 self._delta_dict[field] = (value1, value2) | |
| 42 | |
| 43 return self._delta_dict | |
| 44 | |
| 45 @property | |
| 46 def delta_str_dict(self): | |
| 47 """Converts delta of each field to a string.""" | |
| 48 if self._delta_str_dict: | |
| 49 return self._delta_str_dict | |
| 50 | |
| 51 for key, (value1, value2) in self.delta_dict.iteritems(): | |
| 52 self._delta_str_dict[key] = '%s: %s, %s' % (key, value1, value2) | |
| 53 | |
| 54 return self._delta_str_dict | |
| 55 | |
| 56 def ToDict(self): | |
| 57 return self.delta_dict | |
| 58 | |
| 59 def __str__(self): | |
| 60 return '\n'.join(self.delta_str_dict.values()) | |
| 61 | |
| 62 def __bool__(self): | |
| 63 return bool(self.delta_dict) | |
| 64 | |
| 65 def __nonzero__(self): | |
| 66 return self.__bool__() | |
| 67 | |
| 68 | |
| 69 def GetDeltasFromTwoSetsOfResults(set1, set2): | |
| 70 """Gets delta from two sets of results. | |
| 71 | |
| 72 Results are a list of (message, matches, component_name, cr_label) | |
| 73 Returns a list of delta results (results1, results2). | |
| 74 """ | |
| 75 deltas = {} | |
| 76 for crash_id, result1 in set1.iteritems(): | |
| 77 # Even when the command are exactly the same, it's possible that one set is | |
| 78 # loaded from local result file, another is just queried from database, | |
| 79 # sometimes some crash results would get deleted. | |
| 80 if crash_id not in set2: | |
| 81 continue | |
| 82 | |
| 83 result2 = set2[crash_id] | |
| 84 delta = Delta(result1, result2, result1.fields) | |
| 85 if delta: | |
| 86 deltas[crash_id] = delta | |
| 87 | |
| 88 return deltas | |
| 89 | |
| 90 | |
| 91 def GetResults(crashes, git_hash, result_path, verbose=False): | |
| 92 """Returns an evaluator function to compute delta between 2 findit githashes. | |
| 93 | |
| 94 Args: | |
| 95 crashes (list): A list of crash infos. | |
| 96 git_hash (str): A git hash of findit repository. | |
| 97 result_path (str): file path for subprocess to write results on. | |
| 98 verbose (bool): If True, print all the findit results. | |
| 99 | |
| 100 Return: | |
| 101 A dict mapping crash id to culprit for every crashes analyzed by | |
| 102 git_hash version. | |
| 103 """ | |
| 104 if not crashes: | |
| 105 return {} | |
| 106 | |
| 107 if verbose: | |
| 108 logging.info('\n\n***************************') | |
| 109 logging.info('Switch to git %s', git_hash) | |
| 110 logging.info('***************************\n\n') | |
| 111 | |
| 112 dev_null_handle = open(os.devnull, 'w') | |
| 113 subprocess.check_call( | |
| 114 'cd %s; git checkout %s' % (os.path.dirname(__file__), git_hash), | |
| 115 stdout=dev_null_handle, | |
| 116 stderr=dev_null_handle, | |
| 117 shell=True) | |
| 118 | |
| 119 if not os.path.exists(result_path): | |
| 120 # TODO(katesoina): Implement run-azalea.py. | |
|
stgao
2016/10/14 01:40:40
Is the TODO still valid?
Sharu Jiang
2016/10/15 01:24:47
run-azalea.py is not finished yet, however, right,
| |
| 121 command = 'python %s %s' % ('run-azalea.py', result_path) + ( | |
|
stgao
2016/10/14 01:40:40
what's the current working dir? Will it matter her
Sharu Jiang
2016/10/15 01:24:47
In line 114, we make sure the current working dir
stgao
2016/10/20 01:40:20
Is this new process run in the same shell as that
Sharu Jiang
2016/10/20 22:39:06
Yes, the run-azalea.py is in the same dir as this
| |
| 122 ' -v' if verbose else '') | |
|
stgao
2016/10/14 01:40:40
Can we use a argument list instead of a string?
Sharu Jiang
2016/10/15 01:24:47
Done.
| |
| 123 # Results is a dict with testcase_id as key, and findit results as | |
| 124 # value. | |
| 125 p = subprocess.Popen( | |
| 126 command, | |
| 127 stdin=subprocess.PIPE, | |
| 128 shell=True) | |
| 129 | |
| 130 p.communicate(input=json.dumps(crashes)) | |
|
stgao
2016/10/14 01:40:40
Should we pass information through a file instead?
Sharu Jiang
2016/10/15 01:24:47
We can, we can cache the CrashIterator results(bat
stgao
2016/10/20 01:40:20
I don't quite understand this. Maybe we could chat
Sharu Jiang
2016/10/20 22:39:06
Done.
| |
| 131 else: | |
| 132 logging.info('\nLoading results from %s', result_path) | |
| 133 | |
| 134 if not os.path.exists(result_path): | |
| 135 logging.info('Fail to get results.') | |
|
stgao
2016/10/14 01:40:40
Should it be an error or info? Same for other usag
Sharu Jiang
2016/10/15 01:24:47
Done.
| |
| 136 return {} | |
| 137 | |
| 138 with open(result_path) as f: | |
| 139 return pickle.load(f) | |
|
stgao
2016/10/14 01:40:40
Just curious: why we use pickle instead of json?
Sharu Jiang
2016/10/15 01:24:47
The results are a general concept, it can be a obj
stgao
2016/10/20 01:40:20
Acknowledged.
| |
| 140 | |
| 141 return {} | |
| 142 | |
| 143 | |
| 144 def DeltaEvaluator(git_hash1, git_hash2, | |
| 145 client_id, start_date, end_date, batch_size, | |
| 146 property_values=None, verbose=False, app_id=None): | |
| 147 """Evaluates delta between git_hash1 and git_hash2 on a set of Testcases. | |
| 148 | |
| 149 Args: | |
| 150 git_hash1 (str): A git hash of findit repository. | |
| 151 git_hash2 (str): A git hash of findit repository. | |
| 152 start_date (str): Run delta test on testcases after (including) | |
| 153 the start_date, format should be '%Y-%m-%d'. | |
| 154 end_date (str): Run delta test on testcases before (not including) | |
| 155 the end_date, format should be '%Y-%m-%d'. | |
| 156 client_id (CrashClient): Possible values are 'fracas', 'cracas', | |
| 157 'cluterfuzz'. | |
| 158 batch_size (int): Size of a batch that can be queried at one time. | |
| 159 property_values (dict): Property values to query. | |
| 160 batch_size (int): The size of crashes that can be queried at one time. | |
| 161 verbose (bool): If True, print all the findit results. | |
| 162 app_id (str): Appengine app id to query. | |
| 163 Return: | |
| 164 (deltas, crash_count). | |
| 165 deltas (dict): Mappings id to delta for each culprit value. | |
| 166 crash_count (int): Total count of all the crashes. | |
| 167 """ | |
| 168 head_branch_name = subprocess.check_output( | |
| 169 ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).replace('\n', '') | |
| 170 deltas = {} | |
| 171 try: | |
| 172 crash_count = 0 | |
| 173 for index, crashes in enumerate( | |
| 174 crash_iterator.IterateCrashes(client_id, | |
| 175 property_values=property_values, | |
| 176 start_date=start_date, | |
| 177 end_date=end_date, | |
| 178 batch_size=batch_size, | |
| 179 batch_run=True, | |
| 180 app_id=app_id)): | |
| 181 | |
| 182 results = [] | |
| 183 for git_hash in [git_hash1, git_hash2]: | |
|
stgao
2016/10/14 01:40:40
So for each crash, we switch the checkout twice. I
Sharu Jiang
2016/10/15 01:24:47
This is not for each crash, it is for each batch o
stgao
2016/10/20 01:40:20
sg
wrengr
2016/10/24 17:30:36
It'd still be good (and easy!) to avoid. I can add
Sharu Jiang
2016/10/24 19:14:32
Can we discuss the solution offline then?
The mai
| |
| 184 result_path = os.path.join( | |
| 185 AZALEA_RESULTS_DIRECTORY, delta_util.GenerateResultFileName( | |
| 186 client_id, property_values, start_date, end_date, | |
| 187 batch_size, index, git_hash)) | |
| 188 results.append(GetResults(crashes, git_hash, result_path, | |
| 189 verbose=verbose)) | |
| 190 | |
| 191 crash_count += len(crashes) | |
| 192 deltas.update(GetDeltasFromTwoSetsOfResults(*results)) | |
| 193 | |
| 194 return deltas, crash_count | |
| 195 finally: | |
| 196 dev_null_handle = open(os.devnull, 'w') | |
| 197 subprocess.check_call(['git', 'checkout', head_branch_name], | |
| 198 stdout=dev_null_handle, | |
| 199 stderr=dev_null_handle) | |
| OLD | NEW |