appengine/findit/util_scripts/crash_queries/delta_test/delta_test.py - Issue 2663063007: [Predator] Switch from anonymous dict to CrashData.

Side by Side Diff: appengine/findit/util_scripts/crash_queries/delta_test/delta_test.py

Issue 2663063007: [Predator] Switch from anonymous dict to CrashData. (Closed)

Patch Set: Rebase and fix delta test. Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « appengine/findit/util_scripts/crash_queries/crash_iterator.py ('k') | appengine/findit/util_scripts/crash_queries/delta_test/run-predator.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import json	5 import json

6 import os	6 import os

7 import pickle	7 import pickle

8 import subprocess	8 import subprocess

	9 import zlib

9	10

10 from crash_queries import crash_iterator	11 from crash_queries import crash_iterator

11 from crash_queries.delta_test import delta_util	12 from crash_queries.delta_test import delta_util

12	13

13 PREDATOR_RESULTS_DIRECTORY = os.path.join(os.path.dirname(__file__),	14 PREDATOR_RESULTS_DIRECTORY = os.path.join(os.path.dirname(__file__),

14 'predator_results')	15 'predator_results')

15 DELTA_TEST_DIRECTORY = os.path.dirname(__file__)	16 DELTA_TEST_DIRECTORY = os.path.dirname(__file__)

16 CRASH_FIELDS = ['crashed_version', 'stack_trace', 'signature',	17

17 'platform', 'client_id', 'regression_range',	18 # TODO(crbug.com/662540): Add unittests.

18 'customized_data', 'historical_metadata']

19	19

20	20

21 # TODO(crbug.com/662540): Add unittests.

22 class Delta(object): # pragma: no cover.	21 class Delta(object): # pragma: no cover.

23 """Stands for delta between two results.	22 """Stands for delta between two results.

24	23

25 Note, the 2 results should be the same kind and have the same structure.	24 Note, the 2 results should be the same kind and have the same structure.

26 """	25 """

27	26

28 def __init__(self, result1, result2):	27 def __init__(self, result1, result2):

29 self._result1 = result1	28 self._result1 = result1

30 self._result2 = result2	29 self._result2 = result2

31 self._delta_dict = None	30 self._delta_dict = None

(...skipping 59 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
91 def __str__(self):	90 def __str__(self):

92 return '\n'.join(self.delta_str_dict.values())	91 return '\n'.join(self.delta_str_dict.values())

93	92

94 def __bool__(self):	93 def __bool__(self):

95 return bool(self.delta_dict)	94 return bool(self.delta_dict)

96	95

97 def __nonzero__(self):	96 def __nonzero__(self):

98 return self.__bool__()	97 return self.__bool__()

99	98

100	99

101 # TODO(crbug.com/662540): Add unittests.

102 def GetDeltasFromTwoSetsOfResults(set1, set2): # pragma: no cover.	100 def GetDeltasFromTwoSetsOfResults(set1, set2): # pragma: no cover.

103 """Gets delta from two sets of results.	101 """Gets delta from two sets of results.

104	102

105 Set1 and set2 are dicts mapping id to result.	103 Set1 and set2 are dicts mapping id to result.

106 Results are a list of (message, matches, component_name, cr_label)	104 Results are a list of (message, matches, component_name, cr_label)

107 Returns a list of delta results (results1, results2).	105 Returns a list of delta results (results1, results2).

108 """	106 """

109 deltas = {}	107 deltas = {}

110 for result_id, result1 in set1.iteritems():	108 for result_id, result1 in set1.iteritems():

111 # Even when the command are exactly the same, it's possible that one set is	109 # Even when the command are exactly the same, it's possible that one set is

112 # loaded from local result file, another is just queried from database,	110 # loaded from local result file, another is just queried from database,

113 # sometimes some crash results would get deleted.	111 # sometimes some crash results would get deleted.

114 if result_id not in set2:	112 if result_id not in set2:

115 continue	113 continue

116	114

117 result2 = set2[result_id]	115 result2 = set2[result_id]

118 if not result1 and not result2:	116 if not result1 and not result2:

119 continue	117 continue

120	118

121 delta = Delta(result1, result2)	119 delta = Delta(result1, result2)

122 if delta:	120 if delta:

123 deltas[result_id] = delta	121 deltas[result_id] = delta

124	122

125 return deltas	123 return deltas

126	124

127	125

128 # TODO(crbug.com/662540): Add unittests.

129 def GetResults(crashes, client_id, app_id, git_hash, result_path,	126 def GetResults(crashes, client_id, app_id, git_hash, result_path,

130 verbose=False): # pragma: no cover.	127 verbose=False): # pragma: no cover.

131 """Returns an evaluator function to compute delta between 2 findit githashes.	128 """Returns an evaluator function to compute delta between 2 findit githashes.

132	129

133 Args:	130 Args:

134 crashes (list): A list of crash infos.	131 crashes (list): A list of ``CrashAnalysis``.

135 client_id (str): Possible values - fracas/cracas/clustefuzz.	132 client_id (str): Possible values - fracas/cracas/clustefuzz.

136 app_id (str): Appengine app id to query.	133 app_id (str): Appengine app id to query.

137 git_hash (str): A git hash of findit repository.	134 git_hash (str): A git hash of findit repository.

138 result_path (str): file path for subprocess to write results on.	135 result_path (str): file path for subprocess to write results on.

139 verbose (bool): If True, print all the findit results.	136 verbose (bool): If True, print all the findit results.

140	137

141 Return:	138 Return:

142 A dict mapping crash id to culprit for every crashes analyzed by	139 A dict mapping crash id to culprit for every crashes analyzed by

143 git_hash version.	140 git_hash version.

144 """	141 """

145 if not crashes:	142 if not crashes:

146 return {}	143 return {}

147	144

148 print '***************************'	145 print '***************************'

149 print 'Switching to git %s' % git_hash	146 print 'Switching to git %s' % git_hash

150 print '***************************'	147 print '***************************'

151 with open(os.devnull, 'w') as null_handle:	148 with open(os.devnull, 'w') as null_handle:

152 subprocess.check_call(	149 subprocess.check_call(

153 'cd %s; git checkout %s' % (DELTA_TEST_DIRECTORY, git_hash),	150 'cd %s; git checkout %s' % (DELTA_TEST_DIRECTORY, git_hash),

154 stdout=null_handle,	151 stdout=null_handle,

155 stderr=null_handle,	152 stderr=null_handle,

156 shell=True)	153 shell=True)

157	154

158 if not os.path.exists(result_path):	155 if not os.path.exists(result_path):

159 args = ['python', 'run-predator.py', result_path, client_id, app_id]	156 # Pass the crashes information to sub-routine ``run-predator`` to compute

	157 # culprit results and write results to ``result_path``.

	158 input_path = os.path.join(PREDATOR_RESULTS_DIRECTORY, 'input')

	159 with open(input_path, 'wb') as f:

	160 f.write(zlib.compress(pickle.dumps(crashes)))

	161

	162 args = ['python', 'run-predator.py', input_path, result_path,

	163 client_id, app_id]

160 if verbose:	164 if verbose:

161 args.append('--verbose')	165 args.append('--verbose')

162 p = subprocess.Popen(args, stdin=subprocess.PIPE)	166 p = subprocess.Popen(args, stdin=subprocess.PIPE)

163 # TODO(katesonia): Cache crashes for crash_iterator and let subprocess read	167 p.communicate()

164 # corresponding cache file instead.

165 #

166 # Pass the crashes information to sub-routine ``run-predator`` to compute

167 # culprit results and write results to ``result_path``.

168 p.communicate(input=json.dumps(crashes))

169 else:	168 else:

170 print '\nLoading results from', result_path	169 print '\nLoading results from', result_path

171	170

172 if not os.path.exists(result_path):	171 if not os.path.exists(result_path):

173 print 'Failed to get results.'	172 print 'Failed to get predator results.'

174 return {}	173 return {}

175	174

176 # Read culprit results from ``result_path``, which is computed by sub-routine	175 # Read culprit results from ``result_path``, which is computed by sub-routine

177 # ``run-predator``.	176 # ``run-predator``.

178 with open(result_path) as f:	177 with open(result_path) as f:

179 return pickle.load(f)	178 return pickle.load(f)

180	179

181 return {}	180 return {}

182	181

183	182

184 # TODO(crbug.com/662540): Add unittests.

185 def DeltaEvaluator(git_hash1, git_hash2,	183 def DeltaEvaluator(git_hash1, git_hash2,

186 client_id, app_id,	184 client_id, app_id,

187 start_date, end_date, batch_size, max_n,	185 start_date, end_date, batch_size, max_n,

188 property_values=None, verbose=False): # pragma: no cover.	186 property_values=None, verbose=False): # pragma: no cover.

189 """Evaluates delta between git_hash1 and git_hash2 on a set of Testcases.	187 """Evaluates delta between git_hash1 and git_hash2 on a set of Testcases.

190	188

191 Args:	189 Args:

192 git_hash1 (str): A git hash of findit repository.	190 git_hash1 (str): A git hash of findit repository.

193 git_hash2 (str): A git hash of findit repository.	191 git_hash2 (str): A git hash of findit repository.

194 start_date (str): Run delta test on testcases after (including)	192 start_date (str): Run delta test on testcases after (including)

(...skipping 14 matching lines...) Expand all Loading...
209 crash_count (int): Total count of all the crashes.	207 crash_count (int): Total count of all the crashes.

210 """	208 """

211 head_branch_name = subprocess.check_output(	209 head_branch_name = subprocess.check_output(

212 ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).replace('\n', '')	210 ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).replace('\n', '')

213 try:	211 try:

214 deltas = {}	212 deltas = {}

215 crash_count = 0	213 crash_count = 0

216 # Iterate batches of crash informations.	214 # Iterate batches of crash informations.

217 for index, crashes in enumerate(	215 for index, crashes in enumerate(

218 crash_iterator.CachedCrashIterator(client_id, app_id,	216 crash_iterator.CachedCrashIterator(client_id, app_id,

219 fields=CRASH_FIELDS,

220 property_values=property_values,	217 property_values=property_values,

221 start_date=start_date,	218 start_date=start_date,

222 end_date=end_date,	219 end_date=end_date,

223 batch_size=batch_size,	220 batch_size=batch_size,

224 batch_run=True)):	221 batch_run=True)):

225 # Truncate crashes and make it contain at most max_n crashes.	222 # Truncate crashes and make it contain at most max_n crashes.

226 if crash_count + len(crashes) > max_n:	223 if crash_count + len(crashes) > max_n:

227 crashes = crashes[:(max_n - crash_count)]	224 crashes = crashes[:(max_n - crash_count)]

228	225

229 results = []	226 results = []

(...skipping 17 matching lines...) Expand all Loading...
247 deltas.update(batch_deltas)	244 deltas.update(batch_deltas)

248 if crash_count >= max_n:	245 if crash_count >= max_n:

249 break	246 break

250	247

251 return deltas, crash_count	248 return deltas, crash_count

252 finally:	249 finally:

253 with open(os.devnull, 'w') as null_handle:	250 with open(os.devnull, 'w') as null_handle:

254 subprocess.check_call(['git', 'checkout', head_branch_name],	251 subprocess.check_call(['git', 'checkout', head_branch_name],

255 stdout=null_handle,	252 stdout=null_handle,

256 stderr=null_handle)	253 stderr=null_handle)

OLD	NEW