appengine/findit/util_scripts/crash_queries/delta_test/delta_test.py - Issue 2432203003: [Predator] Run predator.

Side by Side Diff: appengine/findit/util_scripts/crash_queries/delta_test/delta_test.py

Issue 2432203003: [Predator] Run predator. (Closed)

Patch Set: Fix flaky BadStatusLine exception. Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« appengine/findit/crash/changelist_classifier.py ('K') | « appengine/findit/util_scripts/crash_queries/crash_iterator.py ('k') | appengine/findit/util_scripts/crash_queries/delta_test/delta_util.py » ('j') | appengine/findit/util_scripts/crash_queries/delta_test/delta_util.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 # Copyright 2016 The Chromium Authors. All rights reserved.	1 # Copyright 2016 The Chromium Authors. All rights reserved.

2 # Use of this source code is governed by a BSD-style license that can be	2 # Use of this source code is governed by a BSD-style license that can be

3 # found in the LICENSE file.	3 # found in the LICENSE file.

4	4

5 import json	5 import json

6 import logging	6 import logging

7 import os	7 import os

8 import pickle	8 import pickle

9 import subprocess	9 import subprocess

10	10

11 from crash_queries import crash_iterator	11 from crash_queries import crash_iterator

12 from crash_queries.delta_test import delta_util	12 from crash_queries.delta_test import delta_util

13	13

14 AZALEA_RESULTS_DIRECTORY = os.path.join(os.path.dirname(__file__),	14 PREDATOR_RESULTS_DIRECTORY = os.path.join(os.path.dirname(__file__),

15 'azalea_results')	15 'predator_results')

16 DELTA_TEST_DIRECTORY = os.path.dirname(__file__)	16 DELTA_TEST_DIRECTORY = os.path.dirname(__file__)

	17 CRASH_FIELDS = ['crashed_version', 'stack_trace', 'signature',

	18 'platform', 'client_id', 'regression_range',

	19 'customized_data', 'historical_metadata']

	20

17	21

18	22

19 # TODO(crbug.com/662540): Add unittests.	23 # TODO(crbug.com/662540): Add unittests.

20 class Delta(object): # pragma: no cover.	24 class Delta(object): # pragma: no cover.

21 """Stands for delta between two results.	25 """Stands for delta between two results.

22	26

23 Note, the 2 results should be the same kind and have the same structure.	27 Note, the 2 results should be the same kind and have the same structure.

24 """	28 """

25	29

26 def __init__(self, result1, result2, fields):	30 def __init__(self, result1, result2):

27 self._result1 = result1	31 self._result1 = result1

28 self._result2 = result2	32 self._result2 = result2

29 self._fields = fields	33 self._delta_dict = None

30 self._delta_dict = {}	34 self._delta_str_dict = None

31 self._delta_str_dict = {}

32	35

33 @property	36 @property

34 def delta_dict(self):	37 def delta_dict(self):

35 """Dict representation of delta.	38 """Dict representation of delta.

36	39

37 Returns:	40 Returns:

38 A dict. For example, for Culprit result, the delta dict is like below:	41 A dict. For example, for Culprit result, the delta dict is like below:

39 {	42 {

40 'project': 'chromium',	43 'project': 'chromium',

41 'components': ['Blink>API'],	44 'components': ['Blink>API'],

42 'cls': [],	45 'cls': [],

43 'regression_range': ['52.0.1200.1', '52.0.1200.3']	46 'regression_range': ['52.0.1200.1', '52.0.1200.3']

44 }	47 }

45 """	48 """

46 if self._delta_dict:	49 if self._delta_dict:

47 return self._delta_dict	50 return self._delta_dict

48	51

49 for field in self._fields:	52 self._delta_dict = {}

50 value1 = getattr(self._result1, field)	53 result1 = self._result1.ToDicts()[0] if self._result1 else {'found': False}

51 value2 = getattr(self._result2, field)	54 result2 = self._result2.ToDicts()[0] if self._result2 else {'found': False}

	55 keys = (set(result1.keys()) if result1 else set() \|

	56 set(result2.keys()) if result2 else set())

	57 for key in keys:

	58 value1 = result1.get(key)

	59 value2 = result2.get(key)

52 if value1 != value2:	60 if value1 != value2:

53 if hasattr(value1, 'ToDict') and callable(value1.ToDict):	61 self._delta_dict[key] = (value1, value2)

54 value1 = value1.ToDict()

55 value2 = value2.ToDict()

56 self._delta_dict[field] = (value1, value2)

57	62

58 return self._delta_dict	63 return self._delta_dict

59	64

60 @property	65 @property

61 def delta_str_dict(self):	66 def delta_str_dict(self):

62 """Converts delta of each field to a string."""	67 """Converts delta of each field to a string."""

63 if self._delta_str_dict:	68 if self._delta_str_dict:

64 return self._delta_str_dict	69 return self._delta_str_dict

65	70

	71 self._delta_str_dict = {}

66 for key, (value1, value2) in self.delta_dict.iteritems():	72 for key, (value1, value2) in self.delta_dict.iteritems():

67 self._delta_str_dict[key] = '%s: %s, %s' % (key, value1, value2)	73 if key == 'suspected_cls':

	74 for value in [value1, value2]:

	75 if not value:

	76 continue

	77

	78 for cl in value:

	79 cl['confidence'] = round(cl['confidence'], 2)

	80 cl.pop('reasons', None)

	81

	82 value1 = json.dumps(value1, indent=4, sort_keys=True)

	83 value2 = json.dumps(value2, indent=4, sort_keys=True)

	84

	85 self._delta_str_dict[key] = '%s 1: %s\n%s 2: %s\n' % (key, value1,

	86 key, value2)

68	87

69 return self._delta_str_dict	88 return self._delta_str_dict

70	89

71 def ToDict(self):	90 def ToDict(self):

72 return self.delta_dict	91 return self.delta_dict

73	92

74 def __str__(self):	93 def __str__(self):

75 return '\n'.join(self.delta_str_dict.values())	94 return '\n'.join(self.delta_str_dict.values())

76	95

77 def __bool__(self):	96 def __bool__(self):

(...skipping 13 matching lines...) Expand all Loading...
91 """	110 """

92 deltas = {}	111 deltas = {}

93 for result_id, result1 in set1.iteritems():	112 for result_id, result1 in set1.iteritems():

94 # Even when the command are exactly the same, it's possible that one set is	113 # Even when the command are exactly the same, it's possible that one set is

95 # loaded from local result file, another is just queried from database,	114 # loaded from local result file, another is just queried from database,

96 # sometimes some crash results would get deleted.	115 # sometimes some crash results would get deleted.

97 if result_id not in set2:	116 if result_id not in set2:

98 continue	117 continue

99	118

100 result2 = set2[result_id]	119 result2 = set2[result_id]

101 delta = Delta(result1, result2, result1.fields)	120 if not result1 and not result2:

	121 continue

	122

	123 delta = Delta(result1, result2)

102 if delta:	124 if delta:

103 deltas[result_id] = delta	125 deltas[result_id] = delta

104	126

105 return deltas	127 return deltas

106	128

107	129

108 # TODO(crbug.com/662540): Add unittests.	130 # TODO(crbug.com/662540): Add unittests.

109 def GetResults(crashes, client_id, git_hash, result_path,	131 def GetResults(crashes, client_id, app_id, git_hash, result_path,

110 verbose=False): # pragma: no cover.	132 verbose=False): # pragma: no cover.

111 """Returns an evaluator function to compute delta between 2 findit githashes.	133 """Returns an evaluator function to compute delta between 2 findit githashes.

112	134

113 Args:	135 Args:

114 crashes (list): A list of crash infos.	136 crashes (list): A list of crash infos.

115 client_id (str): Possible values - fracas/cracas/clustefuzz.	137 client_id (str): Possible values - fracas/cracas/clustefuzz.

	138 app_id (str): Appengine app id to query.

116 git_hash (str): A git hash of findit repository.	139 git_hash (str): A git hash of findit repository.

117 result_path (str): file path for subprocess to write results on.	140 result_path (str): file path for subprocess to write results on.

118 verbose (bool): If True, print all the findit results.	141 verbose (bool): If True, print all the findit results.

119	142

120 Return:	143 Return:

121 A dict mapping crash id to culprit for every crashes analyzed by	144 A dict mapping crash id to culprit for every crashes analyzed by

122 git_hash version.	145 git_hash version.

123 """	146 """

124 if not crashes:	147 if not crashes:

125 return {}	148 return {}

126	149

127 if verbose:	150 if verbose:

128 logging.info('\n\n***************************')	151 print '***************************'

129 logging.info('Switching to git %s', git_hash)	152 print 'Switching to git %s' % git_hash

130 logging.info('***************************\n\n')	153 print '***************************\n\n'

131	154

132 with open(os.devnull, 'w') as null_handle:	155 with open(os.devnull, 'w') as null_handle:

133 subprocess.check_call(	156 subprocess.check_call(

134 'cd %s; git checkout %s' % (DELTA_TEST_DIRECTORY, git_hash),	157 'cd %s; git checkout %s' % (DELTA_TEST_DIRECTORY, git_hash),

135 stdout=null_handle,	158 stdout=null_handle,

136 stderr=null_handle,	159 stderr=null_handle,

137 shell=True)	160 shell=True)

138	161

139 if not os.path.exists(result_path):	162 if not os.path.exists(result_path):

140 args = ['python', 'run-predator.py', result_path, '--client', client_id]	163 args = ['python', 'run-predator.py', result_path, client_id, app_id]

141 if verbose:	164 if verbose:

142 args.append('--verbose')	165 args.append('--verbose')

143 p = subprocess.Popen(args, stdin=subprocess.PIPE)	166 p = subprocess.Popen(args, stdin=subprocess.PIPE)

144 # TODO(katesonia): Cache crashes for crash_iterator and let subprocess read	167 # TODO(katesonia): Cache crashes for crash_iterator and let subprocess read

145 # corresponding cache file instead.	168 # corresponding cache file instead.

146 p.communicate(input=json.dumps(crashes))	169 p.communicate(input=json.dumps(crashes))

147 else:	170 else:

148 logging.info('\nLoading results from %s', result_path)	171 print '\nLoading results from', result_path

149	172

150 if not os.path.exists(result_path):	173 if not os.path.exists(result_path):

151 logging.error('Failed to get results.')	174 print 'Failed to get results.'

152 return {}	175 return {}

153	176

154 with open(result_path) as f:	177 with open(result_path) as f:

155 return pickle.load(f)	178 return pickle.load(f)

156	179

157 return {}	180 return {}

158	181

159	182

160 # TODO(crbug.com/662540): Add unittests.	183 # TODO(crbug.com/662540): Add unittests.

161 def DeltaEvaluator(git_hash1, git_hash2,	184 def DeltaEvaluator(git_hash1, git_hash2,

(...skipping 21 matching lines...) Expand all Loading...
183 deltas (dict): Mappings id to delta for each culprit value.	206 deltas (dict): Mappings id to delta for each culprit value.

184 crash_count (int): Total count of all the crashes.	207 crash_count (int): Total count of all the crashes.

185 """	208 """

186 head_branch_name = subprocess.check_output(	209 head_branch_name = subprocess.check_output(

187 ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).replace('\n', '')	210 ['git', 'rev-parse', '--abbrev-ref', 'HEAD']).replace('\n', '')

188 try:	211 try:

189 deltas = {}	212 deltas = {}

190 crash_count = 0	213 crash_count = 0

191 for index, crashes in enumerate(	214 for index, crashes in enumerate(

192 crash_iterator.IterateCrashes(client_id, app_id,	215 crash_iterator.IterateCrashes(client_id, app_id,

	216 fields=CRASH_FIELDS,

193 property_values=property_values,	217 property_values=property_values,

194 start_date=start_date,	218 start_date=start_date,

195 end_date=end_date,	219 end_date=end_date,

196 batch_size=batch_size,	220 batch_size=batch_size,

197 batch_run=True)):	221 batch_run=True)):

198	222

199 results = []	223 results = []

200 for git_hash in [git_hash1, git_hash2]:	224 for git_hash in [git_hash1, git_hash2]:

201 result_path = os.path.join(	225 result_path = os.path.join(

202 AZALEA_RESULTS_DIRECTORY, delta_util.GenerateFileName(	226 PREDATOR_RESULTS_DIRECTORY, delta_util.GenerateFileName(

203 client_id, property_values, start_date, end_date,	227 client_id, property_values, start_date, end_date,

204 batch_size, index, git_hash))	228 batch_size, index, git_hash))

205 results.append(GetResults(crashes, client_id, git_hash, result_path,	229 results.append(GetResults(crashes, client_id, app_id,

	230 git_hash, result_path,

206 verbose=verbose))	231 verbose=verbose))

207	232

208 crash_count += len(crashes)	233 crash_count += len(crashes)

209 deltas.update(GetDeltasFromTwoSetsOfResults(*results))	234 batch_deltas = GetDeltasFromTwoSetsOfResults(*results)

	235 # Print deltas of the current batch.

	236 print '========= Delta of this batch ========='

	237 delta_util.PrintDelta(batch_deltas, len(crashes), app_id)

	238 deltas.update(batch_deltas)

210	239

211 return deltas, crash_count	240 return deltas, crash_count

212 finally:	241 finally:

213 with open(os.devnull, 'w') as null_handle:	242 with open(os.devnull, 'w') as null_handle:

214 subprocess.check_call(['git', 'checkout', head_branch_name],	243 subprocess.check_call(['git', 'checkout', head_branch_name],

215 stdout=null_handle,	244 stdout=null_handle,

216 stderr=null_handle)	245 stderr=null_handle)

OLD	NEW