scripts/slave/recipe_modules/bisect_tester/parse_metric.py - Issue 2247373002: Refactor stages 1, 2 and test_api overhaul.

Side by Side Diff: scripts/slave/recipe_modules/bisect_tester/parse_metric.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master

Patch Set: Addressing all early feedback. Created 4 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« scripts/slave/recipe_modules/auto_bisect/test_api.py ('K') | « scripts/slave/recipe_modules/bisect_tester/api.py ('k') | scripts/slave/recipe_modules/bisect_tester/perf_test.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 import math

2 import re

3 from functools import reduce

4

5

6 def _geom_mean_and_std_dev_from_histogram(histogram): # pragma: no cover

7 # Copied from: https://code.google.com/p/chromium/codesearch#chromium/build/sc ripts/common/chromium_utils.py&l=222

8 # TODO(robertocn): Remove this code duplication from common.chromium_utils

9 if not 'buckets' in histogram:

10 return 0.0, 0.0

11 count = 0

12 sum_of_logs = 0

13 for bucket in histogram['buckets']:

14 if 'high' in bucket:

15 bucket['mean'] = (bucket['low'] + bucket['high']) / 2.0

16 else:

17 bucket['mean'] = bucket['low']

18 if bucket['mean'] > 0:

19 sum_of_logs += math.log(bucket['mean']) * bucket['count']

20 count += bucket['count']

21

22 if count == 0:

23 return 0.0, 0.0

24

25 sum_of_squares = 0

26 geom_mean = math.exp(sum_of_logs / count)

27 for bucket in histogram['buckets']:

28 if bucket['mean'] > 0:

29 sum_of_squares += (bucket['mean'] - geom_mean) ** 2 * bucket['count']

30 return geom_mean, math.sqrt(sum_of_squares / count)

31

32

33 def parse_chartjson_metric(results, metric): # pragma: no cover

34 """Interpret results-chart.json, finding the needed values.

35

36 Args:

37 results: The dictionary parsed from the chartjson file.

38 metric: A pair of strings indicating chart and trace names.

39

40 Returns:

41 A triple (valid_values, values, all_results) where valid_values is a

42 boolean, values is a list of floating point numbers, and all_results is a

43 dictionary containing all the results originally in results_str.

44 """

45 def escape_chars(original_string):

46 return re.sub(r'[\:\|=/#&,]', '_', original_string)

47

48 chart_name, trace_name = metric

49 if trace_name == chart_name:

50 trace_name = 'summary'

51 try:

52 for chart in results['charts']:

53 if escape_chars(chart) == chart_name:

54 chart_name = chart # Unescaping

55 break

56 for trace in results['charts'][chart_name]:

57 if escape_chars(trace) == trace_name:

58 trace_name = trace # Unescaping

59 break

60 if (results['charts'][chart_name][trace_name]['type'] ==

61 'list_of_scalar_values'):

62 values = results['charts'][chart_name][trace_name]['values']

63 if values:

64 avg_value = [sum(values) / len(values)]

65 return True, avg_value, results

66 if results['charts'][chart_name][trace_name]['type'] == 'histogram':

67 return True, [_geom_mean_and_std_dev_from_histogram(

68 results['charts'][chart_name][trace_name])[0]], results

69 except KeyError: # e.g. metric not found

70 pass

71 return False, [], results

72

73

74 # The following has largely been copied from bisect_perf_regression.py

75 def parse_metric(out, err, metric): # pragma: no cover

76 """Tries to parse the output in RESULT line format or HISTOGRAM format.

77

78 Args:

79 metric: The metric as a list of [<trace>, <value>] string pairs.

80 out, err: stdout and stderr that may contain the output to be parsed

81

82 Returns:

83 A pair (valid_values, values) where valid_values is a boolean and values is

84 a list of floating point numbers.

85 """

86 text = (out or '') + (err or '')

87 result = _parse_result_values_from_output(metric, text)

88 if not result:

89 result = _parse_histogram_values_from_output(metric, text)

90 return bool(len(result)), result

91

92

93 # TODO: Deprecate the text parsing approach to get results in favor of

94 # chartjson.

95 def _parse_result_values_from_output(metric, text): # pragma: no cover

96 """Attempts to parse a metric in the format RESULT <graph>: <trace>= ...

97

98 Args:

99 metric: The metric as a list of [<trace>, <value>] string pairs.

100 text: The text to parse the metric values from.

101

102 Returns:

103 A list of floating point numbers found.

104 """

105 if not text:

106 return [False, None]

107 # Format is: RESULT <graph>: <trace>= <value> <units>

108 metric_re = re.escape('RESULT %s: %s=' % (metric[0], metric[1]))

109

110 # The log will be parsed looking for format:

111 # <*>RESULT <graph_name>: <trace_name>= <value>

112 single_result_re = re.compile(

113 metric_re + r'\s(?P<VALUE>[-]?\d(\.\d*)?)')

114

115 # The log will be parsed looking for format:

116 # <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...]

117 multi_results_re = re.compile(

118 metric_re + r'\s\[\s(?P<VALUES>[-]?[\d\., ]+)\s*\]')

119

120 # The log will be parsed looking for format:

121 # <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>}

122 mean_stddev_re = re.compile(

123 metric_re +

124 r'\s\{\s(?P<MEAN>[-]?\d(\.\d)?),\s(?P<STDDEV>\d+(\.\d)?)\s*\}')

125

126 text_lines = text.split('\n')

127 values_list = []

128 for current_line in text_lines:

129 # Parse the output from the performance test for the metric we're

130 # interested in.

131 single_result_match = single_result_re.search(current_line)

132 multi_results_match = multi_results_re.search(current_line)

133 mean_stddev_match = mean_stddev_re.search(current_line)

134 if (single_result_match is not None and single_result_match.group('VALUE')):

135 values_list += [single_result_match.group('VALUE')]

136 elif (multi_results_match is not None and

137 multi_results_match.group('VALUES')):

138 metric_values = multi_results_match.group('VALUES')

139 values_list += metric_values.split(',')

140 elif (mean_stddev_match is not None and

141 mean_stddev_match.group('MEAN')):

142 values_list += [mean_stddev_match.group('MEAN')]

143

144 list_of_floats = []

145 # It seems the pythonic way to do this is to try to cast and catch the error.

146 for v in values_list:

147 try:

148 list_of_floats.append(float(v))

149 except ValueError:

150 pass

151 return list_of_floats

152

153

154 def _parse_histogram_values_from_output(metric, text): # pragma: no cover

155 """Attempts to parse a metric in the format HISTOGRAM <graph: <trace>.

156

157 Args:

158 metric: The metric as a list of [<trace>, <value>] strings.

159 text: The text to parse the metric values from.

160

161 Returns:

162 A list of floating point numbers found, [] if none were found.

163 """

164 metric_formatted = 'HISTOGRAM %s: %s= ' % (metric[0], metric[1])

165

166 text_lines = text.split('\n')

167 values_list = []

168

169 for current_line in text_lines:

170 if metric_formatted in current_line:

171 current_line = current_line[len(metric_formatted):]

172

173 try:

174 histogram_values = eval(current_line)

175

176 for b in histogram_values['buckets']:

177 average_for_bucket = float(b['high'] + b['low']) * 0.5

178 # Extends the list with N-elements with the average for that bucket.

179 values_list.extend([average_for_bucket] * b['count'])

180 except Exception:

181 pass

182

183 return values_list

OLD	NEW