Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(375)

Side by Side Diff: scripts/slave/recipe_modules/bisect_tester/parse_metric.py

Issue 2247373002: Refactor stages 1, 2 and test_api overhaul. (Closed) Base URL: https://chromium.googlesource.com/chromium/tools/build.git@master
Patch Set: Addressing all early feedback. Created 4 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 import math
2 import re
3 from functools import reduce
4
5
6 def _geom_mean_and_std_dev_from_histogram(histogram): # pragma: no cover
7 # Copied from: https://code.google.com/p/chromium/codesearch#chromium/build/sc ripts/common/chromium_utils.py&l=222
8 # TODO(robertocn): Remove this code duplication from common.chromium_utils
9 if not 'buckets' in histogram:
10 return 0.0, 0.0
11 count = 0
12 sum_of_logs = 0
13 for bucket in histogram['buckets']:
14 if 'high' in bucket:
15 bucket['mean'] = (bucket['low'] + bucket['high']) / 2.0
16 else:
17 bucket['mean'] = bucket['low']
18 if bucket['mean'] > 0:
19 sum_of_logs += math.log(bucket['mean']) * bucket['count']
20 count += bucket['count']
21
22 if count == 0:
23 return 0.0, 0.0
24
25 sum_of_squares = 0
26 geom_mean = math.exp(sum_of_logs / count)
27 for bucket in histogram['buckets']:
28 if bucket['mean'] > 0:
29 sum_of_squares += (bucket['mean'] - geom_mean) ** 2 * bucket['count']
30 return geom_mean, math.sqrt(sum_of_squares / count)
31
32
33 def parse_chartjson_metric(results, metric): # pragma: no cover
34 """Interpret results-chart.json, finding the needed values.
35
36 Args:
37 results: The dictionary parsed from the chartjson file.
38 metric: A pair of strings indicating chart and trace names.
39
40 Returns:
41 A triple (valid_values, values, all_results) where valid_values is a
42 boolean, values is a list of floating point numbers, and all_results is a
43 dictionary containing all the results originally in results_str.
44 """
45 def escape_chars(original_string):
46 return re.sub(r'[\:|=/#&,]', '_', original_string)
47
48 chart_name, trace_name = metric
49 if trace_name == chart_name:
50 trace_name = 'summary'
51 try:
52 for chart in results['charts']:
53 if escape_chars(chart) == chart_name:
54 chart_name = chart # Unescaping
55 break
56 for trace in results['charts'][chart_name]:
57 if escape_chars(trace) == trace_name:
58 trace_name = trace # Unescaping
59 break
60 if (results['charts'][chart_name][trace_name]['type'] ==
61 'list_of_scalar_values'):
62 values = results['charts'][chart_name][trace_name]['values']
63 if values:
64 avg_value = [sum(values) / len(values)]
65 return True, avg_value, results
66 if results['charts'][chart_name][trace_name]['type'] == 'histogram':
67 return True, [_geom_mean_and_std_dev_from_histogram(
68 results['charts'][chart_name][trace_name])[0]], results
69 except KeyError: # e.g. metric not found
70 pass
71 return False, [], results
72
73
74 # The following has largely been copied from bisect_perf_regression.py
75 def parse_metric(out, err, metric): # pragma: no cover
76 """Tries to parse the output in RESULT line format or HISTOGRAM format.
77
78 Args:
79 metric: The metric as a list of [<trace>, <value>] string pairs.
80 out, err: stdout and stderr that may contain the output to be parsed
81
82 Returns:
83 A pair (valid_values, values) where valid_values is a boolean and values is
84 a list of floating point numbers.
85 """
86 text = (out or '') + (err or '')
87 result = _parse_result_values_from_output(metric, text)
88 if not result:
89 result = _parse_histogram_values_from_output(metric, text)
90 return bool(len(result)), result
91
92
93 # TODO: Deprecate the text parsing approach to get results in favor of
94 # chartjson.
95 def _parse_result_values_from_output(metric, text): # pragma: no cover
96 """Attempts to parse a metric in the format RESULT <graph>: <trace>= ...
97
98 Args:
99 metric: The metric as a list of [<trace>, <value>] string pairs.
100 text: The text to parse the metric values from.
101
102 Returns:
103 A list of floating point numbers found.
104 """
105 if not text:
106 return [False, None]
107 # Format is: RESULT <graph>: <trace>= <value> <units>
108 metric_re = re.escape('RESULT %s: %s=' % (metric[0], metric[1]))
109
110 # The log will be parsed looking for format:
111 # <*>RESULT <graph_name>: <trace_name>= <value>
112 single_result_re = re.compile(
113 metric_re + r'\s*(?P<VALUE>[-]?\d*(\.\d*)?)')
114
115 # The log will be parsed looking for format:
116 # <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...]
117 multi_results_re = re.compile(
118 metric_re + r'\s*\[\s*(?P<VALUES>[-]?[\d\., ]+)\s*\]')
119
120 # The log will be parsed looking for format:
121 # <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>}
122 mean_stddev_re = re.compile(
123 metric_re +
124 r'\s*\{\s*(?P<MEAN>[-]?\d*(\.\d*)?),\s*(?P<STDDEV>\d+(\.\d*)?)\s*\}')
125
126 text_lines = text.split('\n')
127 values_list = []
128 for current_line in text_lines:
129 # Parse the output from the performance test for the metric we're
130 # interested in.
131 single_result_match = single_result_re.search(current_line)
132 multi_results_match = multi_results_re.search(current_line)
133 mean_stddev_match = mean_stddev_re.search(current_line)
134 if (single_result_match is not None and single_result_match.group('VALUE')):
135 values_list += [single_result_match.group('VALUE')]
136 elif (multi_results_match is not None and
137 multi_results_match.group('VALUES')):
138 metric_values = multi_results_match.group('VALUES')
139 values_list += metric_values.split(',')
140 elif (mean_stddev_match is not None and
141 mean_stddev_match.group('MEAN')):
142 values_list += [mean_stddev_match.group('MEAN')]
143
144 list_of_floats = []
145 # It seems the pythonic way to do this is to try to cast and catch the error.
146 for v in values_list:
147 try:
148 list_of_floats.append(float(v))
149 except ValueError:
150 pass
151 return list_of_floats
152
153
154 def _parse_histogram_values_from_output(metric, text): # pragma: no cover
155 """Attempts to parse a metric in the format HISTOGRAM <graph: <trace>.
156
157 Args:
158 metric: The metric as a list of [<trace>, <value>] strings.
159 text: The text to parse the metric values from.
160
161 Returns:
162 A list of floating point numbers found, [] if none were found.
163 """
164 metric_formatted = 'HISTOGRAM %s: %s= ' % (metric[0], metric[1])
165
166 text_lines = text.split('\n')
167 values_list = []
168
169 for current_line in text_lines:
170 if metric_formatted in current_line:
171 current_line = current_line[len(metric_formatted):]
172
173 try:
174 histogram_values = eval(current_line)
175
176 for b in histogram_values['buckets']:
177 average_for_bucket = float(b['high'] + b['low']) * 0.5
178 # Extends the list with N-elements with the average for that bucket.
179 values_list.extend([average_for_bucket] * b['count'])
180 except Exception:
181 pass
182
183 return values_list
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698