OLD | NEW |
| (Empty) |
1 import math | |
2 import re | |
3 from functools import reduce | |
4 | |
5 | |
6 def _geom_mean_and_std_dev_from_histogram(histogram): # pragma: no cover | |
7 # Copied from: https://code.google.com/p/chromium/codesearch#chromium/build/sc
ripts/common/chromium_utils.py&l=222 | |
8 # TODO(robertocn): Remove this code duplication from common.chromium_utils | |
9 if not 'buckets' in histogram: | |
10 return 0.0, 0.0 | |
11 count = 0 | |
12 sum_of_logs = 0 | |
13 for bucket in histogram['buckets']: | |
14 if 'high' in bucket: | |
15 bucket['mean'] = (bucket['low'] + bucket['high']) / 2.0 | |
16 else: | |
17 bucket['mean'] = bucket['low'] | |
18 if bucket['mean'] > 0: | |
19 sum_of_logs += math.log(bucket['mean']) * bucket['count'] | |
20 count += bucket['count'] | |
21 | |
22 if count == 0: | |
23 return 0.0, 0.0 | |
24 | |
25 sum_of_squares = 0 | |
26 geom_mean = math.exp(sum_of_logs / count) | |
27 for bucket in histogram['buckets']: | |
28 if bucket['mean'] > 0: | |
29 sum_of_squares += (bucket['mean'] - geom_mean) ** 2 * bucket['count'] | |
30 return geom_mean, math.sqrt(sum_of_squares / count) | |
31 | |
32 | |
33 def parse_chartjson_metric(results, metric): # pragma: no cover | |
34 """Interpret results-chart.json, finding the needed values. | |
35 | |
36 Args: | |
37 results: The dictionary parsed from the chartjson file. | |
38 metric: A pair of strings indicating chart and trace names. | |
39 | |
40 Returns: | |
41 A triple (valid_values, values, all_results) where valid_values is a | |
42 boolean, values is a list of floating point numbers, and all_results is a | |
43 dictionary containing all the results originally in results_str. | |
44 """ | |
45 def escape_chars(original_string): | |
46 return re.sub(r'[\:|=/#&,]', '_', original_string) | |
47 | |
48 chart_name, trace_name = metric | |
49 if trace_name == chart_name: | |
50 trace_name = 'summary' | |
51 try: | |
52 for chart in results['charts']: | |
53 if escape_chars(chart) == chart_name: | |
54 chart_name = chart # Unescaping | |
55 break | |
56 for trace in results['charts'][chart_name]: | |
57 if escape_chars(trace) == trace_name: | |
58 trace_name = trace # Unescaping | |
59 break | |
60 if (results['charts'][chart_name][trace_name]['type'] == | |
61 'list_of_scalar_values'): | |
62 values = results['charts'][chart_name][trace_name]['values'] | |
63 if values: | |
64 avg_value = [sum(values) / len(values)] | |
65 return True, avg_value, results | |
66 if results['charts'][chart_name][trace_name]['type'] == 'histogram': | |
67 return True, [_geom_mean_and_std_dev_from_histogram( | |
68 results['charts'][chart_name][trace_name])[0]], results | |
69 except KeyError: # e.g. metric not found | |
70 pass | |
71 return False, [], results | |
72 | |
73 | |
74 # The following has largely been copied from bisect_perf_regression.py | |
75 def parse_metric(out, err, metric): # pragma: no cover | |
76 """Tries to parse the output in RESULT line format or HISTOGRAM format. | |
77 | |
78 Args: | |
79 metric: The metric as a list of [<trace>, <value>] string pairs. | |
80 out, err: stdout and stderr that may contain the output to be parsed | |
81 | |
82 Returns: | |
83 A pair (valid_values, values) where valid_values is a boolean and values is | |
84 a list of floating point numbers. | |
85 """ | |
86 text = (out or '') + (err or '') | |
87 result = _parse_result_values_from_output(metric, text) | |
88 if not result: | |
89 result = _parse_histogram_values_from_output(metric, text) | |
90 return bool(len(result)), result | |
91 | |
92 | |
93 # TODO: Deprecate the text parsing approach to get results in favor of | |
94 # chartjson. | |
95 def _parse_result_values_from_output(metric, text): # pragma: no cover | |
96 """Attempts to parse a metric in the format RESULT <graph>: <trace>= ... | |
97 | |
98 Args: | |
99 metric: The metric as a list of [<trace>, <value>] string pairs. | |
100 text: The text to parse the metric values from. | |
101 | |
102 Returns: | |
103 A list of floating point numbers found. | |
104 """ | |
105 if not text: | |
106 return [False, None] | |
107 # Format is: RESULT <graph>: <trace>= <value> <units> | |
108 metric_re = re.escape('RESULT %s: %s=' % (metric[0], metric[1])) | |
109 | |
110 # The log will be parsed looking for format: | |
111 # <*>RESULT <graph_name>: <trace_name>= <value> | |
112 single_result_re = re.compile( | |
113 metric_re + r'\s*(?P<VALUE>[-]?\d*(\.\d*)?)') | |
114 | |
115 # The log will be parsed looking for format: | |
116 # <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...] | |
117 multi_results_re = re.compile( | |
118 metric_re + r'\s*\[\s*(?P<VALUES>[-]?[\d\., ]+)\s*\]') | |
119 | |
120 # The log will be parsed looking for format: | |
121 # <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>} | |
122 mean_stddev_re = re.compile( | |
123 metric_re + | |
124 r'\s*\{\s*(?P<MEAN>[-]?\d*(\.\d*)?),\s*(?P<STDDEV>\d+(\.\d*)?)\s*\}') | |
125 | |
126 text_lines = text.split('\n') | |
127 values_list = [] | |
128 for current_line in text_lines: | |
129 # Parse the output from the performance test for the metric we're | |
130 # interested in. | |
131 single_result_match = single_result_re.search(current_line) | |
132 multi_results_match = multi_results_re.search(current_line) | |
133 mean_stddev_match = mean_stddev_re.search(current_line) | |
134 if (single_result_match is not None and single_result_match.group('VALUE')): | |
135 values_list += [single_result_match.group('VALUE')] | |
136 elif (multi_results_match is not None and | |
137 multi_results_match.group('VALUES')): | |
138 metric_values = multi_results_match.group('VALUES') | |
139 values_list += metric_values.split(',') | |
140 elif (mean_stddev_match is not None and | |
141 mean_stddev_match.group('MEAN')): | |
142 values_list += [mean_stddev_match.group('MEAN')] | |
143 | |
144 list_of_floats = [] | |
145 # It seems the pythonic way to do this is to try to cast and catch the error. | |
146 for v in values_list: | |
147 try: | |
148 list_of_floats.append(float(v)) | |
149 except ValueError: | |
150 pass | |
151 return list_of_floats | |
152 | |
153 | |
154 def _parse_histogram_values_from_output(metric, text): # pragma: no cover | |
155 """Attempts to parse a metric in the format HISTOGRAM <graph: <trace>. | |
156 | |
157 Args: | |
158 metric: The metric as a list of [<trace>, <value>] strings. | |
159 text: The text to parse the metric values from. | |
160 | |
161 Returns: | |
162 A list of floating point numbers found, [] if none were found. | |
163 """ | |
164 metric_formatted = 'HISTOGRAM %s: %s= ' % (metric[0], metric[1]) | |
165 | |
166 text_lines = text.split('\n') | |
167 values_list = [] | |
168 | |
169 for current_line in text_lines: | |
170 if metric_formatted in current_line: | |
171 current_line = current_line[len(metric_formatted):] | |
172 | |
173 try: | |
174 histogram_values = eval(current_line) | |
175 | |
176 for b in histogram_values['buckets']: | |
177 average_for_bucket = float(b['high'] + b['low']) * 0.5 | |
178 # Extends the list with N-elements with the average for that bucket. | |
179 values_list.extend([average_for_bucket] * b['count']) | |
180 except Exception: | |
181 pass | |
182 | |
183 return values_list | |
OLD | NEW |