 Chromium Code Reviews
 Chromium Code Reviews Issue 2479543002:
  Porting relevant legacy conversion code from performance_lp to src side  (Closed)
    
  
    Issue 2479543002:
  Porting relevant legacy conversion code from performance_lp to src side  (Closed) 
  | OLD | NEW | 
|---|---|
| (Empty) | |
| 1 #!/usr/bin/env python | |
| 2 # Copyright 2015 The Chromium Authors. All rights reserved. | |
| 
eakuefner
2016/11/04 16:58:56
2016?
 
eyaich1
2016/11/04 17:33:03
Done.
 
eyaich1
2016/11/04 17:33:03
Done.
 | |
| 3 # Use of this source code is governed by a BSD-style license that can be | |
| 4 # found in the LICENSE file. | |
| 5 | |
| 6 """ Generates legacy perf dashboard json from non-telemetry based perf tests. | |
| 7 Taken from chromium/build/scripts/slave/performance_log_processory.py, we are | |
| 
eakuefner
2016/11/04 16:58:56
I'd put a link to the revision of performance_log_
 
eyaich1
2016/11/04 17:33:03
Done.
 | |
| 8 pulling out the smallest amount of code still need to create valid results for | |
| 9 C++ perf tests. | |
| 10 """ | |
| 11 | |
| 12 import collections | |
| 13 import json | |
| 14 import math | |
| 15 import logging | |
| 16 import os | |
| 17 import re | |
| 18 | |
| 19 | |
| 20 class LegacyResultsProcessor(object): | |
| 21 """Class for any log processor expecting standard data to be graphed. | |
| 22 | |
| 23 The log will be parsed looking for any lines of the forms: | |
| 24 <*>RESULT <graph_name>: <trace_name>= <value> <units> | |
| 25 or | |
| 26 <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...] <units> | |
| 27 or | |
| 28 <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>} <units> | |
| 29 | |
| 30 For example, | |
| 31 *RESULT vm_final_browser: OneTab= 8488 kb | |
| 32 RESULT startup: ref= [167.00,148.00,146.00,142.00] ms | |
| 33 RESULT TabCapturePerformance_foo: Capture= {30.7, 1.45} ms | |
| 34 | |
| 35 The leading * is optional; it indicates that the data from that line should | |
| 36 be considered "important", which may mean for example that it's graphed by | |
| 37 default. | |
| 38 | |
| 39 If multiple values are given in [], their mean and (sample) standard | |
| 40 deviation will be written; if only one value is given, that will be written. | |
| 41 A trailing comma is permitted in the list of values. | |
| 42 | |
| 43 NOTE: All lines except for RESULT lines are ignored, including the Avg and | |
| 44 Stddev lines output by Telemetry! | |
| 45 | |
| 46 Any of the <fields> except <value> may be empty, in which case the | |
| 47 not-terribly-useful defaults will be used. The <graph_name> and <trace_name> | |
| 48 should not contain any spaces, colons (:) nor equals-signs (=). Furthermore, | |
| 49 the <trace_name> will be used on the waterfall display, so it should be kept | |
| 50 short. If the trace_name ends with '_ref', it will be interpreted as a | |
| 51 reference value, and shown alongside the corresponding main value on the | |
| 52 waterfall. | |
| 53 | |
| 54 Semantic note: The terms graph and chart are used interchangeably here. | |
| 55 """ | |
| 56 | |
| 57 RESULTS_REGEX = re.compile(r'(?P<IMPORTANT>\*)?RESULT ' | |
| 58 r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= ' | |
| 59 r'(?P<VALUE>[\{\[]?[-\d\., ]+[\}\]]?)(' | |
| 60 r' ?(?P<UNITS>.+))?') | |
| 61 # TODO(eyaich): Determine if this format is still used by any perf tests | |
| 
eakuefner
2016/11/04 16:58:56
Do you have a plan for how to do this? Maybe a bug
 
eyaich1
2016/11/04 17:33:03
I have to port over the C++ tests one by one since
 | |
| 62 HISTOGRAM_REGEX = re.compile(r'(?P<IMPORTANT>\*)?HISTOGRAM ' | |
| 63 r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= ' | |
| 64 r'(?P<VALUE_JSON>{.*})(?P<UNITS>.+)?') | |
| 65 | |
| 66 class Trace(object): | |
| 67 """Encapsulates data for one trace. Here, this means one point.""" | |
| 68 | |
| 69 def __init__(self): | |
| 70 self.important = False | |
| 71 self.value = 0.0 | |
| 72 self.stddev = 0.0 | |
| 73 | |
| 74 def __str__(self): | |
| 75 result = _FormatHumanReadable(self.value) | |
| 76 if self.stddev: | |
| 77 result += '+/-%s' % _FormatHumanReadable(self.stddev) | |
| 78 return result | |
| 79 | |
| 80 class Graph(object): | |
| 81 """Encapsulates a set of points that should appear on the same graph.""" | |
| 82 | |
| 83 def __init__(self): | |
| 84 self.units = None | |
| 85 self.traces = {} | |
| 86 | |
| 87 def IsImportant(self): | |
| 88 """A graph is considered important if any of its traces is important.""" | |
| 89 for trace in self.traces.itervalues(): | |
| 90 if trace.important: | |
| 91 return True | |
| 92 return False | |
| 93 | |
| 94 def BuildTracesDict(self): | |
| 95 """Returns a dictionary mapping trace names to [value, stddev].""" | |
| 96 traces_dict = {} | |
| 97 for name, trace in self.traces.items(): | |
| 98 traces_dict[name] = [str(trace.value), str(trace.stddev)] | |
| 99 return traces_dict | |
| 100 | |
| 101 | |
| 102 def __init__(self): | |
| 
eakuefner
2016/11/04 16:58:56
Something is weird here. You're missing a class de
 
eyaich1
2016/11/04 17:33:03
Its the init for the top level class, LegacyResult
 | |
| 103 # A dict of Graph objects, by name. | |
| 104 self._graphs = {} | |
| 105 # A dict mapping output file names to lists of lines in a file. | |
| 106 self._output = {} | |
| 107 self._percentiles = [.1, .25, .5, .75, .90, .95, .99] | |
| 108 | |
| 109 | |
| 110 def GenerateJsonResults(self, filename): | |
| 111 # Iterate through the file and process each output line | |
| 112 with open(filename) as f: | |
| 113 for line in f.readlines(): | |
| 114 self._ProcessLine(line) | |
| 115 # After all results have been seen, generate the graph json data | |
| 116 return self._GenerateGraphJson() | |
| 117 | |
| 118 | |
| 119 def _PrependLog(self, filename, data): | |
| 120 """Prepends some data to an output file.""" | |
| 121 self._output[filename] = data + self._output.get(filename, []) | |
| 122 | |
| 123 | |
| 124 def _ProcessLine(self, line): | |
| 125 """Processes one result line, and updates the state accordingly.""" | |
| 126 results_match = self.RESULTS_REGEX.search(line) | |
| 127 histogram_match = self.HISTOGRAM_REGEX.search(line) | |
| 128 if results_match: | |
| 129 self._ProcessResultLine(results_match) | |
| 130 elif histogram_match: | |
| 131 raise Exception("Error: Histogram results parsing not supported yet") | |
| 132 | |
| 133 | |
| 134 def _ProcessResultLine(self, line_match): | |
| 135 """Processes a line that matches the standard RESULT line format. | |
| 136 | |
| 137 Args: | |
| 138 line_match: A MatchObject as returned by re.search. | |
| 139 """ | |
| 140 match_dict = line_match.groupdict() | |
| 141 graph_name = match_dict['GRAPH'].strip() | |
| 142 trace_name = match_dict['TRACE'].strip() | |
| 143 | |
| 144 graph = self._graphs.get(graph_name, self.Graph()) | |
| 145 graph.units = match_dict['UNITS'] or '' | |
| 146 trace = graph.traces.get(trace_name, self.Trace()) | |
| 147 trace.value = match_dict['VALUE'] | |
| 148 trace.important = match_dict['IMPORTANT'] or False | |
| 149 | |
| 150 # Compute the mean and standard deviation for a multiple-valued item, | |
| 
eakuefner
2016/11/04 16:58:56
Can you just be explicit about this and say "a lis
 
eyaich1
2016/11/04 17:33:03
Done.
 | |
| 151 # or the numerical value of a single-valued item. | |
| 152 if trace.value.startswith('['): | |
| 153 try: | |
| 154 value_list = [float(x) for x in trace.value.strip('[],').split(',')] | |
| 155 except ValueError: | |
| 156 # Report, but ignore, corrupted data lines. (Lines that are so badly | |
| 157 # broken that they don't even match the RESULTS_REGEX won't be | |
| 158 # detected.) | |
| 159 logging.warning("Bad test output: '%s'" % trace.value.strip()) | |
| 160 return | |
| 161 trace.value, trace.stddev, filedata = self._CalculateStatistics( | |
| 162 value_list, trace_name) | |
| 163 assert filedata is not None | |
| 164 for filename in filedata: | |
| 165 self._PrependLog(filename, filedata[filename]) | |
| 166 elif trace.value.startswith('{'): | |
| 167 stripped = trace.value.strip('{},') | |
| 168 try: | |
| 169 trace.value, trace.stddev = [float(x) for x in stripped.split(',')] | |
| 170 except ValueError: | |
| 171 logging.warning("Bad test output: '%s'" % trace.value.strip()) | |
| 172 return | |
| 173 else: | |
| 174 try: | |
| 175 trace.value = float(trace.value) | |
| 176 except ValueError: | |
| 177 logging.warning("Bad test output: '%s'" % trace.value.strip()) | |
| 178 return | |
| 179 | |
| 180 graph.traces[trace_name] = trace | |
| 181 self._graphs[graph_name] = graph | |
| 182 | |
| 183 | |
| 184 def _GenerateGraphJson(self): | |
| 185 """Writes graph json for each graph seen. | |
| 186 """ | |
| 187 charts = {} | |
| 188 for graph_name, graph in self._graphs.iteritems(): | |
| 189 graph_dict = collections.OrderedDict([ | |
| 190 ('traces', graph.BuildTracesDict()), | |
| 191 ('units', str(graph.units)), | |
| 192 ]) | |
| 193 | |
| 194 # Include a sorted list of important trace names if there are any. | |
| 
eakuefner
2016/11/04 16:58:56
Why do we need to do this?
 
eyaich1
2016/11/04 17:33:02
We reference it in MakeListOfPoints in results_das
 | |
| 195 important = [t for t in graph.traces.keys() if graph.traces[t].important] | |
| 196 if important: | |
| 197 graph_dict['important'] = sorted(important) | |
| 198 | |
| 199 charts[graph_name] = graph_dict | |
| 200 #charts[graph_name] = json.dumps(graph_dict) | |
| 
eakuefner
2016/11/04 16:58:56
oops?
 
eyaich1
2016/11/04 17:33:03
Done.
 | |
| 201 return json.dumps(charts) | |
| 202 | |
| 203 | |
| 204 # _CalculateStatistics needs to be a member function. | |
| 
eakuefner
2016/11/04 16:58:56
This sequence of comments is really unclear to me.
 
eyaich1
2016/11/04 17:33:03
This is copy and pasted from performacne_log_proce
 | |
| 205 # pylint: disable=R0201 | |
| 206 # Unused argument value_list. | |
| 207 # pylint: disable=W0613 | |
| 208 def _CalculateStatistics(self, value_list, trace_name): | |
| 209 """Returns a tuple with some statistics based on the given value list. | |
| 210 | |
| 211 This method may be overridden by subclasses wanting a different standard | |
| 212 deviation calcuation (or some other sort of error value entirely). | |
| 213 | |
| 214 Args: | |
| 215 value_list: the list of values to use in the calculation | |
| 216 trace_name: the trace that produced the data (not used in the base | |
| 217 implementation, but subclasses may use it) | |
| 218 | |
| 219 Returns: | |
| 220 A 3-tuple - mean, standard deviation, and a dict which is either | |
| 221 empty or contains information about some file contents. | |
| 222 """ | |
| 223 n = len(value_list) | |
| 224 if n == 0: | |
| 225 return 0.0, 0.0 | |
| 
eakuefner
2016/11/04 16:58:56
hm, this isn't a 3-tuple
 
eyaich1
2016/11/04 17:33:02
Done.
 | |
| 226 mean = float(sum(value_list)) / n | |
| 227 variance = sum([(element - mean)**2 for element in value_list]) / n | |
| 228 stddev = math.sqrt(variance) | |
| 229 | |
| 230 return mean, stddev, {} | |
| 231 | |
| 232 | |
| 233 def _FormatHumanReadable(number): | |
| 234 """Formats a float into three significant figures, using metric suffixes. | |
| 235 | |
| 236 Only m, k, and M prefixes (for 1/1000, 1000, and 1,000,000) are used. | |
| 237 Examples: | |
| 238 0.0387 => 38.7m | |
| 239 1.1234 => 1.12 | |
| 240 10866 => 10.8k | |
| 241 682851200 => 683M | |
| 242 """ | |
| 243 metric_prefixes = {-3: 'm', 0: '', 3: 'k', 6: 'M'} | |
| 244 scientific = '%.2e' % float(number) # 6.83e+005 | |
| 245 e_idx = scientific.find('e') # 4, or 5 if negative | |
| 246 digits = float(scientific[:e_idx]) # 6.83 | |
| 247 exponent = int(scientific[e_idx + 1:]) # int('+005') = 5 | |
| 248 while exponent % 3: | |
| 249 digits *= 10 | |
| 250 exponent -= 1 | |
| 251 while exponent > 6: | |
| 252 digits *= 10 | |
| 253 exponent -= 1 | |
| 254 while exponent < -3: | |
| 255 digits /= 10 | |
| 256 exponent += 1 | |
| 257 if digits >= 100: | |
| 258 # Don't append a meaningless '.0' to an integer number. | |
| 259 digits = int(digits) | |
| 260 # Exponent is now divisible by 3, between -3 and 6 inclusive: (-3, 0, 3, 6). | |
| 261 return '%s%s' % (digits, metric_prefixes[exponent]) | |
| OLD | NEW |