Chromium Code Reviews| Index: tools/perf/generate_legacy_perf_dashboard_json.py |
| diff --git a/tools/perf/generate_legacy_perf_dashboard_json.py b/tools/perf/generate_legacy_perf_dashboard_json.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..384d10efbf97aee9dca388d688b52cd54bafbfe3 |
| --- /dev/null |
| +++ b/tools/perf/generate_legacy_perf_dashboard_json.py |
| @@ -0,0 +1,261 @@ |
| +#!/usr/bin/env python |
| +# Copyright 2015 The Chromium Authors. All rights reserved. |
|
eakuefner
2016/11/04 16:58:56
2016?
eyaich1
2016/11/04 17:33:03
Done.
eyaich1
2016/11/04 17:33:03
Done.
|
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| + |
| +""" Generates legacy perf dashboard json from non-telemetry based perf tests. |
| +Taken from chromium/build/scripts/slave/performance_log_processory.py, we are |
|
eakuefner
2016/11/04 16:58:56
I'd put a link to the revision of performance_log_
eyaich1
2016/11/04 17:33:03
Done.
|
| +pulling out the smallest amount of code still need to create valid results for |
| +C++ perf tests. |
| +""" |
| + |
| +import collections |
| +import json |
| +import math |
| +import logging |
| +import os |
| +import re |
| + |
| + |
| +class LegacyResultsProcessor(object): |
| + """Class for any log processor expecting standard data to be graphed. |
| + |
| + The log will be parsed looking for any lines of the forms: |
| + <*>RESULT <graph_name>: <trace_name>= <value> <units> |
| + or |
| + <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...] <units> |
| + or |
| + <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>} <units> |
| + |
| + For example, |
| + *RESULT vm_final_browser: OneTab= 8488 kb |
| + RESULT startup: ref= [167.00,148.00,146.00,142.00] ms |
| + RESULT TabCapturePerformance_foo: Capture= {30.7, 1.45} ms |
| + |
| + The leading * is optional; it indicates that the data from that line should |
| + be considered "important", which may mean for example that it's graphed by |
| + default. |
| + |
| + If multiple values are given in [], their mean and (sample) standard |
| + deviation will be written; if only one value is given, that will be written. |
| + A trailing comma is permitted in the list of values. |
| + |
| + NOTE: All lines except for RESULT lines are ignored, including the Avg and |
| + Stddev lines output by Telemetry! |
| + |
| + Any of the <fields> except <value> may be empty, in which case the |
| + not-terribly-useful defaults will be used. The <graph_name> and <trace_name> |
| + should not contain any spaces, colons (:) nor equals-signs (=). Furthermore, |
| + the <trace_name> will be used on the waterfall display, so it should be kept |
| + short. If the trace_name ends with '_ref', it will be interpreted as a |
| + reference value, and shown alongside the corresponding main value on the |
| + waterfall. |
| + |
| + Semantic note: The terms graph and chart are used interchangeably here. |
| + """ |
| + |
| + RESULTS_REGEX = re.compile(r'(?P<IMPORTANT>\*)?RESULT ' |
| + r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= ' |
| + r'(?P<VALUE>[\{\[]?[-\d\., ]+[\}\]]?)(' |
| + r' ?(?P<UNITS>.+))?') |
| + # TODO(eyaich): Determine if this format is still used by any perf tests |
|
eakuefner
2016/11/04 16:58:56
Do you have a plan for how to do this? Maybe a bug
eyaich1
2016/11/04 17:33:03
I have to port over the C++ tests one by one since
|
| + HISTOGRAM_REGEX = re.compile(r'(?P<IMPORTANT>\*)?HISTOGRAM ' |
| + r'(?P<GRAPH>[^:]*): (?P<TRACE>[^=]*)= ' |
| + r'(?P<VALUE_JSON>{.*})(?P<UNITS>.+)?') |
| + |
| + class Trace(object): |
| + """Encapsulates data for one trace. Here, this means one point.""" |
| + |
| + def __init__(self): |
| + self.important = False |
| + self.value = 0.0 |
| + self.stddev = 0.0 |
| + |
| + def __str__(self): |
| + result = _FormatHumanReadable(self.value) |
| + if self.stddev: |
| + result += '+/-%s' % _FormatHumanReadable(self.stddev) |
| + return result |
| + |
| + class Graph(object): |
| + """Encapsulates a set of points that should appear on the same graph.""" |
| + |
| + def __init__(self): |
| + self.units = None |
| + self.traces = {} |
| + |
| + def IsImportant(self): |
| + """A graph is considered important if any of its traces is important.""" |
| + for trace in self.traces.itervalues(): |
| + if trace.important: |
| + return True |
| + return False |
| + |
| + def BuildTracesDict(self): |
| + """Returns a dictionary mapping trace names to [value, stddev].""" |
| + traces_dict = {} |
| + for name, trace in self.traces.items(): |
| + traces_dict[name] = [str(trace.value), str(trace.stddev)] |
| + return traces_dict |
| + |
| + |
| + def __init__(self): |
|
eakuefner
2016/11/04 16:58:56
Something is weird here. You're missing a class de
eyaich1
2016/11/04 17:33:03
Its the init for the top level class, LegacyResult
|
| + # A dict of Graph objects, by name. |
| + self._graphs = {} |
| + # A dict mapping output file names to lists of lines in a file. |
| + self._output = {} |
| + self._percentiles = [.1, .25, .5, .75, .90, .95, .99] |
| + |
| + |
| + def GenerateJsonResults(self, filename): |
| + # Iterate through the file and process each output line |
| + with open(filename) as f: |
| + for line in f.readlines(): |
| + self._ProcessLine(line) |
| + # After all results have been seen, generate the graph json data |
| + return self._GenerateGraphJson() |
| + |
| + |
| + def _PrependLog(self, filename, data): |
| + """Prepends some data to an output file.""" |
| + self._output[filename] = data + self._output.get(filename, []) |
| + |
| + |
| + def _ProcessLine(self, line): |
| + """Processes one result line, and updates the state accordingly.""" |
| + results_match = self.RESULTS_REGEX.search(line) |
| + histogram_match = self.HISTOGRAM_REGEX.search(line) |
| + if results_match: |
| + self._ProcessResultLine(results_match) |
| + elif histogram_match: |
| + raise Exception("Error: Histogram results parsing not supported yet") |
| + |
| + |
| + def _ProcessResultLine(self, line_match): |
| + """Processes a line that matches the standard RESULT line format. |
| + |
| + Args: |
| + line_match: A MatchObject as returned by re.search. |
| + """ |
| + match_dict = line_match.groupdict() |
| + graph_name = match_dict['GRAPH'].strip() |
| + trace_name = match_dict['TRACE'].strip() |
| + |
| + graph = self._graphs.get(graph_name, self.Graph()) |
| + graph.units = match_dict['UNITS'] or '' |
| + trace = graph.traces.get(trace_name, self.Trace()) |
| + trace.value = match_dict['VALUE'] |
| + trace.important = match_dict['IMPORTANT'] or False |
| + |
| + # Compute the mean and standard deviation for a multiple-valued item, |
|
eakuefner
2016/11/04 16:58:56
Can you just be explicit about this and say "a lis
eyaich1
2016/11/04 17:33:03
Done.
|
| + # or the numerical value of a single-valued item. |
| + if trace.value.startswith('['): |
| + try: |
| + value_list = [float(x) for x in trace.value.strip('[],').split(',')] |
| + except ValueError: |
| + # Report, but ignore, corrupted data lines. (Lines that are so badly |
| + # broken that they don't even match the RESULTS_REGEX won't be |
| + # detected.) |
| + logging.warning("Bad test output: '%s'" % trace.value.strip()) |
| + return |
| + trace.value, trace.stddev, filedata = self._CalculateStatistics( |
| + value_list, trace_name) |
| + assert filedata is not None |
| + for filename in filedata: |
| + self._PrependLog(filename, filedata[filename]) |
| + elif trace.value.startswith('{'): |
| + stripped = trace.value.strip('{},') |
| + try: |
| + trace.value, trace.stddev = [float(x) for x in stripped.split(',')] |
| + except ValueError: |
| + logging.warning("Bad test output: '%s'" % trace.value.strip()) |
| + return |
| + else: |
| + try: |
| + trace.value = float(trace.value) |
| + except ValueError: |
| + logging.warning("Bad test output: '%s'" % trace.value.strip()) |
| + return |
| + |
| + graph.traces[trace_name] = trace |
| + self._graphs[graph_name] = graph |
| + |
| + |
| + def _GenerateGraphJson(self): |
| + """Writes graph json for each graph seen. |
| + """ |
| + charts = {} |
| + for graph_name, graph in self._graphs.iteritems(): |
| + graph_dict = collections.OrderedDict([ |
| + ('traces', graph.BuildTracesDict()), |
| + ('units', str(graph.units)), |
| + ]) |
| + |
| + # Include a sorted list of important trace names if there are any. |
|
eakuefner
2016/11/04 16:58:56
Why do we need to do this?
eyaich1
2016/11/04 17:33:02
We reference it in MakeListOfPoints in results_das
|
| + important = [t for t in graph.traces.keys() if graph.traces[t].important] |
| + if important: |
| + graph_dict['important'] = sorted(important) |
| + |
| + charts[graph_name] = graph_dict |
| + #charts[graph_name] = json.dumps(graph_dict) |
|
eakuefner
2016/11/04 16:58:56
oops?
eyaich1
2016/11/04 17:33:03
Done.
|
| + return json.dumps(charts) |
| + |
| + |
| + # _CalculateStatistics needs to be a member function. |
|
eakuefner
2016/11/04 16:58:56
This sequence of comments is really unclear to me.
eyaich1
2016/11/04 17:33:03
This is copy and pasted from performacne_log_proce
|
| + # pylint: disable=R0201 |
| + # Unused argument value_list. |
| + # pylint: disable=W0613 |
| + def _CalculateStatistics(self, value_list, trace_name): |
| + """Returns a tuple with some statistics based on the given value list. |
| + |
| + This method may be overridden by subclasses wanting a different standard |
| + deviation calcuation (or some other sort of error value entirely). |
| + |
| + Args: |
| + value_list: the list of values to use in the calculation |
| + trace_name: the trace that produced the data (not used in the base |
| + implementation, but subclasses may use it) |
| + |
| + Returns: |
| + A 3-tuple - mean, standard deviation, and a dict which is either |
| + empty or contains information about some file contents. |
| + """ |
| + n = len(value_list) |
| + if n == 0: |
| + return 0.0, 0.0 |
|
eakuefner
2016/11/04 16:58:56
hm, this isn't a 3-tuple
eyaich1
2016/11/04 17:33:02
Done.
|
| + mean = float(sum(value_list)) / n |
| + variance = sum([(element - mean)**2 for element in value_list]) / n |
| + stddev = math.sqrt(variance) |
| + |
| + return mean, stddev, {} |
| + |
| + |
| +def _FormatHumanReadable(number): |
| + """Formats a float into three significant figures, using metric suffixes. |
| + |
| + Only m, k, and M prefixes (for 1/1000, 1000, and 1,000,000) are used. |
| + Examples: |
| + 0.0387 => 38.7m |
| + 1.1234 => 1.12 |
| + 10866 => 10.8k |
| + 682851200 => 683M |
| + """ |
| + metric_prefixes = {-3: 'm', 0: '', 3: 'k', 6: 'M'} |
| + scientific = '%.2e' % float(number) # 6.83e+005 |
| + e_idx = scientific.find('e') # 4, or 5 if negative |
| + digits = float(scientific[:e_idx]) # 6.83 |
| + exponent = int(scientific[e_idx + 1:]) # int('+005') = 5 |
| + while exponent % 3: |
| + digits *= 10 |
| + exponent -= 1 |
| + while exponent > 6: |
| + digits *= 10 |
| + exponent -= 1 |
| + while exponent < -3: |
| + digits /= 10 |
| + exponent += 1 |
| + if digits >= 100: |
| + # Don't append a meaningless '.0' to an integer number. |
| + digits = int(digits) |
| + # Exponent is now divisible by 3, between -3 and 6 inclusive: (-3, 0, 3, 6). |
| + return '%s%s' % (digits, metric_prefixes[exponent]) |