tools/perf/generate_legacy_perf_dashboard_json.py - Issue 2479543002: Porting relevant legacy conversion code from performance_lp to src side

Side by Side Diff: tools/perf/generate_legacy_perf_dashboard_json.py

Issue 2479543002: Porting relevant legacy conversion code from performance_lp to src side (Closed)

Patch Set: Renaming protected functions Created 4 years, 1 month ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 #!/usr/bin/env python

	2 # Copyright 2016 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

	6 """ Generates legacy perf dashboard json from non-telemetry based perf tests.

	7 Taken from chromium/build/scripts/slave/performance_log_processory.py

	8 (https://goo.gl/03SQRk)

	9 """

	10

	11 import collections

	12 import json

	13 import math

	14 import logging

	15 import re

	16

	17

	18 class LegacyResultsProcessor(object):

	19 """Class for any log processor expecting standard data to be graphed.

	20

	21 The log will be parsed looking for any lines of the forms:

	22 <*>RESULT <graph_name>: <trace_name>= <value> <units>

	23 or

	24 <*>RESULT <graph_name>: <trace_name>= [<value>,value,value,...] <units>

	25 or

	26 <*>RESULT <graph_name>: <trace_name>= {<mean>, <std deviation>} <units>

	27

	28 For example,

	29 *RESULT vm_final_browser: OneTab= 8488 kb

	30 RESULT startup: ref= [167.00,148.00,146.00,142.00] ms

	31 RESULT TabCapturePerformance_foo: Capture= {30.7, 1.45} ms

	32

	33 The leading * is optional; it indicates that the data from that line should

	34 be considered "important", which may mean for example that it's graphed by

	35 default.

	36

	37 If multiple values are given in [], their mean and (sample) standard

	38 deviation will be written; if only one value is given, that will be written.

	39 A trailing comma is permitted in the list of values.

	40

	41 NOTE: All lines except for RESULT lines are ignored, including the Avg and

	42 Stddev lines output by Telemetry!

	43

	44 Any of the <fields> except <value> may be empty, in which case the

	45 not-terribly-useful defaults will be used. The <graph_name> and <trace_name>

	46 should not contain any spaces, colons (:) nor equals-signs (=). Furthermore,

	47 the <trace_name> will be used on the waterfall display, so it should be kept

	48 short. If the trace_name ends with '_ref', it will be interpreted as a

	49 reference value, and shown alongside the corresponding main value on the

	50 waterfall.

	51

	52 Semantic note: The terms graph and chart are used interchangeably here.

	53 """

	54

	55 RESULTS_REGEX = re.compile(r'(?P<IMPORTANT>\*)?RESULT '

	56 r'(?P<GRAPH>[^:]): (?P<TRACE>[^=])= '

	57 r'(?P<VALUE>[\{\[]?[-\d\., ]+[\}\]]?)('

	58 r' ?(?P<UNITS>.+))?')

	59 # TODO(eyaich): Determine if this format is still used by any perf tests

	60 HISTOGRAM_REGEX = re.compile(r'(?P<IMPORTANT>\*)?HISTOGRAM '

	61 r'(?P<GRAPH>[^:]): (?P<TRACE>[^=])= '

	62 r'(?P<VALUE_JSON>{.*})(?P<UNITS>.+)?')

	63

	64 def __init__(self):

	65 # A dict of Graph objects, by name.

	66 self._graphs = {}

	67 # A dict mapping output file names to lists of lines in a file.

	68 self._output = {}

	69 self._percentiles = [.1, .25, .5, .75, .90, .95, .99]

	70

	71 class Trace(object):

	72 """Encapsulates data for one trace. Here, this means one point."""

	73

	74 def __init__(self):

	75 self.important = False

	76 self.value = 0.0

	77 self.stddev = 0.0

	78

	79 def __str__(self):

	80 result = _FormatHumanReadable(self.value)

	81 if self.stddev:

	82 result += '+/-%s' % _FormatHumanReadable(self.stddev)

	83 return result

	84

	85 class Graph(object):

	86 """Encapsulates a set of points that should appear on the same graph."""

	87

	88 def __init__(self):

	89 self.units = None

	90 self.traces = {}

	91

	92 def IsImportant(self):

	93 """A graph is considered important if any of its traces is important."""

	94 for trace in self.traces.itervalues():

	95 if trace.important:

	96 return True

	97 return False

	98

	99 def BuildTracesDict(self):

	100 """Returns a dictionary mapping trace names to [value, stddev]."""

	101 traces_dict = {}

	102 for name, trace in self.traces.items():

	103 traces_dict[name] = [str(trace.value), str(trace.stddev)]

	104 return traces_dict

	105

	106

	107 def GenerateJsonResults(self, filename):

	108 # Iterate through the file and process each output line

	109 with open(filename) as f:

	110 for line in f.readlines():

	111 self.ProcessLine(line)

	112 # After all results have been seen, generate the graph json data

	113 return self.GenerateGraphJson()

	114

	115

	116 def _PrependLog(self, filename, data):

	117 """Prepends some data to an output file."""

	118 self._output[filename] = data + self._output.get(filename, [])

	119

	120

	121 def ProcessLine(self, line):

	122 """Processes one result line, and updates the state accordingly."""

	123 results_match = self.RESULTS_REGEX.search(line)

	124 histogram_match = self.HISTOGRAM_REGEX.search(line)

	125 if results_match:

	126 self._ProcessResultLine(results_match)

	127 elif histogram_match:

	128 raise Exception("Error: Histogram results parsing not supported yet")

	129

	130

	131 def _ProcessResultLine(self, line_match):

	132 """Processes a line that matches the standard RESULT line format.

	133

	134 Args:

	135 line_match: A MatchObject as returned by re.search.

	136 """

	137 match_dict = line_match.groupdict()

	138 graph_name = match_dict['GRAPH'].strip()

	139 trace_name = match_dict['TRACE'].strip()

	140

	141 graph = self._graphs.get(graph_name, self.Graph())

	142 graph.units = match_dict['UNITS'] or ''

	143 trace = graph.traces.get(trace_name, self.Trace())

	144 trace.value = match_dict['VALUE']

	145 trace.important = match_dict['IMPORTANT'] or False

	146

	147 # Compute the mean and standard deviation for a list or a histogram,

	148 # or the numerical value of a scalar value.

	149 if trace.value.startswith('['):

	150 try:

	151 value_list = [float(x) for x in trace.value.strip('[],').split(',')]

	152 except ValueError:

	153 # Report, but ignore, corrupted data lines. (Lines that are so badly

	154 # broken that they don't even match the RESULTS_REGEX won't be

	155 # detected.)

	156 logging.warning("Bad test output: '%s'" % trace.value.strip())

	157 return

	158 trace.value, trace.stddev, filedata = self._CalculateStatistics(

	159 value_list, trace_name)

	160 assert filedata is not None

	161 for filename in filedata:

	162 self._PrependLog(filename, filedata[filename])

	163 elif trace.value.startswith('{'):

	164 stripped = trace.value.strip('{},')

	165 try:

	166 trace.value, trace.stddev = [float(x) for x in stripped.split(',')]

	167 except ValueError:

	168 logging.warning("Bad test output: '%s'" % trace.value.strip())

	169 return

	170 else:

	171 try:

	172 trace.value = float(trace.value)

	173 except ValueError:

	174 logging.warning("Bad test output: '%s'" % trace.value.strip())

	175 return

	176

	177 graph.traces[trace_name] = trace

	178 self._graphs[graph_name] = graph

	179

	180

	181 def GenerateGraphJson(self):

	182 """Writes graph json for each graph seen.

	183 """

	184 charts = {}

	185 for graph_name, graph in self._graphs.iteritems():

	186 graph_dict = collections.OrderedDict([

	187 ('traces', graph.BuildTracesDict()),

	188 ('units', str(graph.units)),

	189 ])

	190

	191 # Include a sorted list of important trace names if there are any.

	192 important = [t for t in graph.traces.keys() if graph.traces[t].important]

	193 if important:

	194 graph_dict['important'] = sorted(important)

	195

	196 charts[graph_name] = graph_dict

	197 return json.dumps(charts)

	198

	199

	200 # _CalculateStatistics needs to be a member function.

	201 # pylint: disable=R0201

	202 # Unused argument value_list.

	203 # pylint: disable=W0613

	204 def _CalculateStatistics(self, value_list, trace_name):

	205 """Returns a tuple with some statistics based on the given value list.

	206

	207 This method may be overridden by subclasses wanting a different standard

	208 deviation calcuation (or some other sort of error value entirely).

	209

	210 Args:

	211 value_list: the list of values to use in the calculation

	212 trace_name: the trace that produced the data (not used in the base

	213 implementation, but subclasses may use it)

	214

	215 Returns:

	216 A 3-tuple - mean, standard deviation, and a dict which is either

	217 empty or contains information about some file contents.

	218 """

	219 n = len(value_list)

	220 if n == 0:

	221 return 0.0, 0.0, {}

	222 mean = float(sum(value_list)) / n

	223 variance = sum([(element - mean)**2 for element in value_list]) / n

	224 stddev = math.sqrt(variance)

	225

	226 return mean, stddev, {}

	227

	228

	229 def _FormatHumanReadable(number):

	230 """Formats a float into three significant figures, using metric suffixes.

	231

	232 Only m, k, and M prefixes (for 1/1000, 1000, and 1,000,000) are used.

	233 Examples:

	234 0.0387 => 38.7m

	235 1.1234 => 1.12

	236 10866 => 10.8k

	237 682851200 => 683M

	238 """

	239 metric_prefixes = {-3: 'm', 0: '', 3: 'k', 6: 'M'}

	240 scientific = '%.2e' % float(number) # 6.83e+005

	241 e_idx = scientific.find('e') # 4, or 5 if negative

	242 digits = float(scientific[:e_idx]) # 6.83

	243 exponent = int(scientific[e_idx + 1:]) # int('+005') = 5

	244 while exponent % 3:

	245 digits *= 10

	246 exponent -= 1

	247 while exponent > 6:

	248 digits *= 10

	249 exponent -= 1

	250 while exponent < -3:

	251 digits /= 10

	252 exponent += 1

	253 if digits >= 100:

	254 # Don't append a meaningless '.0' to an integer number.

	255 digits = int(digits)

	256 # Exponent is now divisible by 3, between -3 and 6 inclusive: (-3, 0, 3, 6).

	257 return '%s%s' % (digits, metric_prefixes[exponent])

OLD	NEW

« no previous file with comments | « testing/scripts/run_gtest_perf_test.py ('k') | tools/perf/generate_legacy_perf_dashboard_json_unittest.py » ('j') | no next file with comments »