bench/bench_util.py - Issue 450253003: Restore bench_util.py

Side by Side Diff: bench/bench_util.py

Issue 450253003: Restore bench_util.py (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: rebase Created 6 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

OLD	NEW
(Empty)
	1 '''

	2 Created on May 19, 2011

	3

	4 @author: bungeman

	5 '''

	6

	7 import os

	8 import re

	9 import math

	10

	11 # bench representation algorithm constant names

	12 ALGORITHM_AVERAGE = 'avg'

	13 ALGORITHM_MEDIAN = 'med'

	14 ALGORITHM_MINIMUM = 'min'

	15 ALGORITHM_25TH_PERCENTILE = '25th'

	16

	17 # Regular expressions used throughout.

	18 PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?'

	19 SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)'

	20 BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)'

	21 TIME_RE = '(?:(\w)msecs = )?\s((?:\d+\.\d+)(?:,\s\d+\.\d+))'

	22 # non-per-tile benches have configs that don't end with ']' or '>'

	23 CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)'

	24 # per-tile bench lines are in the following format. Note that there are

	25 # non-averaged bench numbers in separate lines, which we ignore now due to

	26 # their inaccuracy.

	27 TILE_RE = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:'

	28 ' ((?:' + TIME_RE + '\s+)+)')

	29 # for extracting tile layout

	30 TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: '

	31

	32 PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE)

	33 SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE)

	34 BENCH_RE_COMPILED = re.compile(BENCH_RE)

	35 TIME_RE_COMPILED = re.compile(TIME_RE)

	36 CONFIG_RE_COMPILED = re.compile(CONFIG_RE)

	37 TILE_RE_COMPILED = re.compile(TILE_RE)

	38 TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE)

	39

	40 class BenchDataPoint:

	41 """A single data point produced by bench.

	42 """

	43 def __init__(self, bench, config, time_type, time, settings,

	44 tile_layout='', per_tile_values=[], per_iter_time=[]):

	45 # string name of the benchmark to measure

	46 self.bench = bench

	47 # string name of the configurations to run

	48 self.config = config

	49 # type of the timer in string: '' (walltime), 'c' (cpu) or 'g' (gpu)

	50 self.time_type = time_type

	51 # float number of the bench time value

	52 self.time = time

	53 # dictionary of the run settings

	54 self.settings = settings

	55 # how tiles cover the whole picture: '5x3' means 5 columns and 3 rows

	56 self.tile_layout = tile_layout

	57 # list of float for per_tile bench values, if applicable

	58 self.per_tile_values = per_tile_values

	59 # list of float for per-iteration bench time, if applicable

	60 self.per_iter_time = per_iter_time

	61

	62 def __repr__(self):

	63 return "BenchDataPoint(%s, %s, %s, %s, %s)" % (

	64 str(self.bench),

	65 str(self.config),

	66 str(self.time_type),

	67 str(self.time),

	68 str(self.settings),

	69 )

	70

	71 class _ExtremeType(object):

	72 """Instances of this class compare greater or less than other objects."""

	73 def __init__(self, cmpr, rep):

	74 object.__init__(self)

	75 self._cmpr = cmpr

	76 self._rep = rep

	77

	78 def __cmp__(self, other):

	79 if isinstance(other, self.__class__) and other._cmpr == self._cmpr:

	80 return 0

	81 return self._cmpr

	82

	83 def __repr__(self):

	84 return self._rep

	85

	86 Max = _ExtremeType(1, "Max")

	87 Min = _ExtremeType(-1, "Min")

	88

	89 class _ListAlgorithm(object):

	90 """Algorithm for selecting the representation value from a given list.

	91 representation is one of the ALGORITHM_XXX representation types."""

	92 def __init__(self, data, representation=None):

	93 if not representation:

	94 representation = ALGORITHM_AVERAGE # default algorithm

	95 self._data = data

	96 self._len = len(data)

	97 if representation == ALGORITHM_AVERAGE:

	98 self._rep = sum(self._data) / self._len

	99 else:

	100 self._data.sort()

	101 if representation == ALGORITHM_MINIMUM:

	102 self._rep = self._data[0]

	103 else:

	104 # for percentiles, we use the value below which x% of values are

	105 # found, which allows for better detection of quantum behaviors.

	106 if representation == ALGORITHM_MEDIAN:

	107 x = int(round(0.5 * self._len + 0.5))

	108 elif representation == ALGORITHM_25TH_PERCENTILE:

	109 x = int(round(0.25 * self._len + 0.5))

	110 else:

	111 raise Exception("invalid representation algorithm %s!" %

	112 representation)

	113 self._rep = self._data[x - 1]

	114

	115 def compute(self):

	116 return self._rep

	117

	118 def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench,

	119 value_dic, layout_dic):

	120 """Parses given bench time line with regex and adds data to value_dic.

	121

	122 config_re_compiled: precompiled regular expression for parsing the config

	123 line.

	124 is_per_tile: boolean indicating whether this is a per-tile bench.

	125 If so, we add tile layout into layout_dic as well.

	126 line: input string line to parse.

	127 bench: name of bench for the time values.

	128 value_dic: dictionary to store bench values. See bench_dic in parse() below.

	129 layout_dic: dictionary to store tile layouts. See parse() for descriptions.

	130 """

	131

	132 for config in config_re_compiled.finditer(line):

	133 current_config = config.group(1)

	134 tile_layout = ''

	135 if is_per_tile: # per-tile bench, add name prefix

	136 current_config = 'tile_' + current_config

	137 layouts = TILE_LAYOUT_RE_COMPILED.search(line)

	138 if layouts and len(layouts.groups()) == 2:

	139 tile_layout = '%sx%s' % layouts.groups()

	140 times = config.group(2)

	141 for new_time in TIME_RE_COMPILED.finditer(times):

	142 current_time_type = new_time.group(1)

	143 iters = [float(i) for i in

	144 new_time.group(2).strip().split(',')]

	145 value_dic.setdefault(bench, {}).setdefault(

	146 current_config, {}).setdefault(current_time_type, []).append(

	147 iters)

	148 layout_dic.setdefault(bench, {}).setdefault(

	149 current_config, {}).setdefault(current_time_type, tile_layout)

	150

	151 def parse_skp_bench_data(directory, revision, rep, default_settings=None):

	152 """Parses all the skp bench data in the given directory.

	153

	154 Args:

	155 directory: string of path to input data directory.

	156 revision: git hash revision that matches the data to process.

	157 rep: bench representation algorithm, see bench_util.py.

	158 default_settings: dictionary of other run settings. See writer.option() in

	159 bench/benchmain.cpp.

	160

	161 Returns:

	162 A list of BenchDataPoint objects.

	163 """

	164 revision_data_points = []

	165 file_list = os.listdir(directory)

	166 file_list.sort()

	167 for bench_file in file_list:

	168 scalar_type = None

	169 # Scalar type, if any, is in the bench filename after 'scalar_'.

	170 if (bench_file.startswith('bench_' + revision + '_data_')):

	171 if bench_file.find('scalar_') > 0:

	172 components = bench_file.split('_')

	173 scalar_type = components[components.index('scalar') + 1]

	174 else: # Skips non skp bench files.

	175 continue

	176

	177 with open('/'.join([directory, bench_file]), 'r') as file_handle:

	178 settings = dict(default_settings or {})

	179 settings['scalar'] = scalar_type

	180 revision_data_points.extend(parse(settings, file_handle, rep))

	181

	182 return revision_data_points

	183

	184 # TODO(bensong): switch to reading JSON output when available. This way we don't

	185 # need the RE complexities.

	186 def parse(settings, lines, representation=None):

	187 """Parses bench output into a useful data structure.

	188

	189 ({str:str}, __iter__ -> str) -> [BenchDataPoint]

	190 representation is one of the ALGORITHM_XXX types."""

	191

	192 benches = []

	193 current_bench = None

	194 # [bench][config][time_type] -> [[per-iter values]] where per-tile config

	195 # has per-iter value list for each tile [[<tile1_iter1>,<tile1_iter2>,...],

	196 # [<tile2_iter1>,<tile2_iter2>,...],...], while non-per-tile config only

	197 # contains one list of iterations [[iter1, iter2, ...]].

	198 bench_dic = {}

	199 # [bench][config][time_type] -> tile_layout

	200 layout_dic = {}

	201

	202 for line in lines:

	203

	204 # see if this line is a settings line

	205 settingsMatch = SETTINGS_RE_COMPILED.search(line)

	206 if (settingsMatch):

	207 settings = dict(settings)

	208 for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.g roup(1)):

	209 if (settingMatch.group(2)):

	210 settings[settingMatch.group(1)] = settingMatch.group(2)

	211 else:

	212 settings[settingMatch.group(1)] = True

	213

	214 # see if this line starts a new bench

	215 new_bench = BENCH_RE_COMPILED.search(line)

	216 if new_bench:

	217 current_bench = new_bench.group(1)

	218

	219 # add configs on this line to the bench_dic

	220 if current_bench:

	221 if line.startswith(' tile_') :

	222 _ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench,

	223 bench_dic, layout_dic)

	224 else:

	225 _ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line,

	226 current_bench, bench_dic, layout_dic)

	227

	228 # append benches to list

	229 for bench in bench_dic:

	230 for config in bench_dic[bench]:

	231 for time_type in bench_dic[bench][config]:

	232 tile_layout = ''

	233 per_tile_values = [] # empty for non-per-tile configs

	234 per_iter_time = [] # empty for per-tile configs

	235 bench_summary = None # a single final bench value

	236 if len(bench_dic[bench][config][time_type]) > 1:

	237 # per-tile config; compute representation for each tile

	238 per_tile_values = [

	239 _ListAlgorithm(iters, representation).compute()

	240 for iters in bench_dic[bench][config][time_type]]

	241 # use sum of each tile representation for total bench value

	242 bench_summary = sum(per_tile_values)

	243 # extract tile layout

	244 tile_layout = layout_dic[bench][config][time_type]

	245 else:

	246 # get the list of per-iteration values

	247 per_iter_time = bench_dic[bench][config][time_type][0]

	248 bench_summary = _ListAlgorithm(

	249 per_iter_time, representation).compute()

	250 benches.append(BenchDataPoint(

	251 bench,

	252 config,

	253 time_type,

	254 bench_summary,

	255 settings,

	256 tile_layout,

	257 per_tile_values,

	258 per_iter_time))

	259

	260 return benches

	261

	262 class LinearRegression:

	263 """Linear regression data based on a set of data points.

	264

	265 ([(Number,Number)])

	266 There must be at least two points for this to make sense."""

	267 def __init__(self, points):

	268 n = len(points)

	269 max_x = Min

	270 min_x = Max

	271

	272 Sx = 0.0

	273 Sy = 0.0

	274 Sxx = 0.0

	275 Sxy = 0.0

	276 Syy = 0.0

	277 for point in points:

	278 x = point[0]

	279 y = point[1]

	280 max_x = max(max_x, x)

	281 min_x = min(min_x, x)

	282

	283 Sx += x

	284 Sy += y

	285 Sxx += x*x

	286 Sxy += x*y

	287 Syy += y*y

	288

	289 denom = nSxx - SxSx

	290 if (denom != 0.0):

	291 B = (nSxy - SxSy) / denom

	292 else:

	293 B = 0.0

	294 a = (1.0/n)(Sy - BSx)

	295

	296 se2 = 0

	297 sB2 = 0

	298 sa2 = 0

	299 if (n >= 3 and denom != 0.0):

	300 se2 = (1.0/(n(n-2)) (nSyy - SySy - BBdenom))

	301 sB2 = (n*se2) / denom

	302 sa2 = sB2 * (1.0/n) * Sxx

	303

	304

	305 self.slope = B

	306 self.intercept = a

	307 self.serror = math.sqrt(max(0, se2))

	308 self.serror_slope = math.sqrt(max(0, sB2))

	309 self.serror_intercept = math.sqrt(max(0, sa2))

	310 self.max_x = max_x

	311 self.min_x = min_x

	312

	313 def __repr__(self):

	314 return "LinearRegression(%s, %s, %s, %s, %s)" % (

	315 str(self.slope),

	316 str(self.intercept),

	317 str(self.serror),

	318 str(self.serror_slope),

	319 str(self.serror_intercept),

	320 )

	321

	322 def find_min_slope(self):

	323 """Finds the minimal slope given one standard deviation."""

	324 slope = self.slope

	325 intercept = self.intercept

	326 error = self.serror

	327 regr_start = self.min_x

	328 regr_end = self.max_x

	329 regr_width = regr_end - regr_start

	330

	331 if slope < 0:

	332 lower_left_y = slope*regr_start + intercept - error

	333 upper_right_y = slope*regr_end + intercept + error

	334 return min(0, (upper_right_y - lower_left_y) / regr_width)

	335

	336 elif slope > 0:

	337 upper_left_y = slope*regr_start + intercept + error

	338 lower_right_y = slope*regr_end + intercept - error

	339 return max(0, (lower_right_y - upper_left_y) / regr_width)

	340

	341 return 0

	342

	343 def CreateRevisionLink(revision_number):

	344 """Returns HTML displaying the given revision number and linking to

	345 that revision's change page at code.google.com, e.g.

	346 http://code.google.com/p/skia/source/detail?r=2056

	347 """

	348 return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%(

	349 revision_number, revision_number)

	350

	351 def main():

	352 foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]]

	353 LinearRegression(foo)

	354

	355 if __name__ == "__main__":

	356 main()

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »