| OLD | NEW |
| (Empty) |
| 1 ''' | |
| 2 Created on May 19, 2011 | |
| 3 | |
| 4 @author: bungeman | |
| 5 ''' | |
| 6 | |
| 7 import os | |
| 8 import re | |
| 9 import math | |
| 10 | |
| 11 # bench representation algorithm constant names | |
| 12 ALGORITHM_AVERAGE = 'avg' | |
| 13 ALGORITHM_MEDIAN = 'med' | |
| 14 ALGORITHM_MINIMUM = 'min' | |
| 15 ALGORITHM_25TH_PERCENTILE = '25th' | |
| 16 | |
| 17 # Regular expressions used throughout. | |
| 18 PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?' | |
| 19 SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)' | |
| 20 BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)' | |
| 21 TIME_RE = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\s*\d+\.\d+)*)' | |
| 22 # non-per-tile benches have configs that don't end with ']' or '>' | |
| 23 CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)' | |
| 24 # per-tile bench lines are in the following format. Note that there are | |
| 25 # non-averaged bench numbers in separate lines, which we ignore now due to | |
| 26 # their inaccuracy. | |
| 27 TILE_RE = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:' | |
| 28 ' ((?:' + TIME_RE + '\s+)+)') | |
| 29 # for extracting tile layout | |
| 30 TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: ' | |
| 31 | |
| 32 PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE) | |
| 33 SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE) | |
| 34 BENCH_RE_COMPILED = re.compile(BENCH_RE) | |
| 35 TIME_RE_COMPILED = re.compile(TIME_RE) | |
| 36 CONFIG_RE_COMPILED = re.compile(CONFIG_RE) | |
| 37 TILE_RE_COMPILED = re.compile(TILE_RE) | |
| 38 TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE) | |
| 39 | |
| 40 class BenchDataPoint: | |
| 41 """A single data point produced by bench. | |
| 42 """ | |
| 43 def __init__(self, bench, config, time_type, time, settings, | |
| 44 tile_layout='', per_tile_values=[], per_iter_time=[]): | |
| 45 # string name of the benchmark to measure | |
| 46 self.bench = bench | |
| 47 # string name of the configurations to run | |
| 48 self.config = config | |
| 49 # type of the timer in string: '' (walltime), 'c' (cpu) or 'g' (gpu) | |
| 50 self.time_type = time_type | |
| 51 # float number of the bench time value | |
| 52 self.time = time | |
| 53 # dictionary of the run settings | |
| 54 self.settings = settings | |
| 55 # how tiles cover the whole picture: '5x3' means 5 columns and 3 rows | |
| 56 self.tile_layout = tile_layout | |
| 57 # list of float for per_tile bench values, if applicable | |
| 58 self.per_tile_values = per_tile_values | |
| 59 # list of float for per-iteration bench time, if applicable | |
| 60 self.per_iter_time = per_iter_time | |
| 61 | |
| 62 def __repr__(self): | |
| 63 return "BenchDataPoint(%s, %s, %s, %s, %s)" % ( | |
| 64 str(self.bench), | |
| 65 str(self.config), | |
| 66 str(self.time_type), | |
| 67 str(self.time), | |
| 68 str(self.settings), | |
| 69 ) | |
| 70 | |
| 71 class _ExtremeType(object): | |
| 72 """Instances of this class compare greater or less than other objects.""" | |
| 73 def __init__(self, cmpr, rep): | |
| 74 object.__init__(self) | |
| 75 self._cmpr = cmpr | |
| 76 self._rep = rep | |
| 77 | |
| 78 def __cmp__(self, other): | |
| 79 if isinstance(other, self.__class__) and other._cmpr == self._cmpr: | |
| 80 return 0 | |
| 81 return self._cmpr | |
| 82 | |
| 83 def __repr__(self): | |
| 84 return self._rep | |
| 85 | |
| 86 Max = _ExtremeType(1, "Max") | |
| 87 Min = _ExtremeType(-1, "Min") | |
| 88 | |
| 89 class _ListAlgorithm(object): | |
| 90 """Algorithm for selecting the representation value from a given list. | |
| 91 representation is one of the ALGORITHM_XXX representation types.""" | |
| 92 def __init__(self, data, representation=None): | |
| 93 if not representation: | |
| 94 representation = ALGORITHM_AVERAGE # default algorithm | |
| 95 self._data = data | |
| 96 self._len = len(data) | |
| 97 if representation == ALGORITHM_AVERAGE: | |
| 98 self._rep = sum(self._data) / self._len | |
| 99 else: | |
| 100 self._data.sort() | |
| 101 if representation == ALGORITHM_MINIMUM: | |
| 102 self._rep = self._data[0] | |
| 103 else: | |
| 104 # for percentiles, we use the value below which x% of values are | |
| 105 # found, which allows for better detection of quantum behaviors. | |
| 106 if representation == ALGORITHM_MEDIAN: | |
| 107 x = int(round(0.5 * self._len + 0.5)) | |
| 108 elif representation == ALGORITHM_25TH_PERCENTILE: | |
| 109 x = int(round(0.25 * self._len + 0.5)) | |
| 110 else: | |
| 111 raise Exception("invalid representation algorithm %s!" % | |
| 112 representation) | |
| 113 self._rep = self._data[x - 1] | |
| 114 | |
| 115 def compute(self): | |
| 116 return self._rep | |
| 117 | |
| 118 def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench, | |
| 119 value_dic, layout_dic): | |
| 120 """Parses given bench time line with regex and adds data to value_dic. | |
| 121 | |
| 122 config_re_compiled: precompiled regular expression for parsing the config | |
| 123 line. | |
| 124 is_per_tile: boolean indicating whether this is a per-tile bench. | |
| 125 If so, we add tile layout into layout_dic as well. | |
| 126 line: input string line to parse. | |
| 127 bench: name of bench for the time values. | |
| 128 value_dic: dictionary to store bench values. See bench_dic in parse() below. | |
| 129 layout_dic: dictionary to store tile layouts. See parse() for descriptions. | |
| 130 """ | |
| 131 | |
| 132 for config in config_re_compiled.finditer(line): | |
| 133 current_config = config.group(1) | |
| 134 tile_layout = '' | |
| 135 if is_per_tile: # per-tile bench, add name prefix | |
| 136 current_config = 'tile_' + current_config | |
| 137 layouts = TILE_LAYOUT_RE_COMPILED.search(line) | |
| 138 if layouts and len(layouts.groups()) == 2: | |
| 139 tile_layout = '%sx%s' % layouts.groups() | |
| 140 times = config.group(2) | |
| 141 for new_time in TIME_RE_COMPILED.finditer(times): | |
| 142 current_time_type = new_time.group(1) | |
| 143 iters = [float(i) for i in | |
| 144 new_time.group(2).strip().split(',')] | |
| 145 value_dic.setdefault(bench, {}).setdefault( | |
| 146 current_config, {}).setdefault(current_time_type, []).append( | |
| 147 iters) | |
| 148 layout_dic.setdefault(bench, {}).setdefault( | |
| 149 current_config, {}).setdefault(current_time_type, tile_layout) | |
| 150 | |
| 151 def parse_skp_bench_data(directory, revision, rep, default_settings=None): | |
| 152 """Parses all the skp bench data in the given directory. | |
| 153 | |
| 154 Args: | |
| 155 directory: string of path to input data directory. | |
| 156 revision: git hash revision that matches the data to process. | |
| 157 rep: bench representation algorithm, see bench_util.py. | |
| 158 default_settings: dictionary of other run settings. See writer.option() in | |
| 159 bench/benchmain.cpp. | |
| 160 | |
| 161 Returns: | |
| 162 A list of BenchDataPoint objects. | |
| 163 """ | |
| 164 revision_data_points = [] | |
| 165 file_list = os.listdir(directory) | |
| 166 file_list.sort() | |
| 167 for bench_file in file_list: | |
| 168 scalar_type = None | |
| 169 # Scalar type, if any, is in the bench filename after 'scalar_'. | |
| 170 if (bench_file.startswith('bench_' + revision + '_data_')): | |
| 171 if bench_file.find('scalar_') > 0: | |
| 172 components = bench_file.split('_') | |
| 173 scalar_type = components[components.index('scalar') + 1] | |
| 174 else: # Skips non skp bench files. | |
| 175 continue | |
| 176 | |
| 177 with open('/'.join([directory, bench_file]), 'r') as file_handle: | |
| 178 settings = dict(default_settings or {}) | |
| 179 settings['scalar'] = scalar_type | |
| 180 revision_data_points.extend(parse(settings, file_handle, rep)) | |
| 181 | |
| 182 return revision_data_points | |
| 183 | |
| 184 # TODO(bensong): switch to reading JSON output when available. This way we don't | |
| 185 # need the RE complexities. | |
| 186 def parse(settings, lines, representation=None): | |
| 187 """Parses bench output into a useful data structure. | |
| 188 | |
| 189 ({str:str}, __iter__ -> str) -> [BenchDataPoint] | |
| 190 representation is one of the ALGORITHM_XXX types.""" | |
| 191 | |
| 192 benches = [] | |
| 193 current_bench = None | |
| 194 # [bench][config][time_type] -> [[per-iter values]] where per-tile config | |
| 195 # has per-iter value list for each tile [[<tile1_iter1>,<tile1_iter2>,...], | |
| 196 # [<tile2_iter1>,<tile2_iter2>,...],...], while non-per-tile config only | |
| 197 # contains one list of iterations [[iter1, iter2, ...]]. | |
| 198 bench_dic = {} | |
| 199 # [bench][config][time_type] -> tile_layout | |
| 200 layout_dic = {} | |
| 201 | |
| 202 for line in lines: | |
| 203 | |
| 204 # see if this line is a settings line | |
| 205 settingsMatch = SETTINGS_RE_COMPILED.search(line) | |
| 206 if (settingsMatch): | |
| 207 settings = dict(settings) | |
| 208 for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.g
roup(1)): | |
| 209 if (settingMatch.group(2)): | |
| 210 settings[settingMatch.group(1)] = settingMatch.group(2) | |
| 211 else: | |
| 212 settings[settingMatch.group(1)] = True | |
| 213 | |
| 214 # see if this line starts a new bench | |
| 215 new_bench = BENCH_RE_COMPILED.search(line) | |
| 216 if new_bench: | |
| 217 current_bench = new_bench.group(1) | |
| 218 | |
| 219 # add configs on this line to the bench_dic | |
| 220 if current_bench: | |
| 221 if line.startswith(' tile_') : | |
| 222 _ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench, | |
| 223 bench_dic, layout_dic) | |
| 224 else: | |
| 225 _ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line, | |
| 226 current_bench, bench_dic, layout_dic) | |
| 227 | |
| 228 # append benches to list | |
| 229 for bench in bench_dic: | |
| 230 for config in bench_dic[bench]: | |
| 231 for time_type in bench_dic[bench][config]: | |
| 232 tile_layout = '' | |
| 233 per_tile_values = [] # empty for non-per-tile configs | |
| 234 per_iter_time = [] # empty for per-tile configs | |
| 235 bench_summary = None # a single final bench value | |
| 236 if len(bench_dic[bench][config][time_type]) > 1: | |
| 237 # per-tile config; compute representation for each tile | |
| 238 per_tile_values = [ | |
| 239 _ListAlgorithm(iters, representation).compute() | |
| 240 for iters in bench_dic[bench][config][time_type]] | |
| 241 # use sum of each tile representation for total bench value | |
| 242 bench_summary = sum(per_tile_values) | |
| 243 # extract tile layout | |
| 244 tile_layout = layout_dic[bench][config][time_type] | |
| 245 else: | |
| 246 # get the list of per-iteration values | |
| 247 per_iter_time = bench_dic[bench][config][time_type][0] | |
| 248 bench_summary = _ListAlgorithm( | |
| 249 per_iter_time, representation).compute() | |
| 250 benches.append(BenchDataPoint( | |
| 251 bench, | |
| 252 config, | |
| 253 time_type, | |
| 254 bench_summary, | |
| 255 settings, | |
| 256 tile_layout, | |
| 257 per_tile_values, | |
| 258 per_iter_time)) | |
| 259 | |
| 260 return benches | |
| 261 | |
| 262 class LinearRegression: | |
| 263 """Linear regression data based on a set of data points. | |
| 264 | |
| 265 ([(Number,Number)]) | |
| 266 There must be at least two points for this to make sense.""" | |
| 267 def __init__(self, points): | |
| 268 n = len(points) | |
| 269 max_x = Min | |
| 270 min_x = Max | |
| 271 | |
| 272 Sx = 0.0 | |
| 273 Sy = 0.0 | |
| 274 Sxx = 0.0 | |
| 275 Sxy = 0.0 | |
| 276 Syy = 0.0 | |
| 277 for point in points: | |
| 278 x = point[0] | |
| 279 y = point[1] | |
| 280 max_x = max(max_x, x) | |
| 281 min_x = min(min_x, x) | |
| 282 | |
| 283 Sx += x | |
| 284 Sy += y | |
| 285 Sxx += x*x | |
| 286 Sxy += x*y | |
| 287 Syy += y*y | |
| 288 | |
| 289 denom = n*Sxx - Sx*Sx | |
| 290 if (denom != 0.0): | |
| 291 B = (n*Sxy - Sx*Sy) / denom | |
| 292 else: | |
| 293 B = 0.0 | |
| 294 a = (1.0/n)*(Sy - B*Sx) | |
| 295 | |
| 296 se2 = 0 | |
| 297 sB2 = 0 | |
| 298 sa2 = 0 | |
| 299 if (n >= 3 and denom != 0.0): | |
| 300 se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom)) | |
| 301 sB2 = (n*se2) / denom | |
| 302 sa2 = sB2 * (1.0/n) * Sxx | |
| 303 | |
| 304 | |
| 305 self.slope = B | |
| 306 self.intercept = a | |
| 307 self.serror = math.sqrt(max(0, se2)) | |
| 308 self.serror_slope = math.sqrt(max(0, sB2)) | |
| 309 self.serror_intercept = math.sqrt(max(0, sa2)) | |
| 310 self.max_x = max_x | |
| 311 self.min_x = min_x | |
| 312 | |
| 313 def __repr__(self): | |
| 314 return "LinearRegression(%s, %s, %s, %s, %s)" % ( | |
| 315 str(self.slope), | |
| 316 str(self.intercept), | |
| 317 str(self.serror), | |
| 318 str(self.serror_slope), | |
| 319 str(self.serror_intercept), | |
| 320 ) | |
| 321 | |
| 322 def find_min_slope(self): | |
| 323 """Finds the minimal slope given one standard deviation.""" | |
| 324 slope = self.slope | |
| 325 intercept = self.intercept | |
| 326 error = self.serror | |
| 327 regr_start = self.min_x | |
| 328 regr_end = self.max_x | |
| 329 regr_width = regr_end - regr_start | |
| 330 | |
| 331 if slope < 0: | |
| 332 lower_left_y = slope*regr_start + intercept - error | |
| 333 upper_right_y = slope*regr_end + intercept + error | |
| 334 return min(0, (upper_right_y - lower_left_y) / regr_width) | |
| 335 | |
| 336 elif slope > 0: | |
| 337 upper_left_y = slope*regr_start + intercept + error | |
| 338 lower_right_y = slope*regr_end + intercept - error | |
| 339 return max(0, (lower_right_y - upper_left_y) / regr_width) | |
| 340 | |
| 341 return 0 | |
| 342 | |
| 343 def CreateRevisionLink(revision_number): | |
| 344 """Returns HTML displaying the given revision number and linking to | |
| 345 that revision's change page at code.google.com, e.g. | |
| 346 http://code.google.com/p/skia/source/detail?r=2056 | |
| 347 """ | |
| 348 return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%( | |
| 349 revision_number, revision_number) | |
| 350 | |
| 351 def main(): | |
| 352 foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]] | |
| 353 LinearRegression(foo) | |
| 354 | |
| 355 if __name__ == "__main__": | |
| 356 main() | |
| OLD | NEW |