OLD | NEW |
| (Empty) |
1 ''' | |
2 Created on May 19, 2011 | |
3 | |
4 @author: bungeman | |
5 ''' | |
6 | |
7 import os | |
8 import re | |
9 import math | |
10 | |
11 # bench representation algorithm constant names | |
12 ALGORITHM_AVERAGE = 'avg' | |
13 ALGORITHM_MEDIAN = 'med' | |
14 ALGORITHM_MINIMUM = 'min' | |
15 ALGORITHM_25TH_PERCENTILE = '25th' | |
16 | |
17 # Regular expressions used throughout. | |
18 PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?' | |
19 SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)' | |
20 BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)' | |
21 TIME_RE = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\s*\d+\.\d+)*)' | |
22 # non-per-tile benches have configs that don't end with ']' or '>' | |
23 CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)' | |
24 # per-tile bench lines are in the following format. Note that there are | |
25 # non-averaged bench numbers in separate lines, which we ignore now due to | |
26 # their inaccuracy. | |
27 TILE_RE = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:' | |
28 ' ((?:' + TIME_RE + '\s+)+)') | |
29 # for extracting tile layout | |
30 TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: ' | |
31 | |
32 PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE) | |
33 SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE) | |
34 BENCH_RE_COMPILED = re.compile(BENCH_RE) | |
35 TIME_RE_COMPILED = re.compile(TIME_RE) | |
36 CONFIG_RE_COMPILED = re.compile(CONFIG_RE) | |
37 TILE_RE_COMPILED = re.compile(TILE_RE) | |
38 TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE) | |
39 | |
40 class BenchDataPoint: | |
41 """A single data point produced by bench. | |
42 """ | |
43 def __init__(self, bench, config, time_type, time, settings, | |
44 tile_layout='', per_tile_values=[], per_iter_time=[]): | |
45 # string name of the benchmark to measure | |
46 self.bench = bench | |
47 # string name of the configurations to run | |
48 self.config = config | |
49 # type of the timer in string: '' (walltime), 'c' (cpu) or 'g' (gpu) | |
50 self.time_type = time_type | |
51 # float number of the bench time value | |
52 self.time = time | |
53 # dictionary of the run settings | |
54 self.settings = settings | |
55 # how tiles cover the whole picture: '5x3' means 5 columns and 3 rows | |
56 self.tile_layout = tile_layout | |
57 # list of float for per_tile bench values, if applicable | |
58 self.per_tile_values = per_tile_values | |
59 # list of float for per-iteration bench time, if applicable | |
60 self.per_iter_time = per_iter_time | |
61 | |
62 def __repr__(self): | |
63 return "BenchDataPoint(%s, %s, %s, %s, %s)" % ( | |
64 str(self.bench), | |
65 str(self.config), | |
66 str(self.time_type), | |
67 str(self.time), | |
68 str(self.settings), | |
69 ) | |
70 | |
71 class _ExtremeType(object): | |
72 """Instances of this class compare greater or less than other objects.""" | |
73 def __init__(self, cmpr, rep): | |
74 object.__init__(self) | |
75 self._cmpr = cmpr | |
76 self._rep = rep | |
77 | |
78 def __cmp__(self, other): | |
79 if isinstance(other, self.__class__) and other._cmpr == self._cmpr: | |
80 return 0 | |
81 return self._cmpr | |
82 | |
83 def __repr__(self): | |
84 return self._rep | |
85 | |
86 Max = _ExtremeType(1, "Max") | |
87 Min = _ExtremeType(-1, "Min") | |
88 | |
89 class _ListAlgorithm(object): | |
90 """Algorithm for selecting the representation value from a given list. | |
91 representation is one of the ALGORITHM_XXX representation types.""" | |
92 def __init__(self, data, representation=None): | |
93 if not representation: | |
94 representation = ALGORITHM_AVERAGE # default algorithm | |
95 self._data = data | |
96 self._len = len(data) | |
97 if representation == ALGORITHM_AVERAGE: | |
98 self._rep = sum(self._data) / self._len | |
99 else: | |
100 self._data.sort() | |
101 if representation == ALGORITHM_MINIMUM: | |
102 self._rep = self._data[0] | |
103 else: | |
104 # for percentiles, we use the value below which x% of values are | |
105 # found, which allows for better detection of quantum behaviors. | |
106 if representation == ALGORITHM_MEDIAN: | |
107 x = int(round(0.5 * self._len + 0.5)) | |
108 elif representation == ALGORITHM_25TH_PERCENTILE: | |
109 x = int(round(0.25 * self._len + 0.5)) | |
110 else: | |
111 raise Exception("invalid representation algorithm %s!" % | |
112 representation) | |
113 self._rep = self._data[x - 1] | |
114 | |
115 def compute(self): | |
116 return self._rep | |
117 | |
118 def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench, | |
119 value_dic, layout_dic): | |
120 """Parses given bench time line with regex and adds data to value_dic. | |
121 | |
122 config_re_compiled: precompiled regular expression for parsing the config | |
123 line. | |
124 is_per_tile: boolean indicating whether this is a per-tile bench. | |
125 If so, we add tile layout into layout_dic as well. | |
126 line: input string line to parse. | |
127 bench: name of bench for the time values. | |
128 value_dic: dictionary to store bench values. See bench_dic in parse() below. | |
129 layout_dic: dictionary to store tile layouts. See parse() for descriptions. | |
130 """ | |
131 | |
132 for config in config_re_compiled.finditer(line): | |
133 current_config = config.group(1) | |
134 tile_layout = '' | |
135 if is_per_tile: # per-tile bench, add name prefix | |
136 current_config = 'tile_' + current_config | |
137 layouts = TILE_LAYOUT_RE_COMPILED.search(line) | |
138 if layouts and len(layouts.groups()) == 2: | |
139 tile_layout = '%sx%s' % layouts.groups() | |
140 times = config.group(2) | |
141 for new_time in TIME_RE_COMPILED.finditer(times): | |
142 current_time_type = new_time.group(1) | |
143 iters = [float(i) for i in | |
144 new_time.group(2).strip().split(',')] | |
145 value_dic.setdefault(bench, {}).setdefault( | |
146 current_config, {}).setdefault(current_time_type, []).append( | |
147 iters) | |
148 layout_dic.setdefault(bench, {}).setdefault( | |
149 current_config, {}).setdefault(current_time_type, tile_layout) | |
150 | |
151 def parse_skp_bench_data(directory, revision, rep, default_settings=None): | |
152 """Parses all the skp bench data in the given directory. | |
153 | |
154 Args: | |
155 directory: string of path to input data directory. | |
156 revision: git hash revision that matches the data to process. | |
157 rep: bench representation algorithm, see bench_util.py. | |
158 default_settings: dictionary of other run settings. See writer.option() in | |
159 bench/benchmain.cpp. | |
160 | |
161 Returns: | |
162 A list of BenchDataPoint objects. | |
163 """ | |
164 revision_data_points = [] | |
165 file_list = os.listdir(directory) | |
166 file_list.sort() | |
167 for bench_file in file_list: | |
168 scalar_type = None | |
169 # Scalar type, if any, is in the bench filename after 'scalar_'. | |
170 if (bench_file.startswith('bench_' + revision + '_data_')): | |
171 if bench_file.find('scalar_') > 0: | |
172 components = bench_file.split('_') | |
173 scalar_type = components[components.index('scalar') + 1] | |
174 else: # Skips non skp bench files. | |
175 continue | |
176 | |
177 with open('/'.join([directory, bench_file]), 'r') as file_handle: | |
178 settings = dict(default_settings or {}) | |
179 settings['scalar'] = scalar_type | |
180 revision_data_points.extend(parse(settings, file_handle, rep)) | |
181 | |
182 return revision_data_points | |
183 | |
184 # TODO(bensong): switch to reading JSON output when available. This way we don't | |
185 # need the RE complexities. | |
186 def parse(settings, lines, representation=None): | |
187 """Parses bench output into a useful data structure. | |
188 | |
189 ({str:str}, __iter__ -> str) -> [BenchDataPoint] | |
190 representation is one of the ALGORITHM_XXX types.""" | |
191 | |
192 benches = [] | |
193 current_bench = None | |
194 # [bench][config][time_type] -> [[per-iter values]] where per-tile config | |
195 # has per-iter value list for each tile [[<tile1_iter1>,<tile1_iter2>,...], | |
196 # [<tile2_iter1>,<tile2_iter2>,...],...], while non-per-tile config only | |
197 # contains one list of iterations [[iter1, iter2, ...]]. | |
198 bench_dic = {} | |
199 # [bench][config][time_type] -> tile_layout | |
200 layout_dic = {} | |
201 | |
202 for line in lines: | |
203 | |
204 # see if this line is a settings line | |
205 settingsMatch = SETTINGS_RE_COMPILED.search(line) | |
206 if (settingsMatch): | |
207 settings = dict(settings) | |
208 for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.g
roup(1)): | |
209 if (settingMatch.group(2)): | |
210 settings[settingMatch.group(1)] = settingMatch.group(2) | |
211 else: | |
212 settings[settingMatch.group(1)] = True | |
213 | |
214 # see if this line starts a new bench | |
215 new_bench = BENCH_RE_COMPILED.search(line) | |
216 if new_bench: | |
217 current_bench = new_bench.group(1) | |
218 | |
219 # add configs on this line to the bench_dic | |
220 if current_bench: | |
221 if line.startswith(' tile_') : | |
222 _ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench, | |
223 bench_dic, layout_dic) | |
224 else: | |
225 _ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line, | |
226 current_bench, bench_dic, layout_dic) | |
227 | |
228 # append benches to list | |
229 for bench in bench_dic: | |
230 for config in bench_dic[bench]: | |
231 for time_type in bench_dic[bench][config]: | |
232 tile_layout = '' | |
233 per_tile_values = [] # empty for non-per-tile configs | |
234 per_iter_time = [] # empty for per-tile configs | |
235 bench_summary = None # a single final bench value | |
236 if len(bench_dic[bench][config][time_type]) > 1: | |
237 # per-tile config; compute representation for each tile | |
238 per_tile_values = [ | |
239 _ListAlgorithm(iters, representation).compute() | |
240 for iters in bench_dic[bench][config][time_type]] | |
241 # use sum of each tile representation for total bench value | |
242 bench_summary = sum(per_tile_values) | |
243 # extract tile layout | |
244 tile_layout = layout_dic[bench][config][time_type] | |
245 else: | |
246 # get the list of per-iteration values | |
247 per_iter_time = bench_dic[bench][config][time_type][0] | |
248 bench_summary = _ListAlgorithm( | |
249 per_iter_time, representation).compute() | |
250 benches.append(BenchDataPoint( | |
251 bench, | |
252 config, | |
253 time_type, | |
254 bench_summary, | |
255 settings, | |
256 tile_layout, | |
257 per_tile_values, | |
258 per_iter_time)) | |
259 | |
260 return benches | |
261 | |
262 class LinearRegression: | |
263 """Linear regression data based on a set of data points. | |
264 | |
265 ([(Number,Number)]) | |
266 There must be at least two points for this to make sense.""" | |
267 def __init__(self, points): | |
268 n = len(points) | |
269 max_x = Min | |
270 min_x = Max | |
271 | |
272 Sx = 0.0 | |
273 Sy = 0.0 | |
274 Sxx = 0.0 | |
275 Sxy = 0.0 | |
276 Syy = 0.0 | |
277 for point in points: | |
278 x = point[0] | |
279 y = point[1] | |
280 max_x = max(max_x, x) | |
281 min_x = min(min_x, x) | |
282 | |
283 Sx += x | |
284 Sy += y | |
285 Sxx += x*x | |
286 Sxy += x*y | |
287 Syy += y*y | |
288 | |
289 denom = n*Sxx - Sx*Sx | |
290 if (denom != 0.0): | |
291 B = (n*Sxy - Sx*Sy) / denom | |
292 else: | |
293 B = 0.0 | |
294 a = (1.0/n)*(Sy - B*Sx) | |
295 | |
296 se2 = 0 | |
297 sB2 = 0 | |
298 sa2 = 0 | |
299 if (n >= 3 and denom != 0.0): | |
300 se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom)) | |
301 sB2 = (n*se2) / denom | |
302 sa2 = sB2 * (1.0/n) * Sxx | |
303 | |
304 | |
305 self.slope = B | |
306 self.intercept = a | |
307 self.serror = math.sqrt(max(0, se2)) | |
308 self.serror_slope = math.sqrt(max(0, sB2)) | |
309 self.serror_intercept = math.sqrt(max(0, sa2)) | |
310 self.max_x = max_x | |
311 self.min_x = min_x | |
312 | |
313 def __repr__(self): | |
314 return "LinearRegression(%s, %s, %s, %s, %s)" % ( | |
315 str(self.slope), | |
316 str(self.intercept), | |
317 str(self.serror), | |
318 str(self.serror_slope), | |
319 str(self.serror_intercept), | |
320 ) | |
321 | |
322 def find_min_slope(self): | |
323 """Finds the minimal slope given one standard deviation.""" | |
324 slope = self.slope | |
325 intercept = self.intercept | |
326 error = self.serror | |
327 regr_start = self.min_x | |
328 regr_end = self.max_x | |
329 regr_width = regr_end - regr_start | |
330 | |
331 if slope < 0: | |
332 lower_left_y = slope*regr_start + intercept - error | |
333 upper_right_y = slope*regr_end + intercept + error | |
334 return min(0, (upper_right_y - lower_left_y) / regr_width) | |
335 | |
336 elif slope > 0: | |
337 upper_left_y = slope*regr_start + intercept + error | |
338 lower_right_y = slope*regr_end + intercept - error | |
339 return max(0, (lower_right_y - upper_left_y) / regr_width) | |
340 | |
341 return 0 | |
342 | |
343 def CreateRevisionLink(revision_number): | |
344 """Returns HTML displaying the given revision number and linking to | |
345 that revision's change page at code.google.com, e.g. | |
346 http://code.google.com/p/skia/source/detail?r=2056 | |
347 """ | |
348 return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%( | |
349 revision_number, revision_number) | |
350 | |
351 def main(): | |
352 foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]] | |
353 LinearRegression(foo) | |
354 | |
355 if __name__ == "__main__": | |
356 main() | |
OLD | NEW |