OLD | NEW |
| (Empty) |
1 import subprocess as sub | |
2 import re | |
3 import sys | |
4 import math, functools | |
5 | |
6 class SiteTimes(object): | |
7 def __init__(self): | |
8 self.site='UNDEFINED' | |
9 self.times=[] | |
10 | |
11 # helper function found online here: | |
12 # http://code.activestate.com/recipes/511478-finding-the-percentile- | |
13 # of-the-values/ | |
14 def percentile(N, percent, key=lambda x:x): | |
15 """ | |
16 Find the percentile of a list of values. | |
17 | |
18 @parameter N - is a list of values. Note N MUST BE already sorted. | |
19 @parameter percent - a float value from 0.0 to 1.0. | |
20 @parameter key - optional key function to compute value from each | |
21 element of N. | |
22 | |
23 @return - the percentile of the values | |
24 """ | |
25 if not N: | |
26 return None | |
27 k = (len(N)-1) * percent | |
28 f = math.floor(k) | |
29 c = math.ceil(k) | |
30 if f == c: | |
31 return key(N[int(k)]) | |
32 d0 = key(N[int(f)]) * (k-f) | |
33 d1 = key(N[int(c)]) * (c-k) | |
34 return d0+d1 | |
35 | |
36 def mean(numbers): | |
37 assert(len(numbers) != 0), 'list should not be empty!' | |
38 return sum(numbers)/len(numbers) | |
39 | |
40 class PageCyclerResultsParser: | |
41 def parse_file(self, outfile = 'out.txt'): | |
42 # output is the output of the page_cycler tests. | |
43 output = open(outfile).read() | |
44 return self.parse_results(output) | |
45 | |
46 def parse_results(self, output = ''): | |
47 # median is 50th percentile. | |
48 median = functools.partial(percentile, percent=0.5) | |
49 | |
50 assert(output != ''), 'Output cannot be empty!' | |
51 | |
52 # split it up into lines | |
53 lines = output.split('\n') | |
54 | |
55 # figure out where the results are... | |
56 found = False | |
57 # This is our anchor in the text | |
58 token = '*RESULT times:' | |
59 for index, line in enumerate(lines): | |
60 if(line.startswith(token)): | |
61 found = True | |
62 break | |
63 | |
64 assert(found==True), token+' not found!?' | |
65 timesline = lines[index] | |
66 sitesline = lines[index-1] | |
67 | |
68 # we have a line called times and a line called sites | |
69 m = re.search('\[(.*?)\]', sitesline) | |
70 sites = m.group(1).split(',') | |
71 | |
72 m = re.search('\[(.*?)\]', timesline) | |
73 times = m.group(1).split(',') | |
74 | |
75 assert(len(times) % len(sites) == 0), 'Times not divisible by sites!' | |
76 | |
77 iterations = len(times)/len(sites) | |
78 | |
79 # now we have a list called sites and a list called times | |
80 # let's do some statistics on it. | |
81 stList = [] | |
82 | |
83 # go over all the sites and populate the stlist data structure | |
84 for ii, site in enumerate(sites): | |
85 st = SiteTimes() | |
86 st.site = site | |
87 for jj in range(0, iterations): | |
88 mytime = float(times[jj*len(sites)+ii]) | |
89 st.times.append(mytime) | |
90 stList.append(st) | |
91 | |
92 # For debugging use something like this: | |
93 ###for ii, st in enumerate(stList): | |
94 ### print st.site | |
95 ### print st.times | |
96 | |
97 # now remove the lowest element and print out mean of medians | |
98 medianList = [] | |
99 | |
100 totalTime = 0 | |
101 for ii, st in enumerate(stList): | |
102 sortedTimes=sorted(st.times) | |
103 # drop highest time in the sortedTimes | |
104 sortedTimes.pop() | |
105 # TODO: Perhaps this should be a weighted mean? | |
106 totalTime += mean(sortedTimes) | |
107 | |
108 return totalTime/len(stList) | |
109 | |
110 # This is how to use this class | |
111 ###pcrp=PageCyclerResultsParser() | |
112 ###print pcrp.parse_file('out.txt') | |
OLD | NEW |