OLD | NEW |
| (Empty) |
1 import argparse | |
2 import collections | |
3 import cherrypy | |
4 import json | |
5 import os | |
6 import time | |
7 | |
8 genDelta = [ | |
9 0, | |
10 10, | |
11 100, | |
12 1000, | |
13 ] | |
14 | |
15 genCount = 4 | |
16 maxGenLength = 5 | |
17 | |
18 def cleanGeneration(gen, delta): | |
19 for i in range(len(gen) - 1): | |
20 if gen[i + 1]['time'] - gen[i]['time'] < delta: | |
21 e = gen.pop(i + 1) | |
22 os.remove(e['path']) | |
23 return None | |
24 return gen.pop(0) | |
25 | |
26 | |
27 def cleanUpArchives(archives): | |
28 for i, gen in enumerate(archives): | |
29 if len(gen) > maxGenLength: | |
30 e = cleanGeneration(gen, genDelta[i]) | |
31 if e: | |
32 if i + 1 < len(archives): | |
33 archives[i + 1].append(e) | |
34 | |
35 def printArchives(archives): | |
36 print '******' | |
37 for i in archives: | |
38 for e in i: | |
39 print e['time'] | |
40 print ' -- ' | |
41 | |
42 archiveIdx = 0 | |
43 def saveData(service): | |
44 global archiveIdx | |
45 archives = service.getArchives() | |
46 lastGeneration = archives[0] | |
47 lastSavedUpdate = -1 | |
48 if len(lastGeneration) > 0: | |
49 lastSavedUpdate = lastGeneration[-1]['lastId'] | |
50 lastUpdate = service.getLastUpdateId() | |
51 hasChanges = lastSavedUpdate < lastUpdate | |
52 | |
53 if not hasChanges: | |
54 return | |
55 | |
56 timeFormat = '%Y-%m-%e-%H:%M:%S' | |
57 currentTime = time.time() | |
58 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim
e(timeFormat)) | |
59 with open(path, 'w') as outfile: | |
60 json.dump(service.getData(), outfile) | |
61 lastGeneration.append({ | |
62 'lastId': lastUpdate, | |
63 'path': path, | |
64 'time': archiveIdx, | |
65 'realtime': currentTime, | |
66 }) | |
67 archiveIdx += 1 | |
68 | |
69 cleanUpArchives(service.getArchives()) | |
70 | |
71 class Service(object): | |
72 exposed = True | |
73 | |
74 def __init__(self, data_dir): | |
75 self.data = None | |
76 self.updates = collections.deque(maxlen=500) | |
77 self.nextUpdateId = 0 | |
78 self.idxMap = None | |
79 self.archive_path = os.path.join(data_dir, 'archive') | |
80 if not os.path.exists(self.archive_path): | |
81 os.makedirs(self.archive_path) | |
82 with open(os.path.join(data_dir, 'index')) as inf: | |
83 self.data = json.load(inf) | |
84 self.initIdxMap() | |
85 | |
86 self.archives = [] | |
87 for i in range(genCount): | |
88 self.archives.append([]) | |
89 | |
90 self.saver = cherrypy.process.plugins.BackgroundTask(2.0 * 60, saveData, [se
lf]) | |
91 self.saver.start() | |
92 | |
93 def initIdxMap(self): | |
94 self.idxMap = dict() | |
95 for i, entry in enumerate(self.data): | |
96 self.idxMap[entry['url']] = i | |
97 | |
98 def getArchivePath(self): | |
99 return self.archive_path | |
100 | |
101 def getArchives(self): | |
102 return self.archives | |
103 | |
104 def getData(self): | |
105 return self.data | |
106 | |
107 def getDataResponse(self): | |
108 return { | |
109 'data': self.data, | |
110 'nextId': self.nextUpdateId, | |
111 } | |
112 | |
113 def appendUpdate(self, entry, idx): | |
114 self.updates.append({'index': idx, 'id': self.nextUpdateId, 'entry': entry}) | |
115 self.nextUpdateId += 1 | |
116 | |
117 def update(self, entry): | |
118 key = entry['url'] | |
119 idx = self.idxMap[key] | |
120 self.data[idx] = entry | |
121 self.appendUpdate(entry, idx) | |
122 return 'sdf' | |
123 | |
124 @cherrypy.expose | |
125 def getupdates(self, nextId): | |
126 data = None | |
127 updates = None | |
128 nextId = int(nextId) | |
129 newNextId = nextId | |
130 | |
131 if len(self.updates) > 0: | |
132 lastId = self.updates[-1]['id'] | |
133 firstId = self.updates[0]['id'] | |
134 if firstId > nextId: | |
135 data = self.data | |
136 elif lastId >= nextId: | |
137 updates = list(self.updates)[nextId - firstId:] | |
138 newNextId = lastId + 1 | |
139 return json.dumps({'response': { | |
140 'data': data, | |
141 'updates': updates, | |
142 'nextId': newNextId, | |
143 }}) | |
144 | |
145 | |
146 @cherrypy.expose | |
147 def message(self): | |
148 cl = cherrypy.request.headers['Content-Length'] | |
149 rawbody = cherrypy.request.body.read(int(cl)) | |
150 request = json.loads(rawbody) | |
151 action = request['action'] | |
152 response = None | |
153 if action == 'getData': | |
154 response = self.getDataResponse() | |
155 if action == 'update': | |
156 response = self.update(request['data']) | |
157 return json.dumps({'response': response}) | |
158 | |
159 def getLastUpdateId(self): | |
160 return self.nextUpdateId - 1 | |
161 | |
162 | |
163 | |
164 if __name__ == '__main__': | |
165 parser = argparse.ArgumentParser() | |
166 parser.add_argument('--data-dir') | |
167 options = parser.parse_args() | |
168 service = Service(options.data_dir) | |
169 conf = { | |
170 'global': { | |
171 'server.socket_host': '0.0.0.0', | |
172 'server.socket_port': 8081, | |
173 }, | |
174 '/': { | |
175 'tools.response_headers.on': True, | |
176 'tools.response_headers.headers': [('Content-Type', 'text/plain')], | |
177 }, | |
178 '/foo': { | |
179 'tools.staticdir.on': True, | |
180 'tools.staticdir.dir': '/usr/local/google/code/dom_distiller/foo', | |
181 }, | |
182 '/images': { | |
183 'tools.staticdir.on': True, | |
184 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir), | |
185 } | |
186 } | |
187 cherrypy.quickstart(service, '/', conf) | |
OLD | NEW |