OLD | NEW |
(Empty) | |
| 1 import argparse |
| 2 import collections |
| 3 import cherrypy |
| 4 import json |
| 5 import os |
| 6 import time |
| 7 |
| 8 genDelta = [ |
| 9 0, |
| 10 10, |
| 11 100, |
| 12 1000, |
| 13 ] |
| 14 |
| 15 genCount = 4 |
| 16 maxGenLength = 5 |
| 17 |
| 18 def cleanGeneration(gen, delta): |
| 19 for i in range(len(gen) - 1): |
| 20 if gen[i + 1]['time'] - gen[i]['time'] < delta: |
| 21 e = gen.pop(i + 1) |
| 22 os.remove(e['path']) |
| 23 return None |
| 24 return gen.pop(0) |
| 25 |
| 26 |
| 27 def cleanUpArchives(archives): |
| 28 for i, gen in enumerate(archives): |
| 29 if len(gen) > maxGenLength: |
| 30 e = cleanGeneration(gen, genDelta[i]) |
| 31 if e: |
| 32 if i + 1 < len(archives): |
| 33 archives[i + 1].append(e) |
| 34 |
| 35 def printArchives(archives): |
| 36 print '******' |
| 37 for i in archives: |
| 38 for e in i: |
| 39 print e['time'] |
| 40 print ' -- ' |
| 41 |
| 42 archiveIdx = 0 |
| 43 def saveData(service): |
| 44 global archiveIdx |
| 45 archives = service.getArchives() |
| 46 lastGeneration = archives[0] |
| 47 lastSavedUpdate = -1 |
| 48 if len(lastGeneration) > 0: |
| 49 lastSavedUpdate = lastGeneration[-1]['lastId'] |
| 50 lastUpdate = service.getLastUpdateId() |
| 51 hasChanges = lastSavedUpdate < lastUpdate |
| 52 |
| 53 if not hasChanges: |
| 54 return |
| 55 |
| 56 timeFormat = '%Y-%m-%e-%H:%M:%S' |
| 57 currentTime = time.time() |
| 58 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim
e(timeFormat)) |
| 59 with open(path, 'w') as outfile: |
| 60 json.dump(service.getData(), outfile) |
| 61 lastGeneration.append({ |
| 62 'lastId': lastUpdate, |
| 63 'path': path, |
| 64 'time': archiveIdx, |
| 65 'realtime': currentTime, |
| 66 }) |
| 67 archiveIdx += 1 |
| 68 |
| 69 cleanUpArchives(service.getArchives()) |
| 70 |
| 71 class Service(object): |
| 72 exposed = True |
| 73 |
| 74 def __init__(self, data_dir): |
| 75 self.data = None |
| 76 self.updates = collections.deque(maxlen=500) |
| 77 self.nextUpdateId = 0 |
| 78 self.idxMap = None |
| 79 self.archive_path = os.path.join(data_dir, 'archive') |
| 80 if not os.path.exists(self.archive_path): |
| 81 os.makedirs(self.archive_path) |
| 82 with open(os.path.join(data_dir, 'index')) as inf: |
| 83 self.data = json.load(inf) |
| 84 self.initIdxMap() |
| 85 |
| 86 self.archives = [] |
| 87 for i in range(genCount): |
| 88 self.archives.append([]) |
| 89 |
| 90 self.saver = cherrypy.process.plugins.BackgroundTask(2.0 * 60, saveData, [se
lf]) |
| 91 self.saver.start() |
| 92 |
| 93 def initIdxMap(self): |
| 94 self.idxMap = dict() |
| 95 for i, entry in enumerate(self.data): |
| 96 self.idxMap[entry['url']] = i |
| 97 |
| 98 def getArchivePath(self): |
| 99 return self.archive_path |
| 100 |
| 101 def getArchives(self): |
| 102 return self.archives |
| 103 |
| 104 def getData(self): |
| 105 return self.data |
| 106 |
| 107 def getDataResponse(self): |
| 108 return { |
| 109 'data': self.data, |
| 110 'nextId': self.nextUpdateId, |
| 111 } |
| 112 |
| 113 def appendUpdate(self, entry, idx): |
| 114 self.updates.append({'index': idx, 'id': self.nextUpdateId, 'entry': entry}) |
| 115 self.nextUpdateId += 1 |
| 116 |
| 117 def update(self, entry): |
| 118 key = entry['url'] |
| 119 idx = self.idxMap[key] |
| 120 self.data[idx] = entry |
| 121 self.appendUpdate(entry, idx) |
| 122 return 'sdf' |
| 123 |
| 124 @cherrypy.expose |
| 125 def getupdates(self, nextId): |
| 126 data = None |
| 127 updates = None |
| 128 nextId = int(nextId) |
| 129 newNextId = nextId |
| 130 |
| 131 if len(self.updates) > 0: |
| 132 lastId = self.updates[-1]['id'] |
| 133 firstId = self.updates[0]['id'] |
| 134 if firstId > nextId: |
| 135 data = self.data |
| 136 elif lastId >= nextId: |
| 137 updates = list(self.updates)[nextId - firstId:] |
| 138 newNextId = lastId + 1 |
| 139 return json.dumps({'response': { |
| 140 'data': data, |
| 141 'updates': updates, |
| 142 'nextId': newNextId, |
| 143 }}) |
| 144 |
| 145 |
| 146 @cherrypy.expose |
| 147 def message(self): |
| 148 cl = cherrypy.request.headers['Content-Length'] |
| 149 rawbody = cherrypy.request.body.read(int(cl)) |
| 150 request = json.loads(rawbody) |
| 151 action = request['action'] |
| 152 response = None |
| 153 if action == 'getData': |
| 154 response = self.getDataResponse() |
| 155 if action == 'update': |
| 156 response = self.update(request['data']) |
| 157 return json.dumps({'response': response}) |
| 158 |
| 159 def getLastUpdateId(self): |
| 160 return self.nextUpdateId - 1 |
| 161 |
| 162 |
| 163 |
| 164 if __name__ == '__main__': |
| 165 parser = argparse.ArgumentParser() |
| 166 parser.add_argument('--data-dir') |
| 167 options = parser.parse_args() |
| 168 service = Service(options.data_dir) |
| 169 conf = { |
| 170 'global': { |
| 171 'server.socket_host': '0.0.0.0', |
| 172 'server.socket_port': 8081, |
| 173 }, |
| 174 '/': { |
| 175 'tools.response_headers.on': True, |
| 176 'tools.response_headers.headers': [('Content-Type', 'text/plain')], |
| 177 }, |
| 178 '/foo': { |
| 179 'tools.staticdir.on': True, |
| 180 'tools.staticdir.dir': '/usr/local/google/code/dom_distiller/foo', |
| 181 }, |
| 182 '/images': { |
| 183 'tools.staticdir.on': True, |
| 184 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir), |
| 185 } |
| 186 } |
| 187 cherrypy.quickstart(service, '/', conf) |
OLD | NEW |