| OLD | NEW |
| 1 #!/usr/bin/env python |
| 2 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 1 import argparse | 6 import argparse |
| 2 import collections | 7 import collections |
| 3 import cherrypy | 8 import cherrypy |
| 4 import json | 9 import json |
| 5 import os | 10 import os |
| 11 import sys |
| 6 import time | 12 import time |
| 7 | 13 |
| 8 genDelta = [ | 14 genDelta = [ |
| 9 0, | 15 0, |
| 10 10, | 16 10, |
| 11 100, | 17 100, |
| 12 1000, | 18 1000, |
| 13 ] | 19 ] |
| 14 | 20 |
| 15 genCount = 4 | 21 genCount = 4 |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 50 lastUpdate = service.getLastUpdateId() | 56 lastUpdate = service.getLastUpdateId() |
| 51 hasChanges = lastSavedUpdate < lastUpdate | 57 hasChanges = lastSavedUpdate < lastUpdate |
| 52 | 58 |
| 53 if not hasChanges: | 59 if not hasChanges: |
| 54 return | 60 return |
| 55 | 61 |
| 56 timeFormat = '%Y-%m-%e-%H:%M:%S' | 62 timeFormat = '%Y-%m-%e-%H:%M:%S' |
| 57 currentTime = time.time() | 63 currentTime = time.time() |
| 58 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim
e(timeFormat)) | 64 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim
e(timeFormat)) |
| 59 with open(path, 'w') as outfile: | 65 with open(path, 'w') as outfile: |
| 60 json.dump(service.getData(), outfile) | 66 json.dump(service.getData(), outfile, indent=2) |
| 67 print 'saved to %s' % (path) |
| 61 lastGeneration.append({ | 68 lastGeneration.append({ |
| 62 'lastId': lastUpdate, | 69 'lastId': lastUpdate, |
| 63 'path': path, | 70 'path': path, |
| 64 'time': archiveIdx, | 71 'time': archiveIdx, |
| 65 'realtime': currentTime, | 72 'realtime': currentTime, |
| 66 }) | 73 }) |
| 67 archiveIdx += 1 | 74 archiveIdx += 1 |
| 68 | 75 |
| 69 cleanUpArchives(service.getArchives()) | 76 cleanUpArchives(service.getArchives()) |
| 70 | 77 |
| 71 class Service(object): | 78 class Service(object): |
| 72 exposed = True | 79 exposed = True |
| 73 | 80 |
| 74 def __init__(self, data_dir): | 81 def __init__(self, data_dir): |
| 75 self.data = None | 82 self.data = None |
| 76 self.updates = collections.deque(maxlen=500) | 83 self.updates = collections.deque(maxlen=500) |
| 77 self.nextUpdateId = 0 | 84 self.nextUpdateId = 0 |
| 78 self.idxMap = None | 85 self.idxMap = None |
| 79 self.archive_path = os.path.join(data_dir, 'archive') | 86 self.archive_path = os.path.join(data_dir, 'archive') |
| 80 if not os.path.exists(self.archive_path): | 87 if not os.path.exists(self.archive_path): |
| 81 os.makedirs(self.archive_path) | 88 os.makedirs(self.archive_path) |
| 82 with open(os.path.join(data_dir, 'index')) as inf: | 89 with open(os.path.join(data_dir, 'index')) as inf: |
| 83 self.data = json.load(inf) | 90 self.data = json.load(inf) |
| 84 self.initIdxMap() | 91 self.initIdxMap() |
| 85 | 92 |
| 93 archives = [os.path.join(self.archive_path, f) for f in os.listdir(self.arch
ive_path)] |
| 94 archives = [f for f in archives if os.path.isfile(f)] |
| 95 for archive in archives: |
| 96 with open(archive) as inf: |
| 97 last = json.load(inf) |
| 98 for i in last: |
| 99 url = i['url'] |
| 100 if url in self.idxMap and 'good' in i: |
| 101 assert self.data[self.idxMap[url]]['index'] == i['index'] |
| 102 self.data[self.idxMap[url]]['good'] = i['good'] |
| 103 print "%d good = %s" % (self.idxMap[url], i['good']) |
| 104 |
| 86 self.archives = [] | 105 self.archives = [] |
| 87 for i in range(genCount): | 106 for i in range(genCount): |
| 88 self.archives.append([]) | 107 self.archives.append([]) |
| 89 | 108 |
| 90 self.saver = cherrypy.process.plugins.BackgroundTask(2.0 * 60, saveData, [se
lf]) | 109 self.saver = cherrypy.process.plugins.BackgroundTask(1.0 * 60, saveData, [se
lf]) |
| 91 self.saver.start() | 110 self.saver.start() |
| 92 | 111 |
| 93 def initIdxMap(self): | 112 def initIdxMap(self): |
| 94 self.idxMap = dict() | 113 self.idxMap = dict() |
| 95 for i, entry in enumerate(self.data): | 114 for i, entry in enumerate(self.data): |
| 96 self.idxMap[entry['url']] = i | 115 self.idxMap[entry['url']] = i |
| 97 | 116 |
| 98 def getArchivePath(self): | 117 def getArchivePath(self): |
| 99 return self.archive_path | 118 return self.archive_path |
| 100 | 119 |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 167 options = parser.parse_args() | 186 options = parser.parse_args() |
| 168 service = Service(options.data_dir) | 187 service = Service(options.data_dir) |
| 169 conf = { | 188 conf = { |
| 170 'global': { | 189 'global': { |
| 171 'server.socket_host': '0.0.0.0', | 190 'server.socket_host': '0.0.0.0', |
| 172 'server.socket_port': 8081, | 191 'server.socket_port': 8081, |
| 173 }, | 192 }, |
| 174 '/': { | 193 '/': { |
| 175 'tools.response_headers.on': True, | 194 'tools.response_headers.on': True, |
| 176 'tools.response_headers.headers': [('Content-Type', 'text/plain')], | 195 'tools.response_headers.headers': [('Content-Type', 'text/plain')], |
| 177 }, | |
| 178 '/foo': { | |
| 179 'tools.staticdir.on': True, | 196 'tools.staticdir.on': True, |
| 180 'tools.staticdir.dir': '/usr/local/google/code/dom_distiller/foo', | 197 'tools.staticdir.dir': os.getcwd(), |
| 198 'tools.staticdir.index': 'index.html', |
| 181 }, | 199 }, |
| 182 '/images': { | 200 '/images': { |
| 183 'tools.staticdir.on': True, | 201 'tools.staticdir.on': True, |
| 184 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir), | 202 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir), |
| 203 'tools.expires.on': True, |
| 204 'tools.expires.secs': 60, |
| 185 } | 205 } |
| 186 } | 206 } |
| 187 cherrypy.quickstart(service, '/', conf) | 207 cherrypy.quickstart(service, '', conf) |
| OLD | NEW |