OLD | NEW |
| 1 #!/usr/bin/env python |
| 2 # Copyright 2016 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
1 import argparse | 6 import argparse |
2 import collections | 7 import collections |
3 import cherrypy | 8 import cherrypy |
4 import json | 9 import json |
5 import os | 10 import os |
| 11 import sys |
6 import time | 12 import time |
7 | 13 |
8 genDelta = [ | 14 genDelta = [ |
9 0, | 15 0, |
10 10, | 16 10, |
11 100, | 17 100, |
12 1000, | 18 1000, |
13 ] | 19 ] |
14 | 20 |
15 genCount = 4 | 21 genCount = 4 |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
50 lastUpdate = service.getLastUpdateId() | 56 lastUpdate = service.getLastUpdateId() |
51 hasChanges = lastSavedUpdate < lastUpdate | 57 hasChanges = lastSavedUpdate < lastUpdate |
52 | 58 |
53 if not hasChanges: | 59 if not hasChanges: |
54 return | 60 return |
55 | 61 |
56 timeFormat = '%Y-%m-%e-%H:%M:%S' | 62 timeFormat = '%Y-%m-%e-%H:%M:%S' |
57 currentTime = time.time() | 63 currentTime = time.time() |
58 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim
e(timeFormat)) | 64 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim
e(timeFormat)) |
59 with open(path, 'w') as outfile: | 65 with open(path, 'w') as outfile: |
60 json.dump(service.getData(), outfile) | 66 json.dump(service.getData(), outfile, indent=2) |
| 67 print 'saved to %s' % (path) |
61 lastGeneration.append({ | 68 lastGeneration.append({ |
62 'lastId': lastUpdate, | 69 'lastId': lastUpdate, |
63 'path': path, | 70 'path': path, |
64 'time': archiveIdx, | 71 'time': archiveIdx, |
65 'realtime': currentTime, | 72 'realtime': currentTime, |
66 }) | 73 }) |
67 archiveIdx += 1 | 74 archiveIdx += 1 |
68 | 75 |
69 cleanUpArchives(service.getArchives()) | 76 cleanUpArchives(service.getArchives()) |
70 | 77 |
71 class Service(object): | 78 class Service(object): |
72 exposed = True | 79 exposed = True |
73 | 80 |
74 def __init__(self, data_dir): | 81 def __init__(self, data_dir): |
75 self.data = None | 82 self.data = None |
76 self.updates = collections.deque(maxlen=500) | 83 self.updates = collections.deque(maxlen=500) |
77 self.nextUpdateId = 0 | 84 self.nextUpdateId = 0 |
78 self.idxMap = None | 85 self.idxMap = None |
79 self.archive_path = os.path.join(data_dir, 'archive') | 86 self.archive_path = os.path.join(data_dir, 'archive') |
80 if not os.path.exists(self.archive_path): | 87 if not os.path.exists(self.archive_path): |
81 os.makedirs(self.archive_path) | 88 os.makedirs(self.archive_path) |
82 with open(os.path.join(data_dir, 'index')) as inf: | 89 with open(os.path.join(data_dir, 'index')) as inf: |
83 self.data = json.load(inf) | 90 self.data = json.load(inf) |
84 self.initIdxMap() | 91 self.initIdxMap() |
85 | 92 |
| 93 archives = [os.path.join(self.archive_path, f) for f in os.listdir(self.arch
ive_path)] |
| 94 archives = [f for f in archives if os.path.isfile(f)] |
| 95 for archive in archives: |
| 96 with open(archive) as inf: |
| 97 last = json.load(inf) |
| 98 for i in last: |
| 99 url = i['url'] |
| 100 if url in self.idxMap and 'good' in i: |
| 101 assert self.data[self.idxMap[url]]['index'] == i['index'] |
| 102 self.data[self.idxMap[url]]['good'] = i['good'] |
| 103 print "%d good = %s" % (self.idxMap[url], i['good']) |
| 104 |
86 self.archives = [] | 105 self.archives = [] |
87 for i in range(genCount): | 106 for i in range(genCount): |
88 self.archives.append([]) | 107 self.archives.append([]) |
89 | 108 |
90 self.saver = cherrypy.process.plugins.BackgroundTask(2.0 * 60, saveData, [se
lf]) | 109 self.saver = cherrypy.process.plugins.BackgroundTask(1.0 * 60, saveData, [se
lf]) |
91 self.saver.start() | 110 self.saver.start() |
92 | 111 |
93 def initIdxMap(self): | 112 def initIdxMap(self): |
94 self.idxMap = dict() | 113 self.idxMap = dict() |
95 for i, entry in enumerate(self.data): | 114 for i, entry in enumerate(self.data): |
96 self.idxMap[entry['url']] = i | 115 self.idxMap[entry['url']] = i |
97 | 116 |
98 def getArchivePath(self): | 117 def getArchivePath(self): |
99 return self.archive_path | 118 return self.archive_path |
100 | 119 |
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
167 options = parser.parse_args() | 186 options = parser.parse_args() |
168 service = Service(options.data_dir) | 187 service = Service(options.data_dir) |
169 conf = { | 188 conf = { |
170 'global': { | 189 'global': { |
171 'server.socket_host': '0.0.0.0', | 190 'server.socket_host': '0.0.0.0', |
172 'server.socket_port': 8081, | 191 'server.socket_port': 8081, |
173 }, | 192 }, |
174 '/': { | 193 '/': { |
175 'tools.response_headers.on': True, | 194 'tools.response_headers.on': True, |
176 'tools.response_headers.headers': [('Content-Type', 'text/plain')], | 195 'tools.response_headers.headers': [('Content-Type', 'text/plain')], |
177 }, | |
178 '/foo': { | |
179 'tools.staticdir.on': True, | 196 'tools.staticdir.on': True, |
180 'tools.staticdir.dir': '/usr/local/google/code/dom_distiller/foo', | 197 'tools.staticdir.dir': os.getcwd(), |
| 198 'tools.staticdir.index': 'index.html', |
181 }, | 199 }, |
182 '/images': { | 200 '/images': { |
183 'tools.staticdir.on': True, | 201 'tools.staticdir.on': True, |
184 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir), | 202 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir), |
| 203 'tools.expires.on': True, |
| 204 'tools.expires.secs': 60, |
185 } | 205 } |
186 } | 206 } |
187 cherrypy.quickstart(service, '/', conf) | 207 cherrypy.quickstart(service, '', conf) |
OLD | NEW |