Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(21)

Side by Side Diff: heuristics/distillable/server.py

Issue 1620043002: Add scripts for distillability modelling (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: set upstream patchset, identical to patch set 2 Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « heuristics/distillable/index.js ('k') | heuristics/distillable/write_features_csv.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python
2 # Copyright 2016 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
1 import argparse 6 import argparse
2 import collections 7 import collections
3 import cherrypy 8 import cherrypy
4 import json 9 import json
5 import os 10 import os
11 import sys
6 import time 12 import time
7 13
8 genDelta = [ 14 genDelta = [
9 0, 15 0,
10 10, 16 10,
11 100, 17 100,
12 1000, 18 1000,
13 ] 19 ]
14 20
15 genCount = 4 21 genCount = 4
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
50 lastUpdate = service.getLastUpdateId() 56 lastUpdate = service.getLastUpdateId()
51 hasChanges = lastSavedUpdate < lastUpdate 57 hasChanges = lastSavedUpdate < lastUpdate
52 58
53 if not hasChanges: 59 if not hasChanges:
54 return 60 return
55 61
56 timeFormat = '%Y-%m-%e-%H:%M:%S' 62 timeFormat = '%Y-%m-%e-%H:%M:%S'
57 currentTime = time.time() 63 currentTime = time.time()
58 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim e(timeFormat)) 64 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim e(timeFormat))
59 with open(path, 'w') as outfile: 65 with open(path, 'w') as outfile:
60 json.dump(service.getData(), outfile) 66 json.dump(service.getData(), outfile, indent=2)
67 print 'saved to %s' % (path)
61 lastGeneration.append({ 68 lastGeneration.append({
62 'lastId': lastUpdate, 69 'lastId': lastUpdate,
63 'path': path, 70 'path': path,
64 'time': archiveIdx, 71 'time': archiveIdx,
65 'realtime': currentTime, 72 'realtime': currentTime,
66 }) 73 })
67 archiveIdx += 1 74 archiveIdx += 1
68 75
69 cleanUpArchives(service.getArchives()) 76 cleanUpArchives(service.getArchives())
70 77
71 class Service(object): 78 class Service(object):
72 exposed = True 79 exposed = True
73 80
74 def __init__(self, data_dir): 81 def __init__(self, data_dir):
75 self.data = None 82 self.data = None
76 self.updates = collections.deque(maxlen=500) 83 self.updates = collections.deque(maxlen=500)
77 self.nextUpdateId = 0 84 self.nextUpdateId = 0
78 self.idxMap = None 85 self.idxMap = None
79 self.archive_path = os.path.join(data_dir, 'archive') 86 self.archive_path = os.path.join(data_dir, 'archive')
80 if not os.path.exists(self.archive_path): 87 if not os.path.exists(self.archive_path):
81 os.makedirs(self.archive_path) 88 os.makedirs(self.archive_path)
82 with open(os.path.join(data_dir, 'index')) as inf: 89 with open(os.path.join(data_dir, 'index')) as inf:
83 self.data = json.load(inf) 90 self.data = json.load(inf)
84 self.initIdxMap() 91 self.initIdxMap()
85 92
93 archives = [os.path.join(self.archive_path, f) for f in os.listdir(self.arch ive_path)]
94 archives = [f for f in archives if os.path.isfile(f)]
95 for archive in archives:
96 with open(archive) as inf:
97 last = json.load(inf)
98 for i in last:
99 url = i['url']
100 if url in self.idxMap and 'good' in i:
101 assert self.data[self.idxMap[url]]['index'] == i['index']
102 self.data[self.idxMap[url]]['good'] = i['good']
103 print "%d good = %s" % (self.idxMap[url], i['good'])
104
86 self.archives = [] 105 self.archives = []
87 for i in range(genCount): 106 for i in range(genCount):
88 self.archives.append([]) 107 self.archives.append([])
89 108
90 self.saver = cherrypy.process.plugins.BackgroundTask(2.0 * 60, saveData, [se lf]) 109 self.saver = cherrypy.process.plugins.BackgroundTask(1.0 * 60, saveData, [se lf])
91 self.saver.start() 110 self.saver.start()
92 111
93 def initIdxMap(self): 112 def initIdxMap(self):
94 self.idxMap = dict() 113 self.idxMap = dict()
95 for i, entry in enumerate(self.data): 114 for i, entry in enumerate(self.data):
96 self.idxMap[entry['url']] = i 115 self.idxMap[entry['url']] = i
97 116
98 def getArchivePath(self): 117 def getArchivePath(self):
99 return self.archive_path 118 return self.archive_path
100 119
(...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after
167 options = parser.parse_args() 186 options = parser.parse_args()
168 service = Service(options.data_dir) 187 service = Service(options.data_dir)
169 conf = { 188 conf = {
170 'global': { 189 'global': {
171 'server.socket_host': '0.0.0.0', 190 'server.socket_host': '0.0.0.0',
172 'server.socket_port': 8081, 191 'server.socket_port': 8081,
173 }, 192 },
174 '/': { 193 '/': {
175 'tools.response_headers.on': True, 194 'tools.response_headers.on': True,
176 'tools.response_headers.headers': [('Content-Type', 'text/plain')], 195 'tools.response_headers.headers': [('Content-Type', 'text/plain')],
177 },
178 '/foo': {
179 'tools.staticdir.on': True, 196 'tools.staticdir.on': True,
180 'tools.staticdir.dir': '/usr/local/google/code/dom_distiller/foo', 197 'tools.staticdir.dir': os.getcwd(),
198 'tools.staticdir.index': 'index.html',
181 }, 199 },
182 '/images': { 200 '/images': {
183 'tools.staticdir.on': True, 201 'tools.staticdir.on': True,
184 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir), 202 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir),
203 'tools.expires.on': True,
204 'tools.expires.secs': 60,
185 } 205 }
186 } 206 }
187 cherrypy.quickstart(service, '/', conf) 207 cherrypy.quickstart(service, '', conf)
OLDNEW
« no previous file with comments | « heuristics/distillable/index.js ('k') | heuristics/distillable/write_features_csv.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698