heuristics/distillable/server.py - Issue 1620043002: Add scripts for distillability modelling

Side by Side Diff: heuristics/distillable/server.py

Issue 1620043002: Add scripts for distillability modelling (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master

Patch Set: set upstream patchset, identical to patch set 2 Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	1 #!/usr/bin/env python

	2 # Copyright 2016 The Chromium Authors. All rights reserved.

	3 # Use of this source code is governed by a BSD-style license that can be

	4 # found in the LICENSE file.

	5

1 import argparse	6 import argparse

2 import collections	7 import collections

3 import cherrypy	8 import cherrypy

4 import json	9 import json

5 import os	10 import os

	11 import sys

6 import time	12 import time

7	13

8 genDelta = [	14 genDelta = [

9 0,	15 0,

10 10,	16 10,

11 100,	17 100,

12 1000,	18 1000,

13 ]	19 ]

14	20

15 genCount = 4	21 genCount = 4

(...skipping 34 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
50 lastUpdate = service.getLastUpdateId()	56 lastUpdate = service.getLastUpdateId()

51 hasChanges = lastSavedUpdate < lastUpdate	57 hasChanges = lastSavedUpdate < lastUpdate

52	58

53 if not hasChanges:	59 if not hasChanges:

54 return	60 return

55	61

56 timeFormat = '%Y-%m-%e-%H:%M:%S'	62 timeFormat = '%Y-%m-%e-%H:%M:%S'

57 currentTime = time.time()	63 currentTime = time.time()

58 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim e(timeFormat))	64 path = os.path.join(service.getArchivePath(), 'archive-%s.json' % time.strftim e(timeFormat))

59 with open(path, 'w') as outfile:	65 with open(path, 'w') as outfile:

60 json.dump(service.getData(), outfile)	66 json.dump(service.getData(), outfile, indent=2)

	67 print 'saved to %s' % (path)

61 lastGeneration.append({	68 lastGeneration.append({

62 'lastId': lastUpdate,	69 'lastId': lastUpdate,

63 'path': path,	70 'path': path,

64 'time': archiveIdx,	71 'time': archiveIdx,

65 'realtime': currentTime,	72 'realtime': currentTime,

66 })	73 })

67 archiveIdx += 1	74 archiveIdx += 1

68	75

69 cleanUpArchives(service.getArchives())	76 cleanUpArchives(service.getArchives())

70	77

71 class Service(object):	78 class Service(object):

72 exposed = True	79 exposed = True

73	80

74 def __init__(self, data_dir):	81 def __init__(self, data_dir):

75 self.data = None	82 self.data = None

76 self.updates = collections.deque(maxlen=500)	83 self.updates = collections.deque(maxlen=500)

77 self.nextUpdateId = 0	84 self.nextUpdateId = 0

78 self.idxMap = None	85 self.idxMap = None

79 self.archive_path = os.path.join(data_dir, 'archive')	86 self.archive_path = os.path.join(data_dir, 'archive')

80 if not os.path.exists(self.archive_path):	87 if not os.path.exists(self.archive_path):

81 os.makedirs(self.archive_path)	88 os.makedirs(self.archive_path)

82 with open(os.path.join(data_dir, 'index')) as inf:	89 with open(os.path.join(data_dir, 'index')) as inf:

83 self.data = json.load(inf)	90 self.data = json.load(inf)

84 self.initIdxMap()	91 self.initIdxMap()

85	92

	93 archives = [os.path.join(self.archive_path, f) for f in os.listdir(self.arch ive_path)]

	94 archives = [f for f in archives if os.path.isfile(f)]

	95 for archive in archives:

	96 with open(archive) as inf:

	97 last = json.load(inf)

	98 for i in last:

	99 url = i['url']

	100 if url in self.idxMap and 'good' in i:

	101 assert self.data[self.idxMap[url]]['index'] == i['index']

	102 self.data[self.idxMap[url]]['good'] = i['good']

	103 print "%d good = %s" % (self.idxMap[url], i['good'])

	104

86 self.archives = []	105 self.archives = []

87 for i in range(genCount):	106 for i in range(genCount):

88 self.archives.append([])	107 self.archives.append([])

89	108

90 self.saver = cherrypy.process.plugins.BackgroundTask(2.0 * 60, saveData, [se lf])	109 self.saver = cherrypy.process.plugins.BackgroundTask(1.0 * 60, saveData, [se lf])

91 self.saver.start()	110 self.saver.start()

92	111

93 def initIdxMap(self):	112 def initIdxMap(self):

94 self.idxMap = dict()	113 self.idxMap = dict()

95 for i, entry in enumerate(self.data):	114 for i, entry in enumerate(self.data):

96 self.idxMap[entry['url']] = i	115 self.idxMap[entry['url']] = i

97	116

98 def getArchivePath(self):	117 def getArchivePath(self):

99 return self.archive_path	118 return self.archive_path

100	119

(...skipping 66 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
167 options = parser.parse_args()	186 options = parser.parse_args()

168 service = Service(options.data_dir)	187 service = Service(options.data_dir)

169 conf = {	188 conf = {

170 'global': {	189 'global': {

171 'server.socket_host': '0.0.0.0',	190 'server.socket_host': '0.0.0.0',

172 'server.socket_port': 8081,	191 'server.socket_port': 8081,

173 },	192 },

174 '/': {	193 '/': {

175 'tools.response_headers.on': True,	194 'tools.response_headers.on': True,

176 'tools.response_headers.headers': [('Content-Type', 'text/plain')],	195 'tools.response_headers.headers': [('Content-Type', 'text/plain')],

177 },

178 '/foo': {

179 'tools.staticdir.on': True,	196 'tools.staticdir.on': True,

180 'tools.staticdir.dir': '/usr/local/google/code/dom_distiller/foo',	197 'tools.staticdir.dir': os.getcwd(),

	198 'tools.staticdir.index': 'index.html',

181 },	199 },

182 '/images': {	200 '/images': {

183 'tools.staticdir.on': True,	201 'tools.staticdir.on': True,

184 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir),	202 'tools.staticdir.dir': os.path.join(os.getcwd(), options.data_dir),

	203 'tools.expires.on': True,

	204 'tools.expires.secs': 60,

185 }	205 }

186 }	206 }

187 cherrypy.quickstart(service, '/', conf)	207 cherrypy.quickstart(service, '', conf)

OLD	NEW

« no previous file with comments | « heuristics/distillable/index.js ('k') | heuristics/distillable/write_features_csv.py » ('j') | no next file with comments »