Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(451)

Side by Side Diff: get_screenshots.py

Issue 1289123002: Merge branch 'master' into heuristics Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « get_features.py ('k') | quick_score.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 #!/usr/bin/env python
2 # Copyright 2014 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
5
6 import argparse
7 import json
8 import os
9 import shutil
10 import sys
11 import time
12 import urllib
13
14 try:
15 from selenium import webdriver
16 except:
17 print 'ERROR:'
18 print 'Couldn\'t import webdriver. Please run `sudo ./install-build-deps.sh`.'
19 sys.exit(1)
20
21 self_dir = os.path.abspath(os.path.dirname(__file__))
22
23 def addBuildtoolsToPath():
24 envPath = os.environ['PATH']
25 if not 'buildtools' in envPath:
26 os.environ['PATH'] = '%s/buildtools:%s' % (self_dir, envPath)
27
28 def getDistillerUrl(u):
29 params = { 'url': u}
30 return "chrome-distiller://blah/?" + urllib.urlencode(params)
31
32 def newDriver():
33 chromeOptions = webdriver.ChromeOptions()
34 chromeOptions.add_argument('--enable-dom-distiller')
35 driver = webdriver.Chrome(chrome_options=chromeOptions)
36 driver.set_window_size(1600, 5000)
37 driver.set_page_load_timeout(5)
38 return driver
39
40 def main(argv):
41 parser = argparse.ArgumentParser()
42 parser.add_argument('--out', required=True)
43 parser.add_argument('urls', nargs='*')
44 parser.add_argument('--force', action='store_true')
45 parser.add_argument('--urls-file')
46 parser.add_argument('--restart', action='store_true')
47 options = parser.parse_args(argv)
48
49 outdir = options.out
50 if not options.restart:
51 if os.path.exists(outdir):
52 if not options.force:
53 print outdir + ' exists'
54 return 1
55 shutil.rmtree(outdir, ignore_errors=True)
56 os.makedirs(outdir)
57 else:
58 if not os.path.exists(outdir):
59 print outdir + ' doesn\'t exist'
60 return 1
61
62 addBuildtoolsToPath()
63
64 if options.urls:
65 files = options.urls
66 elif options.urls_file:
67 with open(options.urls_file) as u:
68 files = u.read().splitlines()
69 else:
70 print 'oh no'
71 return 1
72
73 driver = newDriver()
74 output = []
75 startIndex = 0
76 if options.restart:
77 prevfiles = [os.path.join(outdir, f) for f in os.listdir(outdir)]
78 prevfiles = [f for f in prevfiles if os.path.isfile(f) and os.path.splitext( f)[1] == '.info']
79 for f in prevfiles:
80 with open(f) as infofile:
81 info = json.load(infofile)
82 output.append(info)
83 startIndex = max([i['index'] for i in output]) + 1
84 print 'starting at ', startIndex
85
86 try:
87 for i, f in enumerate(files):
88 prefix = '%s/%d' % (outdir, i)
89 if i < startIndex:
90 continue
91 try:
92 ss = '%s.png' % prefix
93 dss = '%s-distilled.png' % prefix
94 driver.get(f)
95 time.sleep(0.1)
96 driver.save_screenshot(ss)
97 driver.get(getDistillerUrl(f))
98 time.sleep(0.1)
99 driver.save_screenshot(dss)
100 data = {
101 'index': i,
102 'url': f,
103 'screenshot': ss,
104 'distilled': dss,
105 }
106 output.append(data)
107 with open('%s.info' % prefix, 'w') as info:
108 json.dump(data, info)
109
110 except Exception as e:
111 driver.quit()
112 driver = newDriver()
113 pass
114
115 finally:
116 driver.quit()
117
118 with open('%s/index' % outdir, 'w') as index:
119 json.dump(output, index)
120 return 0
121
122 if __name__ == '__main__':
123 sys.exit(main(sys.argv[1:]))
124
OLDNEW
« no previous file with comments | « get_features.py ('k') | quick_score.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698