OLD | NEW |
| (Empty) |
1 #!/usr/bin/env python | |
2 # Copyright 2014 The Chromium Authors. All rights reserved. | |
3 # Use of this source code is governed by a BSD-style license that can be | |
4 # found in the LICENSE file. | |
5 | |
6 import argparse | |
7 import json | |
8 import os | |
9 import shutil | |
10 import sys | |
11 import time | |
12 import urllib | |
13 | |
14 try: | |
15 from selenium import webdriver | |
16 except: | |
17 print 'ERROR:' | |
18 print 'Couldn\'t import webdriver. Please run `sudo ./install-build-deps.sh`.' | |
19 sys.exit(1) | |
20 | |
21 self_dir = os.path.abspath(os.path.dirname(__file__)) | |
22 | |
23 def addBuildtoolsToPath(): | |
24 envPath = os.environ['PATH'] | |
25 if not 'buildtools' in envPath: | |
26 os.environ['PATH'] = '%s/buildtools:%s' % (self_dir, envPath) | |
27 | |
28 def getDistillerUrl(u): | |
29 params = { 'url': u} | |
30 return "chrome-distiller://blah/?" + urllib.urlencode(params) | |
31 | |
32 def newDriver(): | |
33 chromeOptions = webdriver.ChromeOptions() | |
34 chromeOptions.add_argument('--enable-dom-distiller') | |
35 driver = webdriver.Chrome(chrome_options=chromeOptions) | |
36 driver.set_window_size(1600, 5000) | |
37 driver.set_page_load_timeout(5) | |
38 return driver | |
39 | |
40 def main(argv): | |
41 parser = argparse.ArgumentParser() | |
42 parser.add_argument('--out', required=True) | |
43 parser.add_argument('urls', nargs='*') | |
44 parser.add_argument('--force', action='store_true') | |
45 parser.add_argument('--urls-file') | |
46 parser.add_argument('--restart', action='store_true') | |
47 options = parser.parse_args(argv) | |
48 | |
49 outdir = options.out | |
50 if not options.restart: | |
51 if os.path.exists(outdir): | |
52 if not options.force: | |
53 print outdir + ' exists' | |
54 return 1 | |
55 shutil.rmtree(outdir, ignore_errors=True) | |
56 os.makedirs(outdir) | |
57 else: | |
58 if not os.path.exists(outdir): | |
59 print outdir + ' doesn\'t exist' | |
60 return 1 | |
61 | |
62 addBuildtoolsToPath() | |
63 | |
64 if options.urls: | |
65 files = options.urls | |
66 elif options.urls_file: | |
67 with open(options.urls_file) as u: | |
68 files = u.read().splitlines() | |
69 else: | |
70 print 'oh no' | |
71 return 1 | |
72 | |
73 driver = newDriver() | |
74 output = [] | |
75 startIndex = 0 | |
76 if options.restart: | |
77 prevfiles = [os.path.join(outdir, f) for f in os.listdir(outdir)] | |
78 prevfiles = [f for f in prevfiles if os.path.isfile(f) and os.path.splitext(
f)[1] == '.info'] | |
79 for f in prevfiles: | |
80 with open(f) as infofile: | |
81 info = json.load(infofile) | |
82 output.append(info) | |
83 startIndex = max([i['index'] for i in output]) + 1 | |
84 print 'starting at ', startIndex | |
85 | |
86 try: | |
87 for i, f in enumerate(files): | |
88 prefix = '%s/%d' % (outdir, i) | |
89 if i < startIndex: | |
90 continue | |
91 try: | |
92 ss = '%s.png' % prefix | |
93 dss = '%s-distilled.png' % prefix | |
94 driver.get(f) | |
95 time.sleep(0.1) | |
96 driver.save_screenshot(ss) | |
97 driver.get(getDistillerUrl(f)) | |
98 time.sleep(0.1) | |
99 driver.save_screenshot(dss) | |
100 data = { | |
101 'index': i, | |
102 'url': f, | |
103 'screenshot': ss, | |
104 'distilled': dss, | |
105 } | |
106 output.append(data) | |
107 with open('%s.info' % prefix, 'w') as info: | |
108 json.dump(data, info) | |
109 | |
110 except Exception as e: | |
111 driver.quit() | |
112 driver = newDriver() | |
113 pass | |
114 | |
115 finally: | |
116 driver.quit() | |
117 | |
118 with open('%s/index' % outdir, 'w') as index: | |
119 json.dump(output, index) | |
120 return 0 | |
121 | |
122 if __name__ == '__main__': | |
123 sys.exit(main(sys.argv[1:])) | |
124 | |
OLD | NEW |