Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(319)

Unified Diff: get_screenshots.py

Issue 1289123002: Merge branch 'master' into heuristics Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « get_features.py ('k') | quick_score.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: get_screenshots.py
diff --git a/get_screenshots.py b/get_screenshots.py
new file mode 100755
index 0000000000000000000000000000000000000000..c4d445ce4ff61cbc62f5254d7452cc0163a6c42e
--- /dev/null
+++ b/get_screenshots.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python
+# Copyright 2014 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import argparse
+import json
+import os
+import shutil
+import sys
+import time
+import urllib
+
+try:
+ from selenium import webdriver
+except:
+ print 'ERROR:'
+ print 'Couldn\'t import webdriver. Please run `sudo ./install-build-deps.sh`.'
+ sys.exit(1)
+
+self_dir = os.path.abspath(os.path.dirname(__file__))
+
+def addBuildtoolsToPath():
+ envPath = os.environ['PATH']
+ if not 'buildtools' in envPath:
+ os.environ['PATH'] = '%s/buildtools:%s' % (self_dir, envPath)
+
+def getDistillerUrl(u):
+ params = { 'url': u}
+ return "chrome-distiller://blah/?" + urllib.urlencode(params)
+
+def newDriver():
+ chromeOptions = webdriver.ChromeOptions()
+ chromeOptions.add_argument('--enable-dom-distiller')
+ driver = webdriver.Chrome(chrome_options=chromeOptions)
+ driver.set_window_size(1600, 5000)
+ driver.set_page_load_timeout(5)
+ return driver
+
+def main(argv):
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--out', required=True)
+ parser.add_argument('urls', nargs='*')
+ parser.add_argument('--force', action='store_true')
+ parser.add_argument('--urls-file')
+ parser.add_argument('--restart', action='store_true')
+ options = parser.parse_args(argv)
+
+ outdir = options.out
+ if not options.restart:
+ if os.path.exists(outdir):
+ if not options.force:
+ print outdir + ' exists'
+ return 1
+ shutil.rmtree(outdir, ignore_errors=True)
+ os.makedirs(outdir)
+ else:
+ if not os.path.exists(outdir):
+ print outdir + ' doesn\'t exist'
+ return 1
+
+ addBuildtoolsToPath()
+
+ if options.urls:
+ files = options.urls
+ elif options.urls_file:
+ with open(options.urls_file) as u:
+ files = u.read().splitlines()
+ else:
+ print 'oh no'
+ return 1
+
+ driver = newDriver()
+ output = []
+ startIndex = 0
+ if options.restart:
+ prevfiles = [os.path.join(outdir, f) for f in os.listdir(outdir)]
+ prevfiles = [f for f in prevfiles if os.path.isfile(f) and os.path.splitext(f)[1] == '.info']
+ for f in prevfiles:
+ with open(f) as infofile:
+ info = json.load(infofile)
+ output.append(info)
+ startIndex = max([i['index'] for i in output]) + 1
+ print 'starting at ', startIndex
+
+ try:
+ for i, f in enumerate(files):
+ prefix = '%s/%d' % (outdir, i)
+ if i < startIndex:
+ continue
+ try:
+ ss = '%s.png' % prefix
+ dss = '%s-distilled.png' % prefix
+ driver.get(f)
+ time.sleep(0.1)
+ driver.save_screenshot(ss)
+ driver.get(getDistillerUrl(f))
+ time.sleep(0.1)
+ driver.save_screenshot(dss)
+ data = {
+ 'index': i,
+ 'url': f,
+ 'screenshot': ss,
+ 'distilled': dss,
+ }
+ output.append(data)
+ with open('%s.info' % prefix, 'w') as info:
+ json.dump(data, info)
+
+ except Exception as e:
+ driver.quit()
+ driver = newDriver()
+ pass
+
+ finally:
+ driver.quit()
+
+ with open('%s/index' % outdir, 'w') as index:
+ json.dump(output, index)
+ return 0
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv[1:]))
+
« no previous file with comments | « get_features.py ('k') | quick_score.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698