Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(96)

Unified Diff: get_features.py

Issue 1620043002: Add scripts for distillability modelling (Closed) Base URL: git@github.com:chromium/dom-distiller.git@master
Patch Set: set upstream patchset, identical to patch set 2 Created 4 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « foo/test.js ('k') | get_screenshots.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: get_features.py
diff --git a/get_features.py b/get_features.py
deleted file mode 100755
index 66a32412160de4747f730fe08a702bb09dffc8f9..0000000000000000000000000000000000000000
--- a/get_features.py
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/usr/bin/env python
-# Copyright 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-import argparse
-import json
-import os
-import shutil
-import sys
-import time
-import urllib
-
-try:
- from selenium import webdriver
-except:
- print 'ERROR:'
- print 'Couldn\'t import webdriver. Please run `sudo ./install-build-deps.sh`.'
- sys.exit(1)
-
-self_dir = os.path.abspath(os.path.dirname(__file__))
-
-def addBuildtoolsToPath():
- envPath = os.environ['PATH']
- if not 'buildtools' in envPath:
- os.environ['PATH'] = '%s/buildtools:%s' % (self_dir, envPath)
-
-def newDriver():
- chromeOptions = webdriver.ChromeOptions()
- chromeOptions.add_argument('--enable-dom-distiller')
- driver = webdriver.Chrome(chrome_options=chromeOptions)
- driver.set_window_size(1600, 5000)
- driver.set_page_load_timeout(20)
- driver.set_script_timeout(30)
- return driver
-
-def main(argv):
- parser = argparse.ArgumentParser()
- parser.add_argument('--out', required=True)
- parser.add_argument('urls', nargs='*')
- parser.add_argument('--force', action='store_true')
- parser.add_argument('--urls-file')
- parser.add_argument('--restart', action='store_true')
- options = parser.parse_args(argv)
-
- outdir = options.out
- if not options.restart:
- if os.path.exists(outdir):
- if not options.force:
- print outdir + ' exists'
- return 1
- shutil.rmtree(outdir, ignore_errors=True)
- os.makedirs(outdir)
- else:
- if not os.path.exists(outdir):
- print outdir + ' doesn\'t exist'
- return 1
-
- addBuildtoolsToPath()
-
- if options.urls:
- files = options.urls
- elif options.urls_file:
- with open(options.urls_file) as u:
- files = u.read().splitlines()
- else:
- print 'oh no'
- return 1
-
- driver = newDriver()
- output = []
- startIndex = 0
- if options.restart:
- prevfiles = [os.path.join(outdir, f) for f in os.listdir(outdir)]
- prevfiles = [f for f in prevfiles if os.path.isfile(f) and os.path.splitext(f)[1] == '.info']
- for f in prevfiles:
- with open(f) as infofile:
- info = json.load(infofile)
- output.append(info)
- startIndex = max([i['index'] for i in output]) + 1
- print 'starting at ', startIndex
-
- feature_extractor = open('extract_features.js').read()
-
- try:
- for i, f in enumerate(files):
- prefix = '%s/%d' % (outdir, i)
- if i < startIndex:
- continue
- try:
- ss = '%s.png' % prefix
- dss = '%s-distilled.png' % prefix
- driver.get(f)
- time.sleep(0.5)
- features = driver.execute_script(feature_extractor)
- data = {
- 'index': i,
- 'url': f,
- 'features': features
- }
- output.append(data)
- with open('%s.features' % prefix, 'w') as info:
- json.dump(data, info)
-
- except Exception as e:
- print e
- driver.quit()
- driver = newDriver()
- pass
-
- finally:
- driver.quit()
-
- with open('%s/features' % outdir, 'w') as index:
- json.dump(output, index)
- return 0
-
-if __name__ == '__main__':
- sys.exit(main(sys.argv[1:]))
-
« no previous file with comments | « foo/test.js ('k') | get_screenshots.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698