| OLD | NEW |
| (Empty) |
| 1 #!/usr/bin/env python | |
| 2 # Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | |
| 3 # for details. All rights reserved. Use of this source code is governed by a | |
| 4 # BSD-style license that can be found in the LICENSE file. | |
| 5 ''' | |
| 6 This script finds all HTML pages in a folder and downloads all images, replacing | |
| 7 the urls with local ones. | |
| 8 ''' | |
| 9 import os, sys, optparse, subprocess, multiprocessing | |
| 10 from os.path import abspath, basename, dirname, join | |
| 11 | |
| 12 SWARM_PATH = dirname(abspath(__file__)) | |
| 13 CLIENT_PATH = dirname(dirname(SWARM_PATH)) | |
| 14 CLIENT_TOOLS_PATH = join(CLIENT_PATH, 'tools') | |
| 15 | |
| 16 # Add the client tools directory so we can find htmlconverter.py. | |
| 17 sys.path.append(CLIENT_TOOLS_PATH) | |
| 18 import htmlconverter | |
| 19 converter = CLIENT_TOOLS_PATH + '/htmlconverter.py' | |
| 20 | |
| 21 # This has to be a top level function to use with multiprocessing | |
| 22 def convertImgs(infile): | |
| 23 global options | |
| 24 try: | |
| 25 htmlconverter.convertForOffline( | |
| 26 infile, infile, | |
| 27 verbose=options.verbose, | |
| 28 encode_images=options.inline_images) | |
| 29 print 'Converted ' + infile | |
| 30 except BaseException, e: | |
| 31 print 'Caught error: %s' % e | |
| 32 | |
| 33 def Flags(): | |
| 34 """ Constructs a parser for extracting flags from the command line. """ | |
| 35 parser = optparse.OptionParser() | |
| 36 parser.add_option("--inline_images", | |
| 37 help=("Encode img payloads as data:// URLs rather than local files."), | |
| 38 default=False, | |
| 39 action='store_true') | |
| 40 parser.add_option("--verbose", | |
| 41 help="Print verbose output", | |
| 42 default=False, | |
| 43 action="store_true") | |
| 44 return parser | |
| 45 | |
| 46 def main(): | |
| 47 global options | |
| 48 parser = Flags() | |
| 49 options, args = parser.parse_args() | |
| 50 print "args: %s" % args | |
| 51 if len(args) < 1 or 'help' in args[0]: | |
| 52 print 'Usage: %s DIRECTORY' % basename(sys.argv[0]) | |
| 53 return 1 | |
| 54 | |
| 55 dirname = args[0] | |
| 56 print 'Searching directory ' + dirname | |
| 57 | |
| 58 files = [] | |
| 59 for root, dirs, fnames in os.walk(dirname): | |
| 60 for fname in fnames: | |
| 61 if fname.endswith('.html'): | |
| 62 files.append(join(root, fname)) | |
| 63 | |
| 64 count = 4 * multiprocessing.cpu_count() | |
| 65 pool = multiprocessing.Pool(processes=count) | |
| 66 # Note: need a timeout to get keyboard interrupt due to a Python bug | |
| 67 pool.map_async(convertImgs, files).get(3600) # one hour | |
| 68 | |
| 69 if __name__ == '__main__': | |
| 70 main() | |
| OLD | NEW |