| OLD | NEW |
| 1 #!/usr/bin/python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Downloads web pages with fillable forms after parsing through a set of links. | 6 """Downloads web pages with fillable forms after parsing through a set of links. |
| 7 | 7 |
| 8 Used for collecting web pages with forms. Used as a standalone script. | 8 Used for collecting web pages with forms. Used as a standalone script. |
| 9 This script assumes that it's run from within the same directory in which it's | 9 This script assumes that it's run from within the same directory in which it's |
| 10 checked into. If this script were to be run elsewhere then the path for | 10 checked into. If this script were to be run elsewhere then the path for |
| 11 REGISTER_PAGE_DIR needs to be changed. | 11 REGISTER_PAGE_DIR needs to be changed. |
| (...skipping 703 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 715 self.logger.info( | 715 self.logger.info( |
| 716 'URLs that did not return a registration page: %d\n', | 716 'URLs that did not return a registration page: %d\n', |
| 717 urls_not_found_no) | 717 urls_not_found_no) |
| 718 return urls_no - urls_not_found_no | 718 return urls_no - urls_not_found_no |
| 719 else: | 719 else: |
| 720 self.logger.error('Error: no URLs were found.') | 720 self.logger.error('Error: no URLs were found.') |
| 721 return -1 | 721 return -1 |
| 722 | 722 |
| 723 | 723 |
| 724 def main(): | 724 def main(): |
| 725 # Command line options. | |
| 726 usage = 'usage: %prog [options] single_url_or_urls_filename' | 725 usage = 'usage: %prog [options] single_url_or_urls_filename' |
| 727 parser = optparse.OptionParser(usage) | 726 parser = optparse.OptionParser(usage) |
| 728 parser.add_option( | 727 parser.add_option( |
| 729 '-l', '--log_level', metavar='LOG_LEVEL', default='error', | 728 '-l', '--log_level', metavar='LOG_LEVEL', default='error', |
| 730 help='LOG_LEVEL: debug, info, warning or error [default: %default]') | 729 help='LOG_LEVEL: debug, info, warning or error [default: %default]') |
| 731 | 730 |
| 732 (options, args) = parser.parse_args() | 731 (options, args) = parser.parse_args() |
| 733 options.log_level = options.log_level.upper() | 732 options.log_level = options.log_level.upper() |
| 734 if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']: | 733 if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']: |
| 735 print 'Wrong log_level argument.' | 734 print 'Wrong log_level argument.' |
| 736 parser.print_help() | 735 parser.print_help() |
| 737 sys.exit(1) | 736 return 1 |
| 738 options.log_level = getattr(logging, options.log_level) | 737 options.log_level = getattr(logging, options.log_level) |
| 739 | 738 |
| 740 if len(args) != 1: | 739 if len(args) != 1: |
| 741 parser.error('Wrong number of arguments.') | 740 parser.error('Wrong number of arguments.') |
| 742 | 741 |
| 743 logger = logging.getLogger(__name__) | 742 logger = logging.getLogger(__name__) |
| 744 if options.log_level: | 743 if options.log_level: |
| 745 console = logging.StreamHandler() | 744 console = logging.StreamHandler() |
| 746 logger.addHandler(console) | 745 logger.addHandler(console) |
| 747 logger.setLevel(options.log_level) | 746 logger.setLevel(options.log_level) |
| 748 | 747 |
| 749 arg_is_a_file = os.path.isfile(args[0]) | 748 arg_is_a_file = os.path.isfile(args[0]) |
| 750 if arg_is_a_file: | 749 if arg_is_a_file: |
| 751 CrawlerClass = ThreadedCrawler | 750 CrawlerClass = ThreadedCrawler |
| 752 else: | 751 else: |
| 753 CrawlerClass = Crawler | 752 CrawlerClass = Crawler |
| 754 t0 = datetime.datetime.now() | 753 t0 = datetime.datetime.now() |
| 755 c = CrawlerClass(args[0], options.log_level) | 754 c = CrawlerClass(args[0], options.log_level) |
| 756 c.Run() | 755 c.Run() |
| 757 if not arg_is_a_file and c.url_error: | 756 if not arg_is_a_file and c.url_error: |
| 758 logger.error( | 757 logger.error( |
| 759 'ERROR: "%s" is neither a valid filename nor a valid URL' % args[0]) | 758 'ERROR: "%s" is neither a valid filename nor a valid URL' % args[0]) |
| 760 t1 = datetime.datetime.now() | 759 t1 = datetime.datetime.now() |
| 761 delta_t = t1 - t0 | 760 delta_t = t1 - t0 |
| 762 logger.info('Started at: %s\n', t0) | 761 logger.info('Started at: %s\n', t0) |
| 763 logger.info('Ended at: %s\n', t1) | 762 logger.info('Ended at: %s\n', t1) |
| 764 logger.info('Total execution time: %s\n', delta_t) | 763 logger.info('Total execution time: %s\n', delta_t) |
| 764 return 0 |
| 765 | 765 |
| 766 | 766 |
| 767 if __name__ == "__main__": | 767 if __name__ == "__main__": |
| 768 main() | 768 sys.exit(main()) |
| OLD | NEW |