OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2011 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Downloads web pages with fillable forms after parsing through a set of links. | 6 """Downloads web pages with fillable forms after parsing through a set of links. |
7 | 7 |
8 Used for collecting web pages with forms. Used as a standalone script. | 8 Used for collecting web pages with forms. Used as a standalone script. |
9 This script assumes that it's run from within the same directory in which it's | 9 This script assumes that it's run from within the same directory in which it's |
10 checked into. If this script were to be run elsewhere then the path for | 10 checked into. If this script were to be run elsewhere then the path for |
11 REGISTER_PAGE_DIR needs to be changed. | 11 REGISTER_PAGE_DIR needs to be changed. |
(...skipping 703 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
715 self.logger.info( | 715 self.logger.info( |
716 'URLs that did not return a registration page: %d\n', | 716 'URLs that did not return a registration page: %d\n', |
717 urls_not_found_no) | 717 urls_not_found_no) |
718 return urls_no - urls_not_found_no | 718 return urls_no - urls_not_found_no |
719 else: | 719 else: |
720 self.logger.error('Error: no URLs were found.') | 720 self.logger.error('Error: no URLs were found.') |
721 return -1 | 721 return -1 |
722 | 722 |
723 | 723 |
724 def main(): | 724 def main(): |
725 # Command line options. | |
726 usage = 'usage: %prog [options] single_url_or_urls_filename' | 725 usage = 'usage: %prog [options] single_url_or_urls_filename' |
727 parser = optparse.OptionParser(usage) | 726 parser = optparse.OptionParser(usage) |
728 parser.add_option( | 727 parser.add_option( |
729 '-l', '--log_level', metavar='LOG_LEVEL', default='error', | 728 '-l', '--log_level', metavar='LOG_LEVEL', default='error', |
730 help='LOG_LEVEL: debug, info, warning or error [default: %default]') | 729 help='LOG_LEVEL: debug, info, warning or error [default: %default]') |
731 | 730 |
732 (options, args) = parser.parse_args() | 731 (options, args) = parser.parse_args() |
733 options.log_level = options.log_level.upper() | 732 options.log_level = options.log_level.upper() |
734 if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']: | 733 if options.log_level not in ['DEBUG', 'INFO', 'WARNING', 'ERROR']: |
735 print 'Wrong log_level argument.' | 734 print 'Wrong log_level argument.' |
736 parser.print_help() | 735 parser.print_help() |
737 sys.exit(1) | 736 return 1 |
738 options.log_level = getattr(logging, options.log_level) | 737 options.log_level = getattr(logging, options.log_level) |
739 | 738 |
740 if len(args) != 1: | 739 if len(args) != 1: |
741 parser.error('Wrong number of arguments.') | 740 parser.error('Wrong number of arguments.') |
742 | 741 |
743 logger = logging.getLogger(__name__) | 742 logger = logging.getLogger(__name__) |
744 if options.log_level: | 743 if options.log_level: |
745 console = logging.StreamHandler() | 744 console = logging.StreamHandler() |
746 logger.addHandler(console) | 745 logger.addHandler(console) |
747 logger.setLevel(options.log_level) | 746 logger.setLevel(options.log_level) |
748 | 747 |
749 arg_is_a_file = os.path.isfile(args[0]) | 748 arg_is_a_file = os.path.isfile(args[0]) |
750 if arg_is_a_file: | 749 if arg_is_a_file: |
751 CrawlerClass = ThreadedCrawler | 750 CrawlerClass = ThreadedCrawler |
752 else: | 751 else: |
753 CrawlerClass = Crawler | 752 CrawlerClass = Crawler |
754 t0 = datetime.datetime.now() | 753 t0 = datetime.datetime.now() |
755 c = CrawlerClass(args[0], options.log_level) | 754 c = CrawlerClass(args[0], options.log_level) |
756 c.Run() | 755 c.Run() |
757 if not arg_is_a_file and c.url_error: | 756 if not arg_is_a_file and c.url_error: |
758 logger.error( | 757 logger.error( |
759 'ERROR: "%s" is neither a valid filename nor a valid URL' % args[0]) | 758 'ERROR: "%s" is neither a valid filename nor a valid URL' % args[0]) |
760 t1 = datetime.datetime.now() | 759 t1 = datetime.datetime.now() |
761 delta_t = t1 - t0 | 760 delta_t = t1 - t0 |
762 logger.info('Started at: %s\n', t0) | 761 logger.info('Started at: %s\n', t0) |
763 logger.info('Ended at: %s\n', t1) | 762 logger.info('Ended at: %s\n', t1) |
764 logger.info('Total execution time: %s\n', delta_t) | 763 logger.info('Total execution time: %s\n', delta_t) |
| 764 return 0 |
765 | 765 |
766 | 766 |
767 if __name__ == "__main__": | 767 if __name__ == "__main__": |
768 main() | 768 sys.exit(main()) |
OLD | NEW |