| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2013 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Creates a Python telemetry page_set from the specified webpages CSV. | 6 """Creates a Python telemetry page_set from the specified webpages CSV. |
| 7 | 7 |
| 8 This module does the following steps: | 8 This module does the following steps: |
| 9 * Downloads a ZIP from http://s3.amazonaws.com/alexa-static/top-1m.csv.zip | 9 * Downloads a ZIP from http://s3.amazonaws.com/alexa-static/top-1m.csv.zip |
| 10 * Unpacks it and reads its contents in memory. | 10 * Unpacks it and reads its contents in memory. |
| 11 * Writes out multiple Python page sets from the CSV file for the specified | 11 * Writes out multiple Python page sets from the CSV file for the specified |
| 12 number of webpages. | 12 number of webpages. |
| 13 | 13 |
| 14 Sample Usage: | 14 Sample Usage: |
| 15 python create_page_set.py -s 1 -e 10000 | 15 python create_page_set.py -s 1 -e 10000 |
| 16 | 16 |
| 17 Running the above command will create 10000 different page sets. | 17 Running the above command will create 10000 different page sets. |
| 18 """ | 18 """ |
| 19 | 19 |
| 20 __author__ = 'Ravi Mistry' | 20 __author__ = 'Ravi Mistry' |
| 21 | 21 |
| 22 import json |
| 22 import optparse | 23 import optparse |
| 23 import os | 24 import os |
| 24 import urllib | 25 import urllib |
| 25 import zipfile | 26 import zipfile |
| 26 | 27 |
| 27 from StringIO import StringIO | 28 from StringIO import StringIO |
| 28 | 29 |
| 29 | 30 |
| 30 TOP1M_CSV_FILE_NAME = 'top-1m.csv' | 31 TOP1M_CSV_FILE_NAME = 'top-1m.csv' |
| 31 TOP1M_CSV_ZIP_LOCATION = ( | 32 TOP1M_CSV_ZIP_LOCATION = ( |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 98 qualified_website = website | 99 qualified_website = website |
| 99 else: | 100 else: |
| 100 qualified_website = 'http://www.%s' % website | 101 qualified_website = 'http://www.%s' % website |
| 101 websites.append(qualified_website) | 102 websites.append(qualified_website) |
| 102 | 103 |
| 103 archive_data_file = os.path.join( | 104 archive_data_file = os.path.join( |
| 104 '/', 'b', 'storage', 'webpage_archives', | 105 '/', 'b', 'storage', 'webpage_archives', |
| 105 options.pagesets_type, | 106 options.pagesets_type, |
| 106 'alexa%s-%s.json' % (options.start_number, options.end_number)) | 107 'alexa%s-%s.json' % (options.start_number, options.end_number)) |
| 107 | 108 |
| 108 page_set_content = """ | 109 page_set_content = { |
| 109 # Copyright 2015 The Chromium Authors. All rights reserved. | 110 'user_agent': options.useragent_type, |
| 110 # Use of this source code is governed by a BSD-style license that can be | 111 'archive_data_file': archive_data_file, |
| 111 # found in the LICENSE file. | 112 'urls_list': ','.join(websites), |
| 112 # pylint: disable=W0401,W0614 | |
| 113 | |
| 114 from telemetry import story | |
| 115 from telemetry.page import page as page_module | |
| 116 from telemetry.page import shared_page_state | |
| 117 from page_sets import repaint_helpers | |
| 118 | |
| 119 | |
| 120 class TypicalAlexaPage(page_module.Page): | |
| 121 | |
| 122 def __init__(self, url, page_set): | |
| 123 super(TypicalAlexaPage, self).__init__( | |
| 124 url=url, | |
| 125 page_set=page_set, | |
| 126 shared_page_state_class=shared_page_state.Shared%(user_agent)sPageState) | |
| 127 self.archive_data_file = '%(archive_data_file)s' | |
| 128 | |
| 129 def RunNavigateSteps(self, action_runner): | |
| 130 action_runner.Navigate(self.url) | |
| 131 action_runner.Wait(2) | |
| 132 | |
| 133 def RunPageInteractions(self, action_runner): | |
| 134 repaint_helpers.Repaint(action_runner) | |
| 135 | |
| 136 | |
| 137 class Alexa%(start)s_%(end)sPageSet(story.StorySet): | |
| 138 | |
| 139 def __init__(self): | |
| 140 super(Alexa%(start)s_%(end)sPageSet, self).__init__( | |
| 141 archive_data_file='%(archive_data_file)s') | |
| 142 | |
| 143 urls_list = %(urls_list)s | |
| 144 | |
| 145 for url in urls_list: | |
| 146 self.AddStory(TypicalAlexaPage(url, self)) | |
| 147 """ % { | |
| 148 "user_agent": options.useragent_type.capitalize(), | |
| 149 "archive_data_file": archive_data_file, | |
| 150 "start": options.start_number, | |
| 151 "end": options.end_number, | |
| 152 "urls_list": str(websites), | |
| 153 } | 113 } |
| 154 | 114 |
| 155 # Output the pageset to a file. | 115 # Output the pageset to a file. |
| 156 with open(os.path.join(options.pagesets_output_dir, 'alexa%s_%s.py' % ( | 116 with open(os.path.join(options.pagesets_output_dir, 'alexa%s_%s.py' % ( |
| 157 options.start_number, options.end_number)), | 117 options.start_number, options.end_number)), |
| 158 'w') as outfile: | 118 'w') as outfile: |
| 159 outfile.write(page_set_content) | 119 json.dump(page_set_content, outfile) |
| 160 | |
| OLD | NEW |