OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 """ | 3 """ |
4 Copyright 2014 Google Inc. | 4 Copyright 2014 Google Inc. |
5 | 5 |
6 Use of this source code is governed by a BSD-style license that can be | 6 Use of this source code is governed by a BSD-style license that can be |
7 found in the LICENSE file. | 7 found in the LICENSE file. |
8 | 8 |
9 Download actual GM results for a particular builder. | 9 Download actual GM results for a particular builder. |
10 """ | 10 """ |
11 | 11 |
12 # System-level imports | 12 # System-level imports |
13 import contextlib | |
14 import optparse | 13 import optparse |
15 import os | 14 import os |
16 import posixpath | 15 import posixpath |
17 import re | 16 import re |
18 import shutil | |
19 import sys | 17 import sys |
20 import urllib | |
21 import urllib2 | 18 import urllib2 |
22 import urlparse | |
23 | 19 |
24 # Imports from within Skia | 20 # Imports from within Skia |
25 # | 21 # |
26 # We need to add the 'gm' and 'tools' directories, so that we can import | 22 # We need to add the 'gm' and 'tools' directories, so that we can import |
27 # gm_json.py and buildbot_globals.py. | 23 # gm_json.py and buildbot_globals.py. |
28 # | 24 # |
29 # Make sure that these dirs are in the PYTHONPATH, but add them at the *end* | 25 # Make sure that these dirs are in the PYTHONPATH, but add them at the *end* |
30 # so any dirs that are already in the PYTHONPATH will be preferred. | 26 # so any dirs that are already in the PYTHONPATH will be preferred. |
31 # | 27 # |
32 # TODO(epoger): Is it OK for this to depend on the 'tools' dir, given that | 28 # TODO(epoger): Is it OK for this to depend on the 'tools' dir, given that |
33 # the tools dir is dependent on the 'gm' dir (to import gm_json.py)? | 29 # the tools dir is dependent on the 'gm' dir (to import gm_json.py)? |
34 TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) | 30 TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) |
35 GM_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'gm') | 31 GM_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'gm') |
36 TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools') | 32 TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools') |
37 if GM_DIRECTORY not in sys.path: | 33 if GM_DIRECTORY not in sys.path: |
38 sys.path.append(GM_DIRECTORY) | 34 sys.path.append(GM_DIRECTORY) |
39 if TOOLS_DIRECTORY not in sys.path: | 35 if TOOLS_DIRECTORY not in sys.path: |
40 sys.path.append(TOOLS_DIRECTORY) | 36 sys.path.append(TOOLS_DIRECTORY) |
41 import buildbot_globals | 37 import buildbot_globals |
42 import gm_json | 38 import gm_json |
43 | 39 import gs_utils |
44 # Imports from third-party code | 40 import url_utils |
45 APICLIENT_DIRECTORY = os.path.join( | |
46 TRUNK_DIRECTORY, 'third_party', 'externals', 'google-api-python-client') | |
47 if APICLIENT_DIRECTORY not in sys.path: | |
48 sys.path.append(APICLIENT_DIRECTORY) | |
49 from googleapiclient.discovery import build as build_service | |
50 | 41 |
51 | 42 |
52 GM_SUMMARIES_BUCKET = buildbot_globals.Get('gm_summaries_bucket') | 43 GM_SUMMARIES_BUCKET = buildbot_globals.Get('gm_summaries_bucket') |
53 DEFAULT_ACTUALS_BASE_URL = ( | 44 DEFAULT_ACTUALS_BASE_URL = ( |
54 'http://storage.googleapis.com/%s' % GM_SUMMARIES_BUCKET) | 45 'http://storage.googleapis.com/%s' % GM_SUMMARIES_BUCKET) |
55 DEFAULT_JSON_FILENAME = 'actual-results.json' | 46 DEFAULT_JSON_FILENAME = 'actual-results.json' |
56 | 47 |
57 | 48 |
58 class Download(object): | 49 class Download(object): |
59 | 50 |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
98 results_of_this_type = actual_results_dict[result_type] | 89 results_of_this_type = actual_results_dict[result_type] |
99 if not results_of_this_type: | 90 if not results_of_this_type: |
100 continue | 91 continue |
101 for image_name in sorted(results_of_this_type.keys()): | 92 for image_name in sorted(results_of_this_type.keys()): |
102 (test, config) = self._image_filename_re.match(image_name).groups() | 93 (test, config) = self._image_filename_re.match(image_name).groups() |
103 (hash_type, hash_digest) = results_of_this_type[image_name] | 94 (hash_type, hash_digest) = results_of_this_type[image_name] |
104 source_url = gm_json.CreateGmActualUrl( | 95 source_url = gm_json.CreateGmActualUrl( |
105 test_name=test, hash_type=hash_type, hash_digest=hash_digest, | 96 test_name=test, hash_type=hash_type, hash_digest=hash_digest, |
106 gm_actuals_root_url=self._gm_actuals_root_url) | 97 gm_actuals_root_url=self._gm_actuals_root_url) |
107 dest_path = os.path.join(dest_dir, config, test + '.png') | 98 dest_path = os.path.join(dest_dir, config, test + '.png') |
108 # TODO(epoger): To speed this up, we should only download files that | 99 url_utils.copy_contents(source_url=source_url, dest_path=dest_path, |
109 # we don't already have on local disk. | 100 create_subdirs_if_needed=True) |
110 copy_contents(source_url=source_url, dest_path=dest_path, | |
111 create_subdirs_if_needed=True) | |
112 | 101 |
113 | 102 |
114 def create_filepath_url(filepath): | 103 def get_builders_list(summaries_bucket=GM_SUMMARIES_BUCKET): |
115 """ Returns a file:/// URL pointing at the given filepath on local disk. | 104 """ Returns the list of builders we have actual results for. |
116 | |
117 For now, this is only used by unittests, but I anticipate it being useful | |
118 in production, as a way for developers to run rebaseline_server over locally | |
119 generated images. | |
120 | |
121 TODO(epoger): Move this function, and copy_contents(), into a shared | |
122 utility module. They are generally useful. | |
123 | 105 |
124 Args: | 106 Args: |
125 filepath: string; path to a file on local disk (may be absolute or relative, | 107 summaries_bucket: Google Cloud Storage bucket containing the summary |
126 and the file does not need to exist) | 108 JSON files |
127 | |
128 Returns: | |
129 A file:/// URL pointing at the file. Regardless of whether filepath was | |
130 specified as a relative or absolute path, the URL will contain an | |
131 absolute path to the file. | |
132 | |
133 Raises: | |
134 An Exception, if filepath is already a URL. | |
135 """ | 109 """ |
136 if urlparse.urlparse(filepath).scheme: | 110 dirs, _ = gs_utils.list_bucket_contents(bucket=GM_SUMMARIES_BUCKET) |
137 raise Exception('"%s" is already a URL' % filepath) | 111 return dirs |
138 return urlparse.urljoin( | |
139 'file:', urllib.pathname2url(os.path.abspath(filepath))) | |
140 | |
141 | |
142 def copy_contents(source_url, dest_path, create_subdirs_if_needed=False): | |
143 """ Copies the full contents of the URL 'source_url' into | |
144 filepath 'dest_path'. | |
145 | |
146 Args: | |
147 source_url: string; complete URL to read from | |
148 dest_path: string; complete filepath to write to (may be absolute or | |
149 relative) | |
150 create_subdirs_if_needed: boolean; whether to create subdirectories as | |
151 needed to create dest_path | |
152 | |
153 Raises: | |
154 Some subclass of Exception if unable to read source_url or write dest_path. | |
155 """ | |
156 if create_subdirs_if_needed: | |
157 dest_dir = os.path.dirname(dest_path) | |
158 if not os.path.exists(dest_dir): | |
159 os.makedirs(dest_dir) | |
160 with contextlib.closing(urllib.urlopen(source_url)) as source_handle: | |
161 with open(dest_path, 'wb') as dest_handle: | |
162 shutil.copyfileobj(fsrc=source_handle, fdst=dest_handle) | |
163 | |
164 | |
165 def gcs_list_bucket_contents(bucket, subdir=None): | |
166 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. | |
167 | |
168 Uses the API documented at | |
169 https://developers.google.com/storage/docs/json_api/v1/objects/list | |
170 | |
171 Args: | |
172 bucket: name of the Google Storage bucket | |
173 subdir: directory within the bucket to list, or None for root directory | |
174 """ | |
175 # The GCS command relies on the subdir name (if any) ending with a slash. | |
176 if subdir and not subdir.endswith('/'): | |
177 subdir += '/' | |
178 subdir_length = len(subdir) if subdir else 0 | |
179 | |
180 storage = build_service('storage', 'v1') | |
181 command = storage.objects().list( | |
182 bucket=bucket, delimiter='/', fields='items(name),prefixes', | |
183 prefix=subdir) | |
184 results = command.execute() | |
185 | |
186 # The GCS command returned two subdicts: | |
187 # prefixes: the full path of every directory within subdir, with trailing '/' | |
188 # items: property dict for each file object within subdir | |
189 # (including 'name', which is full path of the object) | |
190 dirs = [] | |
191 for dir_fullpath in results.get('prefixes', []): | |
192 dir_basename = dir_fullpath[subdir_length:] | |
193 dirs.append(dir_basename[:-1]) # strip trailing slash | |
194 files = [] | |
195 for file_properties in results.get('items', []): | |
196 file_fullpath = file_properties['name'] | |
197 file_basename = file_fullpath[subdir_length:] | |
198 files.append(file_basename) | |
199 return (dirs, files) | |
200 | 112 |
201 | 113 |
202 def main(): | 114 def main(): |
203 parser = optparse.OptionParser() | 115 parser = optparse.OptionParser() |
204 required_params = [] | 116 required_params = [] |
205 parser.add_option('--actuals-base-url', | 117 parser.add_option('--actuals-base-url', |
206 action='store', type='string', | 118 action='store', type='string', |
207 default=DEFAULT_ACTUALS_BASE_URL, | 119 default=DEFAULT_ACTUALS_BASE_URL, |
208 help=('Base URL from which to read files containing JSON ' | 120 help=('Base URL from which to read files containing JSON ' |
209 'summaries of actual GM results; defaults to ' | 121 'summaries of actual GM results; defaults to ' |
(...skipping 17 matching lines...) Expand all Loading... |
227 parser.add_option('--json-filename', | 139 parser.add_option('--json-filename', |
228 action='store', type='string', | 140 action='store', type='string', |
229 default=DEFAULT_JSON_FILENAME, | 141 default=DEFAULT_JSON_FILENAME, |
230 help=('JSON summary filename to read for each builder; ' | 142 help=('JSON summary filename to read for each builder; ' |
231 'defaults to "%default".')) | 143 'defaults to "%default".')) |
232 parser.add_option('--list-builders', action='store_true', | 144 parser.add_option('--list-builders', action='store_true', |
233 help=('List all available builders.')) | 145 help=('List all available builders.')) |
234 (params, remaining_args) = parser.parse_args() | 146 (params, remaining_args) = parser.parse_args() |
235 | 147 |
236 if params.list_builders: | 148 if params.list_builders: |
237 dirs, _ = gcs_list_bucket_contents(bucket=GM_SUMMARIES_BUCKET) | 149 print '\n'.join(get_builders_list()) |
238 print '\n'.join(dirs) | |
239 return | 150 return |
240 | 151 |
241 # Make sure all required options were set, | 152 # Make sure all required options were set, |
242 # and that there were no items left over in the command line. | 153 # and that there were no items left over in the command line. |
243 for required_param in required_params: | 154 for required_param in required_params: |
244 if not getattr(params, required_param): | 155 if not getattr(params, required_param): |
245 raise Exception('required option \'%s\' was not set' % required_param) | 156 raise Exception('required option \'%s\' was not set' % required_param) |
246 if len(remaining_args) is not 0: | 157 if len(remaining_args) is not 0: |
247 raise Exception('extra items specified in the command line: %s' % | 158 raise Exception('extra items specified in the command line: %s' % |
248 remaining_args) | 159 remaining_args) |
249 | 160 |
250 downloader = Download(actuals_base_url=params.actuals_base_url) | 161 downloader = Download(actuals_base_url=params.actuals_base_url) |
251 downloader.fetch(builder_name=params.builder, | 162 downloader.fetch(builder_name=params.builder, |
252 dest_dir=params.dest_dir) | 163 dest_dir=params.dest_dir) |
253 | 164 |
254 | 165 |
255 | 166 |
256 if __name__ == '__main__': | 167 if __name__ == '__main__': |
257 main() | 168 main() |
OLD | NEW |