Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(141)

Side by Side Diff: gm/rebaseline_server/download_actuals.py

Issue 310093003: rebaseline_server: download actual-results.json files from GCS instead of SVN (Closed) Base URL: https://skia.googlesource.com/skia.git@master
Patch Set: combine import_gm and import_tools into fix_pythonpath Created 6 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 """ 3 """
4 Copyright 2014 Google Inc. 4 Copyright 2014 Google Inc.
5 5
6 Use of this source code is governed by a BSD-style license that can be 6 Use of this source code is governed by a BSD-style license that can be
7 found in the LICENSE file. 7 found in the LICENSE file.
8 8
9 Download actual GM results for a particular builder. 9 Download actual GM results for a particular builder.
10 """ 10 """
11 11
12 # System-level imports 12 # System-level imports
13 import contextlib
14 import optparse 13 import optparse
15 import os 14 import os
16 import posixpath 15 import posixpath
17 import re 16 import re
18 import shutil
19 import sys
20 import urllib
21 import urllib2 17 import urllib2
22 import urlparse
23 18
24 # Imports from within Skia 19 # Imports from within Skia
25 # 20 import fix_pythonpath # must do this first
26 # We need to add the 'gm' and 'tools' directories, so that we can import 21 from pyutils import gs_utils
27 # gm_json.py and buildbot_globals.py. 22 from pyutils import url_utils
28 #
29 # Make sure that these dirs are in the PYTHONPATH, but add them at the *end*
30 # so any dirs that are already in the PYTHONPATH will be preferred.
31 #
32 # TODO(epoger): Is it OK for this to depend on the 'tools' dir, given that
33 # the tools dir is dependent on the 'gm' dir (to import gm_json.py)?
34 TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
35 GM_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'gm')
36 TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')
37 if GM_DIRECTORY not in sys.path:
38 sys.path.append(GM_DIRECTORY)
39 if TOOLS_DIRECTORY not in sys.path:
40 sys.path.append(TOOLS_DIRECTORY)
41 import buildbot_globals 23 import buildbot_globals
42 import gm_json 24 import gm_json
43 25
44 # Imports from third-party code
45 APICLIENT_DIRECTORY = os.path.join(
46 TRUNK_DIRECTORY, 'third_party', 'externals', 'google-api-python-client')
47 if APICLIENT_DIRECTORY not in sys.path:
48 sys.path.append(APICLIENT_DIRECTORY)
49 from googleapiclient.discovery import build as build_service
50
51 26
52 GM_SUMMARIES_BUCKET = buildbot_globals.Get('gm_summaries_bucket') 27 GM_SUMMARIES_BUCKET = buildbot_globals.Get('gm_summaries_bucket')
53 DEFAULT_ACTUALS_BASE_URL = ( 28 DEFAULT_ACTUALS_BASE_URL = (
54 'http://storage.googleapis.com/%s' % GM_SUMMARIES_BUCKET) 29 'http://storage.googleapis.com/%s' % GM_SUMMARIES_BUCKET)
55 DEFAULT_JSON_FILENAME = 'actual-results.json' 30 DEFAULT_JSON_FILENAME = 'actual-results.json'
56 31
57 32
58 class Download(object): 33 class Download(object):
59 34
60 def __init__(self, actuals_base_url=DEFAULT_ACTUALS_BASE_URL, 35 def __init__(self, actuals_base_url=DEFAULT_ACTUALS_BASE_URL,
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after
98 results_of_this_type = actual_results_dict[result_type] 73 results_of_this_type = actual_results_dict[result_type]
99 if not results_of_this_type: 74 if not results_of_this_type:
100 continue 75 continue
101 for image_name in sorted(results_of_this_type.keys()): 76 for image_name in sorted(results_of_this_type.keys()):
102 (test, config) = self._image_filename_re.match(image_name).groups() 77 (test, config) = self._image_filename_re.match(image_name).groups()
103 (hash_type, hash_digest) = results_of_this_type[image_name] 78 (hash_type, hash_digest) = results_of_this_type[image_name]
104 source_url = gm_json.CreateGmActualUrl( 79 source_url = gm_json.CreateGmActualUrl(
105 test_name=test, hash_type=hash_type, hash_digest=hash_digest, 80 test_name=test, hash_type=hash_type, hash_digest=hash_digest,
106 gm_actuals_root_url=self._gm_actuals_root_url) 81 gm_actuals_root_url=self._gm_actuals_root_url)
107 dest_path = os.path.join(dest_dir, config, test + '.png') 82 dest_path = os.path.join(dest_dir, config, test + '.png')
108 # TODO(epoger): To speed this up, we should only download files that 83 url_utils.copy_contents(source_url=source_url, dest_path=dest_path,
109 # we don't already have on local disk. 84 create_subdirs_if_needed=True)
110 copy_contents(source_url=source_url, dest_path=dest_path,
111 create_subdirs_if_needed=True)
112 85
113 86
114 def create_filepath_url(filepath): 87 def get_builders_list(summaries_bucket=GM_SUMMARIES_BUCKET):
115 """ Returns a file:/// URL pointing at the given filepath on local disk. 88 """ Returns the list of builders we have actual results for.
116
117 For now, this is only used by unittests, but I anticipate it being useful
118 in production, as a way for developers to run rebaseline_server over locally
119 generated images.
120
121 TODO(epoger): Move this function, and copy_contents(), into a shared
122 utility module. They are generally useful.
123 89
124 Args: 90 Args:
125 filepath: string; path to a file on local disk (may be absolute or relative, 91 summaries_bucket: Google Cloud Storage bucket containing the summary
126 and the file does not need to exist) 92 JSON files
127
128 Returns:
129 A file:/// URL pointing at the file. Regardless of whether filepath was
130 specified as a relative or absolute path, the URL will contain an
131 absolute path to the file.
132
133 Raises:
134 An Exception, if filepath is already a URL.
135 """ 93 """
136 if urlparse.urlparse(filepath).scheme: 94 dirs, _ = gs_utils.list_bucket_contents(bucket=GM_SUMMARIES_BUCKET)
137 raise Exception('"%s" is already a URL' % filepath) 95 return dirs
138 return urlparse.urljoin(
139 'file:', urllib.pathname2url(os.path.abspath(filepath)))
140
141
142 def copy_contents(source_url, dest_path, create_subdirs_if_needed=False):
143 """ Copies the full contents of the URL 'source_url' into
144 filepath 'dest_path'.
145
146 Args:
147 source_url: string; complete URL to read from
148 dest_path: string; complete filepath to write to (may be absolute or
149 relative)
150 create_subdirs_if_needed: boolean; whether to create subdirectories as
151 needed to create dest_path
152
153 Raises:
154 Some subclass of Exception if unable to read source_url or write dest_path.
155 """
156 if create_subdirs_if_needed:
157 dest_dir = os.path.dirname(dest_path)
158 if not os.path.exists(dest_dir):
159 os.makedirs(dest_dir)
160 with contextlib.closing(urllib.urlopen(source_url)) as source_handle:
161 with open(dest_path, 'wb') as dest_handle:
162 shutil.copyfileobj(fsrc=source_handle, fdst=dest_handle)
163
164
165 def gcs_list_bucket_contents(bucket, subdir=None):
166 """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
167
168 Uses the API documented at
169 https://developers.google.com/storage/docs/json_api/v1/objects/list
170
171 Args:
172 bucket: name of the Google Storage bucket
173 subdir: directory within the bucket to list, or None for root directory
174 """
175 # The GCS command relies on the subdir name (if any) ending with a slash.
176 if subdir and not subdir.endswith('/'):
177 subdir += '/'
178 subdir_length = len(subdir) if subdir else 0
179
180 storage = build_service('storage', 'v1')
181 command = storage.objects().list(
182 bucket=bucket, delimiter='/', fields='items(name),prefixes',
183 prefix=subdir)
184 results = command.execute()
185
186 # The GCS command returned two subdicts:
187 # prefixes: the full path of every directory within subdir, with trailing '/'
188 # items: property dict for each file object within subdir
189 # (including 'name', which is full path of the object)
190 dirs = []
191 for dir_fullpath in results.get('prefixes', []):
192 dir_basename = dir_fullpath[subdir_length:]
193 dirs.append(dir_basename[:-1]) # strip trailing slash
194 files = []
195 for file_properties in results.get('items', []):
196 file_fullpath = file_properties['name']
197 file_basename = file_fullpath[subdir_length:]
198 files.append(file_basename)
199 return (dirs, files)
200 96
201 97
202 def main(): 98 def main():
203 parser = optparse.OptionParser() 99 parser = optparse.OptionParser()
204 required_params = [] 100 required_params = []
205 parser.add_option('--actuals-base-url', 101 parser.add_option('--actuals-base-url',
206 action='store', type='string', 102 action='store', type='string',
207 default=DEFAULT_ACTUALS_BASE_URL, 103 default=DEFAULT_ACTUALS_BASE_URL,
208 help=('Base URL from which to read files containing JSON ' 104 help=('Base URL from which to read files containing JSON '
209 'summaries of actual GM results; defaults to ' 105 'summaries of actual GM results; defaults to '
(...skipping 17 matching lines...) Expand all
227 parser.add_option('--json-filename', 123 parser.add_option('--json-filename',
228 action='store', type='string', 124 action='store', type='string',
229 default=DEFAULT_JSON_FILENAME, 125 default=DEFAULT_JSON_FILENAME,
230 help=('JSON summary filename to read for each builder; ' 126 help=('JSON summary filename to read for each builder; '
231 'defaults to "%default".')) 127 'defaults to "%default".'))
232 parser.add_option('--list-builders', action='store_true', 128 parser.add_option('--list-builders', action='store_true',
233 help=('List all available builders.')) 129 help=('List all available builders.'))
234 (params, remaining_args) = parser.parse_args() 130 (params, remaining_args) = parser.parse_args()
235 131
236 if params.list_builders: 132 if params.list_builders:
237 dirs, _ = gcs_list_bucket_contents(bucket=GM_SUMMARIES_BUCKET) 133 print '\n'.join(get_builders_list())
238 print '\n'.join(dirs)
239 return 134 return
240 135
241 # Make sure all required options were set, 136 # Make sure all required options were set,
242 # and that there were no items left over in the command line. 137 # and that there were no items left over in the command line.
243 for required_param in required_params: 138 for required_param in required_params:
244 if not getattr(params, required_param): 139 if not getattr(params, required_param):
245 raise Exception('required option \'%s\' was not set' % required_param) 140 raise Exception('required option \'%s\' was not set' % required_param)
246 if len(remaining_args) is not 0: 141 if len(remaining_args) is not 0:
247 raise Exception('extra items specified in the command line: %s' % 142 raise Exception('extra items specified in the command line: %s' %
248 remaining_args) 143 remaining_args)
249 144
250 downloader = Download(actuals_base_url=params.actuals_base_url) 145 downloader = Download(actuals_base_url=params.actuals_base_url)
251 downloader.fetch(builder_name=params.builder, 146 downloader.fetch(builder_name=params.builder,
252 dest_dir=params.dest_dir) 147 dest_dir=params.dest_dir)
253 148
254 149
255 150
256 if __name__ == '__main__': 151 if __name__ == '__main__':
257 main() 152 main()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698