Index: gm/rebaseline_server/download_actuals.py |
diff --git a/gm/rebaseline_server/download_actuals.py b/gm/rebaseline_server/download_actuals.py |
index 2f92898fd4b0451feb3dd1895157f21b51283919..636958be6b545c2251628a3b0f65f2d2db0c25d1 100755 |
--- a/gm/rebaseline_server/download_actuals.py |
+++ b/gm/rebaseline_server/download_actuals.py |
@@ -10,19 +10,44 @@ Download actual GM results for a particular builder. |
""" |
# System-level imports |
+import contextlib |
import optparse |
import os |
import posixpath |
import re |
+import shutil |
+import sys |
+import urllib |
import urllib2 |
+import urlparse |
# Imports from within Skia |
-import fix_pythonpath # must do this first |
-from pyutils import gs_utils |
-from pyutils import url_utils |
+# |
+# We need to add the 'gm' and 'tools' directories, so that we can import |
+# gm_json.py and buildbot_globals.py. |
+# |
+# Make sure that these dirs are in the PYTHONPATH, but add them at the *end* |
+# so any dirs that are already in the PYTHONPATH will be preferred. |
+# |
+# TODO(epoger): Is it OK for this to depend on the 'tools' dir, given that |
+# the tools dir is dependent on the 'gm' dir (to import gm_json.py)? |
+TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) |
+GM_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'gm') |
+TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools') |
+if GM_DIRECTORY not in sys.path: |
+ sys.path.append(GM_DIRECTORY) |
+if TOOLS_DIRECTORY not in sys.path: |
+ sys.path.append(TOOLS_DIRECTORY) |
import buildbot_globals |
import gm_json |
+# Imports from third-party code |
+APICLIENT_DIRECTORY = os.path.join( |
+ TRUNK_DIRECTORY, 'third_party', 'externals', 'google-api-python-client') |
+if APICLIENT_DIRECTORY not in sys.path: |
+ sys.path.append(APICLIENT_DIRECTORY) |
+from googleapiclient.discovery import build as build_service |
+ |
GM_SUMMARIES_BUCKET = buildbot_globals.Get('gm_summaries_bucket') |
DEFAULT_ACTUALS_BASE_URL = ( |
@@ -80,19 +105,98 @@ class Download(object): |
test_name=test, hash_type=hash_type, hash_digest=hash_digest, |
gm_actuals_root_url=self._gm_actuals_root_url) |
dest_path = os.path.join(dest_dir, config, test + '.png') |
- url_utils.copy_contents(source_url=source_url, dest_path=dest_path, |
- create_subdirs_if_needed=True) |
+ # TODO(epoger): To speed this up, we should only download files that |
+ # we don't already have on local disk. |
+ copy_contents(source_url=source_url, dest_path=dest_path, |
+ create_subdirs_if_needed=True) |
+ |
+ |
+def create_filepath_url(filepath): |
+ """ Returns a file:/// URL pointing at the given filepath on local disk. |
+ For now, this is only used by unittests, but I anticipate it being useful |
+ in production, as a way for developers to run rebaseline_server over locally |
+ generated images. |
-def get_builders_list(summaries_bucket=GM_SUMMARIES_BUCKET): |
- """ Returns the list of builders we have actual results for. |
+ TODO(epoger): Move this function, and copy_contents(), into a shared |
+ utility module. They are generally useful. |
Args: |
- summaries_bucket: Google Cloud Storage bucket containing the summary |
- JSON files |
+ filepath: string; path to a file on local disk (may be absolute or relative, |
+ and the file does not need to exist) |
+ |
+ Returns: |
+ A file:/// URL pointing at the file. Regardless of whether filepath was |
+ specified as a relative or absolute path, the URL will contain an |
+ absolute path to the file. |
+ |
+ Raises: |
+ An Exception, if filepath is already a URL. |
""" |
- dirs, _ = gs_utils.list_bucket_contents(bucket=GM_SUMMARIES_BUCKET) |
- return dirs |
+ if urlparse.urlparse(filepath).scheme: |
+ raise Exception('"%s" is already a URL' % filepath) |
+ return urlparse.urljoin( |
+ 'file:', urllib.pathname2url(os.path.abspath(filepath))) |
+ |
+ |
+def copy_contents(source_url, dest_path, create_subdirs_if_needed=False): |
+ """ Copies the full contents of the URL 'source_url' into |
+ filepath 'dest_path'. |
+ |
+ Args: |
+ source_url: string; complete URL to read from |
+ dest_path: string; complete filepath to write to (may be absolute or |
+ relative) |
+ create_subdirs_if_needed: boolean; whether to create subdirectories as |
+ needed to create dest_path |
+ |
+ Raises: |
+ Some subclass of Exception if unable to read source_url or write dest_path. |
+ """ |
+ if create_subdirs_if_needed: |
+ dest_dir = os.path.dirname(dest_path) |
+ if not os.path.exists(dest_dir): |
+ os.makedirs(dest_dir) |
+ with contextlib.closing(urllib.urlopen(source_url)) as source_handle: |
+ with open(dest_path, 'wb') as dest_handle: |
+ shutil.copyfileobj(fsrc=source_handle, fdst=dest_handle) |
+ |
+ |
+def gcs_list_bucket_contents(bucket, subdir=None): |
+ """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple. |
+ |
+ Uses the API documented at |
+ https://developers.google.com/storage/docs/json_api/v1/objects/list |
+ |
+ Args: |
+ bucket: name of the Google Storage bucket |
+ subdir: directory within the bucket to list, or None for root directory |
+ """ |
+ # The GCS command relies on the subdir name (if any) ending with a slash. |
+ if subdir and not subdir.endswith('/'): |
+ subdir += '/' |
+ subdir_length = len(subdir) if subdir else 0 |
+ |
+ storage = build_service('storage', 'v1') |
+ command = storage.objects().list( |
+ bucket=bucket, delimiter='/', fields='items(name),prefixes', |
+ prefix=subdir) |
+ results = command.execute() |
+ |
+ # The GCS command returned two subdicts: |
+ # prefixes: the full path of every directory within subdir, with trailing '/' |
+ # items: property dict for each file object within subdir |
+ # (including 'name', which is full path of the object) |
+ dirs = [] |
+ for dir_fullpath in results.get('prefixes', []): |
+ dir_basename = dir_fullpath[subdir_length:] |
+ dirs.append(dir_basename[:-1]) # strip trailing slash |
+ files = [] |
+ for file_properties in results.get('items', []): |
+ file_fullpath = file_properties['name'] |
+ file_basename = file_fullpath[subdir_length:] |
+ files.append(file_basename) |
+ return (dirs, files) |
def main(): |
@@ -130,7 +234,8 @@ def main(): |
(params, remaining_args) = parser.parse_args() |
if params.list_builders: |
- print '\n'.join(get_builders_list()) |
+ dirs, _ = gcs_list_bucket_contents(bucket=GM_SUMMARIES_BUCKET) |
+ print '\n'.join(dirs) |
return |
# Make sure all required options were set, |