gm/rebaseline_server/download_actuals.py - Issue 317783004: Revert "rebaseline_server: download actual-results.json files from GCS instead of SVN"

Unified Diff: gm/rebaseline_server/download_actuals.py

Issue 317783004: Revert "rebaseline_server: download actual-results.json files from GCS instead of SVN" (Closed) Base URL: https://skia.googlesource.com/skia.git@master

Patch Set: Created 6 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: gm/rebaseline_server/download_actuals.py

diff --git a/gm/rebaseline_server/download_actuals.py b/gm/rebaseline_server/download_actuals.py

index 2f92898fd4b0451feb3dd1895157f21b51283919..636958be6b545c2251628a3b0f65f2d2db0c25d1 100755

--- a/gm/rebaseline_server/download_actuals.py

+++ b/gm/rebaseline_server/download_actuals.py

@@ -10,19 +10,44 @@ Download actual GM results for a particular builder.

"""

# System-level imports

+import contextlib

import optparse

import os

import posixpath

import re

+import shutil

+import sys

+import urllib

import urllib2

+import urlparse

# Imports from within Skia

-import fix_pythonpath # must do this first

-from pyutils import gs_utils

-from pyutils import url_utils

+# We need to add the 'gm' and 'tools' directories, so that we can import

+# gm_json.py and buildbot_globals.py.

+# Make sure that these dirs are in the PYTHONPATH, but add them at the *end*

+# so any dirs that are already in the PYTHONPATH will be preferred.

+# TODO(epoger): Is it OK for this to depend on the 'tools' dir, given that

+# the tools dir is dependent on the 'gm' dir (to import gm_json.py)?

+TRUNK_DIRECTORY = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))

+GM_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'gm')

+TOOLS_DIRECTORY = os.path.join(TRUNK_DIRECTORY, 'tools')

+if GM_DIRECTORY not in sys.path:

+ sys.path.append(GM_DIRECTORY)

+if TOOLS_DIRECTORY not in sys.path:

+ sys.path.append(TOOLS_DIRECTORY)

import buildbot_globals

import gm_json

+# Imports from third-party code

+APICLIENT_DIRECTORY = os.path.join(

+ TRUNK_DIRECTORY, 'third_party', 'externals', 'google-api-python-client')

+if APICLIENT_DIRECTORY not in sys.path:

+ sys.path.append(APICLIENT_DIRECTORY)

+from googleapiclient.discovery import build as build_service

GM_SUMMARIES_BUCKET = buildbot_globals.Get('gm_summaries_bucket')

DEFAULT_ACTUALS_BASE_URL = (

@@ -80,19 +105,98 @@ class Download(object):

test_name=test, hash_type=hash_type, hash_digest=hash_digest,

gm_actuals_root_url=self._gm_actuals_root_url)

dest_path = os.path.join(dest_dir, config, test + '.png')

- url_utils.copy_contents(source_url=source_url, dest_path=dest_path,

- create_subdirs_if_needed=True)

+ # TODO(epoger): To speed this up, we should only download files that

+ # we don't already have on local disk.

+ copy_contents(source_url=source_url, dest_path=dest_path,

+ create_subdirs_if_needed=True)

+def create_filepath_url(filepath):

+ """ Returns a file:/// URL pointing at the given filepath on local disk.

+ For now, this is only used by unittests, but I anticipate it being useful

+ in production, as a way for developers to run rebaseline_server over locally

+ generated images.

-def get_builders_list(summaries_bucket=GM_SUMMARIES_BUCKET):

- """ Returns the list of builders we have actual results for.

+ TODO(epoger): Move this function, and copy_contents(), into a shared

+ utility module. They are generally useful.

Args:

- summaries_bucket: Google Cloud Storage bucket containing the summary

- JSON files

+ filepath: string; path to a file on local disk (may be absolute or relative,

+ and the file does not need to exist)

+ Returns:

+ A file:/// URL pointing at the file. Regardless of whether filepath was

+ specified as a relative or absolute path, the URL will contain an

+ absolute path to the file.

+ Raises:

+ An Exception, if filepath is already a URL.

"""

- dirs, _ = gs_utils.list_bucket_contents(bucket=GM_SUMMARIES_BUCKET)

- return dirs

+ if urlparse.urlparse(filepath).scheme:

+ raise Exception('"%s" is already a URL' % filepath)

+ return urlparse.urljoin(

+ 'file:', urllib.pathname2url(os.path.abspath(filepath)))

+def copy_contents(source_url, dest_path, create_subdirs_if_needed=False):

+ """ Copies the full contents of the URL 'source_url' into

+ filepath 'dest_path'.

+ Args:

+ source_url: string; complete URL to read from

+ dest_path: string; complete filepath to write to (may be absolute or

+ relative)

+ create_subdirs_if_needed: boolean; whether to create subdirectories as

+ needed to create dest_path

+ Raises:

+ Some subclass of Exception if unable to read source_url or write dest_path.

+ """

+ if create_subdirs_if_needed:

+ dest_dir = os.path.dirname(dest_path)

+ if not os.path.exists(dest_dir):

+ os.makedirs(dest_dir)

+ with contextlib.closing(urllib.urlopen(source_url)) as source_handle:

+ with open(dest_path, 'wb') as dest_handle:

+ shutil.copyfileobj(fsrc=source_handle, fdst=dest_handle)

+def gcs_list_bucket_contents(bucket, subdir=None):

+ """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.

+ Uses the API documented at

+ https://developers.google.com/storage/docs/json_api/v1/objects/list

+ Args:

+ bucket: name of the Google Storage bucket

+ subdir: directory within the bucket to list, or None for root directory

+ """

+ # The GCS command relies on the subdir name (if any) ending with a slash.

+ if subdir and not subdir.endswith('/'):

+ subdir += '/'

+ subdir_length = len(subdir) if subdir else 0

+ storage = build_service('storage', 'v1')

+ command = storage.objects().list(

+ bucket=bucket, delimiter='/', fields='items(name),prefixes',

+ prefix=subdir)

+ results = command.execute()

+ # The GCS command returned two subdicts:

+ # prefixes: the full path of every directory within subdir, with trailing '/'

+ # items: property dict for each file object within subdir

+ # (including 'name', which is full path of the object)

+ dirs = []

+ for dir_fullpath in results.get('prefixes', []):

+ dir_basename = dir_fullpath[subdir_length:]

+ dirs.append(dir_basename[:-1]) # strip trailing slash

+ files = []

+ for file_properties in results.get('items', []):

+ file_fullpath = file_properties['name']

+ file_basename = file_fullpath[subdir_length:]

+ files.append(file_basename)

+ return (dirs, files)

def main():

@@ -130,7 +234,8 @@ def main():

(params, remaining_args) = parser.parse_args()

if params.list_builders:

- print '\n'.join(get_builders_list())

+ dirs, _ = gcs_list_bucket_contents(bucket=GM_SUMMARIES_BUCKET)

+ print '\n'.join(dirs)

return

# Make sure all required options were set,

« no previous file with comments | « gm/rebaseline_server/compare_to_expectations.py ('k') | gm/rebaseline_server/download_actuals_test.py » ('j') | no next file with comments »