| Index: py/utils/gs_utils.py
|
| diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py
|
| index c30295b7ea2f096acec73e0924f21df1a297da87..46c9bf17afe2aa9bc654135f02972e2f57a35320 100755
|
| --- a/py/utils/gs_utils.py
|
| +++ b/py/utils/gs_utils.py
|
| @@ -40,88 +40,110 @@ from boto.s3.bucketlistresultset import BucketListResultSet
|
| from boto.s3.prefix import Prefix
|
|
|
|
|
| -def delete_file(bucket, path):
|
| - """Delete a single file within a GS bucket.
|
| -
|
| - TODO(epoger): what if bucket or path does not exist? Should probably raise
|
| - an exception. Implement, and add a test to exercise this.
|
| -
|
| - Params:
|
| - bucket: GS bucket to delete a file from
|
| - path: full path (Posix-style) of the file within the bucket to delete
|
| - """
|
| - conn = _create_connection()
|
| - b = conn.get_bucket(bucket_name=bucket)
|
| - item = Key(b)
|
| - item.key = path
|
| - item.delete()
|
| -
|
| -
|
| -def upload_file(source_path, dest_bucket, dest_path):
|
| - """Upload contents of a local file to Google Storage.
|
| -
|
| - TODO(epoger): Add the extra parameters provided by upload_file() within
|
| - https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py ,
|
| - so we can replace that function with this one.
|
| -
|
| - params:
|
| - source_path: full path (local-OS-style) on local disk to read from
|
| - dest_bucket: GCS bucket to copy the file to
|
| - dest_path: full path (Posix-style) within that bucket
|
| - """
|
| - conn = _create_connection()
|
| - b = conn.get_bucket(bucket_name=dest_bucket)
|
| - item = Key(b)
|
| - item.key = dest_path
|
| - item.set_contents_from_filename(filename=source_path)
|
| -
|
| -
|
| -def download_file(source_bucket, source_path, dest_path,
|
| - create_subdirs_if_needed=False):
|
| - """ Downloads a single file from Google Cloud Storage to local disk.
|
| -
|
| - Args:
|
| - source_bucket: GCS bucket to download the file from
|
| - source_path: full path (Posix-style) within that bucket
|
| - dest_path: full path (local-OS-style) on local disk to copy the file to
|
| - create_subdirs_if_needed: boolean; whether to create subdirectories as
|
| - needed to create dest_path
|
| - """
|
| - conn = _create_connection()
|
| - b = conn.get_bucket(bucket_name=source_bucket)
|
| - item = Key(b)
|
| - item.key = source_path
|
| - if create_subdirs_if_needed:
|
| - _makedirs_if_needed(os.path.dirname(dest_path))
|
| - with open(dest_path, 'w') as f:
|
| - item.get_contents_to_file(fp=f)
|
| -
|
| -
|
| -def list_bucket_contents(bucket, subdir=None):
|
| - """ Returns files in the Google Cloud Storage bucket as a (dirs, files) tuple.
|
| -
|
| - Args:
|
| - bucket: name of the Google Storage bucket
|
| - subdir: directory within the bucket to list, or None for root directory
|
| - """
|
| - # The GS command relies on the prefix (if any) ending with a slash.
|
| - prefix = subdir or ''
|
| - if prefix and not prefix.endswith('/'):
|
| - prefix += '/'
|
| - prefix_length = len(prefix) if prefix else 0
|
| -
|
| - conn = _create_connection()
|
| - b = conn.get_bucket(bucket_name=bucket)
|
| - lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
|
| - dirs = []
|
| - files = []
|
| - for item in lister:
|
| - t = type(item)
|
| - if t is Key:
|
| - files.append(item.key[prefix_length:])
|
| - elif t is Prefix:
|
| - dirs.append(item.name[prefix_length:-1])
|
| - return (dirs, files)
|
| +class GSUtils(object):
|
| + """Utilities for accessing Google Cloud Storage, using the boto library."""
|
| +
|
| + def __init__(self, boto_file_path=os.path.join('~','.boto')):
|
| + """Constructor.
|
| +
|
| + Params:
|
| + boto_file_path: full path (local-OS-style) on local disk where .boto
|
| + credentials file can be found. An exception is thrown if this file
|
| + is missing.
|
| + TODO(epoger): Change missing-file behavior: allow the caller to
|
| + operate on public files in Google Storage.
|
| + """
|
| + boto_file_path = os.path.expanduser(boto_file_path)
|
| + print 'Reading boto file from %s' % boto_file_path
|
| + boto_dict = _config_file_as_dict(filepath=boto_file_path)
|
| + self._gs_access_key_id = boto_dict['gs_access_key_id']
|
| + self._gs_secret_access_key = boto_dict['gs_secret_access_key']
|
| +
|
| + def delete_file(self, bucket, path):
|
| + """Delete a single file within a GS bucket.
|
| +
|
| + TODO(epoger): what if bucket or path does not exist? Should probably raise
|
| + an exception. Implement, and add a test to exercise this.
|
| +
|
| + Params:
|
| + bucket: GS bucket to delete a file from
|
| + path: full path (Posix-style) of the file within the bucket to delete
|
| + """
|
| + conn = self._create_connection()
|
| + b = conn.get_bucket(bucket_name=bucket)
|
| + item = Key(b)
|
| + item.key = path
|
| + item.delete()
|
| +
|
| + def upload_file(self, source_path, dest_bucket, dest_path):
|
| + """Upload contents of a local file to Google Storage.
|
| +
|
| + TODO(epoger): Add the extra parameters provided by upload_file() within
|
| + https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py ,
|
| + so we can replace that function with this one.
|
| +
|
| + params:
|
| + source_path: full path (local-OS-style) on local disk to read from
|
| + dest_bucket: GCS bucket to copy the file to
|
| + dest_path: full path (Posix-style) within that bucket
|
| + """
|
| + conn = self._create_connection()
|
| + b = conn.get_bucket(bucket_name=dest_bucket)
|
| + item = Key(b)
|
| + item.key = dest_path
|
| + item.set_contents_from_filename(filename=source_path)
|
| +
|
| + def download_file(self, source_bucket, source_path, dest_path,
|
| + create_subdirs_if_needed=False):
|
| + """Downloads a single file from Google Cloud Storage to local disk.
|
| +
|
| + Args:
|
| + source_bucket: GCS bucket to download the file from
|
| + source_path: full path (Posix-style) within that bucket
|
| + dest_path: full path (local-OS-style) on local disk to copy the file to
|
| + create_subdirs_if_needed: boolean; whether to create subdirectories as
|
| + needed to create dest_path
|
| + """
|
| + conn = self._create_connection()
|
| + b = conn.get_bucket(bucket_name=source_bucket)
|
| + item = Key(b)
|
| + item.key = source_path
|
| + if create_subdirs_if_needed:
|
| + _makedirs_if_needed(os.path.dirname(dest_path))
|
| + with open(dest_path, 'w') as f:
|
| + item.get_contents_to_file(fp=f)
|
| +
|
| + def list_bucket_contents(self, bucket, subdir=None):
|
| + """Returns files in the Google Storage bucket as a (dirs, files) tuple.
|
| +
|
| + Args:
|
| + bucket: name of the Google Storage bucket
|
| + subdir: directory within the bucket to list, or None for root directory
|
| + """
|
| + # The GS command relies on the prefix (if any) ending with a slash.
|
| + prefix = subdir or ''
|
| + if prefix and not prefix.endswith('/'):
|
| + prefix += '/'
|
| + prefix_length = len(prefix) if prefix else 0
|
| +
|
| + conn = self._create_connection()
|
| + b = conn.get_bucket(bucket_name=bucket)
|
| + lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
|
| + dirs = []
|
| + files = []
|
| + for item in lister:
|
| + t = type(item)
|
| + if t is Key:
|
| + files.append(item.key[prefix_length:])
|
| + elif t is Prefix:
|
| + dirs.append(item.name[prefix_length:-1])
|
| + return (dirs, files)
|
| +
|
| + def _create_connection(self):
|
| + """Returns a GSConnection object we can use to access Google Storage."""
|
| + return GSConnection(
|
| + gs_access_key_id=self._gs_access_key_id,
|
| + gs_secret_access_key=self._gs_secret_access_key)
|
|
|
|
|
| def _config_file_as_dict(filepath):
|
| @@ -148,29 +170,8 @@ def _config_file_as_dict(filepath):
|
| return dic
|
|
|
|
|
| -def _create_connection(boto_file_path=os.path.join('~','.boto')):
|
| - """Returns a GSConnection object we can use to access Google Storage.
|
| -
|
| - Params:
|
| - boto_file_path: full path (local-OS-style) on local disk where .boto
|
| - credentials file can be found
|
| -
|
| - TODO(epoger): Change this module to be object-based, where __init__() reads
|
| - the boto file into boto_dict once instead of repeatedly for each operation.
|
| -
|
| - TODO(epoger): if the file does not exist, rather than raising an exception,
|
| - create a GSConnection that can operate on public files.
|
| - """
|
| - boto_file_path = os.path.expanduser(boto_file_path)
|
| - print 'Reading boto file from %s' % boto_file_path
|
| - boto_dict = _config_file_as_dict(filepath=boto_file_path)
|
| - return GSConnection(
|
| - gs_access_key_id=boto_dict['gs_access_key_id'],
|
| - gs_secret_access_key=boto_dict['gs_secret_access_key'])
|
| -
|
| -
|
| def _makedirs_if_needed(path):
|
| - """ Creates a directory (and any parent directories needed), if it does not
|
| + """Creates a directory (and any parent directories needed), if it does not
|
| exist yet.
|
|
|
| Args:
|
| @@ -188,6 +189,7 @@ def _run_self_test():
|
| remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
|
| subdir = 'subdir'
|
| filenames_to_upload = ['file1', 'file2']
|
| + gs = GSUtils()
|
|
|
| # Upload test files to Google Storage.
|
| local_src_dir = tempfile.mkdtemp()
|
| @@ -196,46 +198,47 @@ def _run_self_test():
|
| for filename in filenames_to_upload:
|
| with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
|
| f.write('contents of %s\n' % filename)
|
| - upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
|
| - dest_bucket=bucket,
|
| - dest_path=posixpath.join(remote_dir, subdir, filename))
|
| + gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
|
| + dest_bucket=bucket,
|
| + dest_path=posixpath.join(remote_dir, subdir, filename))
|
| finally:
|
| shutil.rmtree(local_src_dir)
|
|
|
| # Get a list of the files we uploaded to Google Storage.
|
| - (dirs, files) = list_bucket_contents(
|
| + (dirs, files) = gs.list_bucket_contents(
|
| bucket=bucket, subdir=remote_dir)
|
| - assert dirs == [subdir]
|
| - assert files == []
|
| - (dirs, files) = list_bucket_contents(
|
| + assert dirs == [subdir], '%s == [%s]' % (dirs, subdir)
|
| + assert files == [], '%s == []' % files
|
| + (dirs, files) = gs.list_bucket_contents(
|
| bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
|
| - assert dirs == []
|
| - assert files == filenames_to_upload
|
| + assert dirs == [], '%s == []' % dirs
|
| + assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload)
|
|
|
| # Download the files we uploaded to Google Storage, and validate contents.
|
| local_dest_dir = tempfile.mkdtemp()
|
| try:
|
| for filename in filenames_to_upload:
|
| - download_file(source_bucket=bucket,
|
| - source_path=posixpath.join(remote_dir, subdir, filename),
|
| - dest_path=os.path.join(local_dest_dir, subdir, filename),
|
| - create_subdirs_if_needed=True)
|
| + gs.download_file(source_bucket=bucket,
|
| + source_path=posixpath.join(remote_dir, subdir, filename),
|
| + dest_path=os.path.join(local_dest_dir, subdir, filename),
|
| + create_subdirs_if_needed=True)
|
| with open(os.path.join(local_dest_dir, subdir, filename)) as f:
|
| file_contents = f.read()
|
| - assert file_contents == 'contents of %s\n' % filename
|
| + assert file_contents == 'contents of %s\n' % filename, (
|
| + '%s == "contents of %s\n"' % (file_contents, filename))
|
| finally:
|
| shutil.rmtree(local_dest_dir)
|
|
|
| # Delete all the files we uploaded to Google Storage.
|
| for filename in filenames_to_upload:
|
| - delete_file(bucket=bucket,
|
| - path=posixpath.join(remote_dir, subdir, filename))
|
| + gs.delete_file(bucket=bucket,
|
| + path=posixpath.join(remote_dir, subdir, filename))
|
|
|
| # Confirm that we deleted all the files we uploaded to Google Storage.
|
| - (dirs, files) = list_bucket_contents(
|
| + (dirs, files) = gs.list_bucket_contents(
|
| bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
|
| - assert dirs == []
|
| - assert files == []
|
| + assert dirs == [], '%s == []' % dirs
|
| + assert files == [], '%s == []' % files
|
|
|
|
|
| # TODO(epoger): How should we exercise this self-test?
|
|
|