| Index: py/utils/gs_utils.py
|
| diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py
|
| index 031a0c3341d15bc70d628f07ff1b0d94a906de0f..ecbe68c74e656abbba7e5d3fbfd151ef0ae2f854 100755
|
| --- a/py/utils/gs_utils.py
|
| +++ b/py/utils/gs_utils.py
|
| @@ -36,6 +36,7 @@ for import_subdir in ['boto']:
|
| # We need to insert at the beginning of the path, to make sure that our
|
| # imported versions are favored over others that might be in the path.
|
| sys.path.insert(0, import_dirpath)
|
| +from boto.exception import BotoServerError
|
| from boto.gs import acl
|
| from boto.gs.bucket import Bucket
|
| from boto.gs.connection import GSConnection
|
| @@ -44,20 +45,33 @@ from boto.s3.bucketlistresultset import BucketListResultSet
|
| from boto.s3.connection import SubdomainCallingFormat
|
| from boto.s3.prefix import Prefix
|
|
|
| -# Permissions that may be set on each file in Google Storage.
|
| -# See SupportedPermissions in
|
| +# Predefined (aka "canned") ACLs that provide a "base coat" of permissions for
|
| +# each file in Google Storage. See CannedACLStrings in
|
| # https://github.com/boto/boto/blob/develop/boto/gs/acl.py
|
| +# Also see https://developers.google.com/storage/docs/accesscontrol
|
| +PREDEFINED_ACL_AUTHENTICATED_READ = 'authenticated-read'
|
| +PREDEFINED_ACL_BUCKET_OWNER_FULL_CONTROL = 'bucket-owner-full-control'
|
| +PREDEFINED_ACL_BUCKET_OWNER_READ = 'bucket-owner-read'
|
| +PREDEFINED_ACL_PRIVATE = 'private'
|
| +PREDEFINED_ACL_PROJECT_PRIVATE = 'project-private'
|
| +PREDEFINED_ACL_PUBLIC_READ = 'public-read'
|
| +PREDEFINED_ACL_PUBLIC_READ_WRITE = 'public-read-write'
|
| +
|
| +# "Fine-grained" permissions that may be set per user/group on each file in
|
| +# Google Storage. See SupportedPermissions in
|
| +# https://github.com/boto/boto/blob/develop/boto/gs/acl.py
|
| +# Also see https://developers.google.com/storage/docs/accesscontrol
|
| PERMISSION_NONE = None
|
| PERMISSION_OWNER = 'FULL_CONTROL'
|
| PERMISSION_READ = 'READ'
|
| PERMISSION_WRITE = 'WRITE'
|
|
|
| -# Types of identifiers we can use to set ACLs.
|
| +# Types of identifiers we can use to set "fine-grained" ACLs.
|
| ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN
|
| -ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL
|
| -ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID
|
| -ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL
|
| -ID_TYPE_USER_BY_ID = acl.USER_BY_ID
|
| +ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL
|
| +ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID
|
| +ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL
|
| +ID_TYPE_USER_BY_ID = acl.USER_BY_ID
|
|
|
| # Which field we get/set in ACL entries, depending on ID_TYPE.
|
| FIELD_BY_ID_TYPE = {
|
| @@ -120,16 +134,21 @@ class GSUtils(object):
|
| bucket: GS bucket to delete a file from
|
| path: full path (Posix-style) of the file within the bucket to delete
|
| """
|
| - conn = self._create_connection()
|
| - b = conn.get_bucket(bucket_name=bucket)
|
| + b = self._connect_to_bucket(bucket_name=bucket)
|
| item = Key(b)
|
| item.key = path
|
| - item.delete()
|
| + try:
|
| + item.delete()
|
| + except BotoServerError, e:
|
| + e.body = (repr(e.body) +
|
| + ' while deleting bucket=%s, path=%s' % (bucket, path))
|
| + raise
|
|
|
| - def upload_file(self, source_path, dest_bucket, dest_path):
|
| + def upload_file(self, source_path, dest_bucket, dest_path,
|
| + predefined_acl=None, fine_grained_acl_list=None):
|
| """Upload contents of a local file to Google Storage.
|
|
|
| - TODO(epoger): Add the extra parameters provided by upload_file() within
|
| + TODO(epoger): Add the only_if_modified param provided by upload_file() in
|
| https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py ,
|
| so we can replace that function with this one.
|
|
|
| @@ -137,12 +156,96 @@ class GSUtils(object):
|
| source_path: full path (local-OS-style) on local disk to read from
|
| dest_bucket: GCS bucket to copy the file to
|
| dest_path: full path (Posix-style) within that bucket
|
| + predefined_acl: which predefined ACL to apply to the file on Google
|
| + Storage; must be one of the PREDEFINED_ACL_* constants defined above.
|
| + If None, inherits dest_bucket's default object ACL.
|
| + TODO(epoger): add unittests for this param, although it seems to work
|
| + in my manual testing
|
| + fine_grained_acl_list: list of (id_type, id_value, permission) tuples
|
| + to apply to the uploaded file (on top of the predefined_acl),
|
| + or None if predefined_acl is sufficient
|
| """
|
| - conn = self._create_connection()
|
| - b = conn.get_bucket(bucket_name=dest_bucket)
|
| + b = self._connect_to_bucket(bucket_name=dest_bucket)
|
| item = Key(b)
|
| item.key = dest_path
|
| - item.set_contents_from_filename(filename=source_path)
|
| + try:
|
| + item.set_contents_from_filename(filename=source_path,
|
| + policy=predefined_acl)
|
| + except BotoServerError, e:
|
| + e.body = (repr(e.body) +
|
| + ' while uploading source_path=%s to bucket=%s, path=%s' % (
|
| + source_path, dest_bucket, item.key))
|
| + raise
|
| + # TODO(epoger): This may be inefficient, because it calls
|
| + # _connect_to_bucket() again. Depending on how expensive that
|
| + # call is, we may want to optimize this.
|
| + for (id_type, id_value, permission) in fine_grained_acl_list or []:
|
| + self.set_acl(
|
| + bucket=dest_bucket, path=item.key,
|
| + id_type=id_type, id_value=id_value, permission=permission)
|
| +
|
| + def upload_dir_contents(self, source_dir, dest_bucket, dest_dir,
|
| + predefined_acl=None, fine_grained_acl_list=None):
|
| + """Recursively upload contents of a local directory to Google Storage.
|
| +
|
| + params:
|
| + source_dir: full path (local-OS-style) on local disk of directory to copy
|
| + contents of
|
| + dest_bucket: GCS bucket to copy the files into
|
| + dest_dir: full path (Posix-style) within that bucket; write the files into
|
| + this directory
|
| + predefined_acl: which predefined ACL to apply to the files on Google
|
| + Storage; must be one of the PREDEFINED_ACL_* constants defined above.
|
| + If None, inherits dest_bucket's default object ACL.
|
| + TODO(epoger): add unittests for this param, although it seems to work
|
| + in my manual testing
|
| + fine_grained_acl_list: list of (id_type, id_value, permission) tuples
|
| + to apply to every file uploaded (on top of the predefined_acl),
|
| + or None if predefined_acl is sufficient
|
| + TODO(epoger): add unittests for this param, although it seems to work
|
| + in my manual testing
|
| +
|
| + The copy operates as a "merge with overwrite": any files in source_dir will
|
| + be "overlaid" on top of the existing content in dest_dir. Existing files
|
| + with the same names will be overwritten.
|
| +
|
| + TODO(epoger): Upload multiple files simultaneously to reduce latency.
|
| +
|
| + TODO(epoger): Add a "noclobber" mode that will not upload any files would
|
| + overwrite existing files in Google Storage.
|
| +
|
| + TODO(epoger): Consider adding a do_compress parameter that would compress
|
| + the file using gzip before upload, and add a "Content-Encoding:gzip" header
|
| + so that HTTP downloads of the file would be unzipped automatically.
|
| + See https://developers.google.com/storage/docs/gsutil/addlhelp/
|
| + WorkingWithObjectMetadata#content-encoding
|
| + """
|
| + b = self._connect_to_bucket(bucket_name=dest_bucket)
|
| + for filename in sorted(os.listdir(source_dir)):
|
| + local_path = os.path.join(source_dir, filename)
|
| + if os.path.isdir(local_path):
|
| + self.upload_dir_contents( # recurse
|
| + source_dir=local_path, dest_bucket=dest_bucket,
|
| + dest_dir=posixpath.join(dest_dir, filename),
|
| + predefined_acl=predefined_acl)
|
| + else:
|
| + item = Key(b)
|
| + item.key = posixpath.join(dest_dir, filename)
|
| + try:
|
| + item.set_contents_from_filename(
|
| + filename=local_path, policy=predefined_acl)
|
| + except BotoServerError, e:
|
| + e.body = (repr(e.body) +
|
| + ' while uploading local_path=%s to bucket=%s, path=%s' % (
|
| + local_path, dest_bucket, item.key))
|
| + raise
|
| + # TODO(epoger): This may be inefficient, because it calls
|
| + # _connect_to_bucket() for every file. Depending on how expensive that
|
| + # call is, we may want to optimize this.
|
| + for (id_type, id_value, permission) in fine_grained_acl_list or []:
|
| + self.set_acl(
|
| + bucket=dest_bucket, path=item.key,
|
| + id_type=id_type, id_value=id_value, permission=permission)
|
|
|
| def download_file(self, source_bucket, source_path, dest_path,
|
| create_subdirs_if_needed=False):
|
| @@ -155,14 +258,59 @@ class GSUtils(object):
|
| create_subdirs_if_needed: boolean; whether to create subdirectories as
|
| needed to create dest_path
|
| """
|
| - conn = self._create_connection()
|
| - b = conn.get_bucket(bucket_name=source_bucket)
|
| + b = self._connect_to_bucket(bucket_name=source_bucket)
|
| item = Key(b)
|
| item.key = source_path
|
| if create_subdirs_if_needed:
|
| _makedirs_if_needed(os.path.dirname(dest_path))
|
| with open(dest_path, 'w') as f:
|
| - item.get_contents_to_file(fp=f)
|
| + try:
|
| + item.get_contents_to_file(fp=f)
|
| + except BotoServerError, e:
|
| + e.body = (repr(e.body) +
|
| + ' while downloading bucket=%s, path=%s to local_path=%s' % (
|
| + source_bucket, source_path, dest_path))
|
| + raise
|
| +
|
| + def download_dir_contents(self, source_bucket, source_dir, dest_dir):
|
| + """Recursively download contents of a Google Storage directory to local disk
|
| +
|
| + params:
|
| + source_bucket: GCS bucket to copy the files from
|
| + source_dir: full path (Posix-style) within that bucket; read the files
|
| + from this directory
|
| + dest_dir: full path (local-OS-style) on local disk of directory to copy
|
| + the files into
|
| +
|
| + The copy operates as a "merge with overwrite": any files in source_dir will
|
| + be "overlaid" on top of the existing content in dest_dir. Existing files
|
| + with the same names will be overwritten.
|
| +
|
| + TODO(epoger): Download multiple files simultaneously to reduce latency.
|
| + """
|
| + _makedirs_if_needed(dest_dir)
|
| + b = self._connect_to_bucket(bucket_name=source_bucket)
|
| + (dirs, files) = self.list_bucket_contents(
|
| + bucket=source_bucket, subdir=source_dir)
|
| +
|
| + for filename in files:
|
| + item = Key(b)
|
| + item.key = posixpath.join(source_dir, filename)
|
| + dest_path = os.path.join(dest_dir, filename)
|
| + with open(dest_path, 'w') as f:
|
| + try:
|
| + item.get_contents_to_file(fp=f)
|
| + except BotoServerError, e:
|
| + e.body = (repr(e.body) +
|
| + ' while downloading bucket=%s, path=%s to local_path=%s' % (
|
| + source_bucket, item.key, dest_path))
|
| + raise
|
| +
|
| + for dirname in dirs:
|
| + self.download_dir_contents( # recurse
|
| + source_bucket=source_bucket,
|
| + source_dir=posixpath.join(source_dir, dirname),
|
| + dest_dir=os.path.join(dest_dir, dirname))
|
|
|
| def get_acl(self, bucket, path, id_type, id_value):
|
| """Retrieve partial access permissions on a single file in Google Storage.
|
| @@ -172,6 +320,9 @@ class GSUtils(object):
|
| rights based on *other* id_types (e.g., perhaps they have group access
|
| rights, beyond their individual access rights).
|
|
|
| + TODO(epoger): What if the remote file does not exist? This should probably
|
| + raise an exception in that case.
|
| +
|
| Params:
|
| bucket: GS bucket
|
| path: full path (Posix-style) to the file within that bucket
|
| @@ -184,8 +335,7 @@ class GSUtils(object):
|
| permissions have been set.
|
| """
|
| field = FIELD_BY_ID_TYPE[id_type]
|
| - conn = self._create_connection()
|
| - b = conn.get_bucket(bucket_name=bucket)
|
| + b = self._connect_to_bucket(bucket_name=bucket)
|
| acls = b.get_acl(key_name=path)
|
| matching_entries = [entry for entry in acls.entries.entry_list
|
| if (entry.scope.type == id_type) and
|
| @@ -208,6 +358,9 @@ class GSUtils(object):
|
| If there is already a permission set on this file for this id_type/id_value
|
| combination, this call will overwrite it.
|
|
|
| + TODO(epoger): What if the remote file does not exist? This should probably
|
| + raise an exception in that case.
|
| +
|
| Params:
|
| bucket: GS bucket
|
| path: full path (Posix-style) to the file within that bucket
|
| @@ -231,8 +384,7 @@ class GSUtils(object):
|
| assert PERMISSION_WRITE == get_acl(bucket, path, id_type, id_value)
|
| """
|
| field = FIELD_BY_ID_TYPE[id_type]
|
| - conn = self._create_connection()
|
| - b = conn.get_bucket(bucket_name=bucket)
|
| + b = self._connect_to_bucket(bucket_name=bucket)
|
| acls = b.get_acl(key_name=path)
|
|
|
| # Remove any existing entries that refer to the same id_type/id_value,
|
| @@ -257,6 +409,9 @@ class GSUtils(object):
|
| def list_bucket_contents(self, bucket, subdir=None):
|
| """Returns files in the Google Storage bucket as a (dirs, files) tuple.
|
|
|
| + TODO(epoger): This should raise an exception if subdir does not exist in
|
| + Google Storage; right now, it just returns empty contents.
|
| +
|
| Args:
|
| bucket: name of the Google Storage bucket
|
| subdir: directory within the bucket to list, or None for root directory
|
| @@ -267,8 +422,7 @@ class GSUtils(object):
|
| prefix += '/'
|
| prefix_length = len(prefix) if prefix else 0
|
|
|
| - conn = self._create_connection()
|
| - b = conn.get_bucket(bucket_name=bucket)
|
| + b = self._connect_to_bucket(bucket_name=bucket)
|
| lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
|
| dirs = []
|
| files = []
|
| @@ -280,6 +434,18 @@ class GSUtils(object):
|
| dirs.append(item.name[prefix_length:-1])
|
| return (dirs, files)
|
|
|
| + def _connect_to_bucket(self, bucket_name):
|
| + """Returns a Bucket object we can use to access a particular bucket in GS.
|
| +
|
| + Params:
|
| + bucket_name: name of the bucket (e.g., 'chromium-skia-gm')
|
| + """
|
| + try:
|
| + return self._create_connection().get_bucket(bucket_name=bucket_name)
|
| + except BotoServerError, e:
|
| + e.body = repr(e.body) + ' while connecting to bucket=%s' % bucket_name
|
| + raise
|
| +
|
| def _create_connection(self):
|
| """Returns a GSConnection object we can use to access Google Storage."""
|
| if self._gs_access_key_id:
|
| @@ -349,16 +515,26 @@ and write gs://chromium-skia-gm ?
|
| subdir = 'subdir'
|
| filenames_to_upload = ['file1', 'file2']
|
|
|
| - # Upload test files to Google Storage.
|
| + # Upload test files to Google Storage, checking that their fine-grained
|
| + # ACLs were set correctly.
|
| + id_type = ID_TYPE_GROUP_BY_DOMAIN
|
| + id_value = 'chromium.org'
|
| + set_permission = PERMISSION_READ
|
| local_src_dir = tempfile.mkdtemp()
|
| os.mkdir(os.path.join(local_src_dir, subdir))
|
| try:
|
| for filename in filenames_to_upload:
|
| with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
|
| f.write('contents of %s\n' % filename)
|
| - gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename),
|
| - dest_bucket=bucket,
|
| - dest_path=posixpath.join(remote_dir, subdir, filename))
|
| + dest_path = posixpath.join(remote_dir, subdir, filename)
|
| + gs.upload_file(
|
| + source_path=os.path.join(local_src_dir, subdir, filename),
|
| + dest_bucket=bucket, dest_path=dest_path,
|
| + fine_grained_acl_list=[(id_type, id_value, set_permission)])
|
| + got_permission = gs.get_acl(bucket=bucket, path=dest_path,
|
| + id_type=id_type, id_value=id_value)
|
| + assert got_permission == set_permission, '%s == %s' % (
|
| + got_permission, set_permission)
|
| finally:
|
| shutil.rmtree(local_src_dir)
|
|
|
| @@ -434,10 +610,68 @@ and write gs://chromium-skia-gm ?
|
| assert files == [], '%s == []' % files
|
|
|
|
|
| +def _test_dir_upload_and_download():
|
| + """Test upload_dir_contents() and download_dir_contents()."""
|
| + try:
|
| + gs = GSUtils(boto_file_path=os.path.expanduser(os.path.join('~','.boto')))
|
| + except:
|
| + print """
|
| +Failed to instantiate GSUtils object with default .boto file path.
|
| +Do you have a ~/.boto file that provides the credentials needed to read
|
| +and write gs://chromium-skia-gm ?
|
| +"""
|
| + raise
|
| +
|
| + bucket = 'chromium-skia-gm'
|
| + remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint)
|
| + subdir = 'subdir'
|
| + filenames = ['file1', 'file2']
|
| +
|
| + # Create directory tree on local disk, and upload it.
|
| + local_src_dir = tempfile.mkdtemp()
|
| + os.mkdir(os.path.join(local_src_dir, subdir))
|
| + try:
|
| + for filename in filenames:
|
| + with open(os.path.join(local_src_dir, subdir, filename), 'w') as f:
|
| + f.write('contents of %s\n' % filename)
|
| + gs.upload_dir_contents(source_dir=local_src_dir, dest_bucket=bucket,
|
| + dest_dir=remote_dir)
|
| + finally:
|
| + shutil.rmtree(local_src_dir)
|
| +
|
| + # Validate the list of the files we uploaded to Google Storage.
|
| + (dirs, files) = gs.list_bucket_contents(
|
| + bucket=bucket, subdir=remote_dir)
|
| + assert dirs == [subdir], '%s == [%s]' % (dirs, subdir)
|
| + assert files == [], '%s == []' % files
|
| + (dirs, files) = gs.list_bucket_contents(
|
| + bucket=bucket, subdir=posixpath.join(remote_dir, subdir))
|
| + assert dirs == [], '%s == []' % dirs
|
| + assert files == filenames, '%s == %s' % (files, filenames)
|
| +
|
| + # Download the directory tree we just uploaded, make sure its contents
|
| + # are what we expect, and then delete the tree in Google Storage.
|
| + local_dest_dir = tempfile.mkdtemp()
|
| + try:
|
| + gs.download_dir_contents(source_bucket=bucket, source_dir=remote_dir,
|
| + dest_dir=local_dest_dir)
|
| + for filename in filenames:
|
| + with open(os.path.join(local_dest_dir, subdir, filename)) as f:
|
| + file_contents = f.read()
|
| + assert file_contents == 'contents of %s\n' % filename, (
|
| + '%s == "contents of %s\n"' % (file_contents, filename))
|
| + finally:
|
| + shutil.rmtree(local_dest_dir)
|
| + for filename in filenames:
|
| + gs.delete_file(bucket=bucket,
|
| + path=posixpath.join(remote_dir, subdir, filename))
|
| +
|
| +
|
| # TODO(epoger): How should we exercise these self-tests?
|
| # See http://skbug.com/2751
|
| if __name__ == '__main__':
|
| _test_public_read()
|
| _test_authenticated_round_trip()
|
| + _test_dir_upload_and_download()
|
| # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise
|
| # an exception when we try to access without needed credentials.
|
|
|