Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 | 2 |
| 3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
| 4 """ | 4 """ |
| 5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
| 6 | 6 |
| 7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
| 8 found in the LICENSE file. | 8 found in the LICENSE file. |
| 9 | 9 |
| 10 Utilities for accessing Google Cloud Storage, using the boto library. | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
| 11 for the XML API). | |
| 11 | 12 |
| 12 See http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial -using-boto.html | 13 API/library references: |
| 13 for implementation tips. | 14 - https://developers.google.com/storage/docs/reference-guide |
| 15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u sing-boto.html | |
| 14 """ | 16 """ |
| 15 # pylint: enable=C0301 | 17 # pylint: enable=C0301 |
| 16 | 18 |
| 17 # System-level imports | 19 # System-level imports |
| 18 import errno | 20 import errno |
| 19 import os | 21 import os |
| 20 import posixpath | 22 import posixpath |
| 21 import random | 23 import random |
| 22 import re | 24 import re |
| 23 import shutil | 25 import shutil |
| 24 import sys | 26 import sys |
| 25 import tempfile | 27 import tempfile |
| 26 | 28 |
| 27 # Imports from third-party code | 29 # Imports from third-party code |
| 28 TRUNK_DIRECTORY = os.path.abspath(os.path.join( | 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
| 29 os.path.dirname(__file__), os.pardir, os.pardir)) | 31 os.path.dirname(__file__), os.pardir, os.pardir)) |
| 30 for import_subdir in ['boto']: | 32 for import_subdir in ['boto']: |
| 31 import_dirpath = os.path.join( | 33 import_dirpath = os.path.join( |
| 32 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) | 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) |
| 33 if import_dirpath not in sys.path: | 35 if import_dirpath not in sys.path: |
| 34 # We need to insert at the beginning of the path, to make sure that our | 36 # We need to insert at the beginning of the path, to make sure that our |
| 35 # imported versions are favored over others that might be in the path. | 37 # imported versions are favored over others that might be in the path. |
| 36 sys.path.insert(0, import_dirpath) | 38 sys.path.insert(0, import_dirpath) |
| 39 from boto.gs import acl | |
| 37 from boto.gs.connection import GSConnection | 40 from boto.gs.connection import GSConnection |
| 38 from boto.gs.key import Key | 41 from boto.gs.key import Key |
| 39 from boto.s3.bucketlistresultset import BucketListResultSet | 42 from boto.s3.bucketlistresultset import BucketListResultSet |
| 40 from boto.s3.prefix import Prefix | 43 from boto.s3.prefix import Prefix |
| 41 | 44 |
| 45 # SupportedPermissions as listed in | |
| 46 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py | |
| 47 PERMISSION_OWNER = 'FULL_CONTROL' | |
| 48 PERMISSION_READ = 'READ' | |
| 49 PERMISSION_WRITE = 'WRITE' | |
| 50 | |
| 51 # Types of identifiers we can use to set ACLs. | |
| 52 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | |
| 53 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | |
| 54 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID | |
| 55 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL | |
| 56 ID_TYPE_USER_BY_ID = acl.USER_BY_ID | |
| 57 | |
| 58 # Which field we get/set in ACL entries, depending on ID_TYPE. | |
| 59 FIELD_BY_ID_TYPE = { | |
| 60 ID_TYPE_GROUP_BY_DOMAIN: 'domain', | |
| 61 ID_TYPE_GROUP_BY_EMAIL: 'email_address', | |
| 62 ID_TYPE_GROUP_BY_ID: 'id', | |
| 63 ID_TYPE_USER_BY_EMAIL: 'email_address', | |
| 64 ID_TYPE_USER_BY_ID: 'id', | |
| 65 } | |
| 66 | |
| 42 | 67 |
| 43 class GSUtils(object): | 68 class GSUtils(object): |
| 44 """Utilities for accessing Google Cloud Storage, using the boto library.""" | 69 """Utilities for accessing Google Cloud Storage, using the boto library.""" |
| 45 | 70 |
| 46 def __init__(self, boto_file_path=os.path.join('~','.boto')): | 71 def __init__(self, boto_file_path=os.path.join('~','.boto')): |
| 47 """Constructor. | 72 """Constructor. |
| 48 | 73 |
| 49 Params: | 74 Params: |
| 50 boto_file_path: full path (local-OS-style) on local disk where .boto | 75 boto_file_path: full path (local-OS-style) on local disk where .boto |
| 51 credentials file can be found. An exception is thrown if this file | 76 credentials file can be found. An exception is thrown if this file |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 106 """ | 131 """ |
| 107 conn = self._create_connection() | 132 conn = self._create_connection() |
| 108 b = conn.get_bucket(bucket_name=source_bucket) | 133 b = conn.get_bucket(bucket_name=source_bucket) |
| 109 item = Key(b) | 134 item = Key(b) |
| 110 item.key = source_path | 135 item.key = source_path |
| 111 if create_subdirs_if_needed: | 136 if create_subdirs_if_needed: |
| 112 _makedirs_if_needed(os.path.dirname(dest_path)) | 137 _makedirs_if_needed(os.path.dirname(dest_path)) |
| 113 with open(dest_path, 'w') as f: | 138 with open(dest_path, 'w') as f: |
| 114 item.get_contents_to_file(fp=f) | 139 item.get_contents_to_file(fp=f) |
| 115 | 140 |
| 141 def add_acl(self, bucket, path, id_type, id_value, permission): | |
| 142 """Add access permissions on a single file in Google Storage. | |
| 143 | |
| 144 After this call, the set of users with access rights will always be >= | |
| 145 the set of users with access rights before the call, because the permissions | |
| 146 are additive. | |
| 147 (E.g., if you add READ permission for a group, but a member of that group | |
| 148 already has WRITE permission, that member will still have WRITE permission.) | |
| 149 TODO(epoger): Do we know that for sure? I *think* that's how it works... | |
| 150 | |
| 151 If there is already a permission set for this id_type/id_value combination, | |
| 152 this call will overwrite it. | |
| 153 | |
| 154 Params: | |
| 155 bucket: GS bucket | |
| 156 path: full path (Posix-style) to the file within that bucket | |
| 157 id_type: must be one of the ID_TYPE_* constants defined above | |
| 158 id_value: add permission for users whose id_type field contains this value | |
| 159 permission: permission to add for users matching id_type/id_value; | |
| 160 must be one of the PERMISSION_* constants defined above | |
| 161 """ | |
| 162 field = FIELD_BY_ID_TYPE[id_type] | |
| 163 conn = self._create_connection() | |
| 164 b = conn.get_bucket(bucket_name=bucket) | |
| 165 acls = b.get_acl(key_name=path) | |
| 166 | |
| 167 # Remove any existing entries that refer to the same id_type/id_value, | |
| 168 # because the API will fail if we try to set more than one. | |
| 169 matching_entries = [entry for entry in acls.entries.entry_list | |
| 170 if (entry.scope.type == id_type) and | |
| 171 (getattr(entry.scope, field) == id_value)] | |
| 172 if matching_entries: | |
| 173 for entry in matching_entries: | |
|
rmistry
2014/07/15 11:26:10
Is it possible to ever get a list of more than one
epoger
2014/07/15 13:20:18
AFAICT we should always get either 0 or 1 matching
| |
| 174 acls.entries.entry_list.remove(entry) | |
| 175 | |
| 176 # Add a new entry to the ACLs. | |
| 177 args = {'type': id_type, 'permission': permission} | |
| 178 args[field] = id_value | |
| 179 new_entry = acl.Entry(**args) | |
| 180 acls.entries.entry_list.append(new_entry) | |
| 181 b.set_acl(acl_or_str=acls, key_name=path) | |
| 182 | |
| 183 def delete_acl(self, bucket, path, id_type, id_value): | |
| 184 """Delete certain access permissions on a single file in Google Storage. | |
| 185 | |
| 186 Various users who match this id_type/id_value pair may still have access | |
| 187 rights to this file after this call, if they have been granted those rights | |
| 188 based on *other* id_types (e.g., perhaps they still have individual user | |
| 189 access rights, even if their group access rights are removed). | |
| 190 | |
| 191 If no permissions have been added for this id_type/id_value, this will | |
| 192 return uneventfully (there will be no exception or other indication of | |
| 193 failure). | |
| 194 | |
| 195 Params: | |
| 196 bucket: GS bucket | |
| 197 path: full path (Posix-style) to the file within that bucket | |
| 198 id_type: must be one of the ID_TYPE_* constants defined above | |
| 199 id_value: delete permissions for users whose id_type field contains this | |
| 200 value | |
| 201 """ | |
| 202 field = FIELD_BY_ID_TYPE[id_type] | |
| 203 conn = self._create_connection() | |
| 204 b = conn.get_bucket(bucket_name=bucket) | |
| 205 acls = b.get_acl(key_name=path) | |
| 206 matching_entries = [entry for entry in acls.entries.entry_list | |
| 207 if (entry.scope.type == id_type) and | |
| 208 (getattr(entry.scope, field) == id_value)] | |
| 209 if matching_entries: | |
| 210 for entry in matching_entries: | |
| 211 acls.entries.entry_list.remove(entry) | |
| 212 b.set_acl(acl_or_str=acls, key_name=path) | |
| 213 | |
| 214 def get_acl(self, bucket, path, id_type, id_value): | |
| 215 """Retrieve partial access permissions on a single file in Google Storage. | |
| 216 | |
| 217 Various users who match this id_type/id_value pair may have access rights | |
| 218 other than that returned by this call, if they have been granted those | |
| 219 rights based on *other* id_types (e.g., perhaps they have group access | |
| 220 rights, beyond their individual access rights). | |
| 221 | |
| 222 Params: | |
| 223 bucket: GS bucket | |
| 224 path: full path (Posix-style) to the file within that bucket | |
| 225 id_type: must be one of the ID_TYPE_* constants defined above | |
| 226 id_value: delete permissions for users whose id_type field contains this | |
|
epoger
2014/07/15 13:34:26
delete -> get
| |
| 227 value | |
| 228 | |
|
epoger
2014/07/15 13:34:26
Add an example
| |
| 229 Returns: the PERMISSION_* constant which has been set for users matching | |
| 230 this id_type/id_value, on this file; or None if no such permissions have | |
| 231 been set. | |
|
rmistry
2014/07/15 11:26:10
[Optional] How about creating a PERMISSION_NONE or
epoger
2014/07/15 13:20:18
I can do that, *if* you think that would also make
| |
| 232 """ | |
| 233 field = FIELD_BY_ID_TYPE[id_type] | |
| 234 conn = self._create_connection() | |
| 235 b = conn.get_bucket(bucket_name=bucket) | |
| 236 acls = b.get_acl(key_name=path) | |
| 237 matching_entries = [entry for entry in acls.entries.entry_list | |
| 238 if (entry.scope.type == id_type) and | |
| 239 (getattr(entry.scope, field) == id_value)] | |
| 240 if matching_entries: | |
| 241 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | |
| 242 return matching_entries[0].permission | |
| 243 else: | |
| 244 return None | |
| 245 | |
| 116 def list_bucket_contents(self, bucket, subdir=None): | 246 def list_bucket_contents(self, bucket, subdir=None): |
| 117 """Returns files in the Google Storage bucket as a (dirs, files) tuple. | 247 """Returns files in the Google Storage bucket as a (dirs, files) tuple. |
| 118 | 248 |
| 119 Args: | 249 Args: |
| 120 bucket: name of the Google Storage bucket | 250 bucket: name of the Google Storage bucket |
| 121 subdir: directory within the bucket to list, or None for root directory | 251 subdir: directory within the bucket to list, or None for root directory |
| 122 """ | 252 """ |
| 123 # The GS command relies on the prefix (if any) ending with a slash. | 253 # The GS command relies on the prefix (if any) ending with a slash. |
| 124 prefix = subdir or '' | 254 prefix = subdir or '' |
| 125 if prefix and not prefix.endswith('/'): | 255 if prefix and not prefix.endswith('/'): |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 207 # Get a list of the files we uploaded to Google Storage. | 337 # Get a list of the files we uploaded to Google Storage. |
| 208 (dirs, files) = gs.list_bucket_contents( | 338 (dirs, files) = gs.list_bucket_contents( |
| 209 bucket=bucket, subdir=remote_dir) | 339 bucket=bucket, subdir=remote_dir) |
| 210 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) | 340 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) |
| 211 assert files == [], '%s == []' % files | 341 assert files == [], '%s == []' % files |
| 212 (dirs, files) = gs.list_bucket_contents( | 342 (dirs, files) = gs.list_bucket_contents( |
| 213 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 343 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
| 214 assert dirs == [], '%s == []' % dirs | 344 assert dirs == [], '%s == []' % dirs |
| 215 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) | 345 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) |
| 216 | 346 |
| 347 # Manipulate ACLs on one of those files, and verify them. | |
| 348 # TODO(epoger): Test id_types other than ID_TYPE_GROUP_BY_DOMAIN ? | |
| 349 # TODO(epoger): Test setting multiple ACLs on the same file? | |
| 350 id_type = ID_TYPE_GROUP_BY_DOMAIN | |
| 351 id_value = 'google.com' | |
| 352 fullpath = posixpath.join(remote_dir, subdir, filenames_to_upload[0]) | |
| 353 # Make sure ACL is empty to start with ... | |
| 354 gs.delete_acl(bucket=bucket, path=fullpath, | |
| 355 id_type=id_type, id_value=id_value) | |
| 356 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
| 357 id_type=id_type, id_value=id_value) | |
| 358 assert permission == None, '%s == None' % permission | |
| 359 # ... set it to OWNER ... | |
| 360 gs.add_acl(bucket=bucket, path=fullpath, | |
| 361 id_type=id_type, id_value=id_value, permission=PERMISSION_OWNER) | |
| 362 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
| 363 id_type=id_type, id_value=id_value) | |
| 364 assert permission == PERMISSION_OWNER, '%s == %s' % ( | |
| 365 permission, PERMISSION_OWNER) | |
| 366 # ... now set it to READ ... | |
| 367 gs.add_acl(bucket=bucket, path=fullpath, | |
| 368 id_type=id_type, id_value=id_value, permission=PERMISSION_READ) | |
| 369 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
| 370 id_type=id_type, id_value=id_value) | |
| 371 assert permission == PERMISSION_READ, '%s == %s' % ( | |
| 372 permission, PERMISSION_READ) | |
| 373 # ... and clear it again to finish. | |
| 374 gs.delete_acl(bucket=bucket, path=fullpath, | |
| 375 id_type=id_type, id_value=id_value) | |
| 376 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
| 377 id_type=id_type, id_value=id_value) | |
| 378 assert permission == None, '%s == None' % permission | |
| 379 | |
| 217 # Download the files we uploaded to Google Storage, and validate contents. | 380 # Download the files we uploaded to Google Storage, and validate contents. |
| 218 local_dest_dir = tempfile.mkdtemp() | 381 local_dest_dir = tempfile.mkdtemp() |
| 219 try: | 382 try: |
| 220 for filename in filenames_to_upload: | 383 for filename in filenames_to_upload: |
| 221 gs.download_file(source_bucket=bucket, | 384 gs.download_file(source_bucket=bucket, |
| 222 source_path=posixpath.join(remote_dir, subdir, filename), | 385 source_path=posixpath.join(remote_dir, subdir, filename), |
| 223 dest_path=os.path.join(local_dest_dir, subdir, filename), | 386 dest_path=os.path.join(local_dest_dir, subdir, filename), |
| 224 create_subdirs_if_needed=True) | 387 create_subdirs_if_needed=True) |
| 225 with open(os.path.join(local_dest_dir, subdir, filename)) as f: | 388 with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
| 226 file_contents = f.read() | 389 file_contents = f.read() |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 242 | 405 |
| 243 | 406 |
| 244 # TODO(epoger): How should we exercise this self-test? | 407 # TODO(epoger): How should we exercise this self-test? |
| 245 # I avoided using the standard unittest framework, because these Google Storage | 408 # I avoided using the standard unittest framework, because these Google Storage |
| 246 # operations are expensive and require .boto permissions. | 409 # operations are expensive and require .boto permissions. |
| 247 # | 410 # |
| 248 # How can we automatically test this code without wasting too many resources | 411 # How can we automatically test this code without wasting too many resources |
| 249 # or needing .boto permissions? | 412 # or needing .boto permissions? |
| 250 if __name__ == '__main__': | 413 if __name__ == '__main__': |
| 251 _run_self_test() | 414 _run_self_test() |
| OLD | NEW |