Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 | 2 |
| 3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
| 4 """ | 4 """ |
| 5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
| 6 | 6 |
| 7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
| 8 found in the LICENSE file. | 8 found in the LICENSE file. |
| 9 | 9 |
| 10 Utilities for accessing Google Cloud Storage, using the boto library. | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
| 11 for the XML API). | |
| 11 | 12 |
| 12 See http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial -using-boto.html | 13 API/library references: |
| 13 for implementation tips. | 14 - https://developers.google.com/storage/docs/reference-guide |
| 15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u sing-boto.html | |
| 14 """ | 16 """ |
| 15 # pylint: enable=C0301 | 17 # pylint: enable=C0301 |
| 16 | 18 |
| 17 # System-level imports | 19 # System-level imports |
| 18 import errno | 20 import errno |
| 19 import os | 21 import os |
| 20 import posixpath | 22 import posixpath |
| 21 import random | 23 import random |
| 22 import re | 24 import re |
| 23 import shutil | 25 import shutil |
| 24 import sys | 26 import sys |
| 25 import tempfile | 27 import tempfile |
| 26 | 28 |
| 27 # Imports from third-party code | 29 # Imports from third-party code |
| 28 TRUNK_DIRECTORY = os.path.abspath(os.path.join( | 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
| 29 os.path.dirname(__file__), os.pardir, os.pardir)) | 31 os.path.dirname(__file__), os.pardir, os.pardir)) |
| 30 for import_subdir in ['boto']: | 32 for import_subdir in ['boto']: |
| 31 import_dirpath = os.path.join( | 33 import_dirpath = os.path.join( |
| 32 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) | 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) |
| 33 if import_dirpath not in sys.path: | 35 if import_dirpath not in sys.path: |
| 34 # We need to insert at the beginning of the path, to make sure that our | 36 # We need to insert at the beginning of the path, to make sure that our |
| 35 # imported versions are favored over others that might be in the path. | 37 # imported versions are favored over others that might be in the path. |
| 36 sys.path.insert(0, import_dirpath) | 38 sys.path.insert(0, import_dirpath) |
| 39 from boto.gs import acl | |
| 37 from boto.gs.connection import GSConnection | 40 from boto.gs.connection import GSConnection |
| 38 from boto.gs.key import Key | 41 from boto.gs.key import Key |
| 39 from boto.s3.bucketlistresultset import BucketListResultSet | 42 from boto.s3.bucketlistresultset import BucketListResultSet |
| 40 from boto.s3.prefix import Prefix | 43 from boto.s3.prefix import Prefix |
| 41 | 44 |
| 45 # Permissions that may be set on each file in Google Storage. | |
| 46 # See SupportedPermissions in | |
| 47 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py | |
| 48 PERMISSION_NONE = None | |
| 49 PERMISSION_OWNER = 'FULL_CONTROL' | |
| 50 PERMISSION_READ = 'READ' | |
| 51 PERMISSION_WRITE = 'WRITE' | |
| 52 | |
| 53 # Types of identifiers we can use to set ACLs. | |
| 54 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | |
| 55 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | |
| 56 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID | |
| 57 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL | |
| 58 ID_TYPE_USER_BY_ID = acl.USER_BY_ID | |
| 59 | |
| 60 # Which field we get/set in ACL entries, depending on ID_TYPE. | |
| 61 FIELD_BY_ID_TYPE = { | |
| 62 ID_TYPE_GROUP_BY_DOMAIN: 'domain', | |
| 63 ID_TYPE_GROUP_BY_EMAIL: 'email_address', | |
| 64 ID_TYPE_GROUP_BY_ID: 'id', | |
| 65 ID_TYPE_USER_BY_EMAIL: 'email_address', | |
| 66 ID_TYPE_USER_BY_ID: 'id', | |
| 67 } | |
| 68 | |
| 42 | 69 |
| 43 class GSUtils(object): | 70 class GSUtils(object): |
| 44 """Utilities for accessing Google Cloud Storage, using the boto library.""" | 71 """Utilities for accessing Google Cloud Storage, using the boto library.""" |
| 45 | 72 |
| 46 def __init__(self, boto_file_path=os.path.join('~','.boto')): | 73 def __init__(self, boto_file_path=os.path.join('~','.boto')): |
| 47 """Constructor. | 74 """Constructor. |
| 48 | 75 |
| 49 Params: | 76 Params: |
| 50 boto_file_path: full path (local-OS-style) on local disk where .boto | 77 boto_file_path: full path (local-OS-style) on local disk where .boto |
| 51 credentials file can be found. An exception is thrown if this file | 78 credentials file can be found. An exception is thrown if this file |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 106 """ | 133 """ |
| 107 conn = self._create_connection() | 134 conn = self._create_connection() |
| 108 b = conn.get_bucket(bucket_name=source_bucket) | 135 b = conn.get_bucket(bucket_name=source_bucket) |
| 109 item = Key(b) | 136 item = Key(b) |
| 110 item.key = source_path | 137 item.key = source_path |
| 111 if create_subdirs_if_needed: | 138 if create_subdirs_if_needed: |
| 112 _makedirs_if_needed(os.path.dirname(dest_path)) | 139 _makedirs_if_needed(os.path.dirname(dest_path)) |
| 113 with open(dest_path, 'w') as f: | 140 with open(dest_path, 'w') as f: |
| 114 item.get_contents_to_file(fp=f) | 141 item.get_contents_to_file(fp=f) |
| 115 | 142 |
| 143 def get_acl(self, bucket, path, id_type, id_value): | |
| 144 """Retrieve partial access permissions on a single file in Google Storage. | |
| 145 | |
| 146 Various users who match this id_type/id_value pair may have access rights | |
| 147 other than that returned by this call, if they have been granted those | |
| 148 rights based on *other* id_types (e.g., perhaps they have group access | |
| 149 rights, beyond their individual access rights). | |
| 150 | |
| 151 Params: | |
| 152 bucket: GS bucket | |
| 153 path: full path (Posix-style) to the file within that bucket | |
| 154 id_type: must be one of the ID_TYPE_* constants defined above | |
| 155 id_value: get permissions for users whose id_type field contains this | |
| 156 value | |
| 157 | |
| 158 Returns: the PERMISSION_* constant which has been set for users matching | |
| 159 this id_type/id_value, on this file; or PERMISSION_NONE if no such | |
| 160 permissions have been set. | |
| 161 """ | |
| 162 field = FIELD_BY_ID_TYPE[id_type] | |
| 163 conn = self._create_connection() | |
| 164 b = conn.get_bucket(bucket_name=bucket) | |
| 165 acls = b.get_acl(key_name=path) | |
| 166 matching_entries = [entry for entry in acls.entries.entry_list | |
| 167 if (entry.scope.type == id_type) and | |
| 168 (getattr(entry.scope, field) == id_value)] | |
| 169 if matching_entries: | |
| 170 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | |
| 171 return matching_entries[0].permission | |
| 172 else: | |
| 173 return PERMISSION_NONE | |
| 174 | |
| 175 def set_acl(self, bucket, path, id_type, id_value, permission): | |
| 176 """Set partial access permissions on a single file in Google Storage. | |
| 177 | |
| 178 Note that a single set_acl() call will not guarantee what access rights any | |
| 179 given user will have on a given file, because permissions are additive. | |
| 180 (E.g., if you set READ permission for a group, but a member of that group | |
| 181 already has WRITE permission, that member will still have WRITE permission.) | |
| 182 TODO(epoger): Do we know that for sure? I *think* that's how it works... | |
| 183 | |
| 184 If there is already a permission set on this file for this id_type/id_value | |
| 185 combination, this call will overwrite it. | |
| 186 | |
| 187 Params: | |
| 188 bucket: GS bucket | |
| 189 path: full path (Posix-style) to the file within that bucket | |
| 190 id_type: must be one of the ID_TYPE_* constants defined above | |
| 191 id_value: add permission for users whose id_type field contains this value | |
| 192 permission: permission to add for users matching id_type/id_value; | |
| 193 must be one of the PERMISSION_* constants defined above. | |
| 194 If PERMISSION_NONE, then any permissions will be granted to this | |
| 195 particular id_type/id_value will be removed... but, given that | |
| 196 permissions are additive, specific users may still have access rights | |
| 197 based on permissions given to *other* id_type/id_value pairs. | |
| 198 | |
| 199 Example Code: | |
| 200 bucket = 'gs://bucket-name' | |
| 201 path = 'path/to/file' | |
| 202 id_type = ID_TYPE_USER_BY_EMAIL | |
| 203 id_value = 'epoger@google.com' | |
| 204 set_acl(bucket, path, id_type, id_value, PERMISSION_READ) | |
| 205 assert PERMISSION_READ == get_acl(bucket, path, id_type, id_value) | |
| 206 set_acl(bucket, path, id_type, id_value, PERMISSION_WRITE) | |
| 207 assert PERMISSION_WRITE == get_acl(bucket, path, id_type, id_value) | |
| 208 """ | |
| 209 field = FIELD_BY_ID_TYPE[id_type] | |
|
epoger
2014/07/15 19:47:03
I can extract lines 209-218 into a separate functi
| |
| 210 conn = self._create_connection() | |
| 211 b = conn.get_bucket(bucket_name=bucket) | |
| 212 acls = b.get_acl(key_name=path) | |
| 213 | |
| 214 # Remove any existing entries that refer to the same id_type/id_value, | |
| 215 # because the API will fail if we try to set more than one. | |
| 216 matching_entries = [entry for entry in acls.entries.entry_list | |
| 217 if (entry.scope.type == id_type) and | |
| 218 (getattr(entry.scope, field) == id_value)] | |
| 219 if matching_entries: | |
| 220 assert len(matching_entries) == 1, '%d == 1' % len(matching_entries) | |
| 221 acls.entries.entry_list.remove(matching_entries[0]) | |
| 222 | |
| 223 # Add a new entry to the ACLs. | |
| 224 if permission != PERMISSION_NONE: | |
| 225 args = {'type': id_type, 'permission': permission} | |
| 226 args[field] = id_value | |
| 227 new_entry = acl.Entry(**args) | |
| 228 acls.entries.entry_list.append(new_entry) | |
| 229 | |
| 230 # Finally, write back the modified ACLs. | |
| 231 b.set_acl(acl_or_str=acls, key_name=path) | |
| 232 | |
| 116 def list_bucket_contents(self, bucket, subdir=None): | 233 def list_bucket_contents(self, bucket, subdir=None): |
| 117 """Returns files in the Google Storage bucket as a (dirs, files) tuple. | 234 """Returns files in the Google Storage bucket as a (dirs, files) tuple. |
| 118 | 235 |
| 119 Args: | 236 Args: |
| 120 bucket: name of the Google Storage bucket | 237 bucket: name of the Google Storage bucket |
| 121 subdir: directory within the bucket to list, or None for root directory | 238 subdir: directory within the bucket to list, or None for root directory |
| 122 """ | 239 """ |
| 123 # The GS command relies on the prefix (if any) ending with a slash. | 240 # The GS command relies on the prefix (if any) ending with a slash. |
| 124 prefix = subdir or '' | 241 prefix = subdir or '' |
| 125 if prefix and not prefix.endswith('/'): | 242 if prefix and not prefix.endswith('/'): |
| (...skipping 81 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 207 # Get a list of the files we uploaded to Google Storage. | 324 # Get a list of the files we uploaded to Google Storage. |
| 208 (dirs, files) = gs.list_bucket_contents( | 325 (dirs, files) = gs.list_bucket_contents( |
| 209 bucket=bucket, subdir=remote_dir) | 326 bucket=bucket, subdir=remote_dir) |
| 210 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) | 327 assert dirs == [subdir], '%s == [%s]' % (dirs, subdir) |
| 211 assert files == [], '%s == []' % files | 328 assert files == [], '%s == []' % files |
| 212 (dirs, files) = gs.list_bucket_contents( | 329 (dirs, files) = gs.list_bucket_contents( |
| 213 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 330 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
| 214 assert dirs == [], '%s == []' % dirs | 331 assert dirs == [], '%s == []' % dirs |
| 215 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) | 332 assert files == filenames_to_upload, '%s == %s' % (files, filenames_to_upload) |
| 216 | 333 |
| 334 # Manipulate ACLs on one of those files, and verify them. | |
| 335 # TODO(epoger): Test id_types other than ID_TYPE_GROUP_BY_DOMAIN ? | |
| 336 # TODO(epoger): Test setting multiple ACLs on the same file? | |
| 337 id_type = ID_TYPE_GROUP_BY_DOMAIN | |
| 338 id_value = 'google.com' | |
| 339 fullpath = posixpath.join(remote_dir, subdir, filenames_to_upload[0]) | |
| 340 # Make sure ACL is empty to start with ... | |
| 341 gs.set_acl(bucket=bucket, path=fullpath, | |
| 342 id_type=id_type, id_value=id_value, permission=PERMISSION_NONE) | |
| 343 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
| 344 id_type=id_type, id_value=id_value) | |
| 345 assert permission == PERMISSION_NONE, '%s == %s' % ( | |
| 346 permission, PERMISSION_NONE) | |
| 347 # ... set it to OWNER ... | |
| 348 gs.set_acl(bucket=bucket, path=fullpath, | |
| 349 id_type=id_type, id_value=id_value, permission=PERMISSION_OWNER) | |
| 350 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
| 351 id_type=id_type, id_value=id_value) | |
| 352 assert permission == PERMISSION_OWNER, '%s == %s' % ( | |
| 353 permission, PERMISSION_OWNER) | |
| 354 # ... now set it to READ ... | |
| 355 gs.set_acl(bucket=bucket, path=fullpath, | |
| 356 id_type=id_type, id_value=id_value, permission=PERMISSION_READ) | |
| 357 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
| 358 id_type=id_type, id_value=id_value) | |
| 359 assert permission == PERMISSION_READ, '%s == %s' % ( | |
| 360 permission, PERMISSION_READ) | |
| 361 # ... and clear it again to finish. | |
| 362 gs.set_acl(bucket=bucket, path=fullpath, | |
| 363 id_type=id_type, id_value=id_value, permission=PERMISSION_NONE) | |
| 364 permission = gs.get_acl(bucket=bucket, path=fullpath, | |
| 365 id_type=id_type, id_value=id_value) | |
| 366 assert permission == PERMISSION_NONE, '%s == %s' % ( | |
| 367 permission, PERMISSION_NONE) | |
| 368 | |
| 217 # Download the files we uploaded to Google Storage, and validate contents. | 369 # Download the files we uploaded to Google Storage, and validate contents. |
| 218 local_dest_dir = tempfile.mkdtemp() | 370 local_dest_dir = tempfile.mkdtemp() |
| 219 try: | 371 try: |
| 220 for filename in filenames_to_upload: | 372 for filename in filenames_to_upload: |
| 221 gs.download_file(source_bucket=bucket, | 373 gs.download_file(source_bucket=bucket, |
| 222 source_path=posixpath.join(remote_dir, subdir, filename), | 374 source_path=posixpath.join(remote_dir, subdir, filename), |
| 223 dest_path=os.path.join(local_dest_dir, subdir, filename), | 375 dest_path=os.path.join(local_dest_dir, subdir, filename), |
| 224 create_subdirs_if_needed=True) | 376 create_subdirs_if_needed=True) |
| 225 with open(os.path.join(local_dest_dir, subdir, filename)) as f: | 377 with open(os.path.join(local_dest_dir, subdir, filename)) as f: |
| 226 file_contents = f.read() | 378 file_contents = f.read() |
| (...skipping 15 matching lines...) Expand all Loading... | |
| 242 | 394 |
| 243 | 395 |
| 244 # TODO(epoger): How should we exercise this self-test? | 396 # TODO(epoger): How should we exercise this self-test? |
| 245 # I avoided using the standard unittest framework, because these Google Storage | 397 # I avoided using the standard unittest framework, because these Google Storage |
| 246 # operations are expensive and require .boto permissions. | 398 # operations are expensive and require .boto permissions. |
| 247 # | 399 # |
| 248 # How can we automatically test this code without wasting too many resources | 400 # How can we automatically test this code without wasting too many resources |
| 249 # or needing .boto permissions? | 401 # or needing .boto permissions? |
| 250 if __name__ == '__main__': | 402 if __name__ == '__main__': |
| 251 _run_self_test() | 403 _run_self_test() |
| OLD | NEW |