Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 | 2 |
| 3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
| 4 """ | 4 """ |
| 5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
| 6 | 6 |
| 7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
| 8 found in the LICENSE file. | 8 found in the LICENSE file. |
| 9 | 9 |
| 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( | 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join( |
| 31 os.path.dirname(__file__), os.pardir, os.pardir)) | 31 os.path.dirname(__file__), os.pardir, os.pardir)) |
| 32 for import_subdir in ['boto']: | 32 for import_subdir in ['boto']: |
| 33 import_dirpath = os.path.join( | 33 import_dirpath = os.path.join( |
| 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) | 34 TRUNK_DIRECTORY, 'third_party', 'externals', import_subdir) |
| 35 if import_dirpath not in sys.path: | 35 if import_dirpath not in sys.path: |
| 36 # We need to insert at the beginning of the path, to make sure that our | 36 # We need to insert at the beginning of the path, to make sure that our |
| 37 # imported versions are favored over others that might be in the path. | 37 # imported versions are favored over others that might be in the path. |
| 38 sys.path.insert(0, import_dirpath) | 38 sys.path.insert(0, import_dirpath) |
| 39 from boto.gs import acl | 39 from boto.gs import acl |
| 40 from boto.gs.bucket import Bucket | |
| 40 from boto.gs.connection import GSConnection | 41 from boto.gs.connection import GSConnection |
| 41 from boto.gs.key import Key | 42 from boto.gs.key import Key |
| 42 from boto.s3.bucketlistresultset import BucketListResultSet | 43 from boto.s3.bucketlistresultset import BucketListResultSet |
| 44 from boto.s3.connection import SubdomainCallingFormat | |
| 43 from boto.s3.prefix import Prefix | 45 from boto.s3.prefix import Prefix |
| 44 | 46 |
| 45 # Permissions that may be set on each file in Google Storage. | 47 # Permissions that may be set on each file in Google Storage. |
| 46 # See SupportedPermissions in | 48 # See SupportedPermissions in |
| 47 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py | 49 # https://github.com/boto/boto/blob/develop/boto/gs/acl.py |
| 48 PERMISSION_NONE = None | 50 PERMISSION_NONE = None |
| 49 PERMISSION_OWNER = 'FULL_CONTROL' | 51 PERMISSION_OWNER = 'FULL_CONTROL' |
| 50 PERMISSION_READ = 'READ' | 52 PERMISSION_READ = 'READ' |
| 51 PERMISSION_WRITE = 'WRITE' | 53 PERMISSION_WRITE = 'WRITE' |
| 52 | 54 |
| 53 # Types of identifiers we can use to set ACLs. | 55 # Types of identifiers we can use to set ACLs. |
| 54 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | 56 ID_TYPE_GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN |
| 55 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | 57 ID_TYPE_GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL |
| 56 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID | 58 ID_TYPE_GROUP_BY_ID = acl.GROUP_BY_ID |
| 57 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL | 59 ID_TYPE_USER_BY_EMAIL = acl.USER_BY_EMAIL |
| 58 ID_TYPE_USER_BY_ID = acl.USER_BY_ID | 60 ID_TYPE_USER_BY_ID = acl.USER_BY_ID |
| 59 | 61 |
| 60 # Which field we get/set in ACL entries, depending on ID_TYPE. | 62 # Which field we get/set in ACL entries, depending on ID_TYPE. |
| 61 FIELD_BY_ID_TYPE = { | 63 FIELD_BY_ID_TYPE = { |
| 62 ID_TYPE_GROUP_BY_DOMAIN: 'domain', | 64 ID_TYPE_GROUP_BY_DOMAIN: 'domain', |
| 63 ID_TYPE_GROUP_BY_EMAIL: 'email_address', | 65 ID_TYPE_GROUP_BY_EMAIL: 'email_address', |
| 64 ID_TYPE_GROUP_BY_ID: 'id', | 66 ID_TYPE_GROUP_BY_ID: 'id', |
| 65 ID_TYPE_USER_BY_EMAIL: 'email_address', | 67 ID_TYPE_USER_BY_EMAIL: 'email_address', |
| 66 ID_TYPE_USER_BY_ID: 'id', | 68 ID_TYPE_USER_BY_ID: 'id', |
| 67 } | 69 } |
| 68 | 70 |
| 69 | 71 |
| 72 class AnonymousGSConnection(GSConnection): | |
| 73 """The GSConnection class in the boto library doesn't allow for anonymous | |
| 74 connections (connection without credentials). | |
| 75 """ | |
| 76 def __init__(self): | |
| 77 super(GSConnection, self).__init__( | |
| 78 anon=True, host=GSConnection.DefaultHost, | |
| 79 calling_format=SubdomainCallingFormat(), provider='google', | |
|
rmistry
2014/07/16 13:34:21
Could you explain the provider='google' ?
epoger
2014/07/16 13:47:42
Good question. I have tried to do so in patchset
| |
| 80 bucket_class=Bucket) | |
| 81 | |
| 82 | |
| 70 class GSUtils(object): | 83 class GSUtils(object): |
| 71 """Utilities for accessing Google Cloud Storage, using the boto library.""" | 84 """Utilities for accessing Google Cloud Storage, using the boto library.""" |
| 72 | 85 |
| 73 def __init__(self, boto_file_path=os.path.join('~','.boto')): | 86 def __init__(self, boto_file_path=None): |
| 74 """Constructor. | 87 """Constructor. |
| 75 | 88 |
| 76 Params: | 89 Params: |
| 77 boto_file_path: full path (local-OS-style) on local disk where .boto | 90 boto_file_path: full path (local-OS-style) on local disk where .boto |
| 78 credentials file can be found. An exception is thrown if this file | 91 credentials file can be found. If None, then the GSUtils object |
| 79 is missing. | 92 created will be able to access only public files in Google Storage. |
| 80 TODO(epoger): Change missing-file behavior: allow the caller to | 93 |
| 81 operate on public files in Google Storage. | 94 Raises an exception if no file is found at boto_file_path, or if the file |
| 95 found there is malformed. | |
| 82 """ | 96 """ |
| 83 boto_file_path = os.path.expanduser(boto_file_path) | 97 self._gs_access_key_id = None |
| 84 print 'Reading boto file from %s' % boto_file_path | 98 self._gs_secret_access_key = None |
| 85 boto_dict = _config_file_as_dict(filepath=boto_file_path) | 99 if boto_file_path: |
| 86 self._gs_access_key_id = boto_dict['gs_access_key_id'] | 100 print 'Reading boto file from %s' % boto_file_path |
| 87 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] | 101 boto_dict = _config_file_as_dict(filepath=boto_file_path) |
| 102 self._gs_access_key_id = boto_dict['gs_access_key_id'] | |
| 103 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] | |
| 88 | 104 |
| 89 def delete_file(self, bucket, path): | 105 def delete_file(self, bucket, path): |
| 90 """Delete a single file within a GS bucket. | 106 """Delete a single file within a GS bucket. |
| 91 | 107 |
| 92 TODO(epoger): what if bucket or path does not exist? Should probably raise | 108 TODO(epoger): what if bucket or path does not exist? Should probably raise |
| 93 an exception. Implement, and add a test to exercise this. | 109 an exception. Implement, and add a test to exercise this. |
| 94 | 110 |
| 95 Params: | 111 Params: |
| 96 bucket: GS bucket to delete a file from | 112 bucket: GS bucket to delete a file from |
| 97 path: full path (Posix-style) of the file within the bucket to delete | 113 path: full path (Posix-style) of the file within the bucket to delete |
| (...skipping 153 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 251 for item in lister: | 267 for item in lister: |
| 252 t = type(item) | 268 t = type(item) |
| 253 if t is Key: | 269 if t is Key: |
| 254 files.append(item.key[prefix_length:]) | 270 files.append(item.key[prefix_length:]) |
| 255 elif t is Prefix: | 271 elif t is Prefix: |
| 256 dirs.append(item.name[prefix_length:-1]) | 272 dirs.append(item.name[prefix_length:-1]) |
| 257 return (dirs, files) | 273 return (dirs, files) |
| 258 | 274 |
| 259 def _create_connection(self): | 275 def _create_connection(self): |
| 260 """Returns a GSConnection object we can use to access Google Storage.""" | 276 """Returns a GSConnection object we can use to access Google Storage.""" |
| 261 return GSConnection( | 277 if self._gs_access_key_id: |
| 262 gs_access_key_id=self._gs_access_key_id, | 278 return GSConnection( |
| 263 gs_secret_access_key=self._gs_secret_access_key) | 279 gs_access_key_id=self._gs_access_key_id, |
| 264 | 280 gs_secret_access_key=self._gs_secret_access_key) |
| 281 else: | |
| 282 return AnonymousGSConnection() | |
| 265 | 283 |
| 266 def _config_file_as_dict(filepath): | 284 def _config_file_as_dict(filepath): |
| 267 """Reads a boto-style config file into a dict. | 285 """Reads a boto-style config file into a dict. |
| 268 | 286 |
| 269 Parses all lines from the file of this form: key = value | 287 Parses all lines from the file of this form: key = value |
| 270 TODO(epoger): Create unittest. | 288 TODO(epoger): Create unittest. |
| 271 | 289 |
| 272 Params: | 290 Params: |
| 273 filepath: path to config file on local disk | 291 filepath: path to config file on local disk |
| 274 | 292 |
| (...skipping 19 matching lines...) Expand all Loading... | |
| 294 Args: | 312 Args: |
| 295 path: full path of directory to create | 313 path: full path of directory to create |
| 296 """ | 314 """ |
| 297 try: | 315 try: |
| 298 os.makedirs(path) | 316 os.makedirs(path) |
| 299 except OSError as e: | 317 except OSError as e: |
| 300 if e.errno != errno.EEXIST: | 318 if e.errno != errno.EEXIST: |
| 301 raise | 319 raise |
| 302 | 320 |
| 303 | 321 |
| 304 def _run_self_test(): | 322 def _test_public_read(): |
| 323 """Make sure we can read from public files without .boto file credentials.""" | |
| 324 gs = GSUtils() | |
| 325 gs.list_bucket_contents(bucket='chromium-skia-gm-summaries', subdir=None) | |
| 326 | |
| 327 | |
| 328 def _test_authenticated_round_trip(): | |
| 329 try: | |
| 330 gs = GSUtils(boto_file_path=os.path.expanduser(os.path.join('~','.boto'))) | |
| 331 except: | |
| 332 print """ | |
| 333 Failed to instantiate GSUtils object with default .boto file path. | |
| 334 Do you have a ~/.boto file that provides the credentials needed to read | |
| 335 and write gs://chromium-skia-gm ? | |
| 336 """ | |
| 337 raise | |
| 338 | |
| 305 bucket = 'chromium-skia-gm' | 339 bucket = 'chromium-skia-gm' |
| 306 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) | 340 remote_dir = 'gs_utils_test/%d' % random.randint(0, sys.maxint) |
| 307 subdir = 'subdir' | 341 subdir = 'subdir' |
| 308 filenames_to_upload = ['file1', 'file2'] | 342 filenames_to_upload = ['file1', 'file2'] |
| 309 gs = GSUtils() | |
| 310 | 343 |
| 311 # Upload test files to Google Storage. | 344 # Upload test files to Google Storage. |
| 312 local_src_dir = tempfile.mkdtemp() | 345 local_src_dir = tempfile.mkdtemp() |
| 313 os.mkdir(os.path.join(local_src_dir, subdir)) | 346 os.mkdir(os.path.join(local_src_dir, subdir)) |
| 314 try: | 347 try: |
| 315 for filename in filenames_to_upload: | 348 for filename in filenames_to_upload: |
| 316 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: | 349 with open(os.path.join(local_src_dir, subdir, filename), 'w') as f: |
| 317 f.write('contents of %s\n' % filename) | 350 f.write('contents of %s\n' % filename) |
| 318 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), | 351 gs.upload_file(source_path=os.path.join(local_src_dir, subdir, filename), |
| 319 dest_bucket=bucket, | 352 dest_bucket=bucket, |
| (...skipping 66 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 386 gs.delete_file(bucket=bucket, | 419 gs.delete_file(bucket=bucket, |
| 387 path=posixpath.join(remote_dir, subdir, filename)) | 420 path=posixpath.join(remote_dir, subdir, filename)) |
| 388 | 421 |
| 389 # Confirm that we deleted all the files we uploaded to Google Storage. | 422 # Confirm that we deleted all the files we uploaded to Google Storage. |
| 390 (dirs, files) = gs.list_bucket_contents( | 423 (dirs, files) = gs.list_bucket_contents( |
| 391 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) | 424 bucket=bucket, subdir=posixpath.join(remote_dir, subdir)) |
| 392 assert dirs == [], '%s == []' % dirs | 425 assert dirs == [], '%s == []' % dirs |
| 393 assert files == [], '%s == []' % files | 426 assert files == [], '%s == []' % files |
| 394 | 427 |
| 395 | 428 |
| 396 # TODO(epoger): How should we exercise this self-test? | 429 # TODO(epoger): How should we exercise these self-tests? |
| 397 # I avoided using the standard unittest framework, because these Google Storage | 430 # See http://skbug.com/2751 |
| 398 # operations are expensive and require .boto permissions. | |
| 399 # | |
| 400 # How can we automatically test this code without wasting too many resources | |
| 401 # or needing .boto permissions? | |
| 402 if __name__ == '__main__': | 431 if __name__ == '__main__': |
| 403 _run_self_test() | 432 _test_public_read() |
| 433 _test_authenticated_round_trip() | |
| 434 # TODO(epoger): Add _test_unauthenticated_access() to make sure we raise | |
| 435 # an exception when we try to access without needed credentials. | |
| OLD | NEW |