Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 | 2 |
| 3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
| 4 """ | 4 """ |
| 5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
| 6 | 6 |
| 7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
| 8 found in the LICENSE file. | 8 found in the LICENSE file. |
| 9 | 9 |
| 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 45 from boto.s3.connection import SubdomainCallingFormat | 45 from boto.s3.connection import SubdomainCallingFormat |
| 46 from boto.s3.prefix import Prefix | 46 from boto.s3.prefix import Prefix |
| 47 | 47 |
| 48 # How many files to upload at once, by default. | 48 # How many files to upload at once, by default. |
| 49 # TODO(epoger): Is there a way to compute this intelligently? To some extent | 49 # TODO(epoger): Is there a way to compute this intelligently? To some extent |
| 50 # it is a function of how many cores are on the machine, and how many other | 50 # it is a function of how many cores are on the machine, and how many other |
| 51 # processes it is running; but it's probably more a function of how much time | 51 # processes it is running; but it's probably more a function of how much time |
| 52 # each core sits idle waiting for network I/O to complete. | 52 # each core sits idle waiting for network I/O to complete. |
| 53 DEFAULT_UPLOAD_THREADS = 10 | 53 DEFAULT_UPLOAD_THREADS = 10 |
| 54 | 54 |
| 55 _GS_PREFIX = 'gs://' | 55 GS_PREFIX = 'gs://' |
| 56 | 56 |
| 57 | 57 |
| 58 class AnonymousGSConnection(GSConnection): | 58 class AnonymousGSConnection(GSConnection): |
| 59 """GSConnection class that allows anonymous connections. | 59 """GSConnection class that allows anonymous connections. |
| 60 | 60 |
| 61 The GSConnection class constructor in | 61 The GSConnection class constructor in |
| 62 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow | 62 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow |
| 63 for anonymous connections (connections without credentials), so we have to | 63 for anonymous connections (connections without credentials), so we have to |
| 64 override it. | 64 override it. |
| 65 """ | 65 """ |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 125 IF_NEW = 2 # if there is an existing file with the same name, | 125 IF_NEW = 2 # if there is an existing file with the same name, |
| 126 # leave it alone | 126 # leave it alone |
| 127 IF_MODIFIED = 3 # if there is an existing file with the same name and | 127 IF_MODIFIED = 3 # if there is an existing file with the same name and |
| 128 # contents, leave it alone | 128 # contents, leave it alone |
| 129 | 129 |
| 130 def __init__(self, boto_file_path=None): | 130 def __init__(self, boto_file_path=None): |
| 131 """Constructor. | 131 """Constructor. |
| 132 | 132 |
| 133 Params: | 133 Params: |
| 134 boto_file_path: full path (local-OS-style) on local disk where .boto | 134 boto_file_path: full path (local-OS-style) on local disk where .boto |
| 135 credentials file can be found. If None, then the GSUtils object | 135 credentials file can be found. If None, fall back on the |
| 136 created will be able to access only public files in Google Storage. | 136 AWS_CREDENTIAL_FILE environment variable, then look in a set of |
| 137 common paths for the .boto file. If no .boto file is found, then the | |
| 138 GSUtils object created will be able to access only public files in | |
| 139 Google Storage. | |
| 137 | 140 |
| 138 Raises an exception if no file is found at boto_file_path, or if the file | 141 Raises an exception if no file is found at boto_file_path, or if the file |
| 139 found there is malformed. | 142 found there is malformed. |
| 140 """ | 143 """ |
| 141 self._gs_access_key_id = None | 144 self._gs_access_key_id = None |
| 142 self._gs_secret_access_key = None | 145 self._gs_secret_access_key = None |
| 146 if not boto_file_path: | |
| 147 if os.environ.get('AWS_CREDENTIAL_FILE'): | |
| 148 boto_file_path = os.path.expanduser(os.environ['AWS_CREDENTIAL_FILE']) | |
| 149 if not boto_file_path: | |
|
rmistry
2014/10/16 18:09:58
Remove this if since you already have it above.
borenet
2014/10/16 18:12:51
This is only intended to run if AWS_CREDENTIAL_FIL
| |
| 150 for path in (os.path.join(os.path.expanduser('~'), '.boto'),): | |
| 151 if os.path.isfile(path): | |
| 152 boto_file_path = path | |
| 153 break | |
| 154 | |
| 143 if boto_file_path: | 155 if boto_file_path: |
| 144 print ('Reading boto file from %s' % boto_file_path) | 156 print ('Reading boto file from %s' % boto_file_path) |
| 145 boto_dict = _config_file_as_dict(filepath=boto_file_path) | 157 boto_dict = _config_file_as_dict(filepath=boto_file_path) |
| 146 self._gs_access_key_id = boto_dict['gs_access_key_id'] | 158 self._gs_access_key_id = boto_dict['gs_access_key_id'] |
| 147 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] | 159 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] |
| 160 else: | |
| 161 print >> sys.stderr, 'Warning: no .boto file found.' | |
| 162 | |
| 148 # Which field we get/set in ACL entries, depending on IdType. | 163 # Which field we get/set in ACL entries, depending on IdType. |
| 149 self._field_by_id_type = { | 164 self._field_by_id_type = { |
| 150 self.IdType.GROUP_BY_DOMAIN: 'domain', | 165 self.IdType.GROUP_BY_DOMAIN: 'domain', |
| 151 self.IdType.GROUP_BY_EMAIL: 'email_address', | 166 self.IdType.GROUP_BY_EMAIL: 'email_address', |
| 152 self.IdType.GROUP_BY_ID: 'id', | 167 self.IdType.GROUP_BY_ID: 'id', |
| 153 self.IdType.USER_BY_EMAIL: 'email_address', | 168 self.IdType.USER_BY_EMAIL: 'email_address', |
| 154 self.IdType.USER_BY_ID: 'id', | 169 self.IdType.USER_BY_ID: 'id', |
| 155 } | 170 } |
| 156 | 171 |
| 157 def delete_file(self, bucket, path): | 172 def delete_file(self, bucket, path): |
| (...skipping 417 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 575 dirs = [] | 590 dirs = [] |
| 576 files = [] | 591 files = [] |
| 577 for item in items: | 592 for item in items: |
| 578 t = type(item) | 593 t = type(item) |
| 579 if t is Key: | 594 if t is Key: |
| 580 files.append(item.name[prefix_length:]) | 595 files.append(item.name[prefix_length:]) |
| 581 elif t is Prefix: | 596 elif t is Prefix: |
| 582 dirs.append(item.name[prefix_length:-1]) | 597 dirs.append(item.name[prefix_length:-1]) |
| 583 return (dirs, files) | 598 return (dirs, files) |
| 584 | 599 |
| 600 def does_storage_object_exist(self, bucket, object_name): | |
| 601 """Determines whether an object exists in Google Storage. | |
| 602 | |
| 603 Returns True if it exists else returns False. | |
| 604 """ | |
| 605 b = self._connect_to_bucket(bucket=bucket) | |
| 606 if object_name in b: | |
| 607 return True | |
| 608 dirs, files = self.list_bucket_contents(bucket, object_name) | |
| 609 return bool(dirs or files) | |
| 610 | |
| 585 @staticmethod | 611 @staticmethod |
| 586 def is_gs_url(url): | 612 def is_gs_url(url): |
| 587 """Returns True if url is a legal Google Storage URL ("gs://bucket/file"). | 613 """Returns True if url is a legal Google Storage URL ("gs://bucket/file"). |
| 588 """ | 614 """ |
| 589 try: | 615 try: |
| 590 if url.lower().startswith(_GS_PREFIX) and len(url) > len(_GS_PREFIX): | 616 if url.lower().startswith(GS_PREFIX) and len(url) > len(GS_PREFIX): |
| 591 return url[len(_GS_PREFIX)].isalnum() | 617 return url[len(GS_PREFIX)].isalnum() |
| 592 else: | 618 else: |
| 593 return False | 619 return False |
| 594 except AttributeError: | 620 except AttributeError: |
| 595 return False | 621 return False |
| 596 | 622 |
| 597 @staticmethod | 623 @staticmethod |
| 598 def split_gs_url(url): | 624 def split_gs_url(url): |
| 599 """Returns (bucket, filepath) corresponding to a legal Google Storage URL. | 625 """Returns (bucket, filepath) corresponding to a legal Google Storage URL. |
| 600 | 626 |
| 601 Raises AttributeError if the input URL is not a legal Google Storage URL. | 627 Raises AttributeError if the input URL is not a legal Google Storage URL. |
| 602 """ | 628 """ |
| 603 if not GSUtils.is_gs_url(url): | 629 if not GSUtils.is_gs_url(url): |
| 604 raise AttributeError('"%s" is not a legal Google Storage URL' % url) | 630 raise AttributeError('"%s" is not a legal Google Storage URL' % url) |
| 605 prefix_removed = url[len(_GS_PREFIX):] | 631 prefix_removed = url[len(GS_PREFIX):] |
| 606 pathsep_index = prefix_removed.find('/') | 632 pathsep_index = prefix_removed.find('/') |
| 607 if pathsep_index < 0: | 633 if pathsep_index < 0: |
| 608 return (prefix_removed, '') | 634 return (prefix_removed, '') |
| 609 else: | 635 else: |
| 610 return (prefix_removed[:pathsep_index], | 636 return (prefix_removed[:pathsep_index], |
| 611 prefix_removed[pathsep_index+1:].strip('/')) | 637 prefix_removed[pathsep_index+1:].strip('/')) |
| 612 | 638 |
| 613 def _connect_to_bucket(self, bucket): | 639 def _connect_to_bucket(self, bucket): |
| 614 """Returns a Bucket object we can use to access a particular bucket in GS. | 640 """Returns a Bucket object we can use to access a particular bucket in GS. |
| 615 | 641 |
| (...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 675 | 701 |
| 676 def _get_local_md5(path): | 702 def _get_local_md5(path): |
| 677 """Returns the MD5 hash of a file on local disk.""" | 703 """Returns the MD5 hash of a file on local disk.""" |
| 678 hasher = hashlib.md5() | 704 hasher = hashlib.md5() |
| 679 with open(path, 'rb') as f: | 705 with open(path, 'rb') as f: |
| 680 while True: | 706 while True: |
| 681 data = f.read(64*1024) | 707 data = f.read(64*1024) |
| 682 if not data: | 708 if not data: |
| 683 return hasher.hexdigest() | 709 return hasher.hexdigest() |
| 684 hasher.update(data) | 710 hasher.update(data) |
| OLD | NEW |