OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
45 from boto.s3.connection import SubdomainCallingFormat | 45 from boto.s3.connection import SubdomainCallingFormat |
46 from boto.s3.prefix import Prefix | 46 from boto.s3.prefix import Prefix |
47 | 47 |
48 # How many files to upload at once, by default. | 48 # How many files to upload at once, by default. |
49 # TODO(epoger): Is there a way to compute this intelligently? To some extent | 49 # TODO(epoger): Is there a way to compute this intelligently? To some extent |
50 # it is a function of how many cores are on the machine, and how many other | 50 # it is a function of how many cores are on the machine, and how many other |
51 # processes it is running; but it's probably more a function of how much time | 51 # processes it is running; but it's probably more a function of how much time |
52 # each core sits idle waiting for network I/O to complete. | 52 # each core sits idle waiting for network I/O to complete. |
53 DEFAULT_UPLOAD_THREADS = 10 | 53 DEFAULT_UPLOAD_THREADS = 10 |
54 | 54 |
55 _GS_PREFIX = 'gs://' | 55 GS_PREFIX = 'gs://' |
56 | 56 |
57 | 57 |
58 class AnonymousGSConnection(GSConnection): | 58 class AnonymousGSConnection(GSConnection): |
59 """GSConnection class that allows anonymous connections. | 59 """GSConnection class that allows anonymous connections. |
60 | 60 |
61 The GSConnection class constructor in | 61 The GSConnection class constructor in |
62 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow | 62 https://github.com/boto/boto/blob/develop/boto/gs/connection.py doesn't allow |
63 for anonymous connections (connections without credentials), so we have to | 63 for anonymous connections (connections without credentials), so we have to |
64 override it. | 64 override it. |
65 """ | 65 """ |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
125 IF_NEW = 2 # if there is an existing file with the same name, | 125 IF_NEW = 2 # if there is an existing file with the same name, |
126 # leave it alone | 126 # leave it alone |
127 IF_MODIFIED = 3 # if there is an existing file with the same name and | 127 IF_MODIFIED = 3 # if there is an existing file with the same name and |
128 # contents, leave it alone | 128 # contents, leave it alone |
129 | 129 |
130 def __init__(self, boto_file_path=None): | 130 def __init__(self, boto_file_path=None): |
131 """Constructor. | 131 """Constructor. |
132 | 132 |
133 Params: | 133 Params: |
134 boto_file_path: full path (local-OS-style) on local disk where .boto | 134 boto_file_path: full path (local-OS-style) on local disk where .boto |
135 credentials file can be found. If None, then the GSUtils object | 135 credentials file can be found. If None, fall back on the |
136 created will be able to access only public files in Google Storage. | 136 AWS_CREDENTIAL_FILE environment variable, then look in a set of |
137 common paths for the .boto file. If no .boto file is found, then the | |
138 GSUtils object created will be able to access only public files in | |
139 Google Storage. | |
137 | 140 |
138 Raises an exception if no file is found at boto_file_path, or if the file | 141 Raises an exception if no file is found at boto_file_path, or if the file |
139 found there is malformed. | 142 found there is malformed. |
140 """ | 143 """ |
141 self._gs_access_key_id = None | 144 self._gs_access_key_id = None |
142 self._gs_secret_access_key = None | 145 self._gs_secret_access_key = None |
146 if not boto_file_path: | |
147 if os.environ.get('AWS_CREDENTIAL_FILE'): | |
148 boto_file_path = os.path.expanduser(os.environ['AWS_CREDENTIAL_FILE']) | |
149 if not boto_file_path: | |
rmistry
2014/10/16 18:09:58
Remove this if since you already have it above.
borenet
2014/10/16 18:12:51
This is only intended to run if AWS_CREDENTIAL_FIL
| |
150 for path in (os.path.join(os.path.expanduser('~'), '.boto'),): | |
151 if os.path.isfile(path): | |
152 boto_file_path = path | |
153 break | |
154 | |
143 if boto_file_path: | 155 if boto_file_path: |
144 print ('Reading boto file from %s' % boto_file_path) | 156 print ('Reading boto file from %s' % boto_file_path) |
145 boto_dict = _config_file_as_dict(filepath=boto_file_path) | 157 boto_dict = _config_file_as_dict(filepath=boto_file_path) |
146 self._gs_access_key_id = boto_dict['gs_access_key_id'] | 158 self._gs_access_key_id = boto_dict['gs_access_key_id'] |
147 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] | 159 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] |
160 else: | |
161 print >> sys.stderr, 'Warning: no .boto file found.' | |
162 | |
148 # Which field we get/set in ACL entries, depending on IdType. | 163 # Which field we get/set in ACL entries, depending on IdType. |
149 self._field_by_id_type = { | 164 self._field_by_id_type = { |
150 self.IdType.GROUP_BY_DOMAIN: 'domain', | 165 self.IdType.GROUP_BY_DOMAIN: 'domain', |
151 self.IdType.GROUP_BY_EMAIL: 'email_address', | 166 self.IdType.GROUP_BY_EMAIL: 'email_address', |
152 self.IdType.GROUP_BY_ID: 'id', | 167 self.IdType.GROUP_BY_ID: 'id', |
153 self.IdType.USER_BY_EMAIL: 'email_address', | 168 self.IdType.USER_BY_EMAIL: 'email_address', |
154 self.IdType.USER_BY_ID: 'id', | 169 self.IdType.USER_BY_ID: 'id', |
155 } | 170 } |
156 | 171 |
157 def delete_file(self, bucket, path): | 172 def delete_file(self, bucket, path): |
(...skipping 417 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
575 dirs = [] | 590 dirs = [] |
576 files = [] | 591 files = [] |
577 for item in items: | 592 for item in items: |
578 t = type(item) | 593 t = type(item) |
579 if t is Key: | 594 if t is Key: |
580 files.append(item.name[prefix_length:]) | 595 files.append(item.name[prefix_length:]) |
581 elif t is Prefix: | 596 elif t is Prefix: |
582 dirs.append(item.name[prefix_length:-1]) | 597 dirs.append(item.name[prefix_length:-1]) |
583 return (dirs, files) | 598 return (dirs, files) |
584 | 599 |
600 def does_storage_object_exist(self, bucket, object_name): | |
601 """Determines whether an object exists in Google Storage. | |
602 | |
603 Returns True if it exists else returns False. | |
604 """ | |
605 b = self._connect_to_bucket(bucket=bucket) | |
606 if object_name in b: | |
607 return True | |
608 dirs, files = self.list_bucket_contents(bucket, object_name) | |
609 return bool(dirs or files) | |
610 | |
585 @staticmethod | 611 @staticmethod |
586 def is_gs_url(url): | 612 def is_gs_url(url): |
587 """Returns True if url is a legal Google Storage URL ("gs://bucket/file"). | 613 """Returns True if url is a legal Google Storage URL ("gs://bucket/file"). |
588 """ | 614 """ |
589 try: | 615 try: |
590 if url.lower().startswith(_GS_PREFIX) and len(url) > len(_GS_PREFIX): | 616 if url.lower().startswith(GS_PREFIX) and len(url) > len(GS_PREFIX): |
591 return url[len(_GS_PREFIX)].isalnum() | 617 return url[len(GS_PREFIX)].isalnum() |
592 else: | 618 else: |
593 return False | 619 return False |
594 except AttributeError: | 620 except AttributeError: |
595 return False | 621 return False |
596 | 622 |
597 @staticmethod | 623 @staticmethod |
598 def split_gs_url(url): | 624 def split_gs_url(url): |
599 """Returns (bucket, filepath) corresponding to a legal Google Storage URL. | 625 """Returns (bucket, filepath) corresponding to a legal Google Storage URL. |
600 | 626 |
601 Raises AttributeError if the input URL is not a legal Google Storage URL. | 627 Raises AttributeError if the input URL is not a legal Google Storage URL. |
602 """ | 628 """ |
603 if not GSUtils.is_gs_url(url): | 629 if not GSUtils.is_gs_url(url): |
604 raise AttributeError('"%s" is not a legal Google Storage URL' % url) | 630 raise AttributeError('"%s" is not a legal Google Storage URL' % url) |
605 prefix_removed = url[len(_GS_PREFIX):] | 631 prefix_removed = url[len(GS_PREFIX):] |
606 pathsep_index = prefix_removed.find('/') | 632 pathsep_index = prefix_removed.find('/') |
607 if pathsep_index < 0: | 633 if pathsep_index < 0: |
608 return (prefix_removed, '') | 634 return (prefix_removed, '') |
609 else: | 635 else: |
610 return (prefix_removed[:pathsep_index], | 636 return (prefix_removed[:pathsep_index], |
611 prefix_removed[pathsep_index+1:].strip('/')) | 637 prefix_removed[pathsep_index+1:].strip('/')) |
612 | 638 |
613 def _connect_to_bucket(self, bucket): | 639 def _connect_to_bucket(self, bucket): |
614 """Returns a Bucket object we can use to access a particular bucket in GS. | 640 """Returns a Bucket object we can use to access a particular bucket in GS. |
615 | 641 |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
675 | 701 |
676 def _get_local_md5(path): | 702 def _get_local_md5(path): |
677 """Returns the MD5 hash of a file on local disk.""" | 703 """Returns the MD5 hash of a file on local disk.""" |
678 hasher = hashlib.md5() | 704 hasher = hashlib.md5() |
679 with open(path, 'rb') as f: | 705 with open(path, 'rb') as f: |
680 while True: | 706 while True: |
681 data = f.read(64*1024) | 707 data = f.read(64*1024) |
682 if not data: | 708 if not data: |
683 return hasher.hexdigest() | 709 return hasher.hexdigest() |
684 hasher.update(data) | 710 hasher.update(data) |
OLD | NEW |