OLD | NEW |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 | 2 |
3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
4 """ | 4 """ |
5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
6 | 6 |
7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
8 found in the LICENSE file. | 8 found in the LICENSE file. |
9 | 9 |
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
94 PUBLIC_READ_WRITE = 'public-read-write' | 94 PUBLIC_READ_WRITE = 'public-read-write' |
95 | 95 |
96 class IdType: | 96 class IdType: |
97 """Types of identifiers we can use to set "fine-grained" ACLs.""" | 97 """Types of identifiers we can use to set "fine-grained" ACLs.""" |
98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | 98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN |
99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | 99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL |
100 GROUP_BY_ID = acl.GROUP_BY_ID | 100 GROUP_BY_ID = acl.GROUP_BY_ID |
101 USER_BY_EMAIL = acl.USER_BY_EMAIL | 101 USER_BY_EMAIL = acl.USER_BY_EMAIL |
102 USER_BY_ID = acl.USER_BY_ID | 102 USER_BY_ID = acl.USER_BY_ID |
103 | 103 |
104 class UploadIf: | |
105 """Cases in which we will upload a file. | |
106 | |
107 Beware of performance tradeoffs. E.g., if the file is small, the extra | |
108 round trip to check for file existence and/or checksum may take longer than | |
109 just uploading the file.""" | |
110 ALWAYS = 1 # always upload the file | |
111 IF_NEW = 2 # if there is an existing file with the same name, | |
112 # leave it alone | |
113 IF_MODIFIED = 3 # if there is an existing file with the same name and | |
114 # contents, leave it alone | |
104 | 115 |
105 def __init__(self, boto_file_path=None): | 116 def __init__(self, boto_file_path=None): |
106 """Constructor. | 117 """Constructor. |
107 | 118 |
108 Params: | 119 Params: |
109 boto_file_path: full path (local-OS-style) on local disk where .boto | 120 boto_file_path: full path (local-OS-style) on local disk where .boto |
110 credentials file can be found. If None, then the GSUtils object | 121 credentials file can be found. If None, then the GSUtils object |
111 created will be able to access only public files in Google Storage. | 122 created will be able to access only public files in Google Storage. |
112 | 123 |
113 Raises an exception if no file is found at boto_file_path, or if the file | 124 Raises an exception if no file is found at boto_file_path, or if the file |
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
165 if not key: | 176 if not key: |
166 return None | 177 return None |
167 return key.last_modified | 178 return key.last_modified |
168 except BotoServerError, e: | 179 except BotoServerError, e: |
169 e.body = (repr(e.body) + | 180 e.body = (repr(e.body) + |
170 ' while getting attributes of bucket=%s, path=%s' % ( | 181 ' while getting attributes of bucket=%s, path=%s' % ( |
171 bucket, path)) | 182 bucket, path)) |
172 raise | 183 raise |
173 | 184 |
174 def upload_file(self, source_path, dest_bucket, dest_path, | 185 def upload_file(self, source_path, dest_bucket, dest_path, |
175 only_if_modified=False, predefined_acl=None, | 186 upload_if=UploadIf.ALWAYS, |
187 predefined_acl=None, | |
176 fine_grained_acl_list=None): | 188 fine_grained_acl_list=None): |
177 """Upload contents of a local file to Google Storage. | 189 """Upload contents of a local file to Google Storage. |
178 | 190 |
179 params: | 191 params: |
180 source_path: full path (local-OS-style) on local disk to read from | 192 source_path: full path (local-OS-style) on local disk to read from |
181 dest_bucket: GCS bucket to copy the file to | 193 dest_bucket: GCS bucket to copy the file to |
182 dest_path: full path (Posix-style) within that bucket | 194 dest_path: full path (Posix-style) within that bucket |
183 only_if_modified: if True, only upload the file if it would actually | 195 upload_if: one of the UploadIf values, describing in which cases we should |
184 change the content on Google Storage (uploads the file if dest_path | 196 upload the file |
185 does not exist, or if it exists but has different contents than | |
186 source_path). Note that this may take longer than just uploading the | |
187 file without checking first, due to extra round-trips! | |
188 predefined_acl: which predefined ACL to apply to the file on Google | 197 predefined_acl: which predefined ACL to apply to the file on Google |
189 Storage; must be one of the PredefinedACL values defined above. | 198 Storage; must be one of the PredefinedACL values defined above. |
190 If None, inherits dest_bucket's default object ACL. | 199 If None, inherits dest_bucket's default object ACL. |
191 TODO(epoger): add unittests for this param, although it seems to work | 200 TODO(epoger): add unittests for this param, although it seems to work |
192 in my manual testing | 201 in my manual testing |
193 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | 202 fine_grained_acl_list: list of (id_type, id_value, permission) tuples |
194 to apply to the uploaded file (on top of the predefined_acl), | 203 to apply to the uploaded file (on top of the predefined_acl), |
195 or None if predefined_acl is sufficient | 204 or None if predefined_acl is sufficient |
196 """ | 205 """ |
197 b = self._connect_to_bucket(bucket_name=dest_bucket) | 206 b = self._connect_to_bucket(bucket_name=dest_bucket) |
198 | 207 |
199 if only_if_modified: | 208 if upload_if == self.UploadIf.IF_NEW: |
209 old_key = b.get_key(key_name=dest_path) | |
210 if old_key: | |
211 print 'Skipping upload of existing file gs://%s/%s' % ( | |
212 dest_bucket, dest_path) | |
213 return | |
214 elif upload_if == self.UploadIf.IF_MODIFIED: | |
200 old_key = b.get_key(key_name=dest_path) | 215 old_key = b.get_key(key_name=dest_path) |
201 if old_key: | 216 if old_key: |
202 local_md5 = '"%s"' % _get_local_md5(path=source_path) | 217 local_md5 = '"%s"' % _get_local_md5(path=source_path) |
203 if local_md5 == old_key.etag: | 218 if local_md5 == old_key.etag: |
204 print 'Skipping upload of unmodified file %s : %s' % ( | 219 print 'Skipping upload of unmodified file gs://%s/%s : %s' % ( |
205 source_path, local_md5) | 220 dest_bucket, dest_path, local_md5) |
206 return | 221 return |
207 | 222 |
borenet
2014/07/24 14:21:50
I might add an "elif upload_if != self.UploadIf.AL
epoger
2014/07/24 14:49:49
Good idea, thanks! Done.
| |
208 key = Key(b) | 223 key = Key(b) |
209 key.name = dest_path | 224 key.name = dest_path |
210 try: | 225 try: |
211 key.set_contents_from_filename(filename=source_path, | 226 key.set_contents_from_filename(filename=source_path, |
212 policy=predefined_acl) | 227 policy=predefined_acl) |
213 except BotoServerError, e: | 228 except BotoServerError, e: |
214 e.body = (repr(e.body) + | 229 e.body = (repr(e.body) + |
215 ' while uploading source_path=%s to bucket=%s, path=%s' % ( | 230 ' while uploading source_path=%s to bucket=%s, path=%s' % ( |
216 source_path, dest_bucket, key.name)) | 231 source_path, dest_bucket, key.name)) |
217 raise | 232 raise |
(...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
539 | 554 |
540 def _get_local_md5(path): | 555 def _get_local_md5(path): |
541 """Returns the MD5 hash of a file on local disk.""" | 556 """Returns the MD5 hash of a file on local disk.""" |
542 hasher = hashlib.md5() | 557 hasher = hashlib.md5() |
543 with open(path, 'rb') as f: | 558 with open(path, 'rb') as f: |
544 while True: | 559 while True: |
545 data = f.read(64*1024) | 560 data = f.read(64*1024) |
546 if not data: | 561 if not data: |
547 return hasher.hexdigest() | 562 return hasher.hexdigest() |
548 hasher.update(data) | 563 hasher.update(data) |
OLD | NEW |