Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 | 2 |
| 3 # pylint: disable=C0301 | 3 # pylint: disable=C0301 |
| 4 """ | 4 """ |
| 5 Copyright 2014 Google Inc. | 5 Copyright 2014 Google Inc. |
| 6 | 6 |
| 7 Use of this source code is governed by a BSD-style license that can be | 7 Use of this source code is governed by a BSD-style license that can be |
| 8 found in the LICENSE file. | 8 found in the LICENSE file. |
| 9 | 9 |
| 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper | 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper |
| (...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 94 PUBLIC_READ_WRITE = 'public-read-write' | 94 PUBLIC_READ_WRITE = 'public-read-write' |
| 95 | 95 |
| 96 class IdType: | 96 class IdType: |
| 97 """Types of identifiers we can use to set "fine-grained" ACLs.""" | 97 """Types of identifiers we can use to set "fine-grained" ACLs.""" |
| 98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN | 98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN |
| 99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL | 99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL |
| 100 GROUP_BY_ID = acl.GROUP_BY_ID | 100 GROUP_BY_ID = acl.GROUP_BY_ID |
| 101 USER_BY_EMAIL = acl.USER_BY_EMAIL | 101 USER_BY_EMAIL = acl.USER_BY_EMAIL |
| 102 USER_BY_ID = acl.USER_BY_ID | 102 USER_BY_ID = acl.USER_BY_ID |
| 103 | 103 |
| 104 class UploadIf: | |
| 105 """Cases in which we will upload a file. | |
| 106 | |
| 107 Beware of performance tradeoffs. E.g., if the file is small, the extra | |
| 108 round trip to check for file existence and/or checksum may take longer than | |
| 109 just uploading the file.""" | |
| 110 ALWAYS = 1 # always upload the file | |
| 111 IF_NEW = 2 # if there is an existing file with the same name, | |
| 112 # leave it alone | |
| 113 IF_MODIFIED = 3 # if there is an existing file with the same name and | |
| 114 # contents, leave it alone | |
| 104 | 115 |
| 105 def __init__(self, boto_file_path=None): | 116 def __init__(self, boto_file_path=None): |
| 106 """Constructor. | 117 """Constructor. |
| 107 | 118 |
| 108 Params: | 119 Params: |
| 109 boto_file_path: full path (local-OS-style) on local disk where .boto | 120 boto_file_path: full path (local-OS-style) on local disk where .boto |
| 110 credentials file can be found. If None, then the GSUtils object | 121 credentials file can be found. If None, then the GSUtils object |
| 111 created will be able to access only public files in Google Storage. | 122 created will be able to access only public files in Google Storage. |
| 112 | 123 |
| 113 Raises an exception if no file is found at boto_file_path, or if the file | 124 Raises an exception if no file is found at boto_file_path, or if the file |
| (...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 165 if not key: | 176 if not key: |
| 166 return None | 177 return None |
| 167 return key.last_modified | 178 return key.last_modified |
| 168 except BotoServerError, e: | 179 except BotoServerError, e: |
| 169 e.body = (repr(e.body) + | 180 e.body = (repr(e.body) + |
| 170 ' while getting attributes of bucket=%s, path=%s' % ( | 181 ' while getting attributes of bucket=%s, path=%s' % ( |
| 171 bucket, path)) | 182 bucket, path)) |
| 172 raise | 183 raise |
| 173 | 184 |
| 174 def upload_file(self, source_path, dest_bucket, dest_path, | 185 def upload_file(self, source_path, dest_bucket, dest_path, |
| 175 only_if_modified=False, predefined_acl=None, | 186 upload_if=UploadIf.ALWAYS, |
| 187 predefined_acl=None, | |
| 176 fine_grained_acl_list=None): | 188 fine_grained_acl_list=None): |
| 177 """Upload contents of a local file to Google Storage. | 189 """Upload contents of a local file to Google Storage. |
| 178 | 190 |
| 179 params: | 191 params: |
| 180 source_path: full path (local-OS-style) on local disk to read from | 192 source_path: full path (local-OS-style) on local disk to read from |
| 181 dest_bucket: GCS bucket to copy the file to | 193 dest_bucket: GCS bucket to copy the file to |
| 182 dest_path: full path (Posix-style) within that bucket | 194 dest_path: full path (Posix-style) within that bucket |
| 183 only_if_modified: if True, only upload the file if it would actually | 195 upload_if: one of the UploadIf values, describing in which cases we should |
| 184 change the content on Google Storage (uploads the file if dest_path | 196 upload the file |
| 185 does not exist, or if it exists but has different contents than | |
| 186 source_path). Note that this may take longer than just uploading the | |
| 187 file without checking first, due to extra round-trips! | |
| 188 predefined_acl: which predefined ACL to apply to the file on Google | 197 predefined_acl: which predefined ACL to apply to the file on Google |
| 189 Storage; must be one of the PredefinedACL values defined above. | 198 Storage; must be one of the PredefinedACL values defined above. |
| 190 If None, inherits dest_bucket's default object ACL. | 199 If None, inherits dest_bucket's default object ACL. |
| 191 TODO(epoger): add unittests for this param, although it seems to work | 200 TODO(epoger): add unittests for this param, although it seems to work |
| 192 in my manual testing | 201 in my manual testing |
| 193 fine_grained_acl_list: list of (id_type, id_value, permission) tuples | 202 fine_grained_acl_list: list of (id_type, id_value, permission) tuples |
| 194 to apply to the uploaded file (on top of the predefined_acl), | 203 to apply to the uploaded file (on top of the predefined_acl), |
| 195 or None if predefined_acl is sufficient | 204 or None if predefined_acl is sufficient |
| 196 """ | 205 """ |
| 197 b = self._connect_to_bucket(bucket_name=dest_bucket) | 206 b = self._connect_to_bucket(bucket_name=dest_bucket) |
| 198 | 207 |
| 199 if only_if_modified: | 208 if upload_if == self.UploadIf.IF_NEW: |
| 209 old_key = b.get_key(key_name=dest_path) | |
| 210 if old_key: | |
| 211 print 'Skipping upload of existing file gs://%s/%s' % ( | |
| 212 dest_bucket, dest_path) | |
| 213 return | |
| 214 elif upload_if == self.UploadIf.IF_MODIFIED: | |
| 200 old_key = b.get_key(key_name=dest_path) | 215 old_key = b.get_key(key_name=dest_path) |
| 201 if old_key: | 216 if old_key: |
| 202 local_md5 = '"%s"' % _get_local_md5(path=source_path) | 217 local_md5 = '"%s"' % _get_local_md5(path=source_path) |
| 203 if local_md5 == old_key.etag: | 218 if local_md5 == old_key.etag: |
| 204 print 'Skipping upload of unmodified file %s : %s' % ( | 219 print 'Skipping upload of unmodified file gs://%s/%s : %s' % ( |
| 205 source_path, local_md5) | 220 dest_bucket, dest_path, local_md5) |
| 206 return | 221 return |
| 207 | 222 |
|
borenet
2014/07/24 14:21:50
I might add an "elif upload_if != self.UploadIf.AL
epoger
2014/07/24 14:49:49
Good idea, thanks! Done.
| |
| 208 key = Key(b) | 223 key = Key(b) |
| 209 key.name = dest_path | 224 key.name = dest_path |
| 210 try: | 225 try: |
| 211 key.set_contents_from_filename(filename=source_path, | 226 key.set_contents_from_filename(filename=source_path, |
| 212 policy=predefined_acl) | 227 policy=predefined_acl) |
| 213 except BotoServerError, e: | 228 except BotoServerError, e: |
| 214 e.body = (repr(e.body) + | 229 e.body = (repr(e.body) + |
| 215 ' while uploading source_path=%s to bucket=%s, path=%s' % ( | 230 ' while uploading source_path=%s to bucket=%s, path=%s' % ( |
| 216 source_path, dest_bucket, key.name)) | 231 source_path, dest_bucket, key.name)) |
| 217 raise | 232 raise |
| (...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 539 | 554 |
| 540 def _get_local_md5(path): | 555 def _get_local_md5(path): |
| 541 """Returns the MD5 hash of a file on local disk.""" | 556 """Returns the MD5 hash of a file on local disk.""" |
| 542 hasher = hashlib.md5() | 557 hasher = hashlib.md5() |
| 543 with open(path, 'rb') as f: | 558 with open(path, 'rb') as f: |
| 544 while True: | 559 while True: |
| 545 data = f.read(64*1024) | 560 data = f.read(64*1024) |
| 546 if not data: | 561 if not data: |
| 547 return hasher.hexdigest() | 562 return hasher.hexdigest() |
| 548 hasher.update(data) | 563 hasher.update(data) |
| OLD | NEW |