Chromium Code Reviews| Index: py/utils/gs_utils.py |
| diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py |
| index 1d39ef7f27e17d4dc0a8b7075a40dd41f96040ef..6241ba1e2d156380e91b8a5361be18a39ab1172c 100644 |
| --- a/py/utils/gs_utils.py |
| +++ b/py/utils/gs_utils.py |
| @@ -101,6 +101,17 @@ class GSUtils(object): |
| USER_BY_EMAIL = acl.USER_BY_EMAIL |
| USER_BY_ID = acl.USER_BY_ID |
| + class UploadIf: |
| + """Cases in which we will upload a file. |
| + |
| + Beware of performance tradeoffs. E.g., if the file is small, the extra |
| + round trip to check for file existence and/or checksum may take longer than |
| + just uploading the file.""" |
| + ALWAYS = 1 # always upload the file |
| + IF_NEW = 2 # if there is an existing file with the same name, |
| + # leave it alone |
| + IF_MODIFIED = 3 # if there is an existing file with the same name and |
| + # contents, leave it alone |
| def __init__(self, boto_file_path=None): |
| """Constructor. |
| @@ -172,7 +183,8 @@ class GSUtils(object): |
| raise |
| def upload_file(self, source_path, dest_bucket, dest_path, |
| - only_if_modified=False, predefined_acl=None, |
| + upload_if=UploadIf.ALWAYS, |
| + predefined_acl=None, |
| fine_grained_acl_list=None): |
| """Upload contents of a local file to Google Storage. |
| @@ -180,11 +192,8 @@ class GSUtils(object): |
| source_path: full path (local-OS-style) on local disk to read from |
| dest_bucket: GCS bucket to copy the file to |
| dest_path: full path (Posix-style) within that bucket |
| - only_if_modified: if True, only upload the file if it would actually |
| - change the content on Google Storage (uploads the file if dest_path |
| - does not exist, or if it exists but has different contents than |
| - source_path). Note that this may take longer than just uploading the |
| - file without checking first, due to extra round-trips! |
| + upload_if: one of the UploadIf values, describing in which cases we should |
| + upload the file |
| predefined_acl: which predefined ACL to apply to the file on Google |
| Storage; must be one of the PredefinedACL values defined above. |
| If None, inherits dest_bucket's default object ACL. |
| @@ -196,13 +205,19 @@ class GSUtils(object): |
| """ |
| b = self._connect_to_bucket(bucket_name=dest_bucket) |
| - if only_if_modified: |
| + if upload_if == self.UploadIf.IF_NEW: |
| + old_key = b.get_key(key_name=dest_path) |
| + if old_key: |
| + print 'Skipping upload of existing file gs://%s/%s' % ( |
| + dest_bucket, dest_path) |
| + return |
| + elif upload_if == self.UploadIf.IF_MODIFIED: |
| old_key = b.get_key(key_name=dest_path) |
| if old_key: |
| local_md5 = '"%s"' % _get_local_md5(path=source_path) |
| if local_md5 == old_key.etag: |
| - print 'Skipping upload of unmodified file %s : %s' % ( |
| - source_path, local_md5) |
| + print 'Skipping upload of unmodified file gs://%s/%s : %s' % ( |
| + dest_bucket, dest_path, local_md5) |
| return |
|
borenet
2014/07/24 14:21:50
I might add an "elif upload_if != self.UploadIf.AL
epoger
2014/07/24 14:49:49
Good idea, thanks! Done.
|
| key = Key(b) |