Index: py/utils/gs_utils.py |
diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py |
index 1d39ef7f27e17d4dc0a8b7075a40dd41f96040ef..6241ba1e2d156380e91b8a5361be18a39ab1172c 100644 |
--- a/py/utils/gs_utils.py |
+++ b/py/utils/gs_utils.py |
@@ -101,6 +101,17 @@ class GSUtils(object): |
USER_BY_EMAIL = acl.USER_BY_EMAIL |
USER_BY_ID = acl.USER_BY_ID |
+ class UploadIf: |
+ """Cases in which we will upload a file. |
+ |
+ Beware of performance tradeoffs. E.g., if the file is small, the extra |
+ round trip to check for file existence and/or checksum may take longer than |
+ just uploading the file.""" |
+ ALWAYS = 1 # always upload the file |
+ IF_NEW = 2 # if there is an existing file with the same name, |
+ # leave it alone |
+ IF_MODIFIED = 3 # if there is an existing file with the same name and |
+ # contents, leave it alone |
def __init__(self, boto_file_path=None): |
"""Constructor. |
@@ -172,7 +183,8 @@ class GSUtils(object): |
raise |
def upload_file(self, source_path, dest_bucket, dest_path, |
- only_if_modified=False, predefined_acl=None, |
+ upload_if=UploadIf.ALWAYS, |
+ predefined_acl=None, |
fine_grained_acl_list=None): |
"""Upload contents of a local file to Google Storage. |
@@ -180,11 +192,8 @@ class GSUtils(object): |
source_path: full path (local-OS-style) on local disk to read from |
dest_bucket: GCS bucket to copy the file to |
dest_path: full path (Posix-style) within that bucket |
- only_if_modified: if True, only upload the file if it would actually |
- change the content on Google Storage (uploads the file if dest_path |
- does not exist, or if it exists but has different contents than |
- source_path). Note that this may take longer than just uploading the |
- file without checking first, due to extra round-trips! |
+ upload_if: one of the UploadIf values, describing in which cases we should |
+ upload the file |
predefined_acl: which predefined ACL to apply to the file on Google |
Storage; must be one of the PredefinedACL values defined above. |
If None, inherits dest_bucket's default object ACL. |
@@ -196,13 +205,19 @@ class GSUtils(object): |
""" |
b = self._connect_to_bucket(bucket_name=dest_bucket) |
- if only_if_modified: |
+ if upload_if == self.UploadIf.IF_NEW: |
+ old_key = b.get_key(key_name=dest_path) |
+ if old_key: |
+ print 'Skipping upload of existing file gs://%s/%s' % ( |
+ dest_bucket, dest_path) |
+ return |
+ elif upload_if == self.UploadIf.IF_MODIFIED: |
old_key = b.get_key(key_name=dest_path) |
if old_key: |
local_md5 = '"%s"' % _get_local_md5(path=source_path) |
if local_md5 == old_key.etag: |
- print 'Skipping upload of unmodified file %s : %s' % ( |
- source_path, local_md5) |
+ print 'Skipping upload of unmodified file gs://%s/%s : %s' % ( |
+ dest_bucket, dest_path, local_md5) |
return |
borenet
2014/07/24 14:21:50
I might add an "elif upload_if != self.UploadIf.AL
epoger
2014/07/24 14:49:49
Good idea, thanks! Done.
|
key = Key(b) |