Index: py/utils/gs_utils.py |
diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py |
index 7f3b59c058c4e3d44becbd434c6f14a90e31ab92..f53b4d1427bf9078463547fdf7d5751ef437a24d 100644 |
--- a/py/utils/gs_utils.py |
+++ b/py/utils/gs_utils.py |
@@ -160,7 +160,7 @@ class GSUtils(object): |
key.delete() |
except BotoServerError, e: |
e.body = (repr(e.body) + |
- ' while deleting bucket=%s, path=%s' % (b.name, path)) |
+ ' while deleting gs://%s/%s' % (b.name, path)) |
raise |
def get_last_modified_time(self, bucket, path): |
@@ -181,8 +181,7 @@ class GSUtils(object): |
return key.last_modified |
except BotoServerError, e: |
e.body = (repr(e.body) + |
- ' while getting attributes of bucket=%s, path=%s' % ( |
- b.name, path)) |
+ ' while getting attributes of gs://%s/%s' % (b.name, path)) |
raise |
def upload_file(self, source_path, dest_bucket, dest_path, |
@@ -211,6 +210,7 @@ class GSUtils(object): |
WorkingWithObjectMetadata#content-encoding |
""" |
b = self._connect_to_bucket(bucket=dest_bucket) |
+ local_md5 = None # filled in lazily |
if upload_if == self.UploadIf.IF_NEW: |
old_key = b.get_key(key_name=dest_path) |
@@ -221,27 +221,62 @@ class GSUtils(object): |
elif upload_if == self.UploadIf.IF_MODIFIED: |
old_key = b.get_key(key_name=dest_path) |
if old_key: |
- local_md5 = '"%s"' % _get_local_md5(path=source_path) |
- if local_md5 == old_key.etag: |
+ if not local_md5: |
+ local_md5 = _get_local_md5(path=source_path) |
+ if ('"%s"' % local_md5) == old_key.etag: |
print 'Skipping upload of unmodified file gs://%s/%s : %s' % ( |
b.name, dest_path, local_md5) |
return |
elif upload_if != self.UploadIf.ALWAYS: |
raise Exception('unknown value of upload_if: %s' % upload_if) |
- key = Key(b) |
- key.name = dest_path |
+ # Upload the file using a temporary name at first, in case the transfer |
+ # is interrupted partway through. |
+ if not local_md5: |
+ local_md5 = _get_local_md5(path=source_path) |
+ initial_key = Key(b) |
+ initial_key.name = dest_path + '-uploading-' + local_md5 |
try: |
- key.set_contents_from_filename(filename=source_path, |
- policy=predefined_acl) |
+ initial_key.set_contents_from_filename(filename=source_path, |
+ policy=predefined_acl) |
except BotoServerError, e: |
e.body = (repr(e.body) + |
- ' while uploading source_path=%s to bucket=%s, path=%s' % ( |
- source_path, b.name, key.name)) |
+ ' while uploading source_path=%s to gs://%s/%s' % ( |
+ source_path, b.name, initial_key.name)) |
raise |
+ |
+ # Verify that the file contents were uploaded successfully. |
+ # |
+ # TODO(epoger): Check whether the boto library or XML API already do this... |
+ # if so, we may be duplicating effort here, and maybe we don't need to do |
+ # the whole "upload using temporary filename, then rename" thing. |
+ # |
+ # TODO(epoger): Confirm that the etag is set on the server side... |
+ # otherwise, we may just be validating another MD5 hash that was generated |
+ # on the client side before the file was uploaded! |
+ validate_key = b.get_key(key_name=initial_key.name) |
+ if validate_key.etag != ('"%s"' % local_md5): |
+ raise Exception('found wrong MD5 after uploading gs://%s/%s' % ( |
+ b.name, validate_key.name)) |
+ |
+ # Rename the file to its real name. |
+ # |
+ # TODO(epoger): I don't know how long this takes. I wish we could rename |
+ # the key instead, but AFAICT you can't do that. |
+ # Perhaps we could use Key.compose() to create a composite object pointing |
+ # at the original key? |
+ # See https://developers.google.com/storage/docs/composite-objects |
+ final_key = b.copy_key( |
+ new_key_name=dest_path, src_key_name=initial_key.name, |
+ src_bucket_name=b.name, preserve_acl=False) |
+ initial_key.delete() |
+ |
+ # Set ACLs on the file. |
+ # We do this *after* copy_key(), because copy_key's preserve_acl |
+ # functionality would incur a performance hit. |
for (id_type, id_value, permission) in fine_grained_acl_list or []: |
self.set_acl( |
- bucket=b, path=key.name, |
+ bucket=b, path=final_key.name, |
id_type=id_type, id_value=id_value, permission=permission) |
def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, |
@@ -337,7 +372,7 @@ class GSUtils(object): |
key.get_contents_to_file(fp=f) |
except BotoServerError, e: |
e.body = (repr(e.body) + |
- ' while downloading bucket=%s, path=%s to local_path=%s' % ( |
+ ' while downloading gs://%s/%s to local_path=%s' % ( |
b.name, source_path, dest_path)) |
raise |
@@ -371,7 +406,7 @@ class GSUtils(object): |
key.get_contents_to_file(fp=f) |
except BotoServerError, e: |
e.body = (repr(e.body) + |
- ' while downloading bucket=%s, path=%s to local_path=%s' % ( |
+ ' while downloading gs://%s/%s to local_path=%s' % ( |
b.name, key.name, dest_path)) |
raise |