Index: py/utils/gs_utils.py |
diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py |
index d49b3fe4f615da04ff9c05a05a7fa55d3e03ebda..1d39ef7f27e17d4dc0a8b7075a40dd41f96040ef 100644 |
--- a/py/utils/gs_utils.py |
+++ b/py/utils/gs_utils.py |
@@ -18,6 +18,7 @@ API/library references: |
# System-level imports |
import errno |
+import hashlib |
import os |
import posixpath |
import re |
@@ -139,27 +140,51 @@ class GSUtils(object): |
path: full path (Posix-style) of the file within the bucket to delete |
""" |
b = self._connect_to_bucket(bucket_name=bucket) |
- item = Key(b) |
- item.key = path |
+ key = Key(b) |
+ key.name = path |
try: |
- item.delete() |
+ key.delete() |
except BotoServerError, e: |
e.body = (repr(e.body) + |
' while deleting bucket=%s, path=%s' % (bucket, path)) |
raise |
+ def get_last_modified_time(self, bucket, path): |
+ """Gets the timestamp of when this file was last modified. |
+ |
+ Params: |
+ bucket: GS bucket in which to look for the file |
+ path: full path (Posix-style) of the file within the bucket to check |
+ |
+ Returns the last modified time, as a freeform string. If the file was not |
+ found, returns None. |
+ """ |
+ b = self._connect_to_bucket(bucket_name=bucket) |
+ try: |
+ key = b.get_key(key_name=path) |
+ if not key: |
+ return None |
+ return key.last_modified |
+ except BotoServerError, e: |
+ e.body = (repr(e.body) + |
+ ' while getting attributes of bucket=%s, path=%s' % ( |
+ bucket, path)) |
+ raise |
+ |
def upload_file(self, source_path, dest_bucket, dest_path, |
- predefined_acl=None, fine_grained_acl_list=None): |
+ only_if_modified=False, predefined_acl=None, |
+ fine_grained_acl_list=None): |
"""Upload contents of a local file to Google Storage. |
- TODO(epoger): Add the only_if_modified param provided by upload_file() in |
- https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py , |
- so we can replace that function with this one. |
- |
params: |
source_path: full path (local-OS-style) on local disk to read from |
dest_bucket: GCS bucket to copy the file to |
dest_path: full path (Posix-style) within that bucket |
+ only_if_modified: if True, only upload the file if it would actually |
+ change the content on Google Storage (uploads the file if dest_path |
+ does not exist, or if it exists but has different contents than |
+ source_path). Note that this may take longer than just uploading the |
+ file without checking first, due to extra round-trips! |
predefined_acl: which predefined ACL to apply to the file on Google |
Storage; must be one of the PredefinedACL values defined above. |
If None, inherits dest_bucket's default object ACL. |
@@ -170,22 +195,32 @@ class GSUtils(object): |
or None if predefined_acl is sufficient |
""" |
b = self._connect_to_bucket(bucket_name=dest_bucket) |
- item = Key(b) |
- item.key = dest_path |
+ |
+ if only_if_modified: |
+ old_key = b.get_key(key_name=dest_path) |
+ if old_key: |
+ local_md5 = '"%s"' % _get_local_md5(path=source_path) |
+ if local_md5 == old_key.etag: |
+ print 'Skipping upload of unmodified file %s : %s' % ( |
+ source_path, local_md5) |
+ return |
+ |
+ key = Key(b) |
+ key.name = dest_path |
try: |
- item.set_contents_from_filename(filename=source_path, |
- policy=predefined_acl) |
+ key.set_contents_from_filename(filename=source_path, |
+ policy=predefined_acl) |
except BotoServerError, e: |
e.body = (repr(e.body) + |
' while uploading source_path=%s to bucket=%s, path=%s' % ( |
- source_path, dest_bucket, item.key)) |
+ source_path, dest_bucket, key.name)) |
raise |
# TODO(epoger): This may be inefficient, because it calls |
# _connect_to_bucket() again. Depending on how expensive that |
# call is, we may want to optimize this. |
for (id_type, id_value, permission) in fine_grained_acl_list or []: |
self.set_acl( |
- bucket=dest_bucket, path=item.key, |
+ bucket=dest_bucket, path=key.name, |
id_type=id_type, id_value=id_value, permission=permission) |
def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, |
@@ -237,10 +272,10 @@ class GSUtils(object): |
predefined_acl=predefined_acl, |
fine_grained_acl_list=fine_grained_acl_list) |
else: |
- item = Key(b) |
- item.key = remote_path |
+ key = Key(b) |
+ key.name = remote_path |
try: |
- item.set_contents_from_filename( |
+ key.set_contents_from_filename( |
filename=local_path, policy=predefined_acl) |
except BotoServerError, e: |
e.body = (repr(e.body) + |
@@ -267,13 +302,13 @@ class GSUtils(object): |
needed to create dest_path |
""" |
b = self._connect_to_bucket(bucket_name=source_bucket) |
- item = Key(b) |
- item.key = source_path |
+ key = Key(b) |
+ key.name = source_path |
if create_subdirs_if_needed: |
_makedirs_if_needed(os.path.dirname(dest_path)) |
with open(dest_path, 'w') as f: |
try: |
- item.get_contents_to_file(fp=f) |
+ key.get_contents_to_file(fp=f) |
except BotoServerError, e: |
e.body = (repr(e.body) + |
' while downloading bucket=%s, path=%s to local_path=%s' % ( |
@@ -302,16 +337,16 @@ class GSUtils(object): |
bucket=source_bucket, subdir=source_dir) |
for filename in files: |
- item = Key(b) |
- item.key = posixpath.join(source_dir, filename) |
+ key = Key(b) |
+ key.name = posixpath.join(source_dir, filename) |
dest_path = os.path.join(dest_dir, filename) |
with open(dest_path, 'w') as f: |
try: |
- item.get_contents_to_file(fp=f) |
+ key.get_contents_to_file(fp=f) |
except BotoServerError, e: |
e.body = (repr(e.body) + |
' while downloading bucket=%s, path=%s to local_path=%s' % ( |
- source_bucket, item.key, dest_path)) |
+ source_bucket, key.name, dest_path)) |
raise |
for dirname in dirs: |
@@ -431,13 +466,13 @@ class GSUtils(object): |
prefix_length = len(prefix) if prefix else 0 |
b = self._connect_to_bucket(bucket_name=bucket) |
- lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
+ items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') |
dirs = [] |
files = [] |
- for item in lister: |
+ for item in items: |
t = type(item) |
if t is Key: |
- files.append(item.key[prefix_length:]) |
+ files.append(item.name[prefix_length:]) |
elif t is Prefix: |
dirs.append(item.name[prefix_length:-1]) |
return (dirs, files) |
@@ -500,3 +535,14 @@ def _makedirs_if_needed(path): |
except OSError as e: |
if e.errno != errno.EEXIST: |
raise |
+ |
+ |
+def _get_local_md5(path): |
+ """Returns the MD5 hash of a file on local disk.""" |
+ hasher = hashlib.md5() |
+ with open(path, 'rb') as f: |
+ while True: |
+ data = f.read(64*1024) |
+ if not data: |
+ return hasher.hexdigest() |
+ hasher.update(data) |