Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(904)

Unified Diff: py/utils/gs_utils.py

Issue 411723002: add only_if_modified param to GSUtils.upload_file() (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: sleep(2) Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: py/utils/gs_utils.py
diff --git a/py/utils/gs_utils.py b/py/utils/gs_utils.py
index d49b3fe4f615da04ff9c05a05a7fa55d3e03ebda..1d39ef7f27e17d4dc0a8b7075a40dd41f96040ef 100644
--- a/py/utils/gs_utils.py
+++ b/py/utils/gs_utils.py
@@ -18,6 +18,7 @@ API/library references:
# System-level imports
import errno
+import hashlib
import os
import posixpath
import re
@@ -139,27 +140,51 @@ class GSUtils(object):
path: full path (Posix-style) of the file within the bucket to delete
"""
b = self._connect_to_bucket(bucket_name=bucket)
- item = Key(b)
- item.key = path
+ key = Key(b)
+ key.name = path
try:
- item.delete()
+ key.delete()
except BotoServerError, e:
e.body = (repr(e.body) +
' while deleting bucket=%s, path=%s' % (bucket, path))
raise
+ def get_last_modified_time(self, bucket, path):
+ """Gets the timestamp of when this file was last modified.
+
+ Params:
+ bucket: GS bucket in which to look for the file
+ path: full path (Posix-style) of the file within the bucket to check
+
+ Returns the last modified time, as a freeform string. If the file was not
+ found, returns None.
+ """
+ b = self._connect_to_bucket(bucket_name=bucket)
+ try:
+ key = b.get_key(key_name=path)
+ if not key:
+ return None
+ return key.last_modified
+ except BotoServerError, e:
+ e.body = (repr(e.body) +
+ ' while getting attributes of bucket=%s, path=%s' % (
+ bucket, path))
+ raise
+
def upload_file(self, source_path, dest_bucket, dest_path,
- predefined_acl=None, fine_grained_acl_list=None):
+ only_if_modified=False, predefined_acl=None,
+ fine_grained_acl_list=None):
"""Upload contents of a local file to Google Storage.
- TODO(epoger): Add the only_if_modified param provided by upload_file() in
- https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts/utils/old_gs_utils.py ,
- so we can replace that function with this one.
-
params:
source_path: full path (local-OS-style) on local disk to read from
dest_bucket: GCS bucket to copy the file to
dest_path: full path (Posix-style) within that bucket
+ only_if_modified: if True, only upload the file if it would actually
+ change the content on Google Storage (uploads the file if dest_path
+ does not exist, or if it exists but has different contents than
+ source_path). Note that this may take longer than just uploading the
+ file without checking first, due to extra round-trips!
predefined_acl: which predefined ACL to apply to the file on Google
Storage; must be one of the PredefinedACL values defined above.
If None, inherits dest_bucket's default object ACL.
@@ -170,22 +195,32 @@ class GSUtils(object):
or None if predefined_acl is sufficient
"""
b = self._connect_to_bucket(bucket_name=dest_bucket)
- item = Key(b)
- item.key = dest_path
+
+ if only_if_modified:
+ old_key = b.get_key(key_name=dest_path)
+ if old_key:
+ local_md5 = '"%s"' % _get_local_md5(path=source_path)
+ if local_md5 == old_key.etag:
+ print 'Skipping upload of unmodified file %s : %s' % (
+ source_path, local_md5)
+ return
+
+ key = Key(b)
+ key.name = dest_path
try:
- item.set_contents_from_filename(filename=source_path,
- policy=predefined_acl)
+ key.set_contents_from_filename(filename=source_path,
+ policy=predefined_acl)
except BotoServerError, e:
e.body = (repr(e.body) +
' while uploading source_path=%s to bucket=%s, path=%s' % (
- source_path, dest_bucket, item.key))
+ source_path, dest_bucket, key.name))
raise
# TODO(epoger): This may be inefficient, because it calls
# _connect_to_bucket() again. Depending on how expensive that
# call is, we may want to optimize this.
for (id_type, id_value, permission) in fine_grained_acl_list or []:
self.set_acl(
- bucket=dest_bucket, path=item.key,
+ bucket=dest_bucket, path=key.name,
id_type=id_type, id_value=id_value, permission=permission)
def upload_dir_contents(self, source_dir, dest_bucket, dest_dir,
@@ -237,10 +272,10 @@ class GSUtils(object):
predefined_acl=predefined_acl,
fine_grained_acl_list=fine_grained_acl_list)
else:
- item = Key(b)
- item.key = remote_path
+ key = Key(b)
+ key.name = remote_path
try:
- item.set_contents_from_filename(
+ key.set_contents_from_filename(
filename=local_path, policy=predefined_acl)
except BotoServerError, e:
e.body = (repr(e.body) +
@@ -267,13 +302,13 @@ class GSUtils(object):
needed to create dest_path
"""
b = self._connect_to_bucket(bucket_name=source_bucket)
- item = Key(b)
- item.key = source_path
+ key = Key(b)
+ key.name = source_path
if create_subdirs_if_needed:
_makedirs_if_needed(os.path.dirname(dest_path))
with open(dest_path, 'w') as f:
try:
- item.get_contents_to_file(fp=f)
+ key.get_contents_to_file(fp=f)
except BotoServerError, e:
e.body = (repr(e.body) +
' while downloading bucket=%s, path=%s to local_path=%s' % (
@@ -302,16 +337,16 @@ class GSUtils(object):
bucket=source_bucket, subdir=source_dir)
for filename in files:
- item = Key(b)
- item.key = posixpath.join(source_dir, filename)
+ key = Key(b)
+ key.name = posixpath.join(source_dir, filename)
dest_path = os.path.join(dest_dir, filename)
with open(dest_path, 'w') as f:
try:
- item.get_contents_to_file(fp=f)
+ key.get_contents_to_file(fp=f)
except BotoServerError, e:
e.body = (repr(e.body) +
' while downloading bucket=%s, path=%s to local_path=%s' % (
- source_bucket, item.key, dest_path))
+ source_bucket, key.name, dest_path))
raise
for dirname in dirs:
@@ -431,13 +466,13 @@ class GSUtils(object):
prefix_length = len(prefix) if prefix else 0
b = self._connect_to_bucket(bucket_name=bucket)
- lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
+ items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
dirs = []
files = []
- for item in lister:
+ for item in items:
t = type(item)
if t is Key:
- files.append(item.key[prefix_length:])
+ files.append(item.name[prefix_length:])
elif t is Prefix:
dirs.append(item.name[prefix_length:-1])
return (dirs, files)
@@ -500,3 +535,14 @@ def _makedirs_if_needed(path):
except OSError as e:
if e.errno != errno.EEXIST:
raise
+
+
+def _get_local_md5(path):
+ """Returns the MD5 hash of a file on local disk."""
+ hasher = hashlib.md5()
+ with open(path, 'rb') as f:
+ while True:
+ data = f.read(64*1024)
+ if not data:
+ return hasher.hexdigest()
+ hasher.update(data)
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698