Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(748)

Side by Side Diff: py/utils/gs_utils.py

Issue 418503005: GSUtils: allow uploads to happen ALWAYS, IF_NEW, or IF_MODIFIED (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: add UploadIf.IF_NEW Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper
(...skipping 83 matching lines...) Expand 10 before | Expand all | Expand 10 after
94 PUBLIC_READ_WRITE = 'public-read-write' 94 PUBLIC_READ_WRITE = 'public-read-write'
95 95
96 class IdType: 96 class IdType:
97 """Types of identifiers we can use to set "fine-grained" ACLs.""" 97 """Types of identifiers we can use to set "fine-grained" ACLs."""
98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN 98 GROUP_BY_DOMAIN = acl.GROUP_BY_DOMAIN
99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL 99 GROUP_BY_EMAIL = acl.GROUP_BY_EMAIL
100 GROUP_BY_ID = acl.GROUP_BY_ID 100 GROUP_BY_ID = acl.GROUP_BY_ID
101 USER_BY_EMAIL = acl.USER_BY_EMAIL 101 USER_BY_EMAIL = acl.USER_BY_EMAIL
102 USER_BY_ID = acl.USER_BY_ID 102 USER_BY_ID = acl.USER_BY_ID
103 103
104 class UploadIf:
105 """Cases in which we will upload a file.
106
107 Beware of performance tradeoffs. E.g., if the file is small, the extra
108 round trip to check for file existence and/or checksum may take longer than
109 just uploading the file."""
110 ALWAYS = 1 # always upload the file
111 IF_NEW = 2 # if there is an existing file with the same name,
112 # leave it alone
113 IF_MODIFIED = 3 # if there is an existing file with the same name and
114 # contents, leave it alone
104 115
105 def __init__(self, boto_file_path=None): 116 def __init__(self, boto_file_path=None):
106 """Constructor. 117 """Constructor.
107 118
108 Params: 119 Params:
109 boto_file_path: full path (local-OS-style) on local disk where .boto 120 boto_file_path: full path (local-OS-style) on local disk where .boto
110 credentials file can be found. If None, then the GSUtils object 121 credentials file can be found. If None, then the GSUtils object
111 created will be able to access only public files in Google Storage. 122 created will be able to access only public files in Google Storage.
112 123
113 Raises an exception if no file is found at boto_file_path, or if the file 124 Raises an exception if no file is found at boto_file_path, or if the file
(...skipping 51 matching lines...) Expand 10 before | Expand all | Expand 10 after
165 if not key: 176 if not key:
166 return None 177 return None
167 return key.last_modified 178 return key.last_modified
168 except BotoServerError, e: 179 except BotoServerError, e:
169 e.body = (repr(e.body) + 180 e.body = (repr(e.body) +
170 ' while getting attributes of bucket=%s, path=%s' % ( 181 ' while getting attributes of bucket=%s, path=%s' % (
171 bucket, path)) 182 bucket, path))
172 raise 183 raise
173 184
174 def upload_file(self, source_path, dest_bucket, dest_path, 185 def upload_file(self, source_path, dest_bucket, dest_path,
175 only_if_modified=False, predefined_acl=None, 186 upload_if=UploadIf.ALWAYS,
187 predefined_acl=None,
176 fine_grained_acl_list=None): 188 fine_grained_acl_list=None):
177 """Upload contents of a local file to Google Storage. 189 """Upload contents of a local file to Google Storage.
178 190
179 params: 191 params:
180 source_path: full path (local-OS-style) on local disk to read from 192 source_path: full path (local-OS-style) on local disk to read from
181 dest_bucket: GCS bucket to copy the file to 193 dest_bucket: GCS bucket to copy the file to
182 dest_path: full path (Posix-style) within that bucket 194 dest_path: full path (Posix-style) within that bucket
183 only_if_modified: if True, only upload the file if it would actually 195 upload_if: one of the UploadIf values, describing in which cases we should
184 change the content on Google Storage (uploads the file if dest_path 196 upload the file
185 does not exist, or if it exists but has different contents than
186 source_path). Note that this may take longer than just uploading the
187 file without checking first, due to extra round-trips!
188 predefined_acl: which predefined ACL to apply to the file on Google 197 predefined_acl: which predefined ACL to apply to the file on Google
189 Storage; must be one of the PredefinedACL values defined above. 198 Storage; must be one of the PredefinedACL values defined above.
190 If None, inherits dest_bucket's default object ACL. 199 If None, inherits dest_bucket's default object ACL.
191 TODO(epoger): add unittests for this param, although it seems to work 200 TODO(epoger): add unittests for this param, although it seems to work
192 in my manual testing 201 in my manual testing
193 fine_grained_acl_list: list of (id_type, id_value, permission) tuples 202 fine_grained_acl_list: list of (id_type, id_value, permission) tuples
194 to apply to the uploaded file (on top of the predefined_acl), 203 to apply to the uploaded file (on top of the predefined_acl),
195 or None if predefined_acl is sufficient 204 or None if predefined_acl is sufficient
196 """ 205 """
197 b = self._connect_to_bucket(bucket_name=dest_bucket) 206 b = self._connect_to_bucket(bucket_name=dest_bucket)
198 207
199 if only_if_modified: 208 if upload_if == self.UploadIf.IF_NEW:
209 old_key = b.get_key(key_name=dest_path)
210 if old_key:
211 print 'Skipping upload of existing file gs://%s/%s' % (
212 dest_bucket, dest_path)
213 return
214 elif upload_if == self.UploadIf.IF_MODIFIED:
200 old_key = b.get_key(key_name=dest_path) 215 old_key = b.get_key(key_name=dest_path)
201 if old_key: 216 if old_key:
202 local_md5 = '"%s"' % _get_local_md5(path=source_path) 217 local_md5 = '"%s"' % _get_local_md5(path=source_path)
203 if local_md5 == old_key.etag: 218 if local_md5 == old_key.etag:
204 print 'Skipping upload of unmodified file %s : %s' % ( 219 print 'Skipping upload of unmodified file gs://%s/%s : %s' % (
205 source_path, local_md5) 220 dest_bucket, dest_path, local_md5)
206 return 221 return
207 222
borenet 2014/07/24 14:21:50 I might add an "elif upload_if != self.UploadIf.AL
epoger 2014/07/24 14:49:49 Good idea, thanks! Done.
208 key = Key(b) 223 key = Key(b)
209 key.name = dest_path 224 key.name = dest_path
210 try: 225 try:
211 key.set_contents_from_filename(filename=source_path, 226 key.set_contents_from_filename(filename=source_path,
212 policy=predefined_acl) 227 policy=predefined_acl)
213 except BotoServerError, e: 228 except BotoServerError, e:
214 e.body = (repr(e.body) + 229 e.body = (repr(e.body) +
215 ' while uploading source_path=%s to bucket=%s, path=%s' % ( 230 ' while uploading source_path=%s to bucket=%s, path=%s' % (
216 source_path, dest_bucket, key.name)) 231 source_path, dest_bucket, key.name))
217 raise 232 raise
(...skipping 321 matching lines...) Expand 10 before | Expand all | Expand 10 after
539 554
540 def _get_local_md5(path): 555 def _get_local_md5(path):
541 """Returns the MD5 hash of a file on local disk.""" 556 """Returns the MD5 hash of a file on local disk."""
542 hasher = hashlib.md5() 557 hasher = hashlib.md5()
543 with open(path, 'rb') as f: 558 with open(path, 'rb') as f:
544 while True: 559 while True:
545 data = f.read(64*1024) 560 data = f.read(64*1024)
546 if not data: 561 if not data:
547 return hasher.hexdigest() 562 return hasher.hexdigest()
548 hasher.update(data) 563 hasher.update(data)
OLDNEW
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698