Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(564)

Side by Side Diff: py/utils/gs_utils.py

Issue 419773003: gs_utils.upload_file(): upload with temp name, verify contents, then rename (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: minor change to some log messages Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper
(...skipping 142 matching lines...) Expand 10 before | Expand all | Expand 10 after
153 bucket: GS bucket to delete a file from 153 bucket: GS bucket to delete a file from
154 path: full path (Posix-style) of the file within the bucket to delete 154 path: full path (Posix-style) of the file within the bucket to delete
155 """ 155 """
156 b = self._connect_to_bucket(bucket=bucket) 156 b = self._connect_to_bucket(bucket=bucket)
157 key = Key(b) 157 key = Key(b)
158 key.name = path 158 key.name = path
159 try: 159 try:
160 key.delete() 160 key.delete()
161 except BotoServerError, e: 161 except BotoServerError, e:
162 e.body = (repr(e.body) + 162 e.body = (repr(e.body) +
163 ' while deleting bucket=%s, path=%s' % (b.name, path)) 163 ' while deleting gs://%s/%s' % (b.name, path))
164 raise 164 raise
165 165
166 def get_last_modified_time(self, bucket, path): 166 def get_last_modified_time(self, bucket, path):
167 """Gets the timestamp of when this file was last modified. 167 """Gets the timestamp of when this file was last modified.
168 168
169 Params: 169 Params:
170 bucket: GS bucket in which to look for the file 170 bucket: GS bucket in which to look for the file
171 path: full path (Posix-style) of the file within the bucket to check 171 path: full path (Posix-style) of the file within the bucket to check
172 172
173 Returns the last modified time, as a freeform string. If the file was not 173 Returns the last modified time, as a freeform string. If the file was not
174 found, returns None. 174 found, returns None.
175 """ 175 """
176 b = self._connect_to_bucket(bucket=bucket) 176 b = self._connect_to_bucket(bucket=bucket)
177 try: 177 try:
178 key = b.get_key(key_name=path) 178 key = b.get_key(key_name=path)
179 if not key: 179 if not key:
180 return None 180 return None
181 return key.last_modified 181 return key.last_modified
182 except BotoServerError, e: 182 except BotoServerError, e:
183 e.body = (repr(e.body) + 183 e.body = (repr(e.body) +
184 ' while getting attributes of bucket=%s, path=%s' % ( 184 ' while getting attributes of gs://%s/%s' % (b.name, path))
185 b.name, path))
186 raise 185 raise
187 186
188 def upload_file(self, source_path, dest_bucket, dest_path, 187 def upload_file(self, source_path, dest_bucket, dest_path,
189 upload_if=UploadIf.ALWAYS, 188 upload_if=UploadIf.ALWAYS,
190 predefined_acl=None, 189 predefined_acl=None,
191 fine_grained_acl_list=None): 190 fine_grained_acl_list=None):
192 """Upload contents of a local file to Google Storage. 191 """Upload contents of a local file to Google Storage.
193 192
194 params: 193 params:
195 source_path: full path (local-OS-style) on local disk to read from 194 source_path: full path (local-OS-style) on local disk to read from
196 dest_bucket: GS bucket to copy the file to 195 dest_bucket: GS bucket to copy the file to
197 dest_path: full path (Posix-style) within that bucket 196 dest_path: full path (Posix-style) within that bucket
198 upload_if: one of the UploadIf values, describing in which cases we should 197 upload_if: one of the UploadIf values, describing in which cases we should
199 upload the file 198 upload the file
200 predefined_acl: which predefined ACL to apply to the file on Google 199 predefined_acl: which predefined ACL to apply to the file on Google
201 Storage; must be one of the PredefinedACL values defined above. 200 Storage; must be one of the PredefinedACL values defined above.
202 If None, inherits dest_bucket's default object ACL. 201 If None, inherits dest_bucket's default object ACL.
203 fine_grained_acl_list: list of (id_type, id_value, permission) tuples 202 fine_grained_acl_list: list of (id_type, id_value, permission) tuples
204 to apply to the uploaded file (on top of the predefined_acl), 203 to apply to the uploaded file (on top of the predefined_acl),
205 or None if predefined_acl is sufficient 204 or None if predefined_acl is sufficient
206 205
207 TODO(epoger): Consider adding a do_compress parameter that would compress 206 TODO(epoger): Consider adding a do_compress parameter that would compress
208 the file using gzip before upload, and add a "Content-Encoding:gzip" header 207 the file using gzip before upload, and add a "Content-Encoding:gzip" header
209 so that HTTP downloads of the file would be unzipped automatically. 208 so that HTTP downloads of the file would be unzipped automatically.
210 See https://developers.google.com/storage/docs/gsutil/addlhelp/ 209 See https://developers.google.com/storage/docs/gsutil/addlhelp/
211 WorkingWithObjectMetadata#content-encoding 210 WorkingWithObjectMetadata#content-encoding
212 """ 211 """
213 b = self._connect_to_bucket(bucket=dest_bucket) 212 b = self._connect_to_bucket(bucket=dest_bucket)
213 local_md5 = None # filled in lazily
214 214
215 if upload_if == self.UploadIf.IF_NEW: 215 if upload_if == self.UploadIf.IF_NEW:
216 old_key = b.get_key(key_name=dest_path) 216 old_key = b.get_key(key_name=dest_path)
217 if old_key: 217 if old_key:
218 print 'Skipping upload of existing file gs://%s/%s' % ( 218 print 'Skipping upload of existing file gs://%s/%s' % (
219 b.name, dest_path) 219 b.name, dest_path)
220 return 220 return
221 elif upload_if == self.UploadIf.IF_MODIFIED: 221 elif upload_if == self.UploadIf.IF_MODIFIED:
222 old_key = b.get_key(key_name=dest_path) 222 old_key = b.get_key(key_name=dest_path)
223 if old_key: 223 if old_key:
224 local_md5 = '"%s"' % _get_local_md5(path=source_path) 224 if not local_md5:
225 if local_md5 == old_key.etag: 225 local_md5 = _get_local_md5(path=source_path)
226 if ('"%s"' % local_md5) == old_key.etag:
226 print 'Skipping upload of unmodified file gs://%s/%s : %s' % ( 227 print 'Skipping upload of unmodified file gs://%s/%s : %s' % (
227 b.name, dest_path, local_md5) 228 b.name, dest_path, local_md5)
228 return 229 return
229 elif upload_if != self.UploadIf.ALWAYS: 230 elif upload_if != self.UploadIf.ALWAYS:
230 raise Exception('unknown value of upload_if: %s' % upload_if) 231 raise Exception('unknown value of upload_if: %s' % upload_if)
231 232
232 key = Key(b) 233 # Upload the file using a temporary name at first, in case the transfer
233 key.name = dest_path 234 # is interrupted partway through.
235 if not local_md5:
236 local_md5 = _get_local_md5(path=source_path)
237 initial_key = Key(b)
238 initial_key.name = dest_path + '-uploading-' + local_md5
234 try: 239 try:
235 key.set_contents_from_filename(filename=source_path, 240 initial_key.set_contents_from_filename(filename=source_path,
236 policy=predefined_acl) 241 policy=predefined_acl)
237 except BotoServerError, e: 242 except BotoServerError, e:
238 e.body = (repr(e.body) + 243 e.body = (repr(e.body) +
239 ' while uploading source_path=%s to bucket=%s, path=%s' % ( 244 ' while uploading source_path=%s to gs://%s/%s' % (
240 source_path, b.name, key.name)) 245 source_path, b.name, initial_key.name))
241 raise 246 raise
247
248 # Verify that the file contents were uploaded successfully.
249 #
250 # TODO(epoger): Check whether the boto library or XML API already do this...
251 # if so, we may be duplicating effort here, and maybe we don't need to do
252 # the whole "upload using temporary filename, then rename" thing.
253 #
254 # TODO(epoger): Confirm that the etag is set on the server side...
255 # otherwise, we may just be validating another MD5 hash that was generated
256 # on the client side before the file was uploaded!
257 validate_key = b.get_key(key_name=initial_key.name)
258 if validate_key.etag != ('"%s"' % local_md5):
259 raise Exception('found wrong MD5 after uploading gs://%s/%s' % (
260 b.name, validate_key.name))
261
262 # Rename the file to its real name.
263 #
264 # TODO(epoger): I don't know how long this takes. I wish we could rename
265 # the key instead, but AFAICT you can't do that.
266 # Perhaps we could use Key.compose() to create a composite object pointing
267 # at the original key?
268 # See https://developers.google.com/storage/docs/composite-objects
269 final_key = b.copy_key(
270 new_key_name=dest_path, src_key_name=initial_key.name,
271 src_bucket_name=b.name, preserve_acl=False)
272 initial_key.delete()
273
274 # Set ACLs on the file.
275 # We do this *after* copy_key(), because copy_key's preserve_acl
276 # functionality would incur a performance hit.
242 for (id_type, id_value, permission) in fine_grained_acl_list or []: 277 for (id_type, id_value, permission) in fine_grained_acl_list or []:
243 self.set_acl( 278 self.set_acl(
244 bucket=b, path=key.name, 279 bucket=b, path=final_key.name,
245 id_type=id_type, id_value=id_value, permission=permission) 280 id_type=id_type, id_value=id_value, permission=permission)
246 281
247 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, 282 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir,
248 upload_if=UploadIf.ALWAYS, **kwargs): 283 upload_if=UploadIf.ALWAYS, **kwargs):
249 """Recursively upload contents of a local directory to Google Storage. 284 """Recursively upload contents of a local directory to Google Storage.
250 285
251 params: 286 params:
252 source_dir: full path (local-OS-style) on local disk of directory to copy 287 source_dir: full path (local-OS-style) on local disk of directory to copy
253 contents of 288 contents of
254 dest_bucket: GS bucket to copy the files into 289 dest_bucket: GS bucket to copy the files into
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
330 b = self._connect_to_bucket(bucket=source_bucket) 365 b = self._connect_to_bucket(bucket=source_bucket)
331 key = Key(b) 366 key = Key(b)
332 key.name = source_path 367 key.name = source_path
333 if create_subdirs_if_needed: 368 if create_subdirs_if_needed:
334 _makedirs_if_needed(os.path.dirname(dest_path)) 369 _makedirs_if_needed(os.path.dirname(dest_path))
335 with open(dest_path, 'w') as f: 370 with open(dest_path, 'w') as f:
336 try: 371 try:
337 key.get_contents_to_file(fp=f) 372 key.get_contents_to_file(fp=f)
338 except BotoServerError, e: 373 except BotoServerError, e:
339 e.body = (repr(e.body) + 374 e.body = (repr(e.body) +
340 ' while downloading bucket=%s, path=%s to local_path=%s' % ( 375 ' while downloading gs://%s/%s to local_path=%s' % (
341 b.name, source_path, dest_path)) 376 b.name, source_path, dest_path))
342 raise 377 raise
343 378
344 def download_dir_contents(self, source_bucket, source_dir, dest_dir): 379 def download_dir_contents(self, source_bucket, source_dir, dest_dir):
345 """Recursively download contents of a Google Storage directory to local disk 380 """Recursively download contents of a Google Storage directory to local disk
346 381
347 params: 382 params:
348 source_bucket: GS bucket to copy the files from 383 source_bucket: GS bucket to copy the files from
349 source_dir: full path (Posix-style) within that bucket; read the files 384 source_dir: full path (Posix-style) within that bucket; read the files
350 from this directory 385 from this directory
(...skipping 13 matching lines...) Expand all
364 399
365 for filename in files: 400 for filename in files:
366 key = Key(b) 401 key = Key(b)
367 key.name = posixpath.join(source_dir, filename) 402 key.name = posixpath.join(source_dir, filename)
368 dest_path = os.path.join(dest_dir, filename) 403 dest_path = os.path.join(dest_dir, filename)
369 with open(dest_path, 'w') as f: 404 with open(dest_path, 'w') as f:
370 try: 405 try:
371 key.get_contents_to_file(fp=f) 406 key.get_contents_to_file(fp=f)
372 except BotoServerError, e: 407 except BotoServerError, e:
373 e.body = (repr(e.body) + 408 e.body = (repr(e.body) +
374 ' while downloading bucket=%s, path=%s to local_path=%s' % ( 409 ' while downloading gs://%s/%s to local_path=%s' % (
375 b.name, key.name, dest_path)) 410 b.name, key.name, dest_path))
376 raise 411 raise
377 412
378 for dirname in dirs: 413 for dirname in dirs:
379 self.download_dir_contents( # recurse 414 self.download_dir_contents( # recurse
380 source_bucket=source_bucket, 415 source_bucket=source_bucket,
381 source_dir=posixpath.join(source_dir, dirname), 416 source_dir=posixpath.join(source_dir, dirname),
382 dest_dir=os.path.join(dest_dir, dirname)) 417 dest_dir=os.path.join(dest_dir, dirname))
383 418
384 def get_acl(self, bucket, path, id_type, id_value): 419 def get_acl(self, bucket, path, id_type, id_value):
(...skipping 183 matching lines...) Expand 10 before | Expand all | Expand 10 after
568 603
569 def _get_local_md5(path): 604 def _get_local_md5(path):
570 """Returns the MD5 hash of a file on local disk.""" 605 """Returns the MD5 hash of a file on local disk."""
571 hasher = hashlib.md5() 606 hasher = hashlib.md5()
572 with open(path, 'rb') as f: 607 with open(path, 'rb') as f:
573 while True: 608 while True:
574 data = f.read(64*1024) 609 data = f.read(64*1024)
575 if not data: 610 if not data:
576 return hasher.hexdigest() 611 return hasher.hexdigest()
577 hasher.update(data) 612 hasher.update(data)
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698