Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(320)

Side by Side Diff: py/utils/gs_utils.py

Issue 411723002: add only_if_modified param to GSUtils.upload_file() (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: sleep(2) Created 6 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper
11 for the XML API). 11 for the XML API).
12 12
13 API/library references: 13 API/library references:
14 - https://developers.google.com/storage/docs/reference-guide 14 - https://developers.google.com/storage/docs/reference-guide
15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u sing-boto.html 15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u sing-boto.html
16 """ 16 """
17 # pylint: enable=C0301 17 # pylint: enable=C0301
18 18
19 # System-level imports 19 # System-level imports
20 import errno 20 import errno
21 import hashlib
21 import os 22 import os
22 import posixpath 23 import posixpath
23 import re 24 import re
24 import sys 25 import sys
25 26
26 # Imports from third-party code 27 # Imports from third-party code
27 TRUNK_DIRECTORY = os.path.abspath(os.path.join( 28 TRUNK_DIRECTORY = os.path.abspath(os.path.join(
28 os.path.dirname(__file__), os.pardir, os.pardir)) 29 os.path.dirname(__file__), os.pardir, os.pardir))
29 for import_subdir in ['boto']: 30 for import_subdir in ['boto']:
30 import_dirpath = os.path.join( 31 import_dirpath = os.path.join(
(...skipping 101 matching lines...) Expand 10 before | Expand all | Expand 10 after
132 """Delete a single file within a GS bucket. 133 """Delete a single file within a GS bucket.
133 134
134 TODO(epoger): what if bucket or path does not exist? Should probably raise 135 TODO(epoger): what if bucket or path does not exist? Should probably raise
135 an exception. Implement, and add a test to exercise this. 136 an exception. Implement, and add a test to exercise this.
136 137
137 Params: 138 Params:
138 bucket: GS bucket to delete a file from 139 bucket: GS bucket to delete a file from
139 path: full path (Posix-style) of the file within the bucket to delete 140 path: full path (Posix-style) of the file within the bucket to delete
140 """ 141 """
141 b = self._connect_to_bucket(bucket_name=bucket) 142 b = self._connect_to_bucket(bucket_name=bucket)
142 item = Key(b) 143 key = Key(b)
143 item.key = path 144 key.name = path
144 try: 145 try:
145 item.delete() 146 key.delete()
146 except BotoServerError, e: 147 except BotoServerError, e:
147 e.body = (repr(e.body) + 148 e.body = (repr(e.body) +
148 ' while deleting bucket=%s, path=%s' % (bucket, path)) 149 ' while deleting bucket=%s, path=%s' % (bucket, path))
149 raise 150 raise
150 151
152 def get_last_modified_time(self, bucket, path):
153 """Gets the timestamp of when this file was last modified.
154
155 Params:
156 bucket: GS bucket in which to look for the file
157 path: full path (Posix-style) of the file within the bucket to check
158
159 Returns the last modified time, as a freeform string. If the file was not
160 found, returns None.
161 """
162 b = self._connect_to_bucket(bucket_name=bucket)
163 try:
164 key = b.get_key(key_name=path)
165 if not key:
166 return None
167 return key.last_modified
168 except BotoServerError, e:
169 e.body = (repr(e.body) +
170 ' while getting attributes of bucket=%s, path=%s' % (
171 bucket, path))
172 raise
173
151 def upload_file(self, source_path, dest_bucket, dest_path, 174 def upload_file(self, source_path, dest_bucket, dest_path,
152 predefined_acl=None, fine_grained_acl_list=None): 175 only_if_modified=False, predefined_acl=None,
176 fine_grained_acl_list=None):
153 """Upload contents of a local file to Google Storage. 177 """Upload contents of a local file to Google Storage.
154 178
155 TODO(epoger): Add the only_if_modified param provided by upload_file() in
156 https://github.com/google/skia-buildbot/blob/master/slave/skia_slave_scripts /utils/old_gs_utils.py ,
157 so we can replace that function with this one.
158
159 params: 179 params:
160 source_path: full path (local-OS-style) on local disk to read from 180 source_path: full path (local-OS-style) on local disk to read from
161 dest_bucket: GCS bucket to copy the file to 181 dest_bucket: GCS bucket to copy the file to
162 dest_path: full path (Posix-style) within that bucket 182 dest_path: full path (Posix-style) within that bucket
183 only_if_modified: if True, only upload the file if it would actually
184 change the content on Google Storage (uploads the file if dest_path
185 does not exist, or if it exists but has different contents than
186 source_path). Note that this may take longer than just uploading the
187 file without checking first, due to extra round-trips!
163 predefined_acl: which predefined ACL to apply to the file on Google 188 predefined_acl: which predefined ACL to apply to the file on Google
164 Storage; must be one of the PredefinedACL values defined above. 189 Storage; must be one of the PredefinedACL values defined above.
165 If None, inherits dest_bucket's default object ACL. 190 If None, inherits dest_bucket's default object ACL.
166 TODO(epoger): add unittests for this param, although it seems to work 191 TODO(epoger): add unittests for this param, although it seems to work
167 in my manual testing 192 in my manual testing
168 fine_grained_acl_list: list of (id_type, id_value, permission) tuples 193 fine_grained_acl_list: list of (id_type, id_value, permission) tuples
169 to apply to the uploaded file (on top of the predefined_acl), 194 to apply to the uploaded file (on top of the predefined_acl),
170 or None if predefined_acl is sufficient 195 or None if predefined_acl is sufficient
171 """ 196 """
172 b = self._connect_to_bucket(bucket_name=dest_bucket) 197 b = self._connect_to_bucket(bucket_name=dest_bucket)
173 item = Key(b) 198
174 item.key = dest_path 199 if only_if_modified:
200 old_key = b.get_key(key_name=dest_path)
201 if old_key:
202 local_md5 = '"%s"' % _get_local_md5(path=source_path)
203 if local_md5 == old_key.etag:
204 print 'Skipping upload of unmodified file %s : %s' % (
205 source_path, local_md5)
206 return
207
208 key = Key(b)
209 key.name = dest_path
175 try: 210 try:
176 item.set_contents_from_filename(filename=source_path, 211 key.set_contents_from_filename(filename=source_path,
177 policy=predefined_acl) 212 policy=predefined_acl)
178 except BotoServerError, e: 213 except BotoServerError, e:
179 e.body = (repr(e.body) + 214 e.body = (repr(e.body) +
180 ' while uploading source_path=%s to bucket=%s, path=%s' % ( 215 ' while uploading source_path=%s to bucket=%s, path=%s' % (
181 source_path, dest_bucket, item.key)) 216 source_path, dest_bucket, key.name))
182 raise 217 raise
183 # TODO(epoger): This may be inefficient, because it calls 218 # TODO(epoger): This may be inefficient, because it calls
184 # _connect_to_bucket() again. Depending on how expensive that 219 # _connect_to_bucket() again. Depending on how expensive that
185 # call is, we may want to optimize this. 220 # call is, we may want to optimize this.
186 for (id_type, id_value, permission) in fine_grained_acl_list or []: 221 for (id_type, id_value, permission) in fine_grained_acl_list or []:
187 self.set_acl( 222 self.set_acl(
188 bucket=dest_bucket, path=item.key, 223 bucket=dest_bucket, path=key.name,
189 id_type=id_type, id_value=id_value, permission=permission) 224 id_type=id_type, id_value=id_value, permission=permission)
190 225
191 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir, 226 def upload_dir_contents(self, source_dir, dest_bucket, dest_dir,
192 predefined_acl=None, fine_grained_acl_list=None): 227 predefined_acl=None, fine_grained_acl_list=None):
193 """Recursively upload contents of a local directory to Google Storage. 228 """Recursively upload contents of a local directory to Google Storage.
194 229
195 params: 230 params:
196 source_dir: full path (local-OS-style) on local disk of directory to copy 231 source_dir: full path (local-OS-style) on local disk of directory to copy
197 contents of 232 contents of
198 dest_bucket: GCS bucket to copy the files into 233 dest_bucket: GCS bucket to copy the files into
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
230 else: 265 else:
231 remote_path = filename 266 remote_path = filename
232 267
233 if os.path.isdir(local_path): 268 if os.path.isdir(local_path):
234 self.upload_dir_contents( # recurse 269 self.upload_dir_contents( # recurse
235 source_dir=local_path, dest_bucket=dest_bucket, 270 source_dir=local_path, dest_bucket=dest_bucket,
236 dest_dir=remote_path, 271 dest_dir=remote_path,
237 predefined_acl=predefined_acl, 272 predefined_acl=predefined_acl,
238 fine_grained_acl_list=fine_grained_acl_list) 273 fine_grained_acl_list=fine_grained_acl_list)
239 else: 274 else:
240 item = Key(b) 275 key = Key(b)
241 item.key = remote_path 276 key.name = remote_path
242 try: 277 try:
243 item.set_contents_from_filename( 278 key.set_contents_from_filename(
244 filename=local_path, policy=predefined_acl) 279 filename=local_path, policy=predefined_acl)
245 except BotoServerError, e: 280 except BotoServerError, e:
246 e.body = (repr(e.body) + 281 e.body = (repr(e.body) +
247 ' while uploading local_path=%s to bucket=%s, path=%s' % ( 282 ' while uploading local_path=%s to bucket=%s, path=%s' % (
248 local_path, dest_bucket, remote_path)) 283 local_path, dest_bucket, remote_path))
249 raise 284 raise
250 # TODO(epoger): This may be inefficient, because it calls 285 # TODO(epoger): This may be inefficient, because it calls
251 # _connect_to_bucket() for every file. Depending on how expensive that 286 # _connect_to_bucket() for every file. Depending on how expensive that
252 # call is, we may want to optimize this. 287 # call is, we may want to optimize this.
253 for (id_type, id_value, permission) in fine_grained_acl_list or []: 288 for (id_type, id_value, permission) in fine_grained_acl_list or []:
254 self.set_acl( 289 self.set_acl(
255 bucket=dest_bucket, path=remote_path, 290 bucket=dest_bucket, path=remote_path,
256 id_type=id_type, id_value=id_value, permission=permission) 291 id_type=id_type, id_value=id_value, permission=permission)
257 292
258 def download_file(self, source_bucket, source_path, dest_path, 293 def download_file(self, source_bucket, source_path, dest_path,
259 create_subdirs_if_needed=False): 294 create_subdirs_if_needed=False):
260 """Downloads a single file from Google Cloud Storage to local disk. 295 """Downloads a single file from Google Cloud Storage to local disk.
261 296
262 Args: 297 Args:
263 source_bucket: GCS bucket to download the file from 298 source_bucket: GCS bucket to download the file from
264 source_path: full path (Posix-style) within that bucket 299 source_path: full path (Posix-style) within that bucket
265 dest_path: full path (local-OS-style) on local disk to copy the file to 300 dest_path: full path (local-OS-style) on local disk to copy the file to
266 create_subdirs_if_needed: boolean; whether to create subdirectories as 301 create_subdirs_if_needed: boolean; whether to create subdirectories as
267 needed to create dest_path 302 needed to create dest_path
268 """ 303 """
269 b = self._connect_to_bucket(bucket_name=source_bucket) 304 b = self._connect_to_bucket(bucket_name=source_bucket)
270 item = Key(b) 305 key = Key(b)
271 item.key = source_path 306 key.name = source_path
272 if create_subdirs_if_needed: 307 if create_subdirs_if_needed:
273 _makedirs_if_needed(os.path.dirname(dest_path)) 308 _makedirs_if_needed(os.path.dirname(dest_path))
274 with open(dest_path, 'w') as f: 309 with open(dest_path, 'w') as f:
275 try: 310 try:
276 item.get_contents_to_file(fp=f) 311 key.get_contents_to_file(fp=f)
277 except BotoServerError, e: 312 except BotoServerError, e:
278 e.body = (repr(e.body) + 313 e.body = (repr(e.body) +
279 ' while downloading bucket=%s, path=%s to local_path=%s' % ( 314 ' while downloading bucket=%s, path=%s to local_path=%s' % (
280 source_bucket, source_path, dest_path)) 315 source_bucket, source_path, dest_path))
281 raise 316 raise
282 317
283 def download_dir_contents(self, source_bucket, source_dir, dest_dir): 318 def download_dir_contents(self, source_bucket, source_dir, dest_dir):
284 """Recursively download contents of a Google Storage directory to local disk 319 """Recursively download contents of a Google Storage directory to local disk
285 320
286 params: 321 params:
287 source_bucket: GCS bucket to copy the files from 322 source_bucket: GCS bucket to copy the files from
288 source_dir: full path (Posix-style) within that bucket; read the files 323 source_dir: full path (Posix-style) within that bucket; read the files
289 from this directory 324 from this directory
290 dest_dir: full path (local-OS-style) on local disk of directory to copy 325 dest_dir: full path (local-OS-style) on local disk of directory to copy
291 the files into 326 the files into
292 327
293 The copy operates as a "merge with overwrite": any files in source_dir will 328 The copy operates as a "merge with overwrite": any files in source_dir will
294 be "overlaid" on top of the existing content in dest_dir. Existing files 329 be "overlaid" on top of the existing content in dest_dir. Existing files
295 with the same names will be overwritten. 330 with the same names will be overwritten.
296 331
297 TODO(epoger): Download multiple files simultaneously to reduce latency. 332 TODO(epoger): Download multiple files simultaneously to reduce latency.
298 """ 333 """
299 _makedirs_if_needed(dest_dir) 334 _makedirs_if_needed(dest_dir)
300 b = self._connect_to_bucket(bucket_name=source_bucket) 335 b = self._connect_to_bucket(bucket_name=source_bucket)
301 (dirs, files) = self.list_bucket_contents( 336 (dirs, files) = self.list_bucket_contents(
302 bucket=source_bucket, subdir=source_dir) 337 bucket=source_bucket, subdir=source_dir)
303 338
304 for filename in files: 339 for filename in files:
305 item = Key(b) 340 key = Key(b)
306 item.key = posixpath.join(source_dir, filename) 341 key.name = posixpath.join(source_dir, filename)
307 dest_path = os.path.join(dest_dir, filename) 342 dest_path = os.path.join(dest_dir, filename)
308 with open(dest_path, 'w') as f: 343 with open(dest_path, 'w') as f:
309 try: 344 try:
310 item.get_contents_to_file(fp=f) 345 key.get_contents_to_file(fp=f)
311 except BotoServerError, e: 346 except BotoServerError, e:
312 e.body = (repr(e.body) + 347 e.body = (repr(e.body) +
313 ' while downloading bucket=%s, path=%s to local_path=%s' % ( 348 ' while downloading bucket=%s, path=%s to local_path=%s' % (
314 source_bucket, item.key, dest_path)) 349 source_bucket, key.name, dest_path))
315 raise 350 raise
316 351
317 for dirname in dirs: 352 for dirname in dirs:
318 self.download_dir_contents( # recurse 353 self.download_dir_contents( # recurse
319 source_bucket=source_bucket, 354 source_bucket=source_bucket,
320 source_dir=posixpath.join(source_dir, dirname), 355 source_dir=posixpath.join(source_dir, dirname),
321 dest_dir=os.path.join(dest_dir, dirname)) 356 dest_dir=os.path.join(dest_dir, dirname))
322 357
323 def get_acl(self, bucket, path, id_type, id_value): 358 def get_acl(self, bucket, path, id_type, id_value):
324 """Retrieve partial access permissions on a single file in Google Storage. 359 """Retrieve partial access permissions on a single file in Google Storage.
(...skipping 99 matching lines...) Expand 10 before | Expand all | Expand 10 after
424 bucket: name of the Google Storage bucket 459 bucket: name of the Google Storage bucket
425 subdir: directory within the bucket to list, or None for root directory 460 subdir: directory within the bucket to list, or None for root directory
426 """ 461 """
427 # The GS command relies on the prefix (if any) ending with a slash. 462 # The GS command relies on the prefix (if any) ending with a slash.
428 prefix = subdir or '' 463 prefix = subdir or ''
429 if prefix and not prefix.endswith('/'): 464 if prefix and not prefix.endswith('/'):
430 prefix += '/' 465 prefix += '/'
431 prefix_length = len(prefix) if prefix else 0 466 prefix_length = len(prefix) if prefix else 0
432 467
433 b = self._connect_to_bucket(bucket_name=bucket) 468 b = self._connect_to_bucket(bucket_name=bucket)
434 lister = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/') 469 items = BucketListResultSet(bucket=b, prefix=prefix, delimiter='/')
435 dirs = [] 470 dirs = []
436 files = [] 471 files = []
437 for item in lister: 472 for item in items:
438 t = type(item) 473 t = type(item)
439 if t is Key: 474 if t is Key:
440 files.append(item.key[prefix_length:]) 475 files.append(item.name[prefix_length:])
441 elif t is Prefix: 476 elif t is Prefix:
442 dirs.append(item.name[prefix_length:-1]) 477 dirs.append(item.name[prefix_length:-1])
443 return (dirs, files) 478 return (dirs, files)
444 479
445 def _connect_to_bucket(self, bucket_name): 480 def _connect_to_bucket(self, bucket_name):
446 """Returns a Bucket object we can use to access a particular bucket in GS. 481 """Returns a Bucket object we can use to access a particular bucket in GS.
447 482
448 Params: 483 Params:
449 bucket_name: name of the bucket (e.g., 'chromium-skia-gm') 484 bucket_name: name of the bucket (e.g., 'chromium-skia-gm')
450 """ 485 """
(...skipping 42 matching lines...) Expand 10 before | Expand all | Expand 10 after
493 exist yet. 528 exist yet.
494 529
495 Args: 530 Args:
496 path: full path of directory to create 531 path: full path of directory to create
497 """ 532 """
498 try: 533 try:
499 os.makedirs(path) 534 os.makedirs(path)
500 except OSError as e: 535 except OSError as e:
501 if e.errno != errno.EEXIST: 536 if e.errno != errno.EEXIST:
502 raise 537 raise
538
539
540 def _get_local_md5(path):
541 """Returns the MD5 hash of a file on local disk."""
542 hasher = hashlib.md5()
543 with open(path, 'rb') as f:
544 while True:
545 data = f.read(64*1024)
546 if not data:
547 return hasher.hexdigest()
548 hasher.update(data)
OLDNEW
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698