Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(762)

Side by Side Diff: py/utils/gs_utils.py

Issue 424863004: remove all uses of Python's logging module until we resolve http://skbug.com/2784 (Closed) Base URL: https://skia.googlesource.com/common.git@master
Patch Set: Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/python 1 #!/usr/bin/python
2 2
3 # pylint: disable=C0301 3 # pylint: disable=C0301
4 """ 4 """
5 Copyright 2014 Google Inc. 5 Copyright 2014 Google Inc.
6 6
7 Use of this source code is governed by a BSD-style license that can be 7 Use of this source code is governed by a BSD-style license that can be
8 found in the LICENSE file. 8 found in the LICENSE file.
9 9
10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper 10 Utilities for accessing Google Cloud Storage, using the boto library (wrapper
11 for the XML API). 11 for the XML API).
12 12
13 API/library references: 13 API/library references:
14 - https://developers.google.com/storage/docs/reference-guide 14 - https://developers.google.com/storage/docs/reference-guide
15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u sing-boto.html 15 - http://googlecloudstorage.blogspot.com/2012/09/google-cloud-storage-tutorial-u sing-boto.html
16 """ 16 """
17 # pylint: enable=C0301 17 # pylint: enable=C0301
18 18
19 # System-level imports 19 # System-level imports
20 import errno 20 import errno
21 import hashlib 21 import hashlib
22 import logging
23 import os 22 import os
24 import posixpath 23 import posixpath
25 import Queue 24 import Queue
26 import re 25 import re
27 import sys 26 import sys
28 import threading 27 import threading
29 28
30 # Imports from third-party code 29 # Imports from third-party code
31 TRUNK_DIRECTORY = os.path.abspath(os.path.join( 30 TRUNK_DIRECTORY = os.path.abspath(os.path.join(
32 os.path.dirname(__file__), os.pardir, os.pardir)) 31 os.path.dirname(__file__), os.pardir, os.pardir))
(...skipping 86 matching lines...) Expand 10 before | Expand all | Expand 10 after
119 take longer than just uploading the file. 118 take longer than just uploading the file.
120 See http://skbug.com/2778 ('gs_utils: when uploading IF_NEW, batch up 119 See http://skbug.com/2778 ('gs_utils: when uploading IF_NEW, batch up
121 checks for existing files within a single remote directory') 120 checks for existing files within a single remote directory')
122 """ 121 """
123 ALWAYS = 1 # always upload the file 122 ALWAYS = 1 # always upload the file
124 IF_NEW = 2 # if there is an existing file with the same name, 123 IF_NEW = 2 # if there is an existing file with the same name,
125 # leave it alone 124 # leave it alone
126 IF_MODIFIED = 3 # if there is an existing file with the same name and 125 IF_MODIFIED = 3 # if there is an existing file with the same name and
127 # contents, leave it alone 126 # contents, leave it alone
128 127
129 def __init__(self, boto_file_path=None, logger=None): 128 def __init__(self, boto_file_path=None):
130 """Constructor. 129 """Constructor.
131 130
132 Params: 131 Params:
133 boto_file_path: full path (local-OS-style) on local disk where .boto 132 boto_file_path: full path (local-OS-style) on local disk where .boto
134 credentials file can be found. If None, then the GSUtils object 133 credentials file can be found. If None, then the GSUtils object
135 created will be able to access only public files in Google Storage. 134 created will be able to access only public files in Google Storage.
136 logger: a logging.Logger instance to use for logging output; if None,
137 one will be created with default characteristics
138 135
139 Raises an exception if no file is found at boto_file_path, or if the file 136 Raises an exception if no file is found at boto_file_path, or if the file
140 found there is malformed. 137 found there is malformed.
141 """ 138 """
142 self.logger = logger or logging.getLogger(__name__)
143 self._gs_access_key_id = None 139 self._gs_access_key_id = None
144 self._gs_secret_access_key = None 140 self._gs_secret_access_key = None
145 if boto_file_path: 141 if boto_file_path:
146 self.logger.info('Reading boto file from %s' % boto_file_path) 142 print ('Reading boto file from %s' % boto_file_path)
147 boto_dict = _config_file_as_dict(filepath=boto_file_path) 143 boto_dict = _config_file_as_dict(filepath=boto_file_path)
148 self._gs_access_key_id = boto_dict['gs_access_key_id'] 144 self._gs_access_key_id = boto_dict['gs_access_key_id']
149 self._gs_secret_access_key = boto_dict['gs_secret_access_key'] 145 self._gs_secret_access_key = boto_dict['gs_secret_access_key']
150 # Which field we get/set in ACL entries, depending on IdType. 146 # Which field we get/set in ACL entries, depending on IdType.
151 self._field_by_id_type = { 147 self._field_by_id_type = {
152 self.IdType.GROUP_BY_DOMAIN: 'domain', 148 self.IdType.GROUP_BY_DOMAIN: 'domain',
153 self.IdType.GROUP_BY_EMAIL: 'email_address', 149 self.IdType.GROUP_BY_EMAIL: 'email_address',
154 self.IdType.GROUP_BY_ID: 'id', 150 self.IdType.GROUP_BY_ID: 'id',
155 self.IdType.USER_BY_EMAIL: 'email_address', 151 self.IdType.USER_BY_EMAIL: 'email_address',
156 self.IdType.USER_BY_ID: 'id', 152 self.IdType.USER_BY_ID: 'id',
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after
221 so that HTTP downloads of the file would be unzipped automatically. 217 so that HTTP downloads of the file would be unzipped automatically.
222 See https://developers.google.com/storage/docs/gsutil/addlhelp/ 218 See https://developers.google.com/storage/docs/gsutil/addlhelp/
223 WorkingWithObjectMetadata#content-encoding 219 WorkingWithObjectMetadata#content-encoding
224 """ 220 """
225 b = self._connect_to_bucket(bucket=dest_bucket) 221 b = self._connect_to_bucket(bucket=dest_bucket)
226 local_md5 = None # filled in lazily 222 local_md5 = None # filled in lazily
227 223
228 if upload_if == self.UploadIf.IF_NEW: 224 if upload_if == self.UploadIf.IF_NEW:
229 old_key = b.get_key(key_name=dest_path) 225 old_key = b.get_key(key_name=dest_path)
230 if old_key: 226 if old_key:
231 self.logger.info('Skipping upload of existing file gs://%s/%s' % ( 227 print ('Skipping upload of existing file gs://%s/%s' % (
232 b.name, dest_path)) 228 b.name, dest_path))
233 return 229 return
234 elif upload_if == self.UploadIf.IF_MODIFIED: 230 elif upload_if == self.UploadIf.IF_MODIFIED:
235 old_key = b.get_key(key_name=dest_path) 231 old_key = b.get_key(key_name=dest_path)
236 if old_key: 232 if old_key:
237 if not local_md5: 233 if not local_md5:
238 local_md5 = _get_local_md5(path=source_path) 234 local_md5 = _get_local_md5(path=source_path)
239 if ('"%s"' % local_md5) == old_key.etag: 235 if ('"%s"' % local_md5) == old_key.etag:
240 self.logger.info( 236 print (
241 'Skipping upload of unmodified file gs://%s/%s : %s' % ( 237 'Skipping upload of unmodified file gs://%s/%s : %s' % (
242 b.name, dest_path, local_md5)) 238 b.name, dest_path, local_md5))
243 return 239 return
244 elif upload_if != self.UploadIf.ALWAYS: 240 elif upload_if != self.UploadIf.ALWAYS:
245 raise Exception('unknown value of upload_if: %s' % upload_if) 241 raise Exception('unknown value of upload_if: %s' % upload_if)
246 242
247 # Upload the file using a temporary name at first, in case the transfer 243 # Upload the file using a temporary name at first, in case the transfer
248 # is interrupted partway through. 244 # is interrupted partway through.
249 if not local_md5: 245 if not local_md5:
250 local_md5 = _get_local_md5(path=source_path) 246 local_md5 = _get_local_md5(path=source_path)
(...skipping 103 matching lines...) Expand 10 before | Expand all | Expand 10 after
354 local_md5 = '"%s"' % _get_local_md5(path=os.path.join( 350 local_md5 = '"%s"' % _get_local_md5(path=os.path.join(
355 source_dir, rel_path)) 351 source_dir, rel_path))
356 key = existing_dest_filemap[rel_path] 352 key = existing_dest_filemap[rel_path]
357 if local_md5 == key.etag: 353 if local_md5 == key.etag:
358 source_fileset.remove(rel_path) 354 source_fileset.remove(rel_path)
359 else: 355 else:
360 raise Exception('unknown value of upload_if: %s' % upload_if) 356 raise Exception('unknown value of upload_if: %s' % upload_if)
361 357
362 # Upload any files still in source_fileset. 358 # Upload any files still in source_fileset.
363 num_files_to_upload = len(source_fileset) 359 num_files_to_upload = len(source_fileset)
364 self.logger.info('Uploading %d files, skipping %d ...' % ( 360 print ('Uploading %d files, skipping %d ...' % (
365 num_files_to_upload, num_files_total - num_files_to_upload)) 361 num_files_to_upload, num_files_total - num_files_to_upload))
366 if num_files_to_upload == 0: 362 if num_files_to_upload == 0:
367 return 363 return
368 if num_threads > num_files_to_upload: 364 if num_threads > num_files_to_upload:
369 num_threads = num_files_to_upload 365 num_threads = num_files_to_upload
370 366
371 # Create a work queue with all files that need to be uploaded. 367 # Create a work queue with all files that need to be uploaded.
372 q = Queue.Queue(maxsize=num_files_to_upload) 368 q = Queue.Queue(maxsize=num_files_to_upload)
373 for rel_path in source_fileset: 369 for rel_path in source_fileset:
374 q.put(rel_path) 370 q.put(rel_path)
375 371
376 # Spin up worker threads to read from the task queue. 372 # Spin up worker threads to read from the task queue.
377 def worker(): 373 def worker():
378 while True: 374 while True:
379 try: 375 try:
380 rel_path = q.get(block=False) 376 rel_path = q.get(block=False)
381 except Queue.Empty: 377 except Queue.Empty:
382 return # no more tasks in the queue, so exit 378 return # no more tasks in the queue, so exit
383 self.logger.info(' Uploading file %d/%d: %s' % ( 379 print (' Uploading file %d/%d: %s' % (
384 num_files_to_upload - q.qsize(), num_files_to_upload, rel_path)) 380 num_files_to_upload - q.qsize(), num_files_to_upload, rel_path))
385 self.upload_file( 381 self.upload_file(
386 source_path=os.path.join(source_dir, rel_path), 382 source_path=os.path.join(source_dir, rel_path),
387 dest_bucket=b, 383 dest_bucket=b,
388 dest_path=posixpath.join(dest_dir, rel_path), 384 dest_path=posixpath.join(dest_dir, rel_path),
389 upload_if=self.UploadIf.ALWAYS, 385 upload_if=self.UploadIf.ALWAYS,
390 **kwargs) 386 **kwargs)
391 q.task_done() 387 q.task_done()
392 for _ in range(num_threads): 388 for _ in range(num_threads):
393 t = threading.Thread(target=worker) 389 t = threading.Thread(target=worker)
(...skipping 255 matching lines...) Expand 10 before | Expand all | Expand 10 after
649 645
650 def _get_local_md5(path): 646 def _get_local_md5(path):
651 """Returns the MD5 hash of a file on local disk.""" 647 """Returns the MD5 hash of a file on local disk."""
652 hasher = hashlib.md5() 648 hasher = hashlib.md5()
653 with open(path, 'rb') as f: 649 with open(path, 'rb') as f:
654 while True: 650 while True:
655 data = f.read(64*1024) 651 data = f.read(64*1024)
656 if not data: 652 if not data:
657 return hasher.hexdigest() 653 return hasher.hexdigest()
658 hasher.update(data) 654 hasher.update(data)
OLDNEW
« no previous file with comments | « no previous file | py/utils/gs_utils_manualtest.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698