| OLD | NEW | 
|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python | 
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be | 
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. | 
| 5 | 5 | 
| 6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" | 
| 7 | 7 | 
| 8 | 8 | 
| 9 import hashlib | 9 import hashlib | 
| 10 import optparse | 10 import optparse | 
| 11 import os | 11 import os | 
| 12 import Queue | 12 import Queue | 
| 13 import re | 13 import re | 
|  | 14 import shutil | 
| 14 import stat | 15 import stat | 
| 15 import sys | 16 import sys | 
|  | 17 import tarfile | 
| 16 import threading | 18 import threading | 
| 17 import time | 19 import time | 
| 18 | 20 | 
| 19 import subprocess2 | 21 import subprocess2 | 
| 20 | 22 | 
| 21 | 23 | 
| 22 GSUTIL_DEFAULT_PATH = os.path.join( | 24 GSUTIL_DEFAULT_PATH = os.path.join( | 
| 23     os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') | 25     os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') | 
| 24 # Maps sys.platform to what we actually want to call them. | 26 # Maps sys.platform to what we actually want to call them. | 
| 25 PLATFORM_MAPPING = { | 27 PLATFORM_MAPPING = { | 
| (...skipping 16 matching lines...) Expand all  Loading... | 
| 42   pass | 44   pass | 
| 43 | 45 | 
| 44 | 46 | 
| 45 def GetNormalizedPlatform(): | 47 def GetNormalizedPlatform(): | 
| 46   """Returns the result of sys.platform accounting for cygwin. | 48   """Returns the result of sys.platform accounting for cygwin. | 
| 47   Under cygwin, this will always return "win32" like the native Python.""" | 49   Under cygwin, this will always return "win32" like the native Python.""" | 
| 48   if sys.platform == 'cygwin': | 50   if sys.platform == 'cygwin': | 
| 49     return 'win32' | 51     return 'win32' | 
| 50   return sys.platform | 52   return sys.platform | 
| 51 | 53 | 
| 52 |  | 
| 53 # Common utilities | 54 # Common utilities | 
| 54 class Gsutil(object): | 55 class Gsutil(object): | 
| 55   """Call gsutil with some predefined settings.  This is a convenience object, | 56   """Call gsutil with some predefined settings.  This is a convenience object, | 
| 56   and is also immutable.""" | 57   and is also immutable.""" | 
| 57   def __init__(self, path, boto_path=None, timeout=None, version='4.7'): | 58   def __init__(self, path, boto_path=None, timeout=None, version='4.7'): | 
| 58     if not os.path.exists(path): | 59     if not os.path.exists(path): | 
| 59       raise FileNotFoundError('GSUtil not found in %s' % path) | 60       raise FileNotFoundError('GSUtil not found in %s' % path) | 
| 60     self.path = path | 61     self.path = path | 
| 61     self.timeout = timeout | 62     self.timeout = timeout | 
| 62     self.boto_path = boto_path | 63     self.boto_path = boto_path | 
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 179           work_queue.put( | 180           work_queue.put( | 
| 180               (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 181               (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 
| 181           work_queue_size += 1 | 182           work_queue_size += 1 | 
| 182         else: | 183         else: | 
| 183           if not ignore_errors: | 184           if not ignore_errors: | 
| 184             raise InvalidFileError('No sha1 sum found in %s.' % filename) | 185             raise InvalidFileError('No sha1 sum found in %s.' % filename) | 
| 185           print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 186           print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 
| 186   return work_queue_size | 187   return work_queue_size | 
| 187 | 188 | 
| 188 | 189 | 
|  | 190 def _validate_tar_file(tar, prefix): | 
|  | 191   def _validate(tarinfo): | 
|  | 192     """Returns false if the tarinfo is something we explicitly forbid.""" | 
|  | 193     if tarinfo.issym() or tarinfo.islnk(): | 
|  | 194       return False | 
|  | 195     if '..' in tarinfo.name or not tarinfo.name.startswith(prefix): | 
|  | 196       return False | 
|  | 197     return True | 
|  | 198   return all(map(_validate, tar.getmembers())) | 
|  | 199 | 
| 189 def _downloader_worker_thread(thread_num, q, force, base_url, | 200 def _downloader_worker_thread(thread_num, q, force, base_url, | 
| 190                               gsutil, out_q, ret_codes, verbose): | 201                               gsutil, out_q, ret_codes, verbose, extract, | 
|  | 202                               delete=True): | 
| 191   while True: | 203   while True: | 
| 192     input_sha1_sum, output_filename = q.get() | 204     input_sha1_sum, output_filename = q.get() | 
| 193     if input_sha1_sum is None: | 205     if input_sha1_sum is None: | 
| 194       return | 206       return | 
| 195     if os.path.exists(output_filename) and not force: | 207     if os.path.exists(output_filename) and not force: | 
| 196       if get_sha1(output_filename) == input_sha1_sum: | 208       if get_sha1(output_filename) == input_sha1_sum: | 
| 197         if verbose: | 209         if verbose: | 
| 198           out_q.put( | 210           out_q.put( | 
| 199               '%d> File %s exists and SHA1 matches. Skipping.' % ( | 211               '%d> File %s exists and SHA1 matches. Skipping.' % ( | 
| 200                   thread_num, output_filename)) | 212                   thread_num, output_filename)) | 
| (...skipping 10 matching lines...) Expand all  Loading... | 
| 211       else: | 223       else: | 
| 212         # Other error, probably auth related (bad ~/.boto, etc). | 224         # Other error, probably auth related (bad ~/.boto, etc). | 
| 213         out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % ( | 225         out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % ( | 
| 214             thread_num, file_url, output_filename, err)) | 226             thread_num, file_url, output_filename, err)) | 
| 215         ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % ( | 227         ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % ( | 
| 216             file_url, output_filename, err))) | 228             file_url, output_filename, err))) | 
| 217       continue | 229       continue | 
| 218     # Fetch the file. | 230     # Fetch the file. | 
| 219     out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) | 231     out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) | 
| 220     try: | 232     try: | 
| 221       os.remove(output_filename)  # Delete the file if it exists already. | 233       if delete: | 
|  | 234         os.remove(output_filename)  # Delete the file if it exists already. | 
| 222     except OSError: | 235     except OSError: | 
| 223       if os.path.exists(output_filename): | 236       if os.path.exists(output_filename): | 
| 224         out_q.put('%d> Warning: deleting %s failed.' % ( | 237         out_q.put('%d> Warning: deleting %s failed.' % ( | 
| 225             thread_num, output_filename)) | 238             thread_num, output_filename)) | 
| 226     code, _, err = gsutil.check_call('cp', file_url, output_filename) | 239     code, _, err = gsutil.check_call('cp', file_url, output_filename) | 
| 227     if code != 0: | 240     if code != 0: | 
| 228       out_q.put('%d> %s' % (thread_num, err)) | 241       out_q.put('%d> %s' % (thread_num, err)) | 
| 229       ret_codes.put((code, err)) | 242       ret_codes.put((code, err)) | 
| 230       continue | 243       continue | 
| 231 | 244 | 
| 232     remote_sha1 = get_sha1(output_filename) | 245     remote_sha1 = get_sha1(output_filename) | 
| 233     if remote_sha1 != input_sha1_sum: | 246     if remote_sha1 != input_sha1_sum: | 
| 234       msg = ('%d> ERROR remote sha1 (%s) does not match expected sha1 (%s).' % | 247       msg = ('%d> ERROR remote sha1 (%s) does not match expected sha1 (%s).' % | 
| 235              (thread_num, remote_sha1, input_sha1_sum)) | 248              (thread_num, remote_sha1, input_sha1_sum)) | 
| 236       out_q.put(msg) | 249       out_q.put(msg) | 
| 237       ret_codes.put((20, msg)) | 250       ret_codes.put((20, msg)) | 
| 238       continue | 251       continue | 
| 239 | 252 | 
|  | 253     if extract: | 
|  | 254       if (not tarfile.is_tarfile(output_filename) | 
|  | 255           or not output_filename.endswith('.tar.gz')): | 
|  | 256         out_q.put('%d> Error: %s is not a tar.gz archive.' % ( | 
|  | 257                   thread_num, output_filename)) | 
|  | 258         ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename))) | 
|  | 259         continue | 
|  | 260       with tarfile.open(output_filename, 'r:gz') as tar: | 
|  | 261         dirname = os.path.dirname(os.path.abspath(output_filename)) | 
|  | 262         extract_dir = output_filename[0:len(output_filename)-7] | 
|  | 263         if not _validate_tar_file(tar, os.path.basename(extract_dir)): | 
|  | 264           out_q.put('%d> Error: %s contains files outside %s.' % ( | 
|  | 265                     thread_num, output_filename, extract_dir)) | 
|  | 266           ret_codes.put((1, '%s contains invalid entries.' % (output_filename))) | 
|  | 267           continue | 
|  | 268         if os.path.exists(extract_dir): | 
|  | 269           try: | 
|  | 270             shutil.rmtree(extract_dir) | 
|  | 271             out_q.put('%d> Removed %s...' % (thread_num, extract_dir)) | 
|  | 272           except OSError: | 
|  | 273             out_q.put('%d> Warning: Can\'t delete: %s' % ( | 
|  | 274                       thread_num, extract_dir)) | 
|  | 275             ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir))) | 
|  | 276             continue | 
|  | 277         out_q.put('%d> Extracting %d entries from %s to %s' % | 
|  | 278                   (thread_num, len(tar.getmembers()),output_filename, | 
|  | 279                    extract_dir)) | 
|  | 280         tar.extractall(path=dirname) | 
| 240     # Set executable bit. | 281     # Set executable bit. | 
| 241     if sys.platform == 'cygwin': | 282     if sys.platform == 'cygwin': | 
| 242       # Under cygwin, mark all files as executable. The executable flag in | 283       # Under cygwin, mark all files as executable. The executable flag in | 
| 243       # Google Storage will not be set when uploading from Windows, so if | 284       # Google Storage will not be set when uploading from Windows, so if | 
| 244       # this script is running under cygwin and we're downloading an | 285       # this script is running under cygwin and we're downloading an | 
| 245       # executable, it will be unrunnable from inside cygwin without this. | 286       # executable, it will be unrunnable from inside cygwin without this. | 
| 246       st = os.stat(output_filename) | 287       st = os.stat(output_filename) | 
| 247       os.chmod(output_filename, st.st_mode | stat.S_IEXEC) | 288       os.chmod(output_filename, st.st_mode | stat.S_IEXEC) | 
| 248     elif sys.platform != 'win32': | 289     elif sys.platform != 'win32': | 
| 249       # On non-Windows platforms, key off of the custom header | 290       # On non-Windows platforms, key off of the custom header | 
| (...skipping 10 matching lines...) Expand all  Loading... | 
| 260   while True: | 301   while True: | 
| 261     line = output_queue.get() | 302     line = output_queue.get() | 
| 262     # Its plausible we want to print empty lines. | 303     # Its plausible we want to print empty lines. | 
| 263     if line is None: | 304     if line is None: | 
| 264       break | 305       break | 
| 265     print line | 306     print line | 
| 266 | 307 | 
| 267 | 308 | 
| 268 def download_from_google_storage( | 309 def download_from_google_storage( | 
| 269     input_filename, base_url, gsutil, num_threads, directory, recursive, | 310     input_filename, base_url, gsutil, num_threads, directory, recursive, | 
| 270     force, output, ignore_errors, sha1_file, verbose, auto_platform): | 311     force, output, ignore_errors, sha1_file, verbose, auto_platform, extract): | 
| 271   # Start up all the worker threads. | 312   # Start up all the worker threads. | 
| 272   all_threads = [] | 313   all_threads = [] | 
| 273   download_start = time.time() | 314   download_start = time.time() | 
| 274   stdout_queue = Queue.Queue() | 315   stdout_queue = Queue.Queue() | 
| 275   work_queue = Queue.Queue() | 316   work_queue = Queue.Queue() | 
| 276   ret_codes = Queue.Queue() | 317   ret_codes = Queue.Queue() | 
| 277   ret_codes.put((0, None)) | 318   ret_codes.put((0, None)) | 
| 278   for thread_num in range(num_threads): | 319   for thread_num in range(num_threads): | 
| 279     t = threading.Thread( | 320     t = threading.Thread( | 
| 280         target=_downloader_worker_thread, | 321         target=_downloader_worker_thread, | 
| 281         args=[thread_num, work_queue, force, base_url, | 322         args=[thread_num, work_queue, force, base_url, | 
| 282               gsutil, stdout_queue, ret_codes, verbose]) | 323               gsutil, stdout_queue, ret_codes, verbose, extract]) | 
| 283     t.daemon = True | 324     t.daemon = True | 
| 284     t.start() | 325     t.start() | 
| 285     all_threads.append(t) | 326     all_threads.append(t) | 
| 286   printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 327   printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 
| 287   printer_thread.daemon = True | 328   printer_thread.daemon = True | 
| 288   printer_thread.start() | 329   printer_thread.start() | 
| 289 | 330 | 
| 290   # Enumerate our work queue. | 331   # Enumerate our work queue. | 
| 291   work_queue_size = enumerate_work_queue( | 332   work_queue_size = enumerate_work_queue( | 
| 292       input_filename, work_queue, directory, recursive, | 333       input_filename, work_queue, directory, recursive, | 
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 360   parser.add_option('-p', '--platform', | 401   parser.add_option('-p', '--platform', | 
| 361                     help='A regular expression that is compared against ' | 402                     help='A regular expression that is compared against ' | 
| 362                          'Python\'s sys.platform. If this option is specified, ' | 403                          'Python\'s sys.platform. If this option is specified, ' | 
| 363                          'the download will happen only if there is a match.') | 404                          'the download will happen only if there is a match.') | 
| 364   parser.add_option('-a', '--auto_platform', | 405   parser.add_option('-a', '--auto_platform', | 
| 365                     action='store_true', | 406                     action='store_true', | 
| 366                     help='Detects if any parent folder of the target matches ' | 407                     help='Detects if any parent folder of the target matches ' | 
| 367                          '(linux|mac|win).  If so, the script will only ' | 408                          '(linux|mac|win).  If so, the script will only ' | 
| 368                          'process files that are in the paths that ' | 409                          'process files that are in the paths that ' | 
| 369                          'that matches the current platform.') | 410                          'that matches the current platform.') | 
|  | 411   parser.add_option('-u', '--extract', | 
|  | 412                     action='store_true', | 
|  | 413                     help='Extract a downloaded tar.gz file. ' | 
|  | 414                          'Leaves the tar.gz file around for sha1 verification' | 
|  | 415                          'If a directory with the same name as the tar.gz ' | 
|  | 416                          'file already exists, is deleted (to get a ' | 
|  | 417                          'clean state in case of update.)') | 
| 370   parser.add_option('-v', '--verbose', action='store_true', | 418   parser.add_option('-v', '--verbose', action='store_true', | 
| 371                     help='Output extra diagnostic and progress information.') | 419                     help='Output extra diagnostic and progress information.') | 
| 372 | 420 | 
| 373   (options, args) = parser.parse_args() | 421   (options, args) = parser.parse_args() | 
| 374 | 422 | 
| 375   # Make sure we should run at all based on platform matching. | 423   # Make sure we should run at all based on platform matching. | 
| 376   if options.platform: | 424   if options.platform: | 
| 377     if options.auto_platform: | 425     if options.auto_platform: | 
| 378       parser.error('--platform can not be specified with --auto_platform') | 426       parser.error('--platform can not be specified with --auto_platform') | 
| 379     if not re.match(options.platform, GetNormalizedPlatform()): | 427     if not re.match(options.platform, GetNormalizedPlatform()): | 
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 453   if not options.directory and not options.force and not options.no_resume: | 501   if not options.directory and not options.force and not options.no_resume: | 
| 454     if os.path.exists(options.output): | 502     if os.path.exists(options.output): | 
| 455       parser.error('Output file %s exists and --no_resume is specified.' | 503       parser.error('Output file %s exists and --no_resume is specified.' | 
| 456                    % options.output) | 504                    % options.output) | 
| 457 | 505 | 
| 458   base_url = 'gs://%s' % options.bucket | 506   base_url = 'gs://%s' % options.bucket | 
| 459 | 507 | 
| 460   return download_from_google_storage( | 508   return download_from_google_storage( | 
| 461       input_filename, base_url, gsutil, options.num_threads, options.directory, | 509       input_filename, base_url, gsutil, options.num_threads, options.directory, | 
| 462       options.recursive, options.force, options.output, options.ignore_errors, | 510       options.recursive, options.force, options.output, options.ignore_errors, | 
| 463       options.sha1_file, options.verbose, options.auto_platform) | 511       options.sha1_file, options.verbose, options.auto_platform, | 
|  | 512       options.extract) | 
| 464 | 513 | 
| 465 | 514 | 
| 466 if __name__ == '__main__': | 515 if __name__ == '__main__': | 
| 467   sys.exit(main(sys.argv)) | 516   sys.exit(main(sys.argv)) | 
| OLD | NEW | 
|---|