| OLD | NEW | 
|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python | 
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be | 
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. | 
| 5 | 5 | 
| 6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" | 
| 7 | 7 | 
| 8 | 8 | 
| 9 import hashlib | 9 import hashlib | 
| 10 import optparse | 10 import optparse | 
| 11 import os | 11 import os | 
| 12 import Queue | 12 import Queue | 
| 13 import re | 13 import re | 
| 14 import shutil |  | 
| 15 import stat | 14 import stat | 
| 16 import sys | 15 import sys | 
| 17 import tarfile |  | 
| 18 import threading | 16 import threading | 
| 19 import time | 17 import time | 
| 20 | 18 | 
| 21 import subprocess2 | 19 import subprocess2 | 
| 22 | 20 | 
| 23 | 21 | 
| 24 GSUTIL_DEFAULT_PATH = os.path.join( | 22 GSUTIL_DEFAULT_PATH = os.path.join( | 
| 25     os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') | 23     os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') | 
| 26 # Maps sys.platform to what we actually want to call them. | 24 # Maps sys.platform to what we actually want to call them. | 
| 27 PLATFORM_MAPPING = { | 25 PLATFORM_MAPPING = { | 
| (...skipping 16 matching lines...) Expand all  Loading... | 
| 44   pass | 42   pass | 
| 45 | 43 | 
| 46 | 44 | 
| 47 def GetNormalizedPlatform(): | 45 def GetNormalizedPlatform(): | 
| 48   """Returns the result of sys.platform accounting for cygwin. | 46   """Returns the result of sys.platform accounting for cygwin. | 
| 49   Under cygwin, this will always return "win32" like the native Python.""" | 47   Under cygwin, this will always return "win32" like the native Python.""" | 
| 50   if sys.platform == 'cygwin': | 48   if sys.platform == 'cygwin': | 
| 51     return 'win32' | 49     return 'win32' | 
| 52   return sys.platform | 50   return sys.platform | 
| 53 | 51 | 
|  | 52 | 
| 54 # Common utilities | 53 # Common utilities | 
| 55 class Gsutil(object): | 54 class Gsutil(object): | 
| 56   """Call gsutil with some predefined settings.  This is a convenience object, | 55   """Call gsutil with some predefined settings.  This is a convenience object, | 
| 57   and is also immutable.""" | 56   and is also immutable.""" | 
| 58   def __init__(self, path, boto_path=None, timeout=None, version='4.7'): | 57   def __init__(self, path, boto_path=None, timeout=None, version='4.7'): | 
| 59     if not os.path.exists(path): | 58     if not os.path.exists(path): | 
| 60       raise FileNotFoundError('GSUtil not found in %s' % path) | 59       raise FileNotFoundError('GSUtil not found in %s' % path) | 
| 61     self.path = path | 60     self.path = path | 
| 62     self.timeout = timeout | 61     self.timeout = timeout | 
| 63     self.boto_path = boto_path | 62     self.boto_path = boto_path | 
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 180           work_queue.put( | 179           work_queue.put( | 
| 181               (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 180               (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 
| 182           work_queue_size += 1 | 181           work_queue_size += 1 | 
| 183         else: | 182         else: | 
| 184           if not ignore_errors: | 183           if not ignore_errors: | 
| 185             raise InvalidFileError('No sha1 sum found in %s.' % filename) | 184             raise InvalidFileError('No sha1 sum found in %s.' % filename) | 
| 186           print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 185           print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 
| 187   return work_queue_size | 186   return work_queue_size | 
| 188 | 187 | 
| 189 | 188 | 
| 190 def _validate_tar_file(tar, prefix): |  | 
| 191   def _validate(tarinfo): |  | 
| 192     """Returns false if the tarinfo is something we explicitly forbid.""" |  | 
| 193     if tarinfo.issym() or tarinfo.islnk(): |  | 
| 194       return False |  | 
| 195     if '..' in tarinfo.name or not tarinfo.name.startswith(prefix): |  | 
| 196       return False |  | 
| 197     return True |  | 
| 198   return all(map(_validate, tar.getmembers())) |  | 
| 199 |  | 
| 200 def _downloader_worker_thread(thread_num, q, force, base_url, | 189 def _downloader_worker_thread(thread_num, q, force, base_url, | 
| 201                               gsutil, out_q, ret_codes, verbose, extract, | 190                               gsutil, out_q, ret_codes, verbose): | 
| 202                               delete=True): |  | 
| 203   while True: | 191   while True: | 
| 204     input_sha1_sum, output_filename = q.get() | 192     input_sha1_sum, output_filename = q.get() | 
| 205     if input_sha1_sum is None: | 193     if input_sha1_sum is None: | 
| 206       return | 194       return | 
| 207     if os.path.exists(output_filename) and not force: | 195     if os.path.exists(output_filename) and not force: | 
| 208       if get_sha1(output_filename) == input_sha1_sum: | 196       if get_sha1(output_filename) == input_sha1_sum: | 
| 209         if verbose: | 197         if verbose: | 
| 210           out_q.put( | 198           out_q.put( | 
| 211               '%d> File %s exists and SHA1 matches. Skipping.' % ( | 199               '%d> File %s exists and SHA1 matches. Skipping.' % ( | 
| 212                   thread_num, output_filename)) | 200                   thread_num, output_filename)) | 
| (...skipping 10 matching lines...) Expand all  Loading... | 
| 223       else: | 211       else: | 
| 224         # Other error, probably auth related (bad ~/.boto, etc). | 212         # Other error, probably auth related (bad ~/.boto, etc). | 
| 225         out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % ( | 213         out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % ( | 
| 226             thread_num, file_url, output_filename, err)) | 214             thread_num, file_url, output_filename, err)) | 
| 227         ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % ( | 215         ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % ( | 
| 228             file_url, output_filename, err))) | 216             file_url, output_filename, err))) | 
| 229       continue | 217       continue | 
| 230     # Fetch the file. | 218     # Fetch the file. | 
| 231     out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) | 219     out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) | 
| 232     try: | 220     try: | 
| 233       if delete: | 221       os.remove(output_filename)  # Delete the file if it exists already. | 
| 234         os.remove(output_filename)  # Delete the file if it exists already. |  | 
| 235     except OSError: | 222     except OSError: | 
| 236       if os.path.exists(output_filename): | 223       if os.path.exists(output_filename): | 
| 237         out_q.put('%d> Warning: deleting %s failed.' % ( | 224         out_q.put('%d> Warning: deleting %s failed.' % ( | 
| 238             thread_num, output_filename)) | 225             thread_num, output_filename)) | 
| 239     code, _, err = gsutil.check_call('cp', file_url, output_filename) | 226     code, _, err = gsutil.check_call('cp', file_url, output_filename) | 
| 240     if code != 0: | 227     if code != 0: | 
| 241       out_q.put('%d> %s' % (thread_num, err)) | 228       out_q.put('%d> %s' % (thread_num, err)) | 
| 242       ret_codes.put((code, err)) | 229       ret_codes.put((code, err)) | 
| 243 | 230 | 
| 244     if extract: |  | 
| 245       if (not tarfile.is_tarfile(output_filename) |  | 
| 246           or not output_filename.endswith('.tar.gz')): |  | 
| 247         out_q.put('%d> Error: %s is not a tar.gz archive.' % ( |  | 
| 248                   thread_num, output_filename)) |  | 
| 249         ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename))) |  | 
| 250         continue |  | 
| 251       with tarfile.open(output_filename, 'r:gz') as tar: |  | 
| 252         dirname = os.path.dirname(os.path.abspath(output_filename)) |  | 
| 253         extract_dir = output_filename[0:len(output_filename)-7] |  | 
| 254         if not _validate_tar_file(tar, os.path.basename(extract_dir)): |  | 
| 255           out_q.put('%d> Error: %s contains files outside %s.' % ( |  | 
| 256                     thread_num, output_filename, extract_dir)) |  | 
| 257           ret_codes.put((1, '%s contains invalid entries.' % (output_filename))) |  | 
| 258           continue |  | 
| 259         if os.path.exists(extract_dir): |  | 
| 260           try: |  | 
| 261             shutil.rmtree(extract_dir) |  | 
| 262             out_q.put('%d> Removed %s...' % (thread_num, extract_dir)) |  | 
| 263           except OSError: |  | 
| 264             out_q.put('%d> Warning: Can\'t delete: %s' % ( |  | 
| 265                       thread_num, extract_dir)) |  | 
| 266             ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir))) |  | 
| 267             continue |  | 
| 268         out_q.put('%d> Extracting %d entries from %s to %s' % |  | 
| 269                   (thread_num, len(tar.getmembers()),output_filename, |  | 
| 270                    extract_dir)) |  | 
| 271         tar.extractall(path=dirname) |  | 
| 272     # Set executable bit. | 231     # Set executable bit. | 
| 273     if sys.platform == 'cygwin': | 232     if sys.platform == 'cygwin': | 
| 274       # Under cygwin, mark all files as executable. The executable flag in | 233       # Under cygwin, mark all files as executable. The executable flag in | 
| 275       # Google Storage will not be set when uploading from Windows, so if | 234       # Google Storage will not be set when uploading from Windows, so if | 
| 276       # this script is running under cygwin and we're downloading an | 235       # this script is running under cygwin and we're downloading an | 
| 277       # executable, it will be unrunnable from inside cygwin without this. | 236       # executable, it will be unrunnable from inside cygwin without this. | 
| 278       st = os.stat(output_filename) | 237       st = os.stat(output_filename) | 
| 279       os.chmod(output_filename, st.st_mode | stat.S_IEXEC) | 238       os.chmod(output_filename, st.st_mode | stat.S_IEXEC) | 
| 280     elif sys.platform != 'win32': | 239     elif sys.platform != 'win32': | 
| 281       # On non-Windows platforms, key off of the custom header | 240       # On non-Windows platforms, key off of the custom header | 
| (...skipping 10 matching lines...) Expand all  Loading... | 
| 292   while True: | 251   while True: | 
| 293     line = output_queue.get() | 252     line = output_queue.get() | 
| 294     # Its plausible we want to print empty lines. | 253     # Its plausible we want to print empty lines. | 
| 295     if line is None: | 254     if line is None: | 
| 296       break | 255       break | 
| 297     print line | 256     print line | 
| 298 | 257 | 
| 299 | 258 | 
| 300 def download_from_google_storage( | 259 def download_from_google_storage( | 
| 301     input_filename, base_url, gsutil, num_threads, directory, recursive, | 260     input_filename, base_url, gsutil, num_threads, directory, recursive, | 
| 302     force, output, ignore_errors, sha1_file, verbose, auto_platform, extract): | 261     force, output, ignore_errors, sha1_file, verbose, auto_platform): | 
| 303   # Start up all the worker threads. | 262   # Start up all the worker threads. | 
| 304   all_threads = [] | 263   all_threads = [] | 
| 305   download_start = time.time() | 264   download_start = time.time() | 
| 306   stdout_queue = Queue.Queue() | 265   stdout_queue = Queue.Queue() | 
| 307   work_queue = Queue.Queue() | 266   work_queue = Queue.Queue() | 
| 308   ret_codes = Queue.Queue() | 267   ret_codes = Queue.Queue() | 
| 309   ret_codes.put((0, None)) | 268   ret_codes.put((0, None)) | 
| 310   for thread_num in range(num_threads): | 269   for thread_num in range(num_threads): | 
| 311     t = threading.Thread( | 270     t = threading.Thread( | 
| 312         target=_downloader_worker_thread, | 271         target=_downloader_worker_thread, | 
| 313         args=[thread_num, work_queue, force, base_url, | 272         args=[thread_num, work_queue, force, base_url, | 
| 314               gsutil, stdout_queue, ret_codes, verbose, extract]) | 273               gsutil, stdout_queue, ret_codes, verbose]) | 
| 315     t.daemon = True | 274     t.daemon = True | 
| 316     t.start() | 275     t.start() | 
| 317     all_threads.append(t) | 276     all_threads.append(t) | 
| 318   printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 277   printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 
| 319   printer_thread.daemon = True | 278   printer_thread.daemon = True | 
| 320   printer_thread.start() | 279   printer_thread.start() | 
| 321 | 280 | 
| 322   # Enumerate our work queue. | 281   # Enumerate our work queue. | 
| 323   work_queue_size = enumerate_work_queue( | 282   work_queue_size = enumerate_work_queue( | 
| 324       input_filename, work_queue, directory, recursive, | 283       input_filename, work_queue, directory, recursive, | 
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 392   parser.add_option('-p', '--platform', | 351   parser.add_option('-p', '--platform', | 
| 393                     help='A regular expression that is compared against ' | 352                     help='A regular expression that is compared against ' | 
| 394                          'Python\'s sys.platform. If this option is specified, ' | 353                          'Python\'s sys.platform. If this option is specified, ' | 
| 395                          'the download will happen only if there is a match.') | 354                          'the download will happen only if there is a match.') | 
| 396   parser.add_option('-a', '--auto_platform', | 355   parser.add_option('-a', '--auto_platform', | 
| 397                     action='store_true', | 356                     action='store_true', | 
| 398                     help='Detects if any parent folder of the target matches ' | 357                     help='Detects if any parent folder of the target matches ' | 
| 399                          '(linux|mac|win).  If so, the script will only ' | 358                          '(linux|mac|win).  If so, the script will only ' | 
| 400                          'process files that are in the paths that ' | 359                          'process files that are in the paths that ' | 
| 401                          'that matches the current platform.') | 360                          'that matches the current platform.') | 
| 402   parser.add_option('-u', '--extract', |  | 
| 403                     action='store_true', |  | 
| 404                     help='Extract a downloaded tar.gz file. ' |  | 
| 405                          'Leaves the tar.gz file around for sha1 verification' |  | 
| 406                          'If a directory with the same name as the tar.gz ' |  | 
| 407                          'file already exists, is deleted (to get a ' |  | 
| 408                          'clean state in case of update.)') |  | 
| 409   parser.add_option('-v', '--verbose', action='store_true', | 361   parser.add_option('-v', '--verbose', action='store_true', | 
| 410                     help='Output extra diagnostic and progress information.') | 362                     help='Output extra diagnostic and progress information.') | 
| 411 | 363 | 
| 412   (options, args) = parser.parse_args() | 364   (options, args) = parser.parse_args() | 
| 413 | 365 | 
| 414   # Make sure we should run at all based on platform matching. | 366   # Make sure we should run at all based on platform matching. | 
| 415   if options.platform: | 367   if options.platform: | 
| 416     if options.auto_platform: | 368     if options.auto_platform: | 
| 417       parser.error('--platform can not be specified with --auto_platform') | 369       parser.error('--platform can not be specified with --auto_platform') | 
| 418     if not re.match(options.platform, GetNormalizedPlatform()): | 370     if not re.match(options.platform, GetNormalizedPlatform()): | 
| (...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after  Loading... | 
| 492   if not options.directory and not options.force and not options.no_resume: | 444   if not options.directory and not options.force and not options.no_resume: | 
| 493     if os.path.exists(options.output): | 445     if os.path.exists(options.output): | 
| 494       parser.error('Output file %s exists and --no_resume is specified.' | 446       parser.error('Output file %s exists and --no_resume is specified.' | 
| 495                    % options.output) | 447                    % options.output) | 
| 496 | 448 | 
| 497   base_url = 'gs://%s' % options.bucket | 449   base_url = 'gs://%s' % options.bucket | 
| 498 | 450 | 
| 499   return download_from_google_storage( | 451   return download_from_google_storage( | 
| 500       input_filename, base_url, gsutil, options.num_threads, options.directory, | 452       input_filename, base_url, gsutil, options.num_threads, options.directory, | 
| 501       options.recursive, options.force, options.output, options.ignore_errors, | 453       options.recursive, options.force, options.output, options.ignore_errors, | 
| 502       options.sha1_file, options.verbose, options.auto_platform, | 454       options.sha1_file, options.verbose, options.auto_platform) | 
| 503       options.extract) |  | 
| 504 | 455 | 
| 505 | 456 | 
| 506 if __name__ == '__main__': | 457 if __name__ == '__main__': | 
| 507   sys.exit(main(sys.argv)) | 458   sys.exit(main(sys.argv)) | 
| OLD | NEW | 
|---|