| Index: download_from_google_storage.py | 
| diff --git a/download_from_google_storage.py b/download_from_google_storage.py | 
| old mode 100755 | 
| new mode 100644 | 
| index a45f387ce2aeb3a2d9f8f820206ff8e597eb1e5c..d3f74369106f1955c1d5de9bff359df48916a95b | 
| --- a/download_from_google_storage.py | 
| +++ b/download_from_google_storage.py | 
| @@ -11,8 +11,10 @@ import optparse | 
| import os | 
| import Queue | 
| import re | 
| +import shutil | 
| import stat | 
| import sys | 
| +import tarfile | 
| import threading | 
| import time | 
|  | 
| @@ -49,7 +51,6 @@ def GetNormalizedPlatform(): | 
| return 'win32' | 
| return sys.platform | 
|  | 
| - | 
| # Common utilities | 
| class Gsutil(object): | 
| """Call gsutil with some predefined settings.  This is a convenience object, | 
| @@ -186,8 +187,19 @@ def enumerate_work_queue(input_filename, work_queue, directory, | 
| return work_queue_size | 
|  | 
|  | 
| +def _validate_tar_file(tar, prefix): | 
| +  def _validate(tarinfo): | 
| +    """Returns false if the tarinfo is something we explicitly forbid.""" | 
| +    if tarinfo.issym() or tarinfo.islnk(): | 
| +      return False | 
| +    if '..' in tarinfo.name or not tarinfo.name.startswith(prefix): | 
| +      return False | 
| +    return True | 
| +  return all(map(_validate, tar.getmembers())) | 
| + | 
| def _downloader_worker_thread(thread_num, q, force, base_url, | 
| -                              gsutil, out_q, ret_codes, verbose): | 
| +                              gsutil, out_q, ret_codes, verbose, extract, | 
| +                              delete=True): | 
| while True: | 
| input_sha1_sum, output_filename = q.get() | 
| if input_sha1_sum is None: | 
| @@ -218,7 +230,8 @@ def _downloader_worker_thread(thread_num, q, force, base_url, | 
| # Fetch the file. | 
| out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) | 
| try: | 
| -      os.remove(output_filename)  # Delete the file if it exists already. | 
| +      if delete: | 
| +        os.remove(output_filename)  # Delete the file if it exists already. | 
| except OSError: | 
| if os.path.exists(output_filename): | 
| out_q.put('%d> Warning: deleting %s failed.' % ( | 
| @@ -228,6 +241,34 @@ def _downloader_worker_thread(thread_num, q, force, base_url, | 
| out_q.put('%d> %s' % (thread_num, err)) | 
| ret_codes.put((code, err)) | 
|  | 
| +    if extract: | 
| +      if (not tarfile.is_tarfile(output_filename) | 
| +          or not output_filename.endswith('.tar.gz')): | 
| +        out_q.put('%d> Error: %s is not a tar.gz archive.' % ( | 
| +                  thread_num, output_filename)) | 
| +        ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename))) | 
| +        continue | 
| +      with tarfile.open(output_filename, 'r:gz') as tar: | 
| +        dirname = os.path.dirname(os.path.abspath(output_filename)) | 
| +        extract_dir = output_filename[0:len(output_filename)-7] | 
| +        if not _validate_tar_file(tar, os.path.basename(extract_dir)): | 
| +          out_q.put('%d> Error: %s contains files outside %s.' % ( | 
| +                    thread_num, output_filename, extract_dir)) | 
| +          ret_codes.put((1, '%s contains invalid entries.' % (output_filename))) | 
| +          continue | 
| +        if os.path.exists(extract_dir): | 
| +          try: | 
| +            shutil.rmtree(extract_dir) | 
| +            out_q.put('%d> Removed %s...' % (thread_num, extract_dir)) | 
| +          except OSError: | 
| +            out_q.put('%d> Warning: Can\'t delete: %s' % ( | 
| +                      thread_num, extract_dir)) | 
| +            ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir))) | 
| +            continue | 
| +        out_q.put('%d> Extracting %d entries from %s to %s' % | 
| +                  (thread_num, len(tar.getmembers()),output_filename, | 
| +                   extract_dir)) | 
| +        tar.extractall(path=dirname) | 
| # Set executable bit. | 
| if sys.platform == 'cygwin': | 
| # Under cygwin, mark all files as executable. The executable flag in | 
| @@ -258,7 +299,7 @@ def printer_worker(output_queue): | 
|  | 
| def download_from_google_storage( | 
| input_filename, base_url, gsutil, num_threads, directory, recursive, | 
| -    force, output, ignore_errors, sha1_file, verbose, auto_platform): | 
| +    force, output, ignore_errors, sha1_file, verbose, auto_platform, extract): | 
| # Start up all the worker threads. | 
| all_threads = [] | 
| download_start = time.time() | 
| @@ -270,7 +311,7 @@ def download_from_google_storage( | 
| t = threading.Thread( | 
| target=_downloader_worker_thread, | 
| args=[thread_num, work_queue, force, base_url, | 
| -              gsutil, stdout_queue, ret_codes, verbose]) | 
| +              gsutil, stdout_queue, ret_codes, verbose, extract]) | 
| t.daemon = True | 
| t.start() | 
| all_threads.append(t) | 
| @@ -358,6 +399,13 @@ def main(args): | 
| '(linux|mac|win).  If so, the script will only ' | 
| 'process files that are in the paths that ' | 
| 'that matches the current platform.') | 
| +  parser.add_option('-u', '--extract', | 
| +                    action='store_true', | 
| +                    help='Extract a downloaded tar.gz file. ' | 
| +                         'Leaves the tar.gz file around for sha1 verification' | 
| +                         'If a directory with the same name as the tar.gz ' | 
| +                         'file already exists, is deleted (to get a ' | 
| +                         'clean state in case of update.)') | 
| parser.add_option('-v', '--verbose', action='store_true', | 
| help='Output extra diagnostic and progress information.') | 
|  | 
| @@ -451,7 +499,8 @@ def main(args): | 
| return download_from_google_storage( | 
| input_filename, base_url, gsutil, options.num_threads, options.directory, | 
| options.recursive, options.force, options.output, options.ignore_errors, | 
| -      options.sha1_file, options.verbose, options.auto_platform) | 
| +      options.sha1_file, options.verbose, options.auto_platform, | 
| +      options.extract) | 
|  | 
|  | 
| if __name__ == '__main__': | 
|  |