Chromium Code Reviews| Index: download_from_google_storage.py |
| diff --git a/download_from_google_storage.py b/download_from_google_storage.py |
| old mode 100755 |
| new mode 100644 |
| index a45f387ce2aeb3a2d9f8f820206ff8e597eb1e5c..3bba49498acae0e3920ebd23f29d869afc7b9cdd |
| --- a/download_from_google_storage.py |
| +++ b/download_from_google_storage.py |
| @@ -11,8 +11,10 @@ import optparse |
| import os |
| import Queue |
| import re |
| +import shutil |
| import stat |
| import sys |
| +import tarfile |
| import threading |
| import time |
| @@ -49,7 +51,6 @@ def GetNormalizedPlatform(): |
| return 'win32' |
| return sys.platform |
| - |
| # Common utilities |
| class Gsutil(object): |
| """Call gsutil with some predefined settings. This is a convenience object, |
| @@ -186,8 +187,19 @@ def enumerate_work_queue(input_filename, work_queue, directory, |
| return work_queue_size |
| +def _validate_tar_file(tar, prefix): |
| + def _validate(tarinfo): |
| + """Returns false if the tarinfo is something we explicitly forbid.""" |
| + if tarinfo.issym() or tarinfo.islnk(): |
| + return False |
| + if '..' in tarinfo.name or not tarinfo.name.startswith(prefix): |
| + return False |
| + return True |
| + return all(map(_validate, tar.getmembers())) |
| + |
| def _downloader_worker_thread(thread_num, q, force, base_url, |
| - gsutil, out_q, ret_codes, verbose): |
| + gsutil, out_q, ret_codes, verbose, extract, |
| + delete=True): |
| while True: |
| input_sha1_sum, output_filename = q.get() |
| if input_sha1_sum is None: |
| @@ -218,7 +230,8 @@ def _downloader_worker_thread(thread_num, q, force, base_url, |
| # Fetch the file. |
| out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) |
| try: |
| - os.remove(output_filename) # Delete the file if it exists already. |
| + if delete: |
| + os.remove(output_filename) # Delete the file if it exists already. |
| except OSError: |
| if os.path.exists(output_filename): |
| out_q.put('%d> Warning: deleting %s failed.' % ( |
| @@ -228,6 +241,34 @@ def _downloader_worker_thread(thread_num, q, force, base_url, |
| out_q.put('%d> %s' % (thread_num, err)) |
| ret_codes.put((code, err)) |
| + if extract: |
| + if (not tarfile.is_tarfile(output_filename) |
| + or not output_filename.endswith('.tar.gz')): |
| + out_q.put('%d> Error: %s is not a tar.gz archive.' % ( |
| + thread_num, output_filename)) |
| + ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename))) |
| + continue |
| + tar = tarfile.open(output_filename, 'r:gz') |
|
hinoka
2015/06/24 18:54:38
with tarfile.open(...) as tar:
ricow1
2015/06/25 06:24:05
Done.
|
| + dirname = os.path.dirname(os.path.abspath(output_filename)) |
| + extract_dir = output_filename[0:len(output_filename)-7] |
| + if not _validate_tar_file(tar, os.path.basename(extract_dir)): |
| + out_q.put('%d> Error: %s contains files outside %s.' % ( |
| + thread_num, output_filename, extract_dir)) |
| + ret_codes.put((1, '%s contains invalid entries.' % (output_filename))) |
| + continue |
| + out_q.put('%d> Extracting %s...' % (thread_num, extract_dir)) |
| + if os.path.exists(extract_dir): |
| + try: |
| + shutil.rmtree(extract_dir) |
| + out_q.put('%d> Removed %s...' % (thread_num, extract_dir)) |
| + except OSError: |
| + out_q.put('%d> Warning: Can\'t delete: %s' % ( |
| + thread_num, extract_dir)) |
| + ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir))) |
| + continue |
| + out_q.put('%d> Extracting %s to %s' % (thread_num, output_filename, |
|
hinoka
2015/06/24 18:54:38
Is this supposed to be in this if block? Also seem
ricow1
2015/06/25 06:24:04
Removed printing above, added count
|
| + extract_dir)) |
| + tar.extractall(path=dirname) |
| # Set executable bit. |
| if sys.platform == 'cygwin': |
| # Under cygwin, mark all files as executable. The executable flag in |
| @@ -258,7 +299,7 @@ def printer_worker(output_queue): |
| def download_from_google_storage( |
| input_filename, base_url, gsutil, num_threads, directory, recursive, |
| - force, output, ignore_errors, sha1_file, verbose, auto_platform): |
| + force, output, ignore_errors, sha1_file, verbose, auto_platform, extract): |
| # Start up all the worker threads. |
| all_threads = [] |
| download_start = time.time() |
| @@ -270,7 +311,7 @@ def download_from_google_storage( |
| t = threading.Thread( |
| target=_downloader_worker_thread, |
| args=[thread_num, work_queue, force, base_url, |
| - gsutil, stdout_queue, ret_codes, verbose]) |
| + gsutil, stdout_queue, ret_codes, verbose, extract]) |
| t.daemon = True |
| t.start() |
| all_threads.append(t) |
| @@ -358,6 +399,13 @@ def main(args): |
| '(linux|mac|win). If so, the script will only ' |
| 'process files that are in the paths that ' |
| 'that matches the current platform.') |
| + parser.add_option('-u', '--extract', |
| + action='store_true', |
| + help='Extract a downloaded tar.gz file. ' |
| + 'Leaves the tar.gz file around for sha1 verification' |
| + 'If a directory with the same name as the tar.gz ' |
| + 'file already exists, is deleted (to get a ' |
| + 'clean state in case of update.)') |
| parser.add_option('-v', '--verbose', action='store_true', |
| help='Output extra diagnostic and progress information.') |
| @@ -451,7 +499,8 @@ def main(args): |
| return download_from_google_storage( |
| input_filename, base_url, gsutil, options.num_threads, options.directory, |
| options.recursive, options.force, options.output, options.ignore_errors, |
| - options.sha1_file, options.verbose, options.auto_platform) |
| + options.sha1_file, options.verbose, options.auto_platform, |
| + options.extract) |
| if __name__ == '__main__': |