download_from_google_storage.py - Issue 807463005: Add support for tar.gz archive files to download from download_from_google_storage

Unified Diff: download_from_google_storage.py

Issue 807463005: Add support for tar.gz archive files to download from download_from_google_storage (Closed) Base URL: http://src.chromium.org/svn/trunk/tools/depot_tools/

Patch Set: add some tests Created 5 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: download_from_google_storage.py

diff --git a/download_from_google_storage.py b/download_from_google_storage.py

old mode 100755

new mode 100644

index a45f387ce2aeb3a2d9f8f820206ff8e597eb1e5c..3bba49498acae0e3920ebd23f29d869afc7b9cdd

--- a/download_from_google_storage.py

+++ b/download_from_google_storage.py

@@ -11,8 +11,10 @@ import optparse

import os

import Queue

import re

+import shutil

import stat

import sys

+import tarfile

import threading

import time

@@ -49,7 +51,6 @@ def GetNormalizedPlatform():

return 'win32'

return sys.platform

# Common utilities

class Gsutil(object):

"""Call gsutil with some predefined settings. This is a convenience object,

@@ -186,8 +187,19 @@ def enumerate_work_queue(input_filename, work_queue, directory,

return work_queue_size

+def _validate_tar_file(tar, prefix):

+ def _validate(tarinfo):

+ """Returns false if the tarinfo is something we explicitly forbid."""

+ if tarinfo.issym() or tarinfo.islnk():

+ return False

+ if '..' in tarinfo.name or not tarinfo.name.startswith(prefix):

+ return False

+ return True

+ return all(map(_validate, tar.getmembers()))

def _downloader_worker_thread(thread_num, q, force, base_url,

- gsutil, out_q, ret_codes, verbose):

+ gsutil, out_q, ret_codes, verbose, extract,

+ delete=True):

while True:

input_sha1_sum, output_filename = q.get()

if input_sha1_sum is None:

@@ -218,7 +230,8 @@ def _downloader_worker_thread(thread_num, q, force, base_url,

# Fetch the file.

out_q.put('%d> Downloading %s...' % (thread_num, output_filename))

try:

- os.remove(output_filename) # Delete the file if it exists already.

+ if delete:

+ os.remove(output_filename) # Delete the file if it exists already.

except OSError:

if os.path.exists(output_filename):

out_q.put('%d> Warning: deleting %s failed.' % (

@@ -228,6 +241,34 @@ def _downloader_worker_thread(thread_num, q, force, base_url,

out_q.put('%d> %s' % (thread_num, err))

ret_codes.put((code, err))

+ if extract:

+ if (not tarfile.is_tarfile(output_filename)

+ or not output_filename.endswith('.tar.gz')):

+ out_q.put('%d> Error: %s is not a tar.gz archive.' % (

+ thread_num, output_filename))

+ ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename)))

+ continue

+ tar = tarfile.open(output_filename, 'r:gz')

hinoka 2015/06/24 18:54:38 with tarfile.open(...) as tar:

ricow1 2015/06/25 06:24:05 Done.

+ dirname = os.path.dirname(os.path.abspath(output_filename))

+ extract_dir = output_filename[0:len(output_filename)-7]

+ if not _validate_tar_file(tar, os.path.basename(extract_dir)):

+ out_q.put('%d> Error: %s contains files outside %s.' % (

+ thread_num, output_filename, extract_dir))

+ ret_codes.put((1, '%s contains invalid entries.' % (output_filename)))

+ continue

+ out_q.put('%d> Extracting %s...' % (thread_num, extract_dir))

+ if os.path.exists(extract_dir):

+ try:

+ shutil.rmtree(extract_dir)

+ out_q.put('%d> Removed %s...' % (thread_num, extract_dir))

+ except OSError:

+ out_q.put('%d> Warning: Can\'t delete: %s' % (

+ thread_num, extract_dir))

+ ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir)))

+ continue

+ out_q.put('%d> Extracting %s to %s' % (thread_num, output_filename,

hinoka 2015/06/24 18:54:38 Is this supposed to be in this if block? Also seem

ricow1 2015/06/25 06:24:04 Removed printing above, added count

+ extract_dir))

+ tar.extractall(path=dirname)

# Set executable bit.

if sys.platform == 'cygwin':

# Under cygwin, mark all files as executable. The executable flag in

@@ -258,7 +299,7 @@ def printer_worker(output_queue):

def download_from_google_storage(

input_filename, base_url, gsutil, num_threads, directory, recursive,

- force, output, ignore_errors, sha1_file, verbose, auto_platform):

+ force, output, ignore_errors, sha1_file, verbose, auto_platform, extract):

# Start up all the worker threads.

all_threads = []

download_start = time.time()

@@ -270,7 +311,7 @@ def download_from_google_storage(

t = threading.Thread(

target=_downloader_worker_thread,

args=[thread_num, work_queue, force, base_url,

- gsutil, stdout_queue, ret_codes, verbose])

+ gsutil, stdout_queue, ret_codes, verbose, extract])

t.daemon = True

t.start()

all_threads.append(t)

@@ -358,6 +399,13 @@ def main(args):

'(linux|mac|win). If so, the script will only '

'process files that are in the paths that '

'that matches the current platform.')

+ parser.add_option('-u', '--extract',

+ action='store_true',

+ help='Extract a downloaded tar.gz file. '

+ 'Leaves the tar.gz file around for sha1 verification'

+ 'If a directory with the same name as the tar.gz '

+ 'file already exists, is deleted (to get a '

+ 'clean state in case of update.)')

parser.add_option('-v', '--verbose', action='store_true',

help='Output extra diagnostic and progress information.')

@@ -451,7 +499,8 @@ def main(args):

return download_from_google_storage(

input_filename, base_url, gsutil, options.num_threads, options.directory,

options.recursive, options.force, options.output, options.ignore_errors,

- options.sha1_file, options.verbose, options.auto_platform)

+ options.sha1_file, options.verbose, options.auto_platform,

+ options.extract)

if __name__ == '__main__':

« no previous file with comments | « no previous file | tests/download_from_google_storage_unittests.py » ('j') | tests/download_from_google_storage_unittests.py » ('J')