download_from_google_storage.py - Issue 807463005: Add support for tar.gz archive files to download from download_from_google_storage

Side by Side Diff: download_from_google_storage.py

Issue 807463005: Add support for tar.gz archive files to download from download_from_google_storage (Closed) Base URL: http://src.chromium.org/svn/trunk/tools/depot_tools/

Patch Set: Created 5 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Download files from Google Storage based on SHA1 sums."""	6 """Download files from Google Storage based on SHA1 sums."""

7	7

8	8

9 import hashlib	9 import hashlib

10 import optparse	10 import optparse

11 import os	11 import os

12 import Queue	12 import Queue

13 import re	13 import re

	14 import shutil

14 import stat	15 import stat

15 import sys	16 import sys

	17 import tarfile

16 import threading	18 import threading

17 import time	19 import time

18	20

19 import subprocess2	21 import subprocess2

20	22

21	23

22 GSUTIL_DEFAULT_PATH = os.path.join(	24 GSUTIL_DEFAULT_PATH = os.path.join(

23 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')	25 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')

24 # Maps sys.platform to what we actually want to call them.	26 # Maps sys.platform to what we actually want to call them.

25 PLATFORM_MAPPING = {	27 PLATFORM_MAPPING = {

(...skipping 171 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
197 (sha1_match.groups(1)[0], full_path.replace('.sha1', '')))	199 (sha1_match.groups(1)[0], full_path.replace('.sha1', '')))

198 work_queue_size += 1	200 work_queue_size += 1

199 else:	201 else:

200 if not ignore_errors:	202 if not ignore_errors:

201 raise InvalidFileError('No sha1 sum found in %s.' % filename)	203 raise InvalidFileError('No sha1 sum found in %s.' % filename)

202 print >> sys.stderr, 'No sha1 sum found in %s.' % filename	204 print >> sys.stderr, 'No sha1 sum found in %s.' % filename

203 return work_queue_size	205 return work_queue_size

204	206

205	207

206 def _downloader_worker_thread(thread_num, q, force, base_url,	208 def _downloader_worker_thread(thread_num, q, force, base_url,

207 gsutil, out_q, ret_codes, verbose):	209 gsutil, out_q, ret_codes, verbose, extract):

208 while True:	210 while True:

209 input_sha1_sum, output_filename = q.get()	211 input_sha1_sum, output_filename = q.get()

210 if input_sha1_sum is None:	212 if input_sha1_sum is None:

211 return	213 return

212 if os.path.exists(output_filename) and not force:	214 if os.path.exists(output_filename) and not force:

213 if get_sha1(output_filename) == input_sha1_sum:	215 if get_sha1(output_filename) == input_sha1_sum:

214 if verbose:	216 if verbose:

215 out_q.put(	217 out_q.put(

216 '%d> File %s exists and SHA1 matches. Skipping.' % (	218 '%d> File %s exists and SHA1 matches. Skipping.' % (

217 thread_num, output_filename))	219 thread_num, output_filename))

(...skipping 12 matching lines...) Expand all Loading...
230 os.remove(output_filename) # Delete the file if it exists already.	232 os.remove(output_filename) # Delete the file if it exists already.

231 except OSError:	233 except OSError:

232 if os.path.exists(output_filename):	234 if os.path.exists(output_filename):

233 out_q.put('%d> Warning: deleting %s failed.' % (	235 out_q.put('%d> Warning: deleting %s failed.' % (

234 thread_num, output_filename))	236 thread_num, output_filename))

235 code, _, err = gsutil.check_call('cp', file_url, output_filename)	237 code, _, err = gsutil.check_call('cp', file_url, output_filename)

236 if code != 0:	238 if code != 0:

237 out_q.put('%d> %s' % (thread_num, err))	239 out_q.put('%d> %s' % (thread_num, err))

238 ret_codes.put((code, err))	240 ret_codes.put((code, err))

239	241

	242 if extract:

	243 if (not tarfile.is_tarfile(output_filename)

	244 or not output_filename.endswith('tar.gz')):
	hinoka 2015/02/05 18:44:16 '.tar.gz' to match with expectations on 251 '.tar.gz' to match with expectations on 251 ricow1 2015/02/06 11:52:29 Done. Show quoted text On 2015/02/05 18:44:16, hinoka wrote: > '.tar.gz' to match with expectations on 251 Done.
	245 out_q.put('%d> Error: %s is not a tar.gz archive.' % (

	246 thread_num, output_filename))

	247 ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename)))

	248 continue

	249 tar = tarfile.open(output_filename, 'r:gz')
	hinoka 2015/02/05 18:44:16 Also validate all paths in the tarfile here (Only Also validate all paths in the tarfile here (Only having checks in the upload scripts isn't going to prevent malicious out-of-band uploads). May have to iterate through each file. ricow1 2015/02/06 11:52:29 Added check Show quoted text On 2015/02/05 18:44:16, hinoka wrote: > Also validate all paths in the tarfile here (Only having checks in the upload > scripts isn't going to prevent malicious out-of-band uploads). May have to > iterate through each file. Added check
	250 dirname = os.path.dirname(os.path.abspath(output_filename))

	251 extract_dir = output_filename[0:len(output_filename)-7]

	252 out_q.put('%d> extracting %s...' % (thread_num, extract_dir))

	253 if os.path.exists(extract_dir):

	254 try:

	255 shutil.rmtree(extract_dir)

	256 out_q.put('%d> Removed %s...' % (thread_num, extract_dir))

	257 except OSError:

	258 out_q.put('%d> Warning: Can\'t delete: %s' % (

	259 thread_num, extract_dir))

	260 ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir)))

	261 continue

	262 out_q.put('%d> Extracting %s to %s' % (thread_num, output_filename,

	263 extract_dir))

	264 tar.extractall(path=dirname)

240 # Set executable bit.	265 # Set executable bit.

241 if sys.platform == 'cygwin':	266 if sys.platform == 'cygwin':

242 # Under cygwin, mark all files as executable. The executable flag in	267 # Under cygwin, mark all files as executable. The executable flag in

243 # Google Storage will not be set when uploading from Windows, so if	268 # Google Storage will not be set when uploading from Windows, so if

244 # this script is running under cygwin and we're downloading an	269 # this script is running under cygwin and we're downloading an

245 # executable, it will be unrunnable from inside cygwin without this.	270 # executable, it will be unrunnable from inside cygwin without this.

246 st = os.stat(output_filename)	271 st = os.stat(output_filename)

247 os.chmod(output_filename, st.st_mode \| stat.S_IEXEC)	272 os.chmod(output_filename, st.st_mode \| stat.S_IEXEC)

248 elif sys.platform != 'win32':	273 elif sys.platform != 'win32':

249 # On non-Windows platforms, key off of the custom header	274 # On non-Windows platforms, key off of the custom header

(...skipping 10 matching lines...) Expand all Loading...
260 while True:	285 while True:

261 line = output_queue.get()	286 line = output_queue.get()

262 # Its plausible we want to print empty lines.	287 # Its plausible we want to print empty lines.

263 if line is None:	288 if line is None:

264 break	289 break

265 print line	290 print line

266	291

267	292

268 def download_from_google_storage(	293 def download_from_google_storage(

269 input_filename, base_url, gsutil, num_threads, directory, recursive,	294 input_filename, base_url, gsutil, num_threads, directory, recursive,

270 force, output, ignore_errors, sha1_file, verbose, auto_platform):	295 force, output, ignore_errors, sha1_file, verbose, auto_platform, extract):

271 # Start up all the worker threads.	296 # Start up all the worker threads.

272 all_threads = []	297 all_threads = []

273 download_start = time.time()	298 download_start = time.time()

274 stdout_queue = Queue.Queue()	299 stdout_queue = Queue.Queue()

275 work_queue = Queue.Queue()	300 work_queue = Queue.Queue()

276 ret_codes = Queue.Queue()	301 ret_codes = Queue.Queue()

277 ret_codes.put((0, None))	302 ret_codes.put((0, None))

278 for thread_num in range(num_threads):	303 for thread_num in range(num_threads):

279 t = threading.Thread(	304 t = threading.Thread(

280 target=_downloader_worker_thread,	305 target=_downloader_worker_thread,

281 args=[thread_num, work_queue, force, base_url,	306 args=[thread_num, work_queue, force, base_url,

282 gsutil, stdout_queue, ret_codes, verbose])	307 gsutil, stdout_queue, ret_codes, verbose, extract])

283 t.daemon = True	308 t.daemon = True

284 t.start()	309 t.start()

285 all_threads.append(t)	310 all_threads.append(t)

286 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])	311 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])

287 printer_thread.daemon = True	312 printer_thread.daemon = True

288 printer_thread.start()	313 printer_thread.start()

289	314

290 # Enumerate our work queue.	315 # Enumerate our work queue.

291 work_queue_size = enumerate_work_queue(	316 work_queue_size = enumerate_work_queue(

292 input_filename, work_queue, directory, recursive,	317 input_filename, work_queue, directory, recursive,

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
360 parser.add_option('-p', '--platform',	385 parser.add_option('-p', '--platform',

361 help='A regular expression that is compared against '	386 help='A regular expression that is compared against '

362 'Python\'s sys.platform. If this option is specified, '	387 'Python\'s sys.platform. If this option is specified, '

363 'the download will happen only if there is a match.')	388 'the download will happen only if there is a match.')

364 parser.add_option('-a', '--auto_platform',	389 parser.add_option('-a', '--auto_platform',

365 action='store_true',	390 action='store_true',

366 help='Detects if any parent folder of the target matches '	391 help='Detects if any parent folder of the target matches '

367 '(linux\|mac\|win). If so, the script will only '	392 '(linux\|mac\|win). If so, the script will only '

368 'process files that are in the paths that '	393 'process files that are in the paths that '

369 'that matches the current platform.')	394 'that matches the current platform.')

	395 parser.add_option('-u', '--extract',

	396 action='store_true',

	397 help='Extract a downloaded tar.gz file after download. '

	398 'Leaves the tar.gz file around for sha verification1'

	399 'If a directory with the same name as the tar.gz '

	400 'file already exists, this is deleted (to get a '

	401 'clean state in case of update.)')

370 parser.add_option('-v', '--verbose', action='store_true',	402 parser.add_option('-v', '--verbose', action='store_true',

371 help='Output extra diagnostic and progress information.')	403 help='Output extra diagnostic and progress information.')

372	404

373 (options, args) = parser.parse_args()	405 (options, args) = parser.parse_args()

374	406

375 # Make sure we should run at all based on platform matching.	407 # Make sure we should run at all based on platform matching.

376 if options.platform:	408 if options.platform:

377 if options.auto_platform:	409 if options.auto_platform:

378 parser.error('--platform can not be specified with --auto_platform')	410 parser.error('--platform can not be specified with --auto_platform')

379 if not re.match(options.platform, GetNormalizedPlatform()):	411 if not re.match(options.platform, GetNormalizedPlatform()):

(...skipping 76 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
456	488

457 # Check we have a valid bucket with valid permissions.	489 # Check we have a valid bucket with valid permissions.

458 if not options.no_auth:	490 if not options.no_auth:

459 code = check_bucket_permissions(base_url, gsutil)	491 code = check_bucket_permissions(base_url, gsutil)

460 if code:	492 if code:

461 return code	493 return code

462	494

463 return download_from_google_storage(	495 return download_from_google_storage(

464 input_filename, base_url, gsutil, options.num_threads, options.directory,	496 input_filename, base_url, gsutil, options.num_threads, options.directory,

465 options.recursive, options.force, options.output, options.ignore_errors,	497 options.recursive, options.force, options.output, options.ignore_errors,

466 options.sha1_file, options.verbose, options.auto_platform)	498 options.sha1_file, options.verbose, options.auto_platform,

	499 options.extract)

467	500

468	501

469 if __name__ == '__main__':	502 if __name__ == '__main__':

470 sys.exit(main(sys.argv))	503 sys.exit(main(sys.argv))

OLD	NEW

« no previous file with comments | « no previous file | upload_to_google_storage.py » ('j') | no next file with comments »