Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(348)

Side by Side Diff: download_from_google_storage.py

Issue 807463005: Add support for tar.gz archive files to download from download_from_google_storage (Closed) Base URL: http://src.chromium.org/svn/trunk/tools/depot_tools/
Patch Set: Created 5 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | upload_to_google_storage.py » ('j') | upload_to_google_storage.py » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Download files from Google Storage based on SHA1 sums.""" 6 """Download files from Google Storage based on SHA1 sums."""
7 7
8 8
9 import hashlib 9 import hashlib
10 import optparse 10 import optparse
11 import os 11 import os
12 import Queue 12 import Queue
13 import re 13 import re
14 import shutil
14 import stat 15 import stat
15 import sys 16 import sys
17 import tarfile
16 import threading 18 import threading
17 import time 19 import time
18 20
19 import subprocess2 21 import subprocess2
20 22
21 23
22 GSUTIL_DEFAULT_PATH = os.path.join( 24 GSUTIL_DEFAULT_PATH = os.path.join(
23 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') 25 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
24 # Maps sys.platform to what we actually want to call them. 26 # Maps sys.platform to what we actually want to call them.
25 PLATFORM_MAPPING = { 27 PLATFORM_MAPPING = {
(...skipping 16 matching lines...) Expand all
42 pass 44 pass
43 45
44 46
45 def GetNormalizedPlatform(): 47 def GetNormalizedPlatform():
46 """Returns the result of sys.platform accounting for cygwin. 48 """Returns the result of sys.platform accounting for cygwin.
47 Under cygwin, this will always return "win32" like the native Python.""" 49 Under cygwin, this will always return "win32" like the native Python."""
48 if sys.platform == 'cygwin': 50 if sys.platform == 'cygwin':
49 return 'win32' 51 return 'win32'
50 return sys.platform 52 return sys.platform
51 53
52
53 # Common utilities 54 # Common utilities
54 class Gsutil(object): 55 class Gsutil(object):
55 """Call gsutil with some predefined settings. This is a convenience object, 56 """Call gsutil with some predefined settings. This is a convenience object,
56 and is also immutable.""" 57 and is also immutable."""
57 def __init__(self, path, boto_path, timeout=None, version='4.7'): 58 def __init__(self, path, boto_path, timeout=None, version='4.7'):
58 if not os.path.exists(path): 59 if not os.path.exists(path):
59 raise FileNotFoundError('GSUtil not found in %s' % path) 60 raise FileNotFoundError('GSUtil not found in %s' % path)
60 self.path = path 61 self.path = path
61 self.timeout = timeout 62 self.timeout = timeout
62 self.boto_path = boto_path 63 self.boto_path = boto_path
(...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after
196 work_queue.put( 197 work_queue.put(
197 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) 198 (sha1_match.groups(1)[0], full_path.replace('.sha1', '')))
198 work_queue_size += 1 199 work_queue_size += 1
199 else: 200 else:
200 if not ignore_errors: 201 if not ignore_errors:
201 raise InvalidFileError('No sha1 sum found in %s.' % filename) 202 raise InvalidFileError('No sha1 sum found in %s.' % filename)
202 print >> sys.stderr, 'No sha1 sum found in %s.' % filename 203 print >> sys.stderr, 'No sha1 sum found in %s.' % filename
203 return work_queue_size 204 return work_queue_size
204 205
205 206
207 def _validate_tar_file(tar, prefix):
208 files = tar.getnames()
209 return reduce(lambda x, y: x + 1 if not y.startswith(prefix) else x, files, 0)
hinoka 2015/02/06 18:54:35 1. Still prefer any(map(...)) 2. This doesn't look
ricow1 2015/02/10 11:15:45 Added more checks
210
206 def _downloader_worker_thread(thread_num, q, force, base_url, 211 def _downloader_worker_thread(thread_num, q, force, base_url,
207 gsutil, out_q, ret_codes, verbose): 212 gsutil, out_q, ret_codes, verbose, extract):
208 while True: 213 while True:
209 input_sha1_sum, output_filename = q.get() 214 input_sha1_sum, output_filename = q.get()
210 if input_sha1_sum is None: 215 if input_sha1_sum is None:
211 return 216 return
212 if os.path.exists(output_filename) and not force: 217 if os.path.exists(output_filename) and not force:
213 if get_sha1(output_filename) == input_sha1_sum: 218 if get_sha1(output_filename) == input_sha1_sum:
214 if verbose: 219 if verbose:
215 out_q.put( 220 out_q.put(
216 '%d> File %s exists and SHA1 matches. Skipping.' % ( 221 '%d> File %s exists and SHA1 matches. Skipping.' % (
217 thread_num, output_filename)) 222 thread_num, output_filename))
(...skipping 12 matching lines...) Expand all
230 os.remove(output_filename) # Delete the file if it exists already. 235 os.remove(output_filename) # Delete the file if it exists already.
231 except OSError: 236 except OSError:
232 if os.path.exists(output_filename): 237 if os.path.exists(output_filename):
233 out_q.put('%d> Warning: deleting %s failed.' % ( 238 out_q.put('%d> Warning: deleting %s failed.' % (
234 thread_num, output_filename)) 239 thread_num, output_filename))
235 code, _, err = gsutil.check_call('cp', file_url, output_filename) 240 code, _, err = gsutil.check_call('cp', file_url, output_filename)
236 if code != 0: 241 if code != 0:
237 out_q.put('%d> %s' % (thread_num, err)) 242 out_q.put('%d> %s' % (thread_num, err))
238 ret_codes.put((code, err)) 243 ret_codes.put((code, err))
239 244
245 if extract:
246 if (not tarfile.is_tarfile(output_filename)
247 or not output_filename.endswith('.tar.gz')):
248 out_q.put('%d> Error: %s is not a tar.gz archive.' % (
249 thread_num, output_filename))
250 ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename)))
251 continue
252 tar = tarfile.open(output_filename, 'r:gz')
253 dirname = os.path.dirname(os.path.abspath(output_filename))
254 extract_dir = output_filename[0:len(output_filename)-7]
255 if _validate_tar_file(tar, os.path.basename(extract_dir)):
256 out_q.put('%d> Error: %s contains files outside %s.' % (
257 thread_num, output_filename, extract_dir))
258 ret_codes.put((1, '%s contains invalid entries.' % (output_filename)))
259 continue
260 out_q.put('%d> extracting %s...' % (thread_num, extract_dir))
hinoka 2015/02/06 18:54:35 nit: s/extracting/Extracting/
ricow1 2015/02/10 11:15:45 Done.
261 if os.path.exists(extract_dir):
262 try:
263 shutil.rmtree(extract_dir)
264 out_q.put('%d> Removed %s...' % (thread_num, extract_dir))
265 except OSError:
266 out_q.put('%d> Warning: Can\'t delete: %s' % (
267 thread_num, extract_dir))
268 ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir)))
269 continue
270 out_q.put('%d> Extracting %s to %s' % (thread_num, output_filename,
271 extract_dir))
272 tar.extractall(path=dirname)
240 # Set executable bit. 273 # Set executable bit.
241 if sys.platform == 'cygwin': 274 if sys.platform == 'cygwin':
242 # Under cygwin, mark all files as executable. The executable flag in 275 # Under cygwin, mark all files as executable. The executable flag in
243 # Google Storage will not be set when uploading from Windows, so if 276 # Google Storage will not be set when uploading from Windows, so if
244 # this script is running under cygwin and we're downloading an 277 # this script is running under cygwin and we're downloading an
245 # executable, it will be unrunnable from inside cygwin without this. 278 # executable, it will be unrunnable from inside cygwin without this.
246 st = os.stat(output_filename) 279 st = os.stat(output_filename)
247 os.chmod(output_filename, st.st_mode | stat.S_IEXEC) 280 os.chmod(output_filename, st.st_mode | stat.S_IEXEC)
248 elif sys.platform != 'win32': 281 elif sys.platform != 'win32':
249 # On non-Windows platforms, key off of the custom header 282 # On non-Windows platforms, key off of the custom header
(...skipping 10 matching lines...) Expand all
260 while True: 293 while True:
261 line = output_queue.get() 294 line = output_queue.get()
262 # Its plausible we want to print empty lines. 295 # Its plausible we want to print empty lines.
263 if line is None: 296 if line is None:
264 break 297 break
265 print line 298 print line
266 299
267 300
268 def download_from_google_storage( 301 def download_from_google_storage(
269 input_filename, base_url, gsutil, num_threads, directory, recursive, 302 input_filename, base_url, gsutil, num_threads, directory, recursive,
270 force, output, ignore_errors, sha1_file, verbose, auto_platform): 303 force, output, ignore_errors, sha1_file, verbose, auto_platform, extract):
271 # Start up all the worker threads. 304 # Start up all the worker threads.
272 all_threads = [] 305 all_threads = []
273 download_start = time.time() 306 download_start = time.time()
274 stdout_queue = Queue.Queue() 307 stdout_queue = Queue.Queue()
275 work_queue = Queue.Queue() 308 work_queue = Queue.Queue()
276 ret_codes = Queue.Queue() 309 ret_codes = Queue.Queue()
277 ret_codes.put((0, None)) 310 ret_codes.put((0, None))
278 for thread_num in range(num_threads): 311 for thread_num in range(num_threads):
279 t = threading.Thread( 312 t = threading.Thread(
280 target=_downloader_worker_thread, 313 target=_downloader_worker_thread,
281 args=[thread_num, work_queue, force, base_url, 314 args=[thread_num, work_queue, force, base_url,
282 gsutil, stdout_queue, ret_codes, verbose]) 315 gsutil, stdout_queue, ret_codes, verbose, extract])
283 t.daemon = True 316 t.daemon = True
284 t.start() 317 t.start()
285 all_threads.append(t) 318 all_threads.append(t)
286 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) 319 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])
287 printer_thread.daemon = True 320 printer_thread.daemon = True
288 printer_thread.start() 321 printer_thread.start()
289 322
290 # Enumerate our work queue. 323 # Enumerate our work queue.
291 work_queue_size = enumerate_work_queue( 324 work_queue_size = enumerate_work_queue(
292 input_filename, work_queue, directory, recursive, 325 input_filename, work_queue, directory, recursive,
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
360 parser.add_option('-p', '--platform', 393 parser.add_option('-p', '--platform',
361 help='A regular expression that is compared against ' 394 help='A regular expression that is compared against '
362 'Python\'s sys.platform. If this option is specified, ' 395 'Python\'s sys.platform. If this option is specified, '
363 'the download will happen only if there is a match.') 396 'the download will happen only if there is a match.')
364 parser.add_option('-a', '--auto_platform', 397 parser.add_option('-a', '--auto_platform',
365 action='store_true', 398 action='store_true',
366 help='Detects if any parent folder of the target matches ' 399 help='Detects if any parent folder of the target matches '
367 '(linux|mac|win). If so, the script will only ' 400 '(linux|mac|win). If so, the script will only '
368 'process files that are in the paths that ' 401 'process files that are in the paths that '
369 'that matches the current platform.') 402 'that matches the current platform.')
403 parser.add_option('-u', '--extract',
404 action='store_true',
405 help='Extract a downloaded tar.gz file after download. '
hinoka 2015/02/06 18:54:35 s/after download// (its redundant)
ricow1 2015/02/10 11:15:45 Done.
406 'Leaves the tar.gz file around for sha verification1'
hinoka 2015/02/06 18:54:35 s/sha verification1/sha1 verification/
ricow1 2015/02/10 11:15:45 Done.
407 'If a directory with the same name as the tar.gz '
408 'file already exists, this is deleted (to get a '
hinoka 2015/02/06 18:54:35 s/this/it/
ricow1 2015/02/10 11:15:45 Done.
409 'clean state in case of update.)')
370 parser.add_option('-v', '--verbose', action='store_true', 410 parser.add_option('-v', '--verbose', action='store_true',
371 help='Output extra diagnostic and progress information.') 411 help='Output extra diagnostic and progress information.')
372 412
373 (options, args) = parser.parse_args() 413 (options, args) = parser.parse_args()
374 414
375 # Make sure we should run at all based on platform matching. 415 # Make sure we should run at all based on platform matching.
376 if options.platform: 416 if options.platform:
377 if options.auto_platform: 417 if options.auto_platform:
378 parser.error('--platform can not be specified with --auto_platform') 418 parser.error('--platform can not be specified with --auto_platform')
379 if not re.match(options.platform, GetNormalizedPlatform()): 419 if not re.match(options.platform, GetNormalizedPlatform()):
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after
456 496
457 # Check we have a valid bucket with valid permissions. 497 # Check we have a valid bucket with valid permissions.
458 if not options.no_auth: 498 if not options.no_auth:
459 code = check_bucket_permissions(base_url, gsutil) 499 code = check_bucket_permissions(base_url, gsutil)
460 if code: 500 if code:
461 return code 501 return code
462 502
463 return download_from_google_storage( 503 return download_from_google_storage(
464 input_filename, base_url, gsutil, options.num_threads, options.directory, 504 input_filename, base_url, gsutil, options.num_threads, options.directory,
465 options.recursive, options.force, options.output, options.ignore_errors, 505 options.recursive, options.force, options.output, options.ignore_errors,
466 options.sha1_file, options.verbose, options.auto_platform) 506 options.sha1_file, options.verbose, options.auto_platform,
507 options.extract)
467 508
468 509
469 if __name__ == '__main__': 510 if __name__ == '__main__':
470 sys.exit(main(sys.argv)) 511 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « no previous file | upload_to_google_storage.py » ('j') | upload_to_google_storage.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698