Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(539)

Side by Side Diff: download_from_google_storage.py

Issue 1285423002: Reland 0c7d94eb9d9fa388bda0d74405c0928dceea22ff: Add support for tar.gz archive files to download f… (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/tools/depot_tools
Patch Set: Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Download files from Google Storage based on SHA1 sums.""" 6 """Download files from Google Storage based on SHA1 sums."""
7 7
8 8
9 import hashlib 9 import hashlib
10 import optparse 10 import optparse
11 import os 11 import os
12 import Queue 12 import Queue
13 import re 13 import re
14 import shutil
14 import stat 15 import stat
15 import sys 16 import sys
17 import tarfile
16 import threading 18 import threading
17 import time 19 import time
18 20
19 import subprocess2 21 import subprocess2
20 22
21 23
22 GSUTIL_DEFAULT_PATH = os.path.join( 24 GSUTIL_DEFAULT_PATH = os.path.join(
23 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') 25 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
24 # Maps sys.platform to what we actually want to call them. 26 # Maps sys.platform to what we actually want to call them.
25 PLATFORM_MAPPING = { 27 PLATFORM_MAPPING = {
(...skipping 16 matching lines...) Expand all
42 pass 44 pass
43 45
44 46
45 def GetNormalizedPlatform(): 47 def GetNormalizedPlatform():
46 """Returns the result of sys.platform accounting for cygwin. 48 """Returns the result of sys.platform accounting for cygwin.
47 Under cygwin, this will always return "win32" like the native Python.""" 49 Under cygwin, this will always return "win32" like the native Python."""
48 if sys.platform == 'cygwin': 50 if sys.platform == 'cygwin':
49 return 'win32' 51 return 'win32'
50 return sys.platform 52 return sys.platform
51 53
52
53 # Common utilities 54 # Common utilities
54 class Gsutil(object): 55 class Gsutil(object):
55 """Call gsutil with some predefined settings. This is a convenience object, 56 """Call gsutil with some predefined settings. This is a convenience object,
56 and is also immutable.""" 57 and is also immutable."""
57 def __init__(self, path, boto_path=None, timeout=None, version='4.7'): 58 def __init__(self, path, boto_path=None, timeout=None, version='4.7'):
58 if not os.path.exists(path): 59 if not os.path.exists(path):
59 raise FileNotFoundError('GSUtil not found in %s' % path) 60 raise FileNotFoundError('GSUtil not found in %s' % path)
60 self.path = path 61 self.path = path
61 self.timeout = timeout 62 self.timeout = timeout
62 self.boto_path = boto_path 63 self.boto_path = boto_path
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
179 work_queue.put( 180 work_queue.put(
180 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) 181 (sha1_match.groups(1)[0], full_path.replace('.sha1', '')))
181 work_queue_size += 1 182 work_queue_size += 1
182 else: 183 else:
183 if not ignore_errors: 184 if not ignore_errors:
184 raise InvalidFileError('No sha1 sum found in %s.' % filename) 185 raise InvalidFileError('No sha1 sum found in %s.' % filename)
185 print >> sys.stderr, 'No sha1 sum found in %s.' % filename 186 print >> sys.stderr, 'No sha1 sum found in %s.' % filename
186 return work_queue_size 187 return work_queue_size
187 188
188 189
190 def _validate_tar_file(tar, prefix):
191 def _validate(tarinfo):
192 """Returns false if the tarinfo is something we explicitly forbid."""
193 if tarinfo.issym() or tarinfo.islnk():
194 return False
195 if '..' in tarinfo.name or not tarinfo.name.startswith(prefix):
196 return False
197 return True
198 return all(map(_validate, tar.getmembers()))
199
189 def _downloader_worker_thread(thread_num, q, force, base_url, 200 def _downloader_worker_thread(thread_num, q, force, base_url,
190 gsutil, out_q, ret_codes, verbose): 201 gsutil, out_q, ret_codes, verbose, extract,
202 delete=True):
191 while True: 203 while True:
192 input_sha1_sum, output_filename = q.get() 204 input_sha1_sum, output_filename = q.get()
193 if input_sha1_sum is None: 205 if input_sha1_sum is None:
194 return 206 return
195 if os.path.exists(output_filename) and not force: 207 if os.path.exists(output_filename) and not force:
196 if get_sha1(output_filename) == input_sha1_sum: 208 if get_sha1(output_filename) == input_sha1_sum:
197 if verbose: 209 if verbose:
198 out_q.put( 210 out_q.put(
199 '%d> File %s exists and SHA1 matches. Skipping.' % ( 211 '%d> File %s exists and SHA1 matches. Skipping.' % (
200 thread_num, output_filename)) 212 thread_num, output_filename))
(...skipping 10 matching lines...) Expand all
211 else: 223 else:
212 # Other error, probably auth related (bad ~/.boto, etc). 224 # Other error, probably auth related (bad ~/.boto, etc).
213 out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % ( 225 out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % (
214 thread_num, file_url, output_filename, err)) 226 thread_num, file_url, output_filename, err))
215 ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % ( 227 ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % (
216 file_url, output_filename, err))) 228 file_url, output_filename, err)))
217 continue 229 continue
218 # Fetch the file. 230 # Fetch the file.
219 out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) 231 out_q.put('%d> Downloading %s...' % (thread_num, output_filename))
220 try: 232 try:
221 os.remove(output_filename) # Delete the file if it exists already. 233 if delete:
234 os.remove(output_filename) # Delete the file if it exists already.
222 except OSError: 235 except OSError:
223 if os.path.exists(output_filename): 236 if os.path.exists(output_filename):
224 out_q.put('%d> Warning: deleting %s failed.' % ( 237 out_q.put('%d> Warning: deleting %s failed.' % (
225 thread_num, output_filename)) 238 thread_num, output_filename))
226 code, _, err = gsutil.check_call('cp', file_url, output_filename) 239 code, _, err = gsutil.check_call('cp', file_url, output_filename)
227 if code != 0: 240 if code != 0:
228 out_q.put('%d> %s' % (thread_num, err)) 241 out_q.put('%d> %s' % (thread_num, err))
229 ret_codes.put((code, err)) 242 ret_codes.put((code, err))
230 continue 243 continue
231 244
232 remote_sha1 = get_sha1(output_filename) 245 remote_sha1 = get_sha1(output_filename)
233 if remote_sha1 != input_sha1_sum: 246 if remote_sha1 != input_sha1_sum:
234 msg = ('%d> ERROR remote sha1 (%s) does not match expected sha1 (%s).' % 247 msg = ('%d> ERROR remote sha1 (%s) does not match expected sha1 (%s).' %
235 (thread_num, remote_sha1, input_sha1_sum)) 248 (thread_num, remote_sha1, input_sha1_sum))
236 out_q.put(msg) 249 out_q.put(msg)
237 ret_codes.put((20, msg)) 250 ret_codes.put((20, msg))
238 continue 251 continue
239 252
253 if extract:
254 if (not tarfile.is_tarfile(output_filename)
255 or not output_filename.endswith('.tar.gz')):
256 out_q.put('%d> Error: %s is not a tar.gz archive.' % (
257 thread_num, output_filename))
258 ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename)))
259 continue
260 with tarfile.open(output_filename, 'r:gz') as tar:
261 dirname = os.path.dirname(os.path.abspath(output_filename))
262 extract_dir = output_filename[0:len(output_filename)-7]
263 if not _validate_tar_file(tar, os.path.basename(extract_dir)):
264 out_q.put('%d> Error: %s contains files outside %s.' % (
265 thread_num, output_filename, extract_dir))
266 ret_codes.put((1, '%s contains invalid entries.' % (output_filename)))
267 continue
268 if os.path.exists(extract_dir):
269 try:
270 shutil.rmtree(extract_dir)
271 out_q.put('%d> Removed %s...' % (thread_num, extract_dir))
272 except OSError:
273 out_q.put('%d> Warning: Can\'t delete: %s' % (
274 thread_num, extract_dir))
275 ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir)))
276 continue
277 out_q.put('%d> Extracting %d entries from %s to %s' %
278 (thread_num, len(tar.getmembers()),output_filename,
279 extract_dir))
280 tar.extractall(path=dirname)
240 # Set executable bit. 281 # Set executable bit.
241 if sys.platform == 'cygwin': 282 if sys.platform == 'cygwin':
242 # Under cygwin, mark all files as executable. The executable flag in 283 # Under cygwin, mark all files as executable. The executable flag in
243 # Google Storage will not be set when uploading from Windows, so if 284 # Google Storage will not be set when uploading from Windows, so if
244 # this script is running under cygwin and we're downloading an 285 # this script is running under cygwin and we're downloading an
245 # executable, it will be unrunnable from inside cygwin without this. 286 # executable, it will be unrunnable from inside cygwin without this.
246 st = os.stat(output_filename) 287 st = os.stat(output_filename)
247 os.chmod(output_filename, st.st_mode | stat.S_IEXEC) 288 os.chmod(output_filename, st.st_mode | stat.S_IEXEC)
248 elif sys.platform != 'win32': 289 elif sys.platform != 'win32':
249 # On non-Windows platforms, key off of the custom header 290 # On non-Windows platforms, key off of the custom header
(...skipping 10 matching lines...) Expand all
260 while True: 301 while True:
261 line = output_queue.get() 302 line = output_queue.get()
262 # Its plausible we want to print empty lines. 303 # Its plausible we want to print empty lines.
263 if line is None: 304 if line is None:
264 break 305 break
265 print line 306 print line
266 307
267 308
268 def download_from_google_storage( 309 def download_from_google_storage(
269 input_filename, base_url, gsutil, num_threads, directory, recursive, 310 input_filename, base_url, gsutil, num_threads, directory, recursive,
270 force, output, ignore_errors, sha1_file, verbose, auto_platform): 311 force, output, ignore_errors, sha1_file, verbose, auto_platform, extract):
271 # Start up all the worker threads. 312 # Start up all the worker threads.
272 all_threads = [] 313 all_threads = []
273 download_start = time.time() 314 download_start = time.time()
274 stdout_queue = Queue.Queue() 315 stdout_queue = Queue.Queue()
275 work_queue = Queue.Queue() 316 work_queue = Queue.Queue()
276 ret_codes = Queue.Queue() 317 ret_codes = Queue.Queue()
277 ret_codes.put((0, None)) 318 ret_codes.put((0, None))
278 for thread_num in range(num_threads): 319 for thread_num in range(num_threads):
279 t = threading.Thread( 320 t = threading.Thread(
280 target=_downloader_worker_thread, 321 target=_downloader_worker_thread,
281 args=[thread_num, work_queue, force, base_url, 322 args=[thread_num, work_queue, force, base_url,
282 gsutil, stdout_queue, ret_codes, verbose]) 323 gsutil, stdout_queue, ret_codes, verbose, extract])
283 t.daemon = True 324 t.daemon = True
284 t.start() 325 t.start()
285 all_threads.append(t) 326 all_threads.append(t)
286 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) 327 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])
287 printer_thread.daemon = True 328 printer_thread.daemon = True
288 printer_thread.start() 329 printer_thread.start()
289 330
290 # Enumerate our work queue. 331 # Enumerate our work queue.
291 work_queue_size = enumerate_work_queue( 332 work_queue_size = enumerate_work_queue(
292 input_filename, work_queue, directory, recursive, 333 input_filename, work_queue, directory, recursive,
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
360 parser.add_option('-p', '--platform', 401 parser.add_option('-p', '--platform',
361 help='A regular expression that is compared against ' 402 help='A regular expression that is compared against '
362 'Python\'s sys.platform. If this option is specified, ' 403 'Python\'s sys.platform. If this option is specified, '
363 'the download will happen only if there is a match.') 404 'the download will happen only if there is a match.')
364 parser.add_option('-a', '--auto_platform', 405 parser.add_option('-a', '--auto_platform',
365 action='store_true', 406 action='store_true',
366 help='Detects if any parent folder of the target matches ' 407 help='Detects if any parent folder of the target matches '
367 '(linux|mac|win). If so, the script will only ' 408 '(linux|mac|win). If so, the script will only '
368 'process files that are in the paths that ' 409 'process files that are in the paths that '
369 'that matches the current platform.') 410 'that matches the current platform.')
411 parser.add_option('-u', '--extract',
412 action='store_true',
413 help='Extract a downloaded tar.gz file. '
414 'Leaves the tar.gz file around for sha1 verification'
415 'If a directory with the same name as the tar.gz '
416 'file already exists, is deleted (to get a '
417 'clean state in case of update.)')
370 parser.add_option('-v', '--verbose', action='store_true', 418 parser.add_option('-v', '--verbose', action='store_true',
371 help='Output extra diagnostic and progress information.') 419 help='Output extra diagnostic and progress information.')
372 420
373 (options, args) = parser.parse_args() 421 (options, args) = parser.parse_args()
374 422
375 # Make sure we should run at all based on platform matching. 423 # Make sure we should run at all based on platform matching.
376 if options.platform: 424 if options.platform:
377 if options.auto_platform: 425 if options.auto_platform:
378 parser.error('--platform can not be specified with --auto_platform') 426 parser.error('--platform can not be specified with --auto_platform')
379 if not re.match(options.platform, GetNormalizedPlatform()): 427 if not re.match(options.platform, GetNormalizedPlatform()):
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
453 if not options.directory and not options.force and not options.no_resume: 501 if not options.directory and not options.force and not options.no_resume:
454 if os.path.exists(options.output): 502 if os.path.exists(options.output):
455 parser.error('Output file %s exists and --no_resume is specified.' 503 parser.error('Output file %s exists and --no_resume is specified.'
456 % options.output) 504 % options.output)
457 505
458 base_url = 'gs://%s' % options.bucket 506 base_url = 'gs://%s' % options.bucket
459 507
460 return download_from_google_storage( 508 return download_from_google_storage(
461 input_filename, base_url, gsutil, options.num_threads, options.directory, 509 input_filename, base_url, gsutil, options.num_threads, options.directory,
462 options.recursive, options.force, options.output, options.ignore_errors, 510 options.recursive, options.force, options.output, options.ignore_errors,
463 options.sha1_file, options.verbose, options.auto_platform) 511 options.sha1_file, options.verbose, options.auto_platform,
512 options.extract)
464 513
465 514
466 if __name__ == '__main__': 515 if __name__ == '__main__':
467 sys.exit(main(sys.argv)) 516 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « no previous file | tests/download_from_google_storage_unittests.py » ('j') | tests/download_from_google_storage_unittests.py » ('J')

Powered by Google App Engine
This is Rietveld 408576698