Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" |
| 7 | 7 |
| 8 | 8 |
| 9 import hashlib | 9 import hashlib |
| 10 import optparse | 10 import optparse |
| 11 import os | 11 import os |
| 12 import Queue | 12 import Queue |
| 13 import re | 13 import re |
| 14 import shutil | |
| 14 import stat | 15 import stat |
| 15 import sys | 16 import sys |
| 17 import tarfile | |
| 16 import threading | 18 import threading |
| 17 import time | 19 import time |
| 18 | 20 |
| 19 import subprocess2 | 21 import subprocess2 |
| 20 | 22 |
| 21 | 23 |
| 22 GSUTIL_DEFAULT_PATH = os.path.join( | 24 GSUTIL_DEFAULT_PATH = os.path.join( |
| 23 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') | 25 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') |
| 24 # Maps sys.platform to what we actually want to call them. | 26 # Maps sys.platform to what we actually want to call them. |
| 25 PLATFORM_MAPPING = { | 27 PLATFORM_MAPPING = { |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 42 pass | 44 pass |
| 43 | 45 |
| 44 | 46 |
| 45 def GetNormalizedPlatform(): | 47 def GetNormalizedPlatform(): |
| 46 """Returns the result of sys.platform accounting for cygwin. | 48 """Returns the result of sys.platform accounting for cygwin. |
| 47 Under cygwin, this will always return "win32" like the native Python.""" | 49 Under cygwin, this will always return "win32" like the native Python.""" |
| 48 if sys.platform == 'cygwin': | 50 if sys.platform == 'cygwin': |
| 49 return 'win32' | 51 return 'win32' |
| 50 return sys.platform | 52 return sys.platform |
| 51 | 53 |
| 52 | |
| 53 # Common utilities | 54 # Common utilities |
| 54 class Gsutil(object): | 55 class Gsutil(object): |
| 55 """Call gsutil with some predefined settings. This is a convenience object, | 56 """Call gsutil with some predefined settings. This is a convenience object, |
| 56 and is also immutable.""" | 57 and is also immutable.""" |
| 57 def __init__(self, path, boto_path, timeout=None, version='4.7'): | 58 def __init__(self, path, boto_path, timeout=None, version='4.7'): |
| 58 if not os.path.exists(path): | 59 if not os.path.exists(path): |
| 59 raise FileNotFoundError('GSUtil not found in %s' % path) | 60 raise FileNotFoundError('GSUtil not found in %s' % path) |
| 60 self.path = path | 61 self.path = path |
| 61 self.timeout = timeout | 62 self.timeout = timeout |
| 62 self.boto_path = boto_path | 63 self.boto_path = boto_path |
| (...skipping 133 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 196 work_queue.put( | 197 work_queue.put( |
| 197 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 198 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
| 198 work_queue_size += 1 | 199 work_queue_size += 1 |
| 199 else: | 200 else: |
| 200 if not ignore_errors: | 201 if not ignore_errors: |
| 201 raise InvalidFileError('No sha1 sum found in %s.' % filename) | 202 raise InvalidFileError('No sha1 sum found in %s.' % filename) |
| 202 print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 203 print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
| 203 return work_queue_size | 204 return work_queue_size |
| 204 | 205 |
| 205 | 206 |
| 207 def _validate_tar_file(tar, prefix): | |
|
hinoka
2015/02/11 00:16:57
Also check for symbolic/hard links when decompress
ricow1
2015/06/19 09:31:27
Changed to your suggestion below
| |
| 208 files = tar.getnames() | |
| 209 if any(map(lambda x: '..' in x, files)): | |
|
hinoka
2015/02/11 00:16:57
how about
def _validate(tarinfo):
"""Returns fa
ricow1
2015/06/19 09:31:27
Done.
| |
| 210 return True | |
| 211 return any(map(lambda x: not x.startswith(prefix), files)) | |
| 212 | |
| 206 def _downloader_worker_thread(thread_num, q, force, base_url, | 213 def _downloader_worker_thread(thread_num, q, force, base_url, |
| 207 gsutil, out_q, ret_codes, verbose): | 214 gsutil, out_q, ret_codes, verbose, extract): |
| 208 while True: | 215 while True: |
| 209 input_sha1_sum, output_filename = q.get() | 216 input_sha1_sum, output_filename = q.get() |
| 210 if input_sha1_sum is None: | 217 if input_sha1_sum is None: |
| 211 return | 218 return |
| 212 if os.path.exists(output_filename) and not force: | 219 if os.path.exists(output_filename) and not force: |
| 213 if get_sha1(output_filename) == input_sha1_sum: | 220 if get_sha1(output_filename) == input_sha1_sum: |
| 214 if verbose: | 221 if verbose: |
| 215 out_q.put( | 222 out_q.put( |
| 216 '%d> File %s exists and SHA1 matches. Skipping.' % ( | 223 '%d> File %s exists and SHA1 matches. Skipping.' % ( |
| 217 thread_num, output_filename)) | 224 thread_num, output_filename)) |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 230 os.remove(output_filename) # Delete the file if it exists already. | 237 os.remove(output_filename) # Delete the file if it exists already. |
| 231 except OSError: | 238 except OSError: |
| 232 if os.path.exists(output_filename): | 239 if os.path.exists(output_filename): |
| 233 out_q.put('%d> Warning: deleting %s failed.' % ( | 240 out_q.put('%d> Warning: deleting %s failed.' % ( |
| 234 thread_num, output_filename)) | 241 thread_num, output_filename)) |
| 235 code, _, err = gsutil.check_call('cp', file_url, output_filename) | 242 code, _, err = gsutil.check_call('cp', file_url, output_filename) |
| 236 if code != 0: | 243 if code != 0: |
| 237 out_q.put('%d> %s' % (thread_num, err)) | 244 out_q.put('%d> %s' % (thread_num, err)) |
| 238 ret_codes.put((code, err)) | 245 ret_codes.put((code, err)) |
| 239 | 246 |
| 247 if extract: | |
| 248 if (not tarfile.is_tarfile(output_filename) | |
| 249 or not output_filename.endswith('.tar.gz')): | |
| 250 out_q.put('%d> Error: %s is not a tar.gz archive.' % ( | |
| 251 thread_num, output_filename)) | |
| 252 ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename))) | |
| 253 continue | |
| 254 tar = tarfile.open(output_filename, 'r:gz') | |
| 255 dirname = os.path.dirname(os.path.abspath(output_filename)) | |
| 256 extract_dir = output_filename[0:len(output_filename)-7] | |
| 257 if _validate_tar_file(tar, os.path.basename(extract_dir)): | |
|
hinoka
2015/02/11 00:16:57
"_validate_tar_file" implies it would return True
ricow1
2015/06/19 09:31:27
Done.
| |
| 258 out_q.put('%d> Error: %s contains files outside %s.' % ( | |
| 259 thread_num, output_filename, extract_dir)) | |
| 260 ret_codes.put((1, '%s contains invalid entries.' % (output_filename))) | |
| 261 continue | |
| 262 out_q.put('%d> Extracting %s...' % (thread_num, extract_dir)) | |
| 263 if os.path.exists(extract_dir): | |
| 264 try: | |
| 265 shutil.rmtree(extract_dir) | |
| 266 out_q.put('%d> Removed %s...' % (thread_num, extract_dir)) | |
| 267 except OSError: | |
| 268 out_q.put('%d> Warning: Can\'t delete: %s' % ( | |
| 269 thread_num, extract_dir)) | |
| 270 ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir))) | |
| 271 continue | |
| 272 out_q.put('%d> Extracting %s to %s' % (thread_num, output_filename, | |
| 273 extract_dir)) | |
| 274 tar.extractall(path=dirname) | |
| 240 # Set executable bit. | 275 # Set executable bit. |
| 241 if sys.platform == 'cygwin': | 276 if sys.platform == 'cygwin': |
| 242 # Under cygwin, mark all files as executable. The executable flag in | 277 # Under cygwin, mark all files as executable. The executable flag in |
| 243 # Google Storage will not be set when uploading from Windows, so if | 278 # Google Storage will not be set when uploading from Windows, so if |
| 244 # this script is running under cygwin and we're downloading an | 279 # this script is running under cygwin and we're downloading an |
| 245 # executable, it will be unrunnable from inside cygwin without this. | 280 # executable, it will be unrunnable from inside cygwin without this. |
| 246 st = os.stat(output_filename) | 281 st = os.stat(output_filename) |
| 247 os.chmod(output_filename, st.st_mode | stat.S_IEXEC) | 282 os.chmod(output_filename, st.st_mode | stat.S_IEXEC) |
| 248 elif sys.platform != 'win32': | 283 elif sys.platform != 'win32': |
| 249 # On non-Windows platforms, key off of the custom header | 284 # On non-Windows platforms, key off of the custom header |
| (...skipping 10 matching lines...) Expand all Loading... | |
| 260 while True: | 295 while True: |
| 261 line = output_queue.get() | 296 line = output_queue.get() |
| 262 # Its plausible we want to print empty lines. | 297 # Its plausible we want to print empty lines. |
| 263 if line is None: | 298 if line is None: |
| 264 break | 299 break |
| 265 print line | 300 print line |
| 266 | 301 |
| 267 | 302 |
| 268 def download_from_google_storage( | 303 def download_from_google_storage( |
| 269 input_filename, base_url, gsutil, num_threads, directory, recursive, | 304 input_filename, base_url, gsutil, num_threads, directory, recursive, |
| 270 force, output, ignore_errors, sha1_file, verbose, auto_platform): | 305 force, output, ignore_errors, sha1_file, verbose, auto_platform, extract): |
| 271 # Start up all the worker threads. | 306 # Start up all the worker threads. |
| 272 all_threads = [] | 307 all_threads = [] |
| 273 download_start = time.time() | 308 download_start = time.time() |
| 274 stdout_queue = Queue.Queue() | 309 stdout_queue = Queue.Queue() |
| 275 work_queue = Queue.Queue() | 310 work_queue = Queue.Queue() |
| 276 ret_codes = Queue.Queue() | 311 ret_codes = Queue.Queue() |
| 277 ret_codes.put((0, None)) | 312 ret_codes.put((0, None)) |
| 278 for thread_num in range(num_threads): | 313 for thread_num in range(num_threads): |
| 279 t = threading.Thread( | 314 t = threading.Thread( |
| 280 target=_downloader_worker_thread, | 315 target=_downloader_worker_thread, |
| 281 args=[thread_num, work_queue, force, base_url, | 316 args=[thread_num, work_queue, force, base_url, |
| 282 gsutil, stdout_queue, ret_codes, verbose]) | 317 gsutil, stdout_queue, ret_codes, verbose, extract]) |
| 283 t.daemon = True | 318 t.daemon = True |
| 284 t.start() | 319 t.start() |
| 285 all_threads.append(t) | 320 all_threads.append(t) |
| 286 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 321 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
| 287 printer_thread.daemon = True | 322 printer_thread.daemon = True |
| 288 printer_thread.start() | 323 printer_thread.start() |
| 289 | 324 |
| 290 # Enumerate our work queue. | 325 # Enumerate our work queue. |
| 291 work_queue_size = enumerate_work_queue( | 326 work_queue_size = enumerate_work_queue( |
| 292 input_filename, work_queue, directory, recursive, | 327 input_filename, work_queue, directory, recursive, |
| (...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 360 parser.add_option('-p', '--platform', | 395 parser.add_option('-p', '--platform', |
| 361 help='A regular expression that is compared against ' | 396 help='A regular expression that is compared against ' |
| 362 'Python\'s sys.platform. If this option is specified, ' | 397 'Python\'s sys.platform. If this option is specified, ' |
| 363 'the download will happen only if there is a match.') | 398 'the download will happen only if there is a match.') |
| 364 parser.add_option('-a', '--auto_platform', | 399 parser.add_option('-a', '--auto_platform', |
| 365 action='store_true', | 400 action='store_true', |
| 366 help='Detects if any parent folder of the target matches ' | 401 help='Detects if any parent folder of the target matches ' |
| 367 '(linux|mac|win). If so, the script will only ' | 402 '(linux|mac|win). If so, the script will only ' |
| 368 'process files that are in the paths that ' | 403 'process files that are in the paths that ' |
| 369 'that matches the current platform.') | 404 'that matches the current platform.') |
| 405 parser.add_option('-u', '--extract', | |
| 406 action='store_true', | |
| 407 help='Extract a downloaded tar.gz file. ' | |
| 408 'Leaves the tar.gz file around for sha1 verification' | |
| 409 'If a directory with the same name as the tar.gz ' | |
| 410 'file already exists, is deleted (to get a ' | |
| 411 'clean state in case of update.)') | |
| 370 parser.add_option('-v', '--verbose', action='store_true', | 412 parser.add_option('-v', '--verbose', action='store_true', |
| 371 help='Output extra diagnostic and progress information.') | 413 help='Output extra diagnostic and progress information.') |
| 372 | 414 |
| 373 (options, args) = parser.parse_args() | 415 (options, args) = parser.parse_args() |
| 374 | 416 |
| 375 # Make sure we should run at all based on platform matching. | 417 # Make sure we should run at all based on platform matching. |
| 376 if options.platform: | 418 if options.platform: |
| 377 if options.auto_platform: | 419 if options.auto_platform: |
| 378 parser.error('--platform can not be specified with --auto_platform') | 420 parser.error('--platform can not be specified with --auto_platform') |
| 379 if not re.match(options.platform, GetNormalizedPlatform()): | 421 if not re.match(options.platform, GetNormalizedPlatform()): |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 456 | 498 |
| 457 # Check we have a valid bucket with valid permissions. | 499 # Check we have a valid bucket with valid permissions. |
| 458 if not options.no_auth: | 500 if not options.no_auth: |
| 459 code = check_bucket_permissions(base_url, gsutil) | 501 code = check_bucket_permissions(base_url, gsutil) |
| 460 if code: | 502 if code: |
| 461 return code | 503 return code |
| 462 | 504 |
| 463 return download_from_google_storage( | 505 return download_from_google_storage( |
| 464 input_filename, base_url, gsutil, options.num_threads, options.directory, | 506 input_filename, base_url, gsutil, options.num_threads, options.directory, |
| 465 options.recursive, options.force, options.output, options.ignore_errors, | 507 options.recursive, options.force, options.output, options.ignore_errors, |
| 466 options.sha1_file, options.verbose, options.auto_platform) | 508 options.sha1_file, options.verbose, options.auto_platform, |
| 509 options.extract) | |
| 467 | 510 |
| 468 | 511 |
| 469 if __name__ == '__main__': | 512 if __name__ == '__main__': |
| 470 sys.exit(main(sys.argv)) | 513 sys.exit(main(sys.argv)) |
| OLD | NEW |