| OLD | NEW |
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" |
| 7 | 7 |
| 8 | 8 |
| 9 import hashlib | 9 import hashlib |
| 10 import optparse | 10 import optparse |
| 11 import os | 11 import os |
| 12 import Queue | 12 import Queue |
| 13 import re | 13 import re |
| 14 import stat | 14 import stat |
| 15 import sys | 15 import sys |
| 16 import threading | 16 import threading |
| 17 import time | 17 import time |
| 18 | 18 |
| 19 import subprocess2 | 19 import subprocess2 |
| 20 | 20 |
| 21 | 21 |
| 22 GSUTIL_DEFAULT_PATH = os.path.join( | 22 GSUTIL_DEFAULT_PATH = os.path.join( |
| 23 os.path.dirname(os.path.abspath(__file__)), | 23 os.path.dirname(os.path.abspath(__file__)), |
| 24 'third_party', 'gsutil', 'gsutil') | 24 'third_party', 'gsutil', 'gsutil') |
| 25 # Maps sys.platform to what we actually want to call them. |
| 26 PLATFORM_MAPPING = { |
| 27 'cygwin': 'win', |
| 28 'darwin': 'mac', |
| 29 'linux2': 'linux', |
| 30 'win32': 'win', |
| 31 } |
| 25 | 32 |
| 26 | 33 |
| 27 class FileNotFoundError(IOError): | 34 class FileNotFoundError(IOError): |
| 28 pass | 35 pass |
| 29 | 36 |
| 30 | 37 |
| 31 class InvalidFileError(IOError): | 38 class InvalidFileError(IOError): |
| 32 pass | 39 pass |
| 33 | 40 |
| 34 | 41 |
| 42 class InvalidPlatformError(Exception): |
| 43 pass |
| 44 |
| 45 |
| 35 def GetNormalizedPlatform(): | 46 def GetNormalizedPlatform(): |
| 36 """Returns the result of sys.platform accounting for cygwin. | 47 """Returns the result of sys.platform accounting for cygwin. |
| 37 Under cygwin, this will always return "win32" like the native Python.""" | 48 Under cygwin, this will always return "win32" like the native Python.""" |
| 38 if sys.platform == 'cygwin': | 49 if sys.platform == 'cygwin': |
| 39 return 'win32' | 50 return 'win32' |
| 40 return sys.platform | 51 return sys.platform |
| 41 | 52 |
| 42 | 53 |
| 43 # Common utilities | 54 # Common utilities |
| 44 class Gsutil(object): | 55 class Gsutil(object): |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 109 if code != 0: | 120 if code != 0: |
| 110 print >> sys.stderr, ls_err | 121 print >> sys.stderr, ls_err |
| 111 if code == 403: | 122 if code == 403: |
| 112 print >> sys.stderr, 'Got error 403 while authenticating to %s.' % base_url | 123 print >> sys.stderr, 'Got error 403 while authenticating to %s.' % base_url |
| 113 print >> sys.stderr, 'Try running "download_from_google_storage --config".' | 124 print >> sys.stderr, 'Try running "download_from_google_storage --config".' |
| 114 elif code == 404: | 125 elif code == 404: |
| 115 print >> sys.stderr, '%s not found.' % base_url | 126 print >> sys.stderr, '%s not found.' % base_url |
| 116 return (base_url, code) | 127 return (base_url, code) |
| 117 | 128 |
| 118 | 129 |
| 130 def check_platform(target): |
| 131 """Checks if any parent directory of target matches (win|mac|linux).""" |
| 132 assert os.path.isabs(target) |
| 133 root, target_name = os.path.split(target) |
| 134 if not target_name: |
| 135 return None |
| 136 if target_name in ('linux', 'mac', 'win'): |
| 137 return target_name |
| 138 return check_platform(root) |
| 139 |
| 140 |
| 119 def get_sha1(filename): | 141 def get_sha1(filename): |
| 120 sha1 = hashlib.sha1() | 142 sha1 = hashlib.sha1() |
| 121 with open(filename, 'rb') as f: | 143 with open(filename, 'rb') as f: |
| 122 while True: | 144 while True: |
| 123 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. | 145 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. |
| 124 chunk = f.read(1024*1024) | 146 chunk = f.read(1024*1024) |
| 125 if not chunk: | 147 if not chunk: |
| 126 break | 148 break |
| 127 sha1.update(chunk) | 149 sha1.update(chunk) |
| 128 return sha1.hexdigest() | 150 return sha1.hexdigest() |
| 129 | 151 |
| 130 | 152 |
| 131 # Download-specific code starts here | 153 # Download-specific code starts here |
| 132 | 154 |
| 133 def enumerate_work_queue(input_filename, work_queue, directory, | 155 def enumerate_work_queue(input_filename, work_queue, directory, |
| 134 recursive, ignore_errors, output, sha1_file): | 156 recursive, ignore_errors, output, sha1_file, |
| 157 auto_platform): |
| 135 if sha1_file: | 158 if sha1_file: |
| 136 if not os.path.exists(input_filename): | 159 if not os.path.exists(input_filename): |
| 137 if not ignore_errors: | 160 if not ignore_errors: |
| 138 raise FileNotFoundError('%s not found.' % input_filename) | 161 raise FileNotFoundError('%s not found.' % input_filename) |
| 139 print >> sys.stderr, '%s not found.' % input_filename | 162 print >> sys.stderr, '%s not found.' % input_filename |
| 140 with open(input_filename, 'rb') as f: | 163 with open(input_filename, 'rb') as f: |
| 141 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) | 164 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
| 142 if sha1_match: | 165 if sha1_match: |
| 143 work_queue.put( | 166 work_queue.put( |
| 144 (sha1_match.groups(1)[0], input_filename.replace('.sha1', ''))) | 167 (sha1_match.groups(1)[0], input_filename.replace('.sha1', ''))) |
| (...skipping 12 matching lines...) Expand all Loading... |
| 157 if not recursive: | 180 if not recursive: |
| 158 for item in dirs[:]: | 181 for item in dirs[:]: |
| 159 dirs.remove(item) | 182 dirs.remove(item) |
| 160 else: | 183 else: |
| 161 for exclude in ['.svn', '.git']: | 184 for exclude in ['.svn', '.git']: |
| 162 if exclude in dirs: | 185 if exclude in dirs: |
| 163 dirs.remove(exclude) | 186 dirs.remove(exclude) |
| 164 for filename in files: | 187 for filename in files: |
| 165 full_path = os.path.join(root, filename) | 188 full_path = os.path.join(root, filename) |
| 166 if full_path.endswith('.sha1'): | 189 if full_path.endswith('.sha1'): |
| 190 if auto_platform: |
| 191 # Skip if the platform does not match. |
| 192 target_platform = check_platform(os.path.abspath(full_path)) |
| 193 if not target_platform: |
| 194 err = ('--auto_platform passed in but no platform name found in ' |
| 195 'the path of %s' % full_path) |
| 196 if not ignore_errors: |
| 197 raise InvalidFileError(err) |
| 198 print >> sys.stderr, err |
| 199 continue |
| 200 current_platform = PLATFORM_MAPPING[sys.platform] |
| 201 if current_platform != target_platform: |
| 202 continue |
| 203 |
| 167 with open(full_path, 'rb') as f: | 204 with open(full_path, 'rb') as f: |
| 168 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) | 205 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
| 169 if sha1_match: | 206 if sha1_match: |
| 170 work_queue.put( | 207 work_queue.put( |
| 171 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 208 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
| 172 work_queue_size += 1 | 209 work_queue_size += 1 |
| 173 else: | 210 else: |
| 174 if not ignore_errors: | 211 if not ignore_errors: |
| 175 raise InvalidFileError('No sha1 sum found in %s.' % filename) | 212 raise InvalidFileError('No sha1 sum found in %s.' % filename) |
| 176 print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 213 print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 233 while True: | 270 while True: |
| 234 line = output_queue.get() | 271 line = output_queue.get() |
| 235 # Its plausible we want to print empty lines. | 272 # Its plausible we want to print empty lines. |
| 236 if line is None: | 273 if line is None: |
| 237 break | 274 break |
| 238 print line | 275 print line |
| 239 | 276 |
| 240 | 277 |
| 241 def download_from_google_storage( | 278 def download_from_google_storage( |
| 242 input_filename, base_url, gsutil, num_threads, directory, recursive, | 279 input_filename, base_url, gsutil, num_threads, directory, recursive, |
| 243 force, output, ignore_errors, sha1_file, verbose): | 280 force, output, ignore_errors, sha1_file, verbose, auto_platform): |
| 244 # Start up all the worker threads. | 281 # Start up all the worker threads. |
| 245 all_threads = [] | 282 all_threads = [] |
| 246 download_start = time.time() | 283 download_start = time.time() |
| 247 stdout_queue = Queue.Queue() | 284 stdout_queue = Queue.Queue() |
| 248 work_queue = Queue.Queue() | 285 work_queue = Queue.Queue() |
| 249 ret_codes = Queue.Queue() | 286 ret_codes = Queue.Queue() |
| 250 ret_codes.put((0, None)) | 287 ret_codes.put((0, None)) |
| 251 for thread_num in range(num_threads): | 288 for thread_num in range(num_threads): |
| 252 t = threading.Thread( | 289 t = threading.Thread( |
| 253 target=_downloader_worker_thread, | 290 target=_downloader_worker_thread, |
| 254 args=[thread_num, work_queue, force, base_url, | 291 args=[thread_num, work_queue, force, base_url, |
| 255 gsutil, stdout_queue, ret_codes, verbose]) | 292 gsutil, stdout_queue, ret_codes, verbose]) |
| 256 t.daemon = True | 293 t.daemon = True |
| 257 t.start() | 294 t.start() |
| 258 all_threads.append(t) | 295 all_threads.append(t) |
| 259 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 296 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
| 260 printer_thread.daemon = True | 297 printer_thread.daemon = True |
| 261 printer_thread.start() | 298 printer_thread.start() |
| 262 | 299 |
| 263 # Enumerate our work queue. | 300 # Enumerate our work queue. |
| 264 work_queue_size = enumerate_work_queue( | 301 work_queue_size = enumerate_work_queue( |
| 265 input_filename, work_queue, directory, recursive, | 302 input_filename, work_queue, directory, recursive, |
| 266 ignore_errors, output, sha1_file) | 303 ignore_errors, output, sha1_file, auto_platform) |
| 267 for _ in all_threads: | 304 for _ in all_threads: |
| 268 work_queue.put((None, None)) # Used to tell worker threads to stop. | 305 work_queue.put((None, None)) # Used to tell worker threads to stop. |
| 269 | 306 |
| 270 # Wait for all downloads to finish. | 307 # Wait for all downloads to finish. |
| 271 for t in all_threads: | 308 for t in all_threads: |
| 272 t.join() | 309 t.join() |
| 273 stdout_queue.put(None) | 310 stdout_queue.put(None) |
| 274 printer_thread.join() | 311 printer_thread.join() |
| 275 | 312 |
| 276 # See if we ran into any errors. | 313 # See if we ran into any errors. |
| (...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 327 'to initialize your saved Google Storage ' | 364 'to initialize your saved Google Storage ' |
| 328 'credentials. This will create a read-only ' | 365 'credentials. This will create a read-only ' |
| 329 'credentials file in ~/.boto.depot_tools.') | 366 'credentials file in ~/.boto.depot_tools.') |
| 330 parser.add_option('-n', '--no_auth', action='store_true', | 367 parser.add_option('-n', '--no_auth', action='store_true', |
| 331 help='Skip auth checking. Use if it\'s known that the ' | 368 help='Skip auth checking. Use if it\'s known that the ' |
| 332 'target bucket is a public bucket.') | 369 'target bucket is a public bucket.') |
| 333 parser.add_option('-p', '--platform', | 370 parser.add_option('-p', '--platform', |
| 334 help='A regular expression that is compared against ' | 371 help='A regular expression that is compared against ' |
| 335 'Python\'s sys.platform. If this option is specified, ' | 372 'Python\'s sys.platform. If this option is specified, ' |
| 336 'the download will happen only if there is a match.') | 373 'the download will happen only if there is a match.') |
| 374 parser.add_option('-a', '--auto_platform', |
| 375 action='store_true', |
| 376 help='Detects if any parent folder of the target matches ' |
| 377 '(linux|mac|win). If so, the script will only ' |
| 378 'process files that are in the paths that ' |
| 379 'that matches the current platform.') |
| 337 parser.add_option('-v', '--verbose', action='store_true', | 380 parser.add_option('-v', '--verbose', action='store_true', |
| 338 help='Output extra diagnostic and progress information.') | 381 help='Output extra diagnostic and progress information.') |
| 339 | 382 |
| 340 (options, args) = parser.parse_args() | 383 (options, args) = parser.parse_args() |
| 341 | 384 |
| 342 # Make sure we should run at all based on platform matching. | 385 # Make sure we should run at all based on platform matching. |
| 343 if options.platform: | 386 if options.platform: |
| 387 if options.auto_platform: |
| 388 parser.error('--platform can not be specified with --auto_platform') |
| 344 if not re.match(options.platform, GetNormalizedPlatform()): | 389 if not re.match(options.platform, GetNormalizedPlatform()): |
| 345 if options.verbose: | 390 if options.verbose: |
| 346 print('The current platform doesn\'t match "%s", skipping.' % | 391 print('The current platform doesn\'t match "%s", skipping.' % |
| 347 options.platform) | 392 options.platform) |
| 348 return 0 | 393 return 0 |
| 349 | 394 |
| 350 # Set the boto file to /dev/null if we don't need auth. | 395 # Set the boto file to /dev/null if we don't need auth. |
| 351 if options.no_auth: | 396 if options.no_auth: |
| 352 options.boto = os.devnull | 397 options.boto = os.devnull |
| 353 | 398 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 371 parser.error('Too many targets.') | 416 parser.error('Too many targets.') |
| 372 if not options.bucket: | 417 if not options.bucket: |
| 373 parser.error('Missing bucket. Specify bucket with --bucket.') | 418 parser.error('Missing bucket. Specify bucket with --bucket.') |
| 374 if options.sha1_file and options.directory: | 419 if options.sha1_file and options.directory: |
| 375 parser.error('Both --directory and --sha1_file are specified, ' | 420 parser.error('Both --directory and --sha1_file are specified, ' |
| 376 'can only specify one.') | 421 'can only specify one.') |
| 377 if options.recursive and not options.directory: | 422 if options.recursive and not options.directory: |
| 378 parser.error('--recursive specified but --directory not specified.') | 423 parser.error('--recursive specified but --directory not specified.') |
| 379 if options.output and options.directory: | 424 if options.output and options.directory: |
| 380 parser.error('--directory is specified, so --output has no effect.') | 425 parser.error('--directory is specified, so --output has no effect.') |
| 426 if (not (options.sha1_file or options.directory) |
| 427 and options.auto_platform): |
| 428 parser.error('--auto_platform must be specified with either ' |
| 429 '--sha1_file or --directory') |
| 430 |
| 381 input_filename = args[0] | 431 input_filename = args[0] |
| 382 | 432 |
| 383 # Set output filename if not specified. | 433 # Set output filename if not specified. |
| 384 if not options.output and not options.directory: | 434 if not options.output and not options.directory: |
| 385 if not options.sha1_file: | 435 if not options.sha1_file: |
| 386 # Target is a sha1 sum, so output filename would also be the sha1 sum. | 436 # Target is a sha1 sum, so output filename would also be the sha1 sum. |
| 387 options.output = input_filename | 437 options.output = input_filename |
| 388 elif options.sha1_file: | 438 elif options.sha1_file: |
| 389 # Target is a .sha1 file. | 439 # Target is a .sha1 file. |
| 390 if not input_filename.endswith('.sha1'): | 440 if not input_filename.endswith('.sha1'): |
| (...skipping 12 matching lines...) Expand all Loading... |
| 403 % options.output) | 453 % options.output) |
| 404 | 454 |
| 405 # Check we have a valid bucket with valid permissions. | 455 # Check we have a valid bucket with valid permissions. |
| 406 base_url, code = check_bucket_permissions(options.bucket, gsutil) | 456 base_url, code = check_bucket_permissions(options.bucket, gsutil) |
| 407 if code: | 457 if code: |
| 408 return code | 458 return code |
| 409 | 459 |
| 410 return download_from_google_storage( | 460 return download_from_google_storage( |
| 411 input_filename, base_url, gsutil, options.num_threads, options.directory, | 461 input_filename, base_url, gsutil, options.num_threads, options.directory, |
| 412 options.recursive, options.force, options.output, options.ignore_errors, | 462 options.recursive, options.force, options.output, options.ignore_errors, |
| 413 options.sha1_file, options.verbose) | 463 options.sha1_file, options.verbose, options.auto_platform) |
| 414 | 464 |
| 415 | 465 |
| 416 if __name__ == '__main__': | 466 if __name__ == '__main__': |
| 417 sys.exit(main(sys.argv)) | 467 sys.exit(main(sys.argv)) |
| OLD | NEW |