Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" |
| 7 | 7 |
| 8 | 8 |
| 9 import hashlib | 9 import hashlib |
| 10 import optparse | 10 import optparse |
| 11 import os | 11 import os |
| 12 import Queue | 12 import Queue |
| 13 import re | 13 import re |
| 14 import stat | 14 import stat |
| 15 import sys | 15 import sys |
| 16 import threading | 16 import threading |
| 17 import time | 17 import time |
| 18 | 18 |
| 19 import subprocess2 | 19 import subprocess2 |
| 20 | 20 |
| 21 | 21 |
| 22 GSUTIL_DEFAULT_PATH = os.path.join( | 22 GSUTIL_DEFAULT_PATH = os.path.join( |
| 23 os.path.dirname(os.path.abspath(__file__)), | 23 os.path.dirname(os.path.abspath(__file__)), |
| 24 'third_party', 'gsutil', 'gsutil') | 24 'third_party', 'gsutil', 'gsutil') |
| 25 # Maps sys.platform to what we actually want to call them. | |
| 26 PLATFORM_MAPPING = { | |
| 27 'linux2': 'linux', | |
| 28 'win32': 'win', | |
| 29 'cygwin': 'win', | |
| 30 'darwin': 'mac', | |
|
M-A Ruel
2014/01/17 20:19:21
sort keys
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
| 31 } | |
| 25 | 32 |
| 26 | 33 |
| 27 class FileNotFoundError(IOError): | 34 class FileNotFoundError(IOError): |
| 28 pass | 35 pass |
| 29 | 36 |
| 30 | 37 |
| 31 class InvalidFileError(IOError): | 38 class InvalidFileError(IOError): |
| 32 pass | 39 pass |
| 33 | 40 |
| 34 | 41 |
| 42 class InvalidPlatformError(Exception): | |
| 43 pass | |
| 44 | |
| 45 | |
| 35 def GetNormalizedPlatform(): | 46 def GetNormalizedPlatform(): |
| 36 """Returns the result of sys.platform accounting for cygwin. | 47 """Returns the result of sys.platform accounting for cygwin. |
| 37 Under cygwin, this will always return "win32" like the native Python.""" | 48 Under cygwin, this will always return "win32" like the native Python.""" |
| 38 if sys.platform == 'cygwin': | 49 if sys.platform == 'cygwin': |
| 39 return 'win32' | 50 return 'win32' |
| 40 return sys.platform | 51 return sys.platform |
| 41 | 52 |
| 42 | 53 |
| 43 # Common utilities | 54 # Common utilities |
| 44 class Gsutil(object): | 55 class Gsutil(object): |
| (...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 109 if code != 0: | 120 if code != 0: |
| 110 print >> sys.stderr, ls_err | 121 print >> sys.stderr, ls_err |
| 111 if code == 403: | 122 if code == 403: |
| 112 print >> sys.stderr, 'Got error 403 while authenticating to %s.' % base_url | 123 print >> sys.stderr, 'Got error 403 while authenticating to %s.' % base_url |
| 113 print >> sys.stderr, 'Try running "download_from_google_storage --config".' | 124 print >> sys.stderr, 'Try running "download_from_google_storage --config".' |
| 114 elif code == 404: | 125 elif code == 404: |
| 115 print >> sys.stderr, '%s not found.' % base_url | 126 print >> sys.stderr, '%s not found.' % base_url |
| 116 return (base_url, code) | 127 return (base_url, code) |
| 117 | 128 |
| 118 | 129 |
| 130 def check_platform(target): | |
| 131 """Check if any parent directory of target matches (win|mac|linux).""" | |
|
M-A Ruel
2014/01/17 20:19:21
Checks
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
| 132 if not target: | |
| 133 return None | |
| 134 full_path = os.path.abspath(target) | |
|
M-A Ruel
2014/01/17 20:19:21
what about asserting the path is absolute?
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
| 135 root, target_name = os.path.split(full_path) | |
| 136 if target_name in ('win', 'mac', 'linux'): | |
|
M-A Ruel
2014/01/17 20:19:21
sort
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
| 137 return target_name | |
| 138 return check_platform(root) | |
|
M-A Ruel
2014/01/17 20:19:21
What happens if you call it with 'e:\\' ?
Ryan Tseng
2014/01/17 20:58:37
('e:\\', '')
| |
| 139 | |
| 140 | |
| 119 def get_sha1(filename): | 141 def get_sha1(filename): |
| 120 sha1 = hashlib.sha1() | 142 sha1 = hashlib.sha1() |
| 121 with open(filename, 'rb') as f: | 143 with open(filename, 'rb') as f: |
| 122 while True: | 144 while True: |
| 123 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. | 145 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. |
| 124 chunk = f.read(1024*1024) | 146 chunk = f.read(1024*1024) |
| 125 if not chunk: | 147 if not chunk: |
| 126 break | 148 break |
| 127 sha1.update(chunk) | 149 sha1.update(chunk) |
| 128 return sha1.hexdigest() | 150 return sha1.hexdigest() |
| 129 | 151 |
| 130 | 152 |
| 131 # Download-specific code starts here | 153 # Download-specific code starts here |
| 132 | 154 |
| 133 def enumerate_work_queue(input_filename, work_queue, directory, | 155 def enumerate_work_queue(input_filename, work_queue, directory, |
| 134 recursive, ignore_errors, output, sha1_file): | 156 recursive, ignore_errors, output, sha1_file, |
| 157 auto_platform): | |
| 135 if sha1_file: | 158 if sha1_file: |
| 136 if not os.path.exists(input_filename): | 159 if not os.path.exists(input_filename): |
| 137 if not ignore_errors: | 160 if not ignore_errors: |
| 138 raise FileNotFoundError('%s not found.' % input_filename) | 161 raise FileNotFoundError('%s not found.' % input_filename) |
| 139 print >> sys.stderr, '%s not found.' % input_filename | 162 print >> sys.stderr, '%s not found.' % input_filename |
| 140 with open(input_filename, 'rb') as f: | 163 with open(input_filename, 'rb') as f: |
| 141 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) | 164 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
| 142 if sha1_match: | 165 if sha1_match: |
| 143 work_queue.put( | 166 work_queue.put( |
| 144 (sha1_match.groups(1)[0], input_filename.replace('.sha1', ''))) | 167 (sha1_match.groups(1)[0], input_filename.replace('.sha1', ''))) |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 157 if not recursive: | 180 if not recursive: |
| 158 for item in dirs[:]: | 181 for item in dirs[:]: |
| 159 dirs.remove(item) | 182 dirs.remove(item) |
| 160 else: | 183 else: |
| 161 for exclude in ['.svn', '.git']: | 184 for exclude in ['.svn', '.git']: |
| 162 if exclude in dirs: | 185 if exclude in dirs: |
| 163 dirs.remove(exclude) | 186 dirs.remove(exclude) |
| 164 for filename in files: | 187 for filename in files: |
| 165 full_path = os.path.join(root, filename) | 188 full_path = os.path.join(root, filename) |
| 166 if full_path.endswith('.sha1'): | 189 if full_path.endswith('.sha1'): |
| 190 if auto_platform: | |
| 191 # Skip if the platform does not match. | |
| 192 target_platform = check_platform(full_path) | |
| 193 if not target_platform: | |
| 194 err = ('--auto_platform passed in but no ' | |
|
M-A Ruel
2014/01/17 20:19:21
the wrapping seems excessive, try to extend nearer
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
| 195 'platform name found in the path of %s' | |
| 196 % full_path) | |
| 197 if not ignore_errors: | |
| 198 raise InvalidFileError(err) | |
| 199 print >> sys.stderr, err | |
| 200 continue | |
| 201 current_platform = PLATFORM_MAPPING[sys.platform] | |
| 202 if current_platform != target_platform: | |
| 203 continue | |
| 204 | |
| 167 with open(full_path, 'rb') as f: | 205 with open(full_path, 'rb') as f: |
| 168 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) | 206 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
| 169 if sha1_match: | 207 if sha1_match: |
| 170 work_queue.put( | 208 work_queue.put( |
| 171 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 209 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
| 172 work_queue_size += 1 | 210 work_queue_size += 1 |
| 173 else: | 211 else: |
| 174 if not ignore_errors: | 212 if not ignore_errors: |
| 175 raise InvalidFileError('No sha1 sum found in %s.' % filename) | 213 raise InvalidFileError('No sha1 sum found in %s.' % filename) |
| 176 print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 214 print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 233 while True: | 271 while True: |
| 234 line = output_queue.get() | 272 line = output_queue.get() |
| 235 # Its plausible we want to print empty lines. | 273 # Its plausible we want to print empty lines. |
| 236 if line is None: | 274 if line is None: |
| 237 break | 275 break |
| 238 print line | 276 print line |
| 239 | 277 |
| 240 | 278 |
| 241 def download_from_google_storage( | 279 def download_from_google_storage( |
| 242 input_filename, base_url, gsutil, num_threads, directory, recursive, | 280 input_filename, base_url, gsutil, num_threads, directory, recursive, |
| 243 force, output, ignore_errors, sha1_file, verbose): | 281 force, output, ignore_errors, sha1_file, verbose, auto_platform): |
| 244 # Start up all the worker threads. | 282 # Start up all the worker threads. |
| 245 all_threads = [] | 283 all_threads = [] |
| 246 download_start = time.time() | 284 download_start = time.time() |
| 247 stdout_queue = Queue.Queue() | 285 stdout_queue = Queue.Queue() |
| 248 work_queue = Queue.Queue() | 286 work_queue = Queue.Queue() |
| 249 ret_codes = Queue.Queue() | 287 ret_codes = Queue.Queue() |
| 250 ret_codes.put((0, None)) | 288 ret_codes.put((0, None)) |
| 251 for thread_num in range(num_threads): | 289 for thread_num in range(num_threads): |
| 252 t = threading.Thread( | 290 t = threading.Thread( |
| 253 target=_downloader_worker_thread, | 291 target=_downloader_worker_thread, |
| 254 args=[thread_num, work_queue, force, base_url, | 292 args=[thread_num, work_queue, force, base_url, |
| 255 gsutil, stdout_queue, ret_codes, verbose]) | 293 gsutil, stdout_queue, ret_codes, verbose]) |
| 256 t.daemon = True | 294 t.daemon = True |
| 257 t.start() | 295 t.start() |
| 258 all_threads.append(t) | 296 all_threads.append(t) |
| 259 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 297 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
| 260 printer_thread.daemon = True | 298 printer_thread.daemon = True |
| 261 printer_thread.start() | 299 printer_thread.start() |
| 262 | 300 |
| 263 # Enumerate our work queue. | 301 # Enumerate our work queue. |
| 264 work_queue_size = enumerate_work_queue( | 302 work_queue_size = enumerate_work_queue( |
| 265 input_filename, work_queue, directory, recursive, | 303 input_filename, work_queue, directory, recursive, |
| 266 ignore_errors, output, sha1_file) | 304 ignore_errors, output, sha1_file, auto_platform) |
| 267 for _ in all_threads: | 305 for _ in all_threads: |
| 268 work_queue.put((None, None)) # Used to tell worker threads to stop. | 306 work_queue.put((None, None)) # Used to tell worker threads to stop. |
| 269 | 307 |
| 270 # Wait for all downloads to finish. | 308 # Wait for all downloads to finish. |
| 271 for t in all_threads: | 309 for t in all_threads: |
| 272 t.join() | 310 t.join() |
| 273 stdout_queue.put(None) | 311 stdout_queue.put(None) |
| 274 printer_thread.join() | 312 printer_thread.join() |
| 275 | 313 |
| 276 # See if we ran into any errors. | 314 # See if we ran into any errors. |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 326 help='Alias for "gsutil config". Run this if you want ' | 364 help='Alias for "gsutil config". Run this if you want ' |
| 327 'to initialize your saved Google Storage ' | 365 'to initialize your saved Google Storage ' |
| 328 'credentials.') | 366 'credentials.') |
| 329 parser.add_option('-n', '--no_auth', action='store_true', | 367 parser.add_option('-n', '--no_auth', action='store_true', |
| 330 help='Skip auth checking. Use if it\'s known that the ' | 368 help='Skip auth checking. Use if it\'s known that the ' |
| 331 'target bucket is a public bucket.') | 369 'target bucket is a public bucket.') |
| 332 parser.add_option('-p', '--platform', | 370 parser.add_option('-p', '--platform', |
| 333 help='A regular expression that is compared against ' | 371 help='A regular expression that is compared against ' |
| 334 'Python\'s sys.platform. If this option is specified, ' | 372 'Python\'s sys.platform. If this option is specified, ' |
| 335 'the download will happen only if there is a match.') | 373 'the download will happen only if there is a match.') |
| 374 parser.add_option('-a', '--auto_platform', | |
| 375 help='Detects if any parent folder of the target matches ' | |
| 376 '(win|mac|linux). If so, the script will only ' | |
|
M-A Ruel
2014/01/17 20:19:21
sort
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
| 377 'process files that are in the paths that ' | |
| 378 'that matches the current platform.') | |
| 336 parser.add_option('-v', '--verbose', action='store_true', | 379 parser.add_option('-v', '--verbose', action='store_true', |
| 337 help='Output extra diagnostic and progress information.') | 380 help='Output extra diagnostic and progress information.') |
| 338 | 381 |
| 339 (options, args) = parser.parse_args() | 382 (options, args) = parser.parse_args() |
| 340 | 383 |
| 341 # Make sure we should run at all based on platform matching. | 384 # Make sure we should run at all based on platform matching. |
| 342 if options.platform: | 385 if options.platform: |
| 386 if options.auto_platform: | |
| 387 parser.error('--platform can not be specified with --auto_platform') | |
| 388 return 1 | |
|
M-A Ruel
2014/01/17 20:19:21
parser.error() calls sys.exit(2), no need for retu
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
| 343 if not re.match(options.platform, GetNormalizedPlatform()): | 389 if not re.match(options.platform, GetNormalizedPlatform()): |
| 344 if options.verbose: | 390 if options.verbose: |
| 345 print('The current platform doesn\'t match "%s", skipping.' % | 391 print('The current platform doesn\'t match "%s", skipping.' % |
| 346 options.platform) | 392 options.platform) |
| 347 return 0 | 393 return 0 |
| 348 | 394 |
| 349 # Set the boto file to /dev/null if we don't need auth. | 395 # Set the boto file to /dev/null if we don't need auth. |
| 350 if options.no_auth: | 396 if options.no_auth: |
| 351 options.boto = os.devnull | 397 options.boto = os.devnull |
| 352 | 398 |
| (...skipping 16 matching lines...) Expand all Loading... | |
| 369 parser.error('Too many targets.') | 415 parser.error('Too many targets.') |
| 370 if not options.bucket: | 416 if not options.bucket: |
| 371 parser.error('Missing bucket. Specify bucket with --bucket.') | 417 parser.error('Missing bucket. Specify bucket with --bucket.') |
| 372 if options.sha1_file and options.directory: | 418 if options.sha1_file and options.directory: |
| 373 parser.error('Both --directory and --sha1_file are specified, ' | 419 parser.error('Both --directory and --sha1_file are specified, ' |
| 374 'can only specify one.') | 420 'can only specify one.') |
| 375 if options.recursive and not options.directory: | 421 if options.recursive and not options.directory: |
| 376 parser.error('--recursive specified but --directory not specified.') | 422 parser.error('--recursive specified but --directory not specified.') |
| 377 if options.output and options.directory: | 423 if options.output and options.directory: |
| 378 parser.error('--directory is specified, so --output has no effect.') | 424 parser.error('--directory is specified, so --output has no effect.') |
| 425 if (not (options.sha1_file or options.directory) | |
| 426 and options.auto_platform): | |
| 427 parser.error('--auto_platform must be specified with either ' | |
| 428 '--sha1_file or --directory') | |
| 429 | |
| 379 input_filename = args[0] | 430 input_filename = args[0] |
| 380 | 431 |
| 381 # Set output filename if not specified. | 432 # Set output filename if not specified. |
| 382 if not options.output and not options.directory: | 433 if not options.output and not options.directory: |
| 383 if not options.sha1_file: | 434 if not options.sha1_file: |
| 384 # Target is a sha1 sum, so output filename would also be the sha1 sum. | 435 # Target is a sha1 sum, so output filename would also be the sha1 sum. |
| 385 options.output = input_filename | 436 options.output = input_filename |
| 386 elif options.sha1_file: | 437 elif options.sha1_file: |
| 387 # Target is a .sha1 file. | 438 # Target is a .sha1 file. |
| 388 if not input_filename.endswith('.sha1'): | 439 if not input_filename.endswith('.sha1'): |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 401 % options.output) | 452 % options.output) |
| 402 | 453 |
| 403 # Check we have a valid bucket with valid permissions. | 454 # Check we have a valid bucket with valid permissions. |
| 404 base_url, code = check_bucket_permissions(options.bucket, gsutil) | 455 base_url, code = check_bucket_permissions(options.bucket, gsutil) |
| 405 if code: | 456 if code: |
| 406 return code | 457 return code |
| 407 | 458 |
| 408 return download_from_google_storage( | 459 return download_from_google_storage( |
| 409 input_filename, base_url, gsutil, options.num_threads, options.directory, | 460 input_filename, base_url, gsutil, options.num_threads, options.directory, |
| 410 options.recursive, options.force, options.output, options.ignore_errors, | 461 options.recursive, options.force, options.output, options.ignore_errors, |
| 411 options.sha1_file, options.verbose) | 462 options.sha1_file, options.verbose, options.auto_platform) |
| 412 | 463 |
| 413 | 464 |
| 414 if __name__ == '__main__': | 465 if __name__ == '__main__': |
| 415 sys.exit(main(sys.argv)) | 466 sys.exit(main(sys.argv)) |
| OLD | NEW |