OLD | NEW |
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" |
7 | 7 |
8 | 8 |
9 import hashlib | 9 import hashlib |
10 import optparse | 10 import optparse |
11 import os | 11 import os |
12 import Queue | 12 import Queue |
13 import re | 13 import re |
14 import stat | 14 import stat |
15 import sys | 15 import sys |
16 import threading | 16 import threading |
17 import time | 17 import time |
18 | 18 |
19 import subprocess2 | 19 import subprocess2 |
20 | 20 |
21 | 21 |
22 GSUTIL_DEFAULT_PATH = os.path.join( | 22 GSUTIL_DEFAULT_PATH = os.path.join( |
23 os.path.dirname(os.path.abspath(__file__)), | 23 os.path.dirname(os.path.abspath(__file__)), |
24 'third_party', 'gsutil', 'gsutil') | 24 'third_party', 'gsutil', 'gsutil') |
| 25 # Maps sys.platform to what we actually want to call them. |
| 26 PLATFORM_MAPPING = { |
| 27 'cygwin': 'win', |
| 28 'darwin': 'mac', |
| 29 'linux2': 'linux', |
| 30 'win32': 'win', |
| 31 } |
25 | 32 |
26 | 33 |
27 class FileNotFoundError(IOError): | 34 class FileNotFoundError(IOError): |
28 pass | 35 pass |
29 | 36 |
30 | 37 |
31 class InvalidFileError(IOError): | 38 class InvalidFileError(IOError): |
32 pass | 39 pass |
33 | 40 |
34 | 41 |
| 42 class InvalidPlatformError(Exception): |
| 43 pass |
| 44 |
| 45 |
35 def GetNormalizedPlatform(): | 46 def GetNormalizedPlatform(): |
36 """Returns the result of sys.platform accounting for cygwin. | 47 """Returns the result of sys.platform accounting for cygwin. |
37 Under cygwin, this will always return "win32" like the native Python.""" | 48 Under cygwin, this will always return "win32" like the native Python.""" |
38 if sys.platform == 'cygwin': | 49 if sys.platform == 'cygwin': |
39 return 'win32' | 50 return 'win32' |
40 return sys.platform | 51 return sys.platform |
41 | 52 |
42 | 53 |
43 # Common utilities | 54 # Common utilities |
44 class Gsutil(object): | 55 class Gsutil(object): |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
109 if code != 0: | 120 if code != 0: |
110 print >> sys.stderr, ls_err | 121 print >> sys.stderr, ls_err |
111 if code == 403: | 122 if code == 403: |
112 print >> sys.stderr, 'Got error 403 while authenticating to %s.' % base_url | 123 print >> sys.stderr, 'Got error 403 while authenticating to %s.' % base_url |
113 print >> sys.stderr, 'Try running "download_from_google_storage --config".' | 124 print >> sys.stderr, 'Try running "download_from_google_storage --config".' |
114 elif code == 404: | 125 elif code == 404: |
115 print >> sys.stderr, '%s not found.' % base_url | 126 print >> sys.stderr, '%s not found.' % base_url |
116 return (base_url, code) | 127 return (base_url, code) |
117 | 128 |
118 | 129 |
| 130 def check_platform(target): |
| 131 """Checks if any parent directory of target matches (win|mac|linux).""" |
| 132 assert os.path.isabs(target) |
| 133 root, target_name = os.path.split(target) |
| 134 if not target_name: |
| 135 return None |
| 136 if target_name in ('linux', 'mac', 'win'): |
| 137 return target_name |
| 138 return check_platform(root) |
| 139 |
| 140 |
119 def get_sha1(filename): | 141 def get_sha1(filename): |
120 sha1 = hashlib.sha1() | 142 sha1 = hashlib.sha1() |
121 with open(filename, 'rb') as f: | 143 with open(filename, 'rb') as f: |
122 while True: | 144 while True: |
123 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. | 145 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. |
124 chunk = f.read(1024*1024) | 146 chunk = f.read(1024*1024) |
125 if not chunk: | 147 if not chunk: |
126 break | 148 break |
127 sha1.update(chunk) | 149 sha1.update(chunk) |
128 return sha1.hexdigest() | 150 return sha1.hexdigest() |
129 | 151 |
130 | 152 |
131 # Download-specific code starts here | 153 # Download-specific code starts here |
132 | 154 |
133 def enumerate_work_queue(input_filename, work_queue, directory, | 155 def enumerate_work_queue(input_filename, work_queue, directory, |
134 recursive, ignore_errors, output, sha1_file): | 156 recursive, ignore_errors, output, sha1_file, |
| 157 auto_platform): |
135 if sha1_file: | 158 if sha1_file: |
136 if not os.path.exists(input_filename): | 159 if not os.path.exists(input_filename): |
137 if not ignore_errors: | 160 if not ignore_errors: |
138 raise FileNotFoundError('%s not found.' % input_filename) | 161 raise FileNotFoundError('%s not found.' % input_filename) |
139 print >> sys.stderr, '%s not found.' % input_filename | 162 print >> sys.stderr, '%s not found.' % input_filename |
140 with open(input_filename, 'rb') as f: | 163 with open(input_filename, 'rb') as f: |
141 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) | 164 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
142 if sha1_match: | 165 if sha1_match: |
143 work_queue.put( | 166 work_queue.put( |
144 (sha1_match.groups(1)[0], input_filename.replace('.sha1', ''))) | 167 (sha1_match.groups(1)[0], input_filename.replace('.sha1', ''))) |
(...skipping 12 matching lines...) Expand all Loading... |
157 if not recursive: | 180 if not recursive: |
158 for item in dirs[:]: | 181 for item in dirs[:]: |
159 dirs.remove(item) | 182 dirs.remove(item) |
160 else: | 183 else: |
161 for exclude in ['.svn', '.git']: | 184 for exclude in ['.svn', '.git']: |
162 if exclude in dirs: | 185 if exclude in dirs: |
163 dirs.remove(exclude) | 186 dirs.remove(exclude) |
164 for filename in files: | 187 for filename in files: |
165 full_path = os.path.join(root, filename) | 188 full_path = os.path.join(root, filename) |
166 if full_path.endswith('.sha1'): | 189 if full_path.endswith('.sha1'): |
| 190 if auto_platform: |
| 191 # Skip if the platform does not match. |
| 192 target_platform = check_platform(os.path.abspath(full_path)) |
| 193 if not target_platform: |
| 194 err = ('--auto_platform passed in but no platform name found in ' |
| 195 'the path of %s' % full_path) |
| 196 if not ignore_errors: |
| 197 raise InvalidFileError(err) |
| 198 print >> sys.stderr, err |
| 199 continue |
| 200 current_platform = PLATFORM_MAPPING[sys.platform] |
| 201 if current_platform != target_platform: |
| 202 continue |
| 203 |
167 with open(full_path, 'rb') as f: | 204 with open(full_path, 'rb') as f: |
168 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) | 205 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
169 if sha1_match: | 206 if sha1_match: |
170 work_queue.put( | 207 work_queue.put( |
171 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 208 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
172 work_queue_size += 1 | 209 work_queue_size += 1 |
173 else: | 210 else: |
174 if not ignore_errors: | 211 if not ignore_errors: |
175 raise InvalidFileError('No sha1 sum found in %s.' % filename) | 212 raise InvalidFileError('No sha1 sum found in %s.' % filename) |
176 print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 213 print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
233 while True: | 270 while True: |
234 line = output_queue.get() | 271 line = output_queue.get() |
235 # Its plausible we want to print empty lines. | 272 # Its plausible we want to print empty lines. |
236 if line is None: | 273 if line is None: |
237 break | 274 break |
238 print line | 275 print line |
239 | 276 |
240 | 277 |
241 def download_from_google_storage( | 278 def download_from_google_storage( |
242 input_filename, base_url, gsutil, num_threads, directory, recursive, | 279 input_filename, base_url, gsutil, num_threads, directory, recursive, |
243 force, output, ignore_errors, sha1_file, verbose): | 280 force, output, ignore_errors, sha1_file, verbose, auto_platform): |
244 # Start up all the worker threads. | 281 # Start up all the worker threads. |
245 all_threads = [] | 282 all_threads = [] |
246 download_start = time.time() | 283 download_start = time.time() |
247 stdout_queue = Queue.Queue() | 284 stdout_queue = Queue.Queue() |
248 work_queue = Queue.Queue() | 285 work_queue = Queue.Queue() |
249 ret_codes = Queue.Queue() | 286 ret_codes = Queue.Queue() |
250 ret_codes.put((0, None)) | 287 ret_codes.put((0, None)) |
251 for thread_num in range(num_threads): | 288 for thread_num in range(num_threads): |
252 t = threading.Thread( | 289 t = threading.Thread( |
253 target=_downloader_worker_thread, | 290 target=_downloader_worker_thread, |
254 args=[thread_num, work_queue, force, base_url, | 291 args=[thread_num, work_queue, force, base_url, |
255 gsutil, stdout_queue, ret_codes, verbose]) | 292 gsutil, stdout_queue, ret_codes, verbose]) |
256 t.daemon = True | 293 t.daemon = True |
257 t.start() | 294 t.start() |
258 all_threads.append(t) | 295 all_threads.append(t) |
259 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 296 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
260 printer_thread.daemon = True | 297 printer_thread.daemon = True |
261 printer_thread.start() | 298 printer_thread.start() |
262 | 299 |
263 # Enumerate our work queue. | 300 # Enumerate our work queue. |
264 work_queue_size = enumerate_work_queue( | 301 work_queue_size = enumerate_work_queue( |
265 input_filename, work_queue, directory, recursive, | 302 input_filename, work_queue, directory, recursive, |
266 ignore_errors, output, sha1_file) | 303 ignore_errors, output, sha1_file, auto_platform) |
267 for _ in all_threads: | 304 for _ in all_threads: |
268 work_queue.put((None, None)) # Used to tell worker threads to stop. | 305 work_queue.put((None, None)) # Used to tell worker threads to stop. |
269 | 306 |
270 # Wait for all downloads to finish. | 307 # Wait for all downloads to finish. |
271 for t in all_threads: | 308 for t in all_threads: |
272 t.join() | 309 t.join() |
273 stdout_queue.put(None) | 310 stdout_queue.put(None) |
274 printer_thread.join() | 311 printer_thread.join() |
275 | 312 |
276 # See if we ran into any errors. | 313 # See if we ran into any errors. |
(...skipping 50 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
327 'to initialize your saved Google Storage ' | 364 'to initialize your saved Google Storage ' |
328 'credentials. This will create a read-only ' | 365 'credentials. This will create a read-only ' |
329 'credentials file in ~/.boto.depot_tools.') | 366 'credentials file in ~/.boto.depot_tools.') |
330 parser.add_option('-n', '--no_auth', action='store_true', | 367 parser.add_option('-n', '--no_auth', action='store_true', |
331 help='Skip auth checking. Use if it\'s known that the ' | 368 help='Skip auth checking. Use if it\'s known that the ' |
332 'target bucket is a public bucket.') | 369 'target bucket is a public bucket.') |
333 parser.add_option('-p', '--platform', | 370 parser.add_option('-p', '--platform', |
334 help='A regular expression that is compared against ' | 371 help='A regular expression that is compared against ' |
335 'Python\'s sys.platform. If this option is specified, ' | 372 'Python\'s sys.platform. If this option is specified, ' |
336 'the download will happen only if there is a match.') | 373 'the download will happen only if there is a match.') |
| 374 parser.add_option('-a', '--auto_platform', |
| 375 action='store_true', |
| 376 help='Detects if any parent folder of the target matches ' |
| 377 '(linux|mac|win). If so, the script will only ' |
| 378 'process files that are in the paths that ' |
| 379 'that matches the current platform.') |
337 parser.add_option('-v', '--verbose', action='store_true', | 380 parser.add_option('-v', '--verbose', action='store_true', |
338 help='Output extra diagnostic and progress information.') | 381 help='Output extra diagnostic and progress information.') |
339 | 382 |
340 (options, args) = parser.parse_args() | 383 (options, args) = parser.parse_args() |
341 | 384 |
342 # Make sure we should run at all based on platform matching. | 385 # Make sure we should run at all based on platform matching. |
343 if options.platform: | 386 if options.platform: |
| 387 if options.auto_platform: |
| 388 parser.error('--platform can not be specified with --auto_platform') |
344 if not re.match(options.platform, GetNormalizedPlatform()): | 389 if not re.match(options.platform, GetNormalizedPlatform()): |
345 if options.verbose: | 390 if options.verbose: |
346 print('The current platform doesn\'t match "%s", skipping.' % | 391 print('The current platform doesn\'t match "%s", skipping.' % |
347 options.platform) | 392 options.platform) |
348 return 0 | 393 return 0 |
349 | 394 |
350 # Set the boto file to /dev/null if we don't need auth. | 395 # Set the boto file to /dev/null if we don't need auth. |
351 if options.no_auth: | 396 if options.no_auth: |
352 options.boto = os.devnull | 397 options.boto = os.devnull |
353 | 398 |
(...skipping 17 matching lines...) Expand all Loading... |
371 parser.error('Too many targets.') | 416 parser.error('Too many targets.') |
372 if not options.bucket: | 417 if not options.bucket: |
373 parser.error('Missing bucket. Specify bucket with --bucket.') | 418 parser.error('Missing bucket. Specify bucket with --bucket.') |
374 if options.sha1_file and options.directory: | 419 if options.sha1_file and options.directory: |
375 parser.error('Both --directory and --sha1_file are specified, ' | 420 parser.error('Both --directory and --sha1_file are specified, ' |
376 'can only specify one.') | 421 'can only specify one.') |
377 if options.recursive and not options.directory: | 422 if options.recursive and not options.directory: |
378 parser.error('--recursive specified but --directory not specified.') | 423 parser.error('--recursive specified but --directory not specified.') |
379 if options.output and options.directory: | 424 if options.output and options.directory: |
380 parser.error('--directory is specified, so --output has no effect.') | 425 parser.error('--directory is specified, so --output has no effect.') |
| 426 if (not (options.sha1_file or options.directory) |
| 427 and options.auto_platform): |
| 428 parser.error('--auto_platform must be specified with either ' |
| 429 '--sha1_file or --directory') |
| 430 |
381 input_filename = args[0] | 431 input_filename = args[0] |
382 | 432 |
383 # Set output filename if not specified. | 433 # Set output filename if not specified. |
384 if not options.output and not options.directory: | 434 if not options.output and not options.directory: |
385 if not options.sha1_file: | 435 if not options.sha1_file: |
386 # Target is a sha1 sum, so output filename would also be the sha1 sum. | 436 # Target is a sha1 sum, so output filename would also be the sha1 sum. |
387 options.output = input_filename | 437 options.output = input_filename |
388 elif options.sha1_file: | 438 elif options.sha1_file: |
389 # Target is a .sha1 file. | 439 # Target is a .sha1 file. |
390 if not input_filename.endswith('.sha1'): | 440 if not input_filename.endswith('.sha1'): |
(...skipping 12 matching lines...) Expand all Loading... |
403 % options.output) | 453 % options.output) |
404 | 454 |
405 # Check we have a valid bucket with valid permissions. | 455 # Check we have a valid bucket with valid permissions. |
406 base_url, code = check_bucket_permissions(options.bucket, gsutil) | 456 base_url, code = check_bucket_permissions(options.bucket, gsutil) |
407 if code: | 457 if code: |
408 return code | 458 return code |
409 | 459 |
410 return download_from_google_storage( | 460 return download_from_google_storage( |
411 input_filename, base_url, gsutil, options.num_threads, options.directory, | 461 input_filename, base_url, gsutil, options.num_threads, options.directory, |
412 options.recursive, options.force, options.output, options.ignore_errors, | 462 options.recursive, options.force, options.output, options.ignore_errors, |
413 options.sha1_file, options.verbose) | 463 options.sha1_file, options.verbose, options.auto_platform) |
414 | 464 |
415 | 465 |
416 if __name__ == '__main__': | 466 if __name__ == '__main__': |
417 sys.exit(main(sys.argv)) | 467 sys.exit(main(sys.argv)) |
OLD | NEW |