OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" |
7 | 7 |
8 | 8 |
9 import hashlib | 9 import hashlib |
10 import optparse | 10 import optparse |
11 import os | 11 import os |
12 import Queue | 12 import Queue |
13 import re | 13 import re |
14 import shutil | |
14 import stat | 15 import stat |
15 import sys | 16 import sys |
17 import tarfile | |
16 import threading | 18 import threading |
17 import time | 19 import time |
18 | 20 |
19 import subprocess2 | 21 import subprocess2 |
20 | 22 |
21 | 23 |
22 GSUTIL_DEFAULT_PATH = os.path.join( | 24 GSUTIL_DEFAULT_PATH = os.path.join( |
23 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') | 25 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') |
24 # Maps sys.platform to what we actually want to call them. | 26 # Maps sys.platform to what we actually want to call them. |
25 PLATFORM_MAPPING = { | 27 PLATFORM_MAPPING = { |
(...skipping 171 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
197 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 199 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
198 work_queue_size += 1 | 200 work_queue_size += 1 |
199 else: | 201 else: |
200 if not ignore_errors: | 202 if not ignore_errors: |
201 raise InvalidFileError('No sha1 sum found in %s.' % filename) | 203 raise InvalidFileError('No sha1 sum found in %s.' % filename) |
202 print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 204 print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
203 return work_queue_size | 205 return work_queue_size |
204 | 206 |
205 | 207 |
206 def _downloader_worker_thread(thread_num, q, force, base_url, | 208 def _downloader_worker_thread(thread_num, q, force, base_url, |
207 gsutil, out_q, ret_codes, verbose): | 209 gsutil, out_q, ret_codes, verbose, extract): |
208 while True: | 210 while True: |
209 input_sha1_sum, output_filename = q.get() | 211 input_sha1_sum, output_filename = q.get() |
210 if input_sha1_sum is None: | 212 if input_sha1_sum is None: |
211 return | 213 return |
212 if os.path.exists(output_filename) and not force: | 214 if os.path.exists(output_filename) and not force: |
213 if get_sha1(output_filename) == input_sha1_sum: | 215 if get_sha1(output_filename) == input_sha1_sum: |
214 if verbose: | 216 if verbose: |
215 out_q.put( | 217 out_q.put( |
216 '%d> File %s exists and SHA1 matches. Skipping.' % ( | 218 '%d> File %s exists and SHA1 matches. Skipping.' % ( |
217 thread_num, output_filename)) | 219 thread_num, output_filename)) |
(...skipping 12 matching lines...) Expand all Loading... | |
230 os.remove(output_filename) # Delete the file if it exists already. | 232 os.remove(output_filename) # Delete the file if it exists already. |
231 except OSError: | 233 except OSError: |
232 if os.path.exists(output_filename): | 234 if os.path.exists(output_filename): |
233 out_q.put('%d> Warning: deleting %s failed.' % ( | 235 out_q.put('%d> Warning: deleting %s failed.' % ( |
234 thread_num, output_filename)) | 236 thread_num, output_filename)) |
235 code, _, err = gsutil.check_call('cp', file_url, output_filename) | 237 code, _, err = gsutil.check_call('cp', file_url, output_filename) |
236 if code != 0: | 238 if code != 0: |
237 out_q.put('%d> %s' % (thread_num, err)) | 239 out_q.put('%d> %s' % (thread_num, err)) |
238 ret_codes.put((code, err)) | 240 ret_codes.put((code, err)) |
239 | 241 |
242 if extract: | |
243 if (not tarfile.is_tarfile(output_filename) | |
244 or not output_filename.endswith('tar.gz')): | |
hinoka
2015/01/20 19:28:52
The second one doesn't seem to be necessary. Eg.
ricow1
2015/01/22 15:46:03
removed second check (although we could easily req
| |
245 out_q.put('%d> Warning: %s is not a tar.gz archive.' % ( | |
hinoka
2015/01/20 19:28:52
s/Warning/Error/
ricow1
2015/01/22 15:46:03
Done.
| |
246 thread_num, output_filename)) | |
247 ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename))) | |
248 continue | |
249 tar = tarfile.open(output_filename, 'r:gz') | |
250 dirname = os.path.dirname(os.path.abspath(output_filename)) | |
251 extract_dir = output_filename[0:len(output_filename)-7] | |
hinoka
2015/01/20 19:28:52
os.path.splitext()[0]
ricow1
2015/01/22 15:46:03
Not really, assume foobar.tar.gz, that will give m
| |
252 if os.path.exists(extract_dir): | |
253 try: | |
254 shutil.rmtree(extract_dir) | |
255 out_q.put('%d> Removed %s...' % (thread_num, extract_dir)) | |
256 except OSError: | |
257 out_q.put('%d> Warning: Can\'t delete.' % ( | |
258 thread_num, extract_dir)) | |
259 ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir))) | |
260 continue | |
261 out_q.put('%d> Extracting %s to %s' % (thread_num, output_filename, | |
262 extract_dir)) | |
263 tar.extractall(path=dirname) | |
240 # Set executable bit. | 264 # Set executable bit. |
241 if sys.platform == 'cygwin': | 265 if sys.platform == 'cygwin': |
242 # Under cygwin, mark all files as executable. The executable flag in | 266 # Under cygwin, mark all files as executable. The executable flag in |
243 # Google Storage will not be set when uploading from Windows, so if | 267 # Google Storage will not be set when uploading from Windows, so if |
244 # this script is running under cygwin and we're downloading an | 268 # this script is running under cygwin and we're downloading an |
245 # executable, it will be unrunnable from inside cygwin without this. | 269 # executable, it will be unrunnable from inside cygwin without this. |
246 st = os.stat(output_filename) | 270 st = os.stat(output_filename) |
247 os.chmod(output_filename, st.st_mode | stat.S_IEXEC) | 271 os.chmod(output_filename, st.st_mode | stat.S_IEXEC) |
248 elif sys.platform != 'win32': | 272 elif sys.platform != 'win32': |
249 # On non-Windows platforms, key off of the custom header | 273 # On non-Windows platforms, key off of the custom header |
(...skipping 10 matching lines...) Expand all Loading... | |
260 while True: | 284 while True: |
261 line = output_queue.get() | 285 line = output_queue.get() |
262 # Its plausible we want to print empty lines. | 286 # Its plausible we want to print empty lines. |
263 if line is None: | 287 if line is None: |
264 break | 288 break |
265 print line | 289 print line |
266 | 290 |
267 | 291 |
268 def download_from_google_storage( | 292 def download_from_google_storage( |
269 input_filename, base_url, gsutil, num_threads, directory, recursive, | 293 input_filename, base_url, gsutil, num_threads, directory, recursive, |
270 force, output, ignore_errors, sha1_file, verbose, auto_platform): | 294 force, output, ignore_errors, sha1_file, verbose, auto_platform, extract): |
271 # Start up all the worker threads. | 295 # Start up all the worker threads. |
272 all_threads = [] | 296 all_threads = [] |
273 download_start = time.time() | 297 download_start = time.time() |
274 stdout_queue = Queue.Queue() | 298 stdout_queue = Queue.Queue() |
275 work_queue = Queue.Queue() | 299 work_queue = Queue.Queue() |
276 ret_codes = Queue.Queue() | 300 ret_codes = Queue.Queue() |
277 ret_codes.put((0, None)) | 301 ret_codes.put((0, None)) |
278 for thread_num in range(num_threads): | 302 for thread_num in range(num_threads): |
279 t = threading.Thread( | 303 t = threading.Thread( |
280 target=_downloader_worker_thread, | 304 target=_downloader_worker_thread, |
281 args=[thread_num, work_queue, force, base_url, | 305 args=[thread_num, work_queue, force, base_url, |
282 gsutil, stdout_queue, ret_codes, verbose]) | 306 gsutil, stdout_queue, ret_codes, verbose, extract]) |
283 t.daemon = True | 307 t.daemon = True |
284 t.start() | 308 t.start() |
285 all_threads.append(t) | 309 all_threads.append(t) |
286 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 310 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
287 printer_thread.daemon = True | 311 printer_thread.daemon = True |
288 printer_thread.start() | 312 printer_thread.start() |
289 | 313 |
290 # Enumerate our work queue. | 314 # Enumerate our work queue. |
291 work_queue_size = enumerate_work_queue( | 315 work_queue_size = enumerate_work_queue( |
292 input_filename, work_queue, directory, recursive, | 316 input_filename, work_queue, directory, recursive, |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
360 parser.add_option('-p', '--platform', | 384 parser.add_option('-p', '--platform', |
361 help='A regular expression that is compared against ' | 385 help='A regular expression that is compared against ' |
362 'Python\'s sys.platform. If this option is specified, ' | 386 'Python\'s sys.platform. If this option is specified, ' |
363 'the download will happen only if there is a match.') | 387 'the download will happen only if there is a match.') |
364 parser.add_option('-a', '--auto_platform', | 388 parser.add_option('-a', '--auto_platform', |
365 action='store_true', | 389 action='store_true', |
366 help='Detects if any parent folder of the target matches ' | 390 help='Detects if any parent folder of the target matches ' |
367 '(linux|mac|win). If so, the script will only ' | 391 '(linux|mac|win). If so, the script will only ' |
368 'process files that are in the paths that ' | 392 'process files that are in the paths that ' |
369 'that matches the current platform.') | 393 'that matches the current platform.') |
394 parser.add_option('-u', '--extract', | |
395 action='store_true', | |
396 help='Extract a downloaded tar.gz file after download. ' | |
397 'Leaves the tar.gz file around for sha verification1' | |
398 'If a directory with the same name as the tar.gz ' | |
399 'file already exists, this is deleted (to get a ' | |
400 'clean state in case of update.)') | |
370 parser.add_option('-v', '--verbose', action='store_true', | 401 parser.add_option('-v', '--verbose', action='store_true', |
371 help='Output extra diagnostic and progress information.') | 402 help='Output extra diagnostic and progress information.') |
372 | 403 |
373 (options, args) = parser.parse_args() | 404 (options, args) = parser.parse_args() |
374 | 405 |
375 # Make sure we should run at all based on platform matching. | 406 # Make sure we should run at all based on platform matching. |
376 if options.platform: | 407 if options.platform: |
377 if options.auto_platform: | 408 if options.auto_platform: |
378 parser.error('--platform can not be specified with --auto_platform') | 409 parser.error('--platform can not be specified with --auto_platform') |
379 if not re.match(options.platform, GetNormalizedPlatform()): | 410 if not re.match(options.platform, GetNormalizedPlatform()): |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
456 | 487 |
457 # Check we have a valid bucket with valid permissions. | 488 # Check we have a valid bucket with valid permissions. |
458 if not options.no_auth: | 489 if not options.no_auth: |
459 code = check_bucket_permissions(base_url, gsutil) | 490 code = check_bucket_permissions(base_url, gsutil) |
460 if code: | 491 if code: |
461 return code | 492 return code |
462 | 493 |
463 return download_from_google_storage( | 494 return download_from_google_storage( |
464 input_filename, base_url, gsutil, options.num_threads, options.directory, | 495 input_filename, base_url, gsutil, options.num_threads, options.directory, |
465 options.recursive, options.force, options.output, options.ignore_errors, | 496 options.recursive, options.force, options.output, options.ignore_errors, |
466 options.sha1_file, options.verbose, options.auto_platform) | 497 options.sha1_file, options.verbose, options.auto_platform, |
498 options.extract) | |
467 | 499 |
468 | 500 |
469 if __name__ == '__main__': | 501 if __name__ == '__main__': |
470 sys.exit(main(sys.argv)) | 502 sys.exit(main(sys.argv)) |
OLD | NEW |