Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" |
| 7 | 7 |
| 8 | 8 |
| 9 import hashlib | 9 import hashlib |
| 10 import optparse | 10 import optparse |
| (...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 156 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 156 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
| 157 work_queue_size += 1 | 157 work_queue_size += 1 |
| 158 else: | 158 else: |
| 159 if not ignore_errors: | 159 if not ignore_errors: |
| 160 raise InvalidFileError('No sha1 sum found in %s.' % filename) | 160 raise InvalidFileError('No sha1 sum found in %s.' % filename) |
| 161 print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 161 print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
| 162 return work_queue_size | 162 return work_queue_size |
| 163 | 163 |
| 164 | 164 |
| 165 def _downloader_worker_thread(thread_num, q, force, base_url, | 165 def _downloader_worker_thread(thread_num, q, force, base_url, |
| 166 gsutil, out_q, ret_codes): | 166 gsutil, out_q, ret_codes, verbose): |
| 167 while True: | 167 while True: |
| 168 input_sha1_sum, output_filename = q.get() | 168 input_sha1_sum, output_filename = q.get() |
| 169 if input_sha1_sum is None: | 169 if input_sha1_sum is None: |
| 170 return | 170 return |
| 171 if os.path.exists(output_filename) and not force: | 171 if os.path.exists(output_filename) and not force: |
| 172 if get_sha1(output_filename) == input_sha1_sum: | 172 if get_sha1(output_filename) == input_sha1_sum: |
| 173 out_q.put( | 173 if verbose: |
| 174 '%d> File %s exists and SHA1 matches. Skipping.' % ( | 174 out_q.put( |
| 175 thread_num, output_filename)) | 175 '%d> File %s exists and SHA1 matches. Skipping.' % ( |
| 176 thread_num, output_filename)) | |
| 176 continue | 177 continue |
| 177 # Check if file exists. | 178 # Check if file exists. |
| 178 file_url = '%s/%s' % (base_url, input_sha1_sum) | 179 file_url = '%s/%s' % (base_url, input_sha1_sum) |
| 179 if gsutil.check_call('ls', file_url)[0] != 0: | 180 if gsutil.check_call('ls', file_url)[0] != 0: |
| 180 out_q.put('%d> File %s for %s does not exist, skipping.' % ( | 181 out_q.put('%d> File %s for %s does not exist, skipping.' % ( |
| 181 thread_num, file_url, output_filename)) | 182 thread_num, file_url, output_filename)) |
| 182 ret_codes.put((1, 'File %s for %s does not exist.' % ( | 183 ret_codes.put((1, 'File %s for %s does not exist.' % ( |
| 183 file_url, output_filename))) | 184 file_url, output_filename))) |
| 184 continue | 185 continue |
| 185 # Fetch the file. | 186 # Fetch the file. |
| (...skipping 23 matching lines...) Expand all Loading... | |
| 209 while True: | 210 while True: |
| 210 line = output_queue.get() | 211 line = output_queue.get() |
| 211 # Its plausible we want to print empty lines. | 212 # Its plausible we want to print empty lines. |
| 212 if line is None: | 213 if line is None: |
| 213 break | 214 break |
| 214 print line | 215 print line |
| 215 | 216 |
| 216 | 217 |
| 217 def download_from_google_storage( | 218 def download_from_google_storage( |
| 218 input_filename, base_url, gsutil, num_threads, directory, recursive, | 219 input_filename, base_url, gsutil, num_threads, directory, recursive, |
| 219 force, output, ignore_errors, sha1_file): | 220 force, output, ignore_errors, sha1_file, verbose): |
| 220 # Start up all the worker threads. | 221 # Start up all the worker threads. |
| 221 all_threads = [] | 222 all_threads = [] |
| 222 download_start = time.time() | 223 download_start = time.time() |
| 223 stdout_queue = Queue.Queue() | 224 stdout_queue = Queue.Queue() |
| 224 work_queue = Queue.Queue() | 225 work_queue = Queue.Queue() |
| 225 ret_codes = Queue.Queue() | 226 ret_codes = Queue.Queue() |
| 226 ret_codes.put((0, None)) | 227 ret_codes.put((0, None)) |
| 227 for thread_num in range(num_threads): | 228 for thread_num in range(num_threads): |
| 228 t = threading.Thread( | 229 t = threading.Thread( |
| 229 target=_downloader_worker_thread, | 230 target=_downloader_worker_thread, |
| 230 args=[thread_num, work_queue, force, base_url, | 231 args=[thread_num, work_queue, force, base_url, |
| 231 gsutil, stdout_queue, ret_codes]) | 232 gsutil, stdout_queue, ret_codes, verbose]) |
| 232 t.daemon = True | 233 t.daemon = True |
| 233 t.start() | 234 t.start() |
| 234 all_threads.append(t) | 235 all_threads.append(t) |
| 235 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 236 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
| 236 printer_thread.daemon = True | 237 printer_thread.daemon = True |
| 237 printer_thread.start() | 238 printer_thread.start() |
| 238 | 239 |
| 239 # Enumerate our work queue. | 240 # Enumerate our work queue. |
| 240 work_queue_size = enumerate_work_queue( | 241 work_queue_size = enumerate_work_queue( |
| 241 input_filename, work_queue, directory, recursive, | 242 input_filename, work_queue, directory, recursive, |
| 242 ignore_errors, output, sha1_file) | 243 ignore_errors, output, sha1_file) |
| 243 for _ in all_threads: | 244 for _ in all_threads: |
| 244 work_queue.put((None, None)) # Used to tell worker threads to stop. | 245 work_queue.put((None, None)) # Used to tell worker threads to stop. |
| 245 | 246 |
| 246 # Wait for all downloads to finish. | 247 # Wait for all downloads to finish. |
| 247 for t in all_threads: | 248 for t in all_threads: |
| 248 t.join() | 249 t.join() |
| 249 stdout_queue.put(None) | 250 stdout_queue.put(None) |
| 250 printer_thread.join() | 251 printer_thread.join() |
| 251 | 252 |
| 252 # See if we ran into any errors. | 253 # See if we ran into any errors. |
| 253 max_ret_code = 0 | 254 max_ret_code = 0 |
| 254 for ret_code, message in ret_codes.queue: | 255 for ret_code, message in ret_codes.queue: |
| 255 max_ret_code = max(ret_code, max_ret_code) | 256 max_ret_code = max(ret_code, max_ret_code) |
| 256 if message: | 257 if message: |
| 257 print >> sys.stderr, message | 258 print >> sys.stderr, message |
| 258 if not max_ret_code: | 259 if verbose and not max_ret_code: |
| 259 print 'Success!' | 260 print 'Success!' |
| 260 | 261 |
| 261 print 'Downloading %d files took %1f second(s)' % ( | 262 if verbose: |
| 262 work_queue_size, time.time() - download_start) | 263 print 'Downloading %d files took %1f second(s)' % ( |
| 264 work_queue_size, time.time() - download_start) | |
| 263 return max_ret_code | 265 return max_ret_code |
| 264 | 266 |
| 265 | 267 |
| 266 def main(args): | 268 def main(args): |
| 267 usage = ('usage: %prog [options] target\n' | 269 usage = ('usage: %prog [options] target\n' |
| 268 'Target must be:\n' | 270 'Target must be:\n' |
| 269 ' (default) a sha1 sum ([A-Za-z0-9]{40}).\n' | 271 ' (default) a sha1 sum ([A-Za-z0-9]{40}).\n' |
| 270 ' (-s or --sha1_file) a .sha1 file, containing a sha1 sum on ' | 272 ' (-s or --sha1_file) a .sha1 file, containing a sha1 sum on ' |
| 271 'the first line.\n' | 273 'the first line.\n' |
| 272 ' (-d or --directory) A directory to scan for .sha1 files.') | 274 ' (-d or --directory) A directory to scan for .sha1 files.') |
| (...skipping 28 matching lines...) Expand all Loading... | |
| 301 help='Alias for "gsutil config". Run this if you want ' | 303 help='Alias for "gsutil config". Run this if you want ' |
| 302 'to initialize your saved Google Storage ' | 304 'to initialize your saved Google Storage ' |
| 303 'credentials.') | 305 'credentials.') |
| 304 parser.add_option('-n', '--no_auth', action='store_true', | 306 parser.add_option('-n', '--no_auth', action='store_true', |
| 305 help='Skip auth checking. Use if it\'s known that the ' | 307 help='Skip auth checking. Use if it\'s known that the ' |
| 306 'target bucket is a public bucket.') | 308 'target bucket is a public bucket.') |
| 307 parser.add_option('-p', '--platform', | 309 parser.add_option('-p', '--platform', |
| 308 help='A regular expression that is compared against ' | 310 help='A regular expression that is compared against ' |
| 309 'Python\'s sys.platform. If this option is specified, ' | 311 'Python\'s sys.platform. If this option is specified, ' |
| 310 'the download will happen only if there is a match.') | 312 'the download will happen only if there is a match.') |
| 313 parser.add_option('-v', '--verbose', default=False, action='store_true', | |
|
M-A Ruel
2013/11/25 19:13:44
default=False is not needed, please remove.
scottmg
2013/11/25 19:23:40
Done.
| |
| 314 help='Output extra diagnostic and progress information.') | |
| 311 | 315 |
| 312 (options, args) = parser.parse_args() | 316 (options, args) = parser.parse_args() |
| 313 | 317 |
| 314 # Make sure we should run at all based on platform matching. | 318 # Make sure we should run at all based on platform matching. |
| 315 if options.platform: | 319 if options.platform: |
| 316 if not re.match(options.platform, sys.platform): | 320 if not re.match(options.platform, sys.platform): |
| 317 print('The current platform doesn\'t match "%s", skipping.' % | 321 if options.verbose: |
| 318 options.platform) | 322 print('The current platform doesn\'t match "%s", skipping.' % |
| 323 options.platform) | |
| 319 return 0 | 324 return 0 |
| 320 | 325 |
| 321 # Set the boto file to /dev/null if we don't need auth. | 326 # Set the boto file to /dev/null if we don't need auth. |
| 322 if options.no_auth: | 327 if options.no_auth: |
| 323 options.boto = os.devnull | 328 options.boto = os.devnull |
| 324 | 329 |
| 325 # Make sure we can find a working instance of gsutil. | 330 # Make sure we can find a working instance of gsutil. |
| 326 if os.path.exists(GSUTIL_DEFAULT_PATH): | 331 if os.path.exists(GSUTIL_DEFAULT_PATH): |
| 327 gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto) | 332 gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto) |
| 328 else: | 333 else: |
| (...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 376 % options.output) | 381 % options.output) |
| 377 | 382 |
| 378 # Check we have a valid bucket with valid permissions. | 383 # Check we have a valid bucket with valid permissions. |
| 379 base_url, code = check_bucket_permissions(options.bucket, gsutil) | 384 base_url, code = check_bucket_permissions(options.bucket, gsutil) |
| 380 if code: | 385 if code: |
| 381 return code | 386 return code |
| 382 | 387 |
| 383 return download_from_google_storage( | 388 return download_from_google_storage( |
| 384 input_filename, base_url, gsutil, options.num_threads, options.directory, | 389 input_filename, base_url, gsutil, options.num_threads, options.directory, |
| 385 options.recursive, options.force, options.output, options.ignore_errors, | 390 options.recursive, options.force, options.output, options.ignore_errors, |
| 386 options.sha1_file) | 391 options.sha1_file, options.verbose) |
| 387 | 392 |
| 388 | 393 |
| 389 if __name__ == '__main__': | 394 if __name__ == '__main__': |
| 390 sys.exit(main(sys.argv)) | 395 sys.exit(main(sys.argv)) |
| OLD | NEW |