OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" |
7 | 7 |
8 | 8 |
9 import hashlib | 9 import hashlib |
10 import optparse | 10 import optparse |
(...skipping 145 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
156 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 156 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
157 work_queue_size += 1 | 157 work_queue_size += 1 |
158 else: | 158 else: |
159 if not ignore_errors: | 159 if not ignore_errors: |
160 raise InvalidFileError('No sha1 sum found in %s.' % filename) | 160 raise InvalidFileError('No sha1 sum found in %s.' % filename) |
161 print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 161 print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
162 return work_queue_size | 162 return work_queue_size |
163 | 163 |
164 | 164 |
165 def _downloader_worker_thread(thread_num, q, force, base_url, | 165 def _downloader_worker_thread(thread_num, q, force, base_url, |
166 gsutil, out_q, ret_codes): | 166 gsutil, out_q, ret_codes, verbose): |
167 while True: | 167 while True: |
168 input_sha1_sum, output_filename = q.get() | 168 input_sha1_sum, output_filename = q.get() |
169 if input_sha1_sum is None: | 169 if input_sha1_sum is None: |
170 return | 170 return |
171 if os.path.exists(output_filename) and not force: | 171 if os.path.exists(output_filename) and not force: |
172 if get_sha1(output_filename) == input_sha1_sum: | 172 if get_sha1(output_filename) == input_sha1_sum: |
173 out_q.put( | 173 if verbose: |
174 '%d> File %s exists and SHA1 matches. Skipping.' % ( | 174 out_q.put( |
175 thread_num, output_filename)) | 175 '%d> File %s exists and SHA1 matches. Skipping.' % ( |
176 thread_num, output_filename)) | |
176 continue | 177 continue |
177 # Check if file exists. | 178 # Check if file exists. |
178 file_url = '%s/%s' % (base_url, input_sha1_sum) | 179 file_url = '%s/%s' % (base_url, input_sha1_sum) |
179 if gsutil.check_call('ls', file_url)[0] != 0: | 180 if gsutil.check_call('ls', file_url)[0] != 0: |
180 out_q.put('%d> File %s for %s does not exist, skipping.' % ( | 181 out_q.put('%d> File %s for %s does not exist, skipping.' % ( |
181 thread_num, file_url, output_filename)) | 182 thread_num, file_url, output_filename)) |
182 ret_codes.put((1, 'File %s for %s does not exist.' % ( | 183 ret_codes.put((1, 'File %s for %s does not exist.' % ( |
183 file_url, output_filename))) | 184 file_url, output_filename))) |
184 continue | 185 continue |
185 # Fetch the file. | 186 # Fetch the file. |
(...skipping 23 matching lines...) Expand all Loading... | |
209 while True: | 210 while True: |
210 line = output_queue.get() | 211 line = output_queue.get() |
211 # Its plausible we want to print empty lines. | 212 # Its plausible we want to print empty lines. |
212 if line is None: | 213 if line is None: |
213 break | 214 break |
214 print line | 215 print line |
215 | 216 |
216 | 217 |
217 def download_from_google_storage( | 218 def download_from_google_storage( |
218 input_filename, base_url, gsutil, num_threads, directory, recursive, | 219 input_filename, base_url, gsutil, num_threads, directory, recursive, |
219 force, output, ignore_errors, sha1_file): | 220 force, output, ignore_errors, sha1_file, verbose): |
220 # Start up all the worker threads. | 221 # Start up all the worker threads. |
221 all_threads = [] | 222 all_threads = [] |
222 download_start = time.time() | 223 download_start = time.time() |
223 stdout_queue = Queue.Queue() | 224 stdout_queue = Queue.Queue() |
224 work_queue = Queue.Queue() | 225 work_queue = Queue.Queue() |
225 ret_codes = Queue.Queue() | 226 ret_codes = Queue.Queue() |
226 ret_codes.put((0, None)) | 227 ret_codes.put((0, None)) |
227 for thread_num in range(num_threads): | 228 for thread_num in range(num_threads): |
228 t = threading.Thread( | 229 t = threading.Thread( |
229 target=_downloader_worker_thread, | 230 target=_downloader_worker_thread, |
230 args=[thread_num, work_queue, force, base_url, | 231 args=[thread_num, work_queue, force, base_url, |
231 gsutil, stdout_queue, ret_codes]) | 232 gsutil, stdout_queue, ret_codes, verbose]) |
232 t.daemon = True | 233 t.daemon = True |
233 t.start() | 234 t.start() |
234 all_threads.append(t) | 235 all_threads.append(t) |
235 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 236 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
236 printer_thread.daemon = True | 237 printer_thread.daemon = True |
237 printer_thread.start() | 238 printer_thread.start() |
238 | 239 |
239 # Enumerate our work queue. | 240 # Enumerate our work queue. |
240 work_queue_size = enumerate_work_queue( | 241 work_queue_size = enumerate_work_queue( |
241 input_filename, work_queue, directory, recursive, | 242 input_filename, work_queue, directory, recursive, |
242 ignore_errors, output, sha1_file) | 243 ignore_errors, output, sha1_file) |
243 for _ in all_threads: | 244 for _ in all_threads: |
244 work_queue.put((None, None)) # Used to tell worker threads to stop. | 245 work_queue.put((None, None)) # Used to tell worker threads to stop. |
245 | 246 |
246 # Wait for all downloads to finish. | 247 # Wait for all downloads to finish. |
247 for t in all_threads: | 248 for t in all_threads: |
248 t.join() | 249 t.join() |
249 stdout_queue.put(None) | 250 stdout_queue.put(None) |
250 printer_thread.join() | 251 printer_thread.join() |
251 | 252 |
252 # See if we ran into any errors. | 253 # See if we ran into any errors. |
253 max_ret_code = 0 | 254 max_ret_code = 0 |
254 for ret_code, message in ret_codes.queue: | 255 for ret_code, message in ret_codes.queue: |
255 max_ret_code = max(ret_code, max_ret_code) | 256 max_ret_code = max(ret_code, max_ret_code) |
256 if message: | 257 if message: |
257 print >> sys.stderr, message | 258 print >> sys.stderr, message |
258 if not max_ret_code: | 259 if verbose and not max_ret_code: |
259 print 'Success!' | 260 print 'Success!' |
260 | 261 |
261 print 'Downloading %d files took %1f second(s)' % ( | 262 if verbose: |
262 work_queue_size, time.time() - download_start) | 263 print 'Downloading %d files took %1f second(s)' % ( |
264 work_queue_size, time.time() - download_start) | |
263 return max_ret_code | 265 return max_ret_code |
264 | 266 |
265 | 267 |
266 def main(args): | 268 def main(args): |
267 usage = ('usage: %prog [options] target\n' | 269 usage = ('usage: %prog [options] target\n' |
268 'Target must be:\n' | 270 'Target must be:\n' |
269 ' (default) a sha1 sum ([A-Za-z0-9]{40}).\n' | 271 ' (default) a sha1 sum ([A-Za-z0-9]{40}).\n' |
270 ' (-s or --sha1_file) a .sha1 file, containing a sha1 sum on ' | 272 ' (-s or --sha1_file) a .sha1 file, containing a sha1 sum on ' |
271 'the first line.\n' | 273 'the first line.\n' |
272 ' (-d or --directory) A directory to scan for .sha1 files.') | 274 ' (-d or --directory) A directory to scan for .sha1 files.') |
(...skipping 28 matching lines...) Expand all Loading... | |
301 help='Alias for "gsutil config". Run this if you want ' | 303 help='Alias for "gsutil config". Run this if you want ' |
302 'to initialize your saved Google Storage ' | 304 'to initialize your saved Google Storage ' |
303 'credentials.') | 305 'credentials.') |
304 parser.add_option('-n', '--no_auth', action='store_true', | 306 parser.add_option('-n', '--no_auth', action='store_true', |
305 help='Skip auth checking. Use if it\'s known that the ' | 307 help='Skip auth checking. Use if it\'s known that the ' |
306 'target bucket is a public bucket.') | 308 'target bucket is a public bucket.') |
307 parser.add_option('-p', '--platform', | 309 parser.add_option('-p', '--platform', |
308 help='A regular expression that is compared against ' | 310 help='A regular expression that is compared against ' |
309 'Python\'s sys.platform. If this option is specified, ' | 311 'Python\'s sys.platform. If this option is specified, ' |
310 'the download will happen only if there is a match.') | 312 'the download will happen only if there is a match.') |
313 parser.add_option('-v', '--verbose', default=False, action='store_true', | |
M-A Ruel
2013/11/25 19:13:44
default=False is not needed, please remove.
scottmg
2013/11/25 19:23:40
Done.
| |
314 help='Output extra diagnostic and progress information.') | |
311 | 315 |
312 (options, args) = parser.parse_args() | 316 (options, args) = parser.parse_args() |
313 | 317 |
314 # Make sure we should run at all based on platform matching. | 318 # Make sure we should run at all based on platform matching. |
315 if options.platform: | 319 if options.platform: |
316 if not re.match(options.platform, sys.platform): | 320 if not re.match(options.platform, sys.platform): |
317 print('The current platform doesn\'t match "%s", skipping.' % | 321 if options.verbose: |
318 options.platform) | 322 print('The current platform doesn\'t match "%s", skipping.' % |
323 options.platform) | |
319 return 0 | 324 return 0 |
320 | 325 |
321 # Set the boto file to /dev/null if we don't need auth. | 326 # Set the boto file to /dev/null if we don't need auth. |
322 if options.no_auth: | 327 if options.no_auth: |
323 options.boto = os.devnull | 328 options.boto = os.devnull |
324 | 329 |
325 # Make sure we can find a working instance of gsutil. | 330 # Make sure we can find a working instance of gsutil. |
326 if os.path.exists(GSUTIL_DEFAULT_PATH): | 331 if os.path.exists(GSUTIL_DEFAULT_PATH): |
327 gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto) | 332 gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto) |
328 else: | 333 else: |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
376 % options.output) | 381 % options.output) |
377 | 382 |
378 # Check we have a valid bucket with valid permissions. | 383 # Check we have a valid bucket with valid permissions. |
379 base_url, code = check_bucket_permissions(options.bucket, gsutil) | 384 base_url, code = check_bucket_permissions(options.bucket, gsutil) |
380 if code: | 385 if code: |
381 return code | 386 return code |
382 | 387 |
383 return download_from_google_storage( | 388 return download_from_google_storage( |
384 input_filename, base_url, gsutil, options.num_threads, options.directory, | 389 input_filename, base_url, gsutil, options.num_threads, options.directory, |
385 options.recursive, options.force, options.output, options.ignore_errors, | 390 options.recursive, options.force, options.output, options.ignore_errors, |
386 options.sha1_file) | 391 options.sha1_file, options.verbose) |
387 | 392 |
388 | 393 |
389 if __name__ == '__main__': | 394 if __name__ == '__main__': |
390 sys.exit(main(sys.argv)) | 395 sys.exit(main(sys.argv)) |
OLD | NEW |