OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" |
7 | 7 |
8 | 8 |
9 import hashlib | 9 import hashlib |
10 import optparse | 10 import optparse |
11 import os | 11 import os |
12 import Queue | 12 import Queue |
13 import re | 13 import re |
14 import stat | 14 import stat |
15 import sys | 15 import sys |
16 import threading | 16 import threading |
17 import time | 17 import time |
18 | 18 |
19 import subprocess2 | 19 import subprocess2 |
20 | 20 |
21 | 21 |
22 GSUTIL_DEFAULT_PATH = os.path.join( | 22 GSUTIL_DEFAULT_PATH = os.path.join( |
23 os.path.dirname(os.path.abspath(__file__)), | 23 os.path.dirname(os.path.abspath(__file__)), |
24 'third_party', 'gsutil', 'gsutil') | 24 'third_party', 'gsutil', 'gsutil') |
25 # Maps sys.platform to what we actually want to call them. | |
26 PLATFORM_MAPPING = { | |
27 'linux2': 'linux', | |
28 'win32': 'win', | |
29 'cygwin': 'win', | |
30 'darwin': 'mac', | |
M-A Ruel
2014/01/17 20:19:21
sort keys
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
31 } | |
25 | 32 |
26 | 33 |
27 class FileNotFoundError(IOError): | 34 class FileNotFoundError(IOError): |
28 pass | 35 pass |
29 | 36 |
30 | 37 |
31 class InvalidFileError(IOError): | 38 class InvalidFileError(IOError): |
32 pass | 39 pass |
33 | 40 |
34 | 41 |
42 class InvalidPlatformError(Exception): | |
43 pass | |
44 | |
45 | |
35 def GetNormalizedPlatform(): | 46 def GetNormalizedPlatform(): |
36 """Returns the result of sys.platform accounting for cygwin. | 47 """Returns the result of sys.platform accounting for cygwin. |
37 Under cygwin, this will always return "win32" like the native Python.""" | 48 Under cygwin, this will always return "win32" like the native Python.""" |
38 if sys.platform == 'cygwin': | 49 if sys.platform == 'cygwin': |
39 return 'win32' | 50 return 'win32' |
40 return sys.platform | 51 return sys.platform |
41 | 52 |
42 | 53 |
43 # Common utilities | 54 # Common utilities |
44 class Gsutil(object): | 55 class Gsutil(object): |
(...skipping 64 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
109 if code != 0: | 120 if code != 0: |
110 print >> sys.stderr, ls_err | 121 print >> sys.stderr, ls_err |
111 if code == 403: | 122 if code == 403: |
112 print >> sys.stderr, 'Got error 403 while authenticating to %s.' % base_url | 123 print >> sys.stderr, 'Got error 403 while authenticating to %s.' % base_url |
113 print >> sys.stderr, 'Try running "download_from_google_storage --config".' | 124 print >> sys.stderr, 'Try running "download_from_google_storage --config".' |
114 elif code == 404: | 125 elif code == 404: |
115 print >> sys.stderr, '%s not found.' % base_url | 126 print >> sys.stderr, '%s not found.' % base_url |
116 return (base_url, code) | 127 return (base_url, code) |
117 | 128 |
118 | 129 |
130 def check_platform(target): | |
131 """Check if any parent directory of target matches (win|mac|linux).""" | |
M-A Ruel
2014/01/17 20:19:21
Checks
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
132 if not target: | |
133 return None | |
134 full_path = os.path.abspath(target) | |
M-A Ruel
2014/01/17 20:19:21
what about asserting the path is absolute?
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
135 root, target_name = os.path.split(full_path) | |
136 if target_name in ('win', 'mac', 'linux'): | |
M-A Ruel
2014/01/17 20:19:21
sort
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
137 return target_name | |
138 return check_platform(root) | |
M-A Ruel
2014/01/17 20:19:21
What happens if you call it with 'e:\\' ?
Ryan Tseng
2014/01/17 20:58:37
('e:\\', '')
| |
139 | |
140 | |
119 def get_sha1(filename): | 141 def get_sha1(filename): |
120 sha1 = hashlib.sha1() | 142 sha1 = hashlib.sha1() |
121 with open(filename, 'rb') as f: | 143 with open(filename, 'rb') as f: |
122 while True: | 144 while True: |
123 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. | 145 # Read in 1mb chunks, so it doesn't all have to be loaded into memory. |
124 chunk = f.read(1024*1024) | 146 chunk = f.read(1024*1024) |
125 if not chunk: | 147 if not chunk: |
126 break | 148 break |
127 sha1.update(chunk) | 149 sha1.update(chunk) |
128 return sha1.hexdigest() | 150 return sha1.hexdigest() |
129 | 151 |
130 | 152 |
131 # Download-specific code starts here | 153 # Download-specific code starts here |
132 | 154 |
133 def enumerate_work_queue(input_filename, work_queue, directory, | 155 def enumerate_work_queue(input_filename, work_queue, directory, |
134 recursive, ignore_errors, output, sha1_file): | 156 recursive, ignore_errors, output, sha1_file, |
157 auto_platform): | |
135 if sha1_file: | 158 if sha1_file: |
136 if not os.path.exists(input_filename): | 159 if not os.path.exists(input_filename): |
137 if not ignore_errors: | 160 if not ignore_errors: |
138 raise FileNotFoundError('%s not found.' % input_filename) | 161 raise FileNotFoundError('%s not found.' % input_filename) |
139 print >> sys.stderr, '%s not found.' % input_filename | 162 print >> sys.stderr, '%s not found.' % input_filename |
140 with open(input_filename, 'rb') as f: | 163 with open(input_filename, 'rb') as f: |
141 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) | 164 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
142 if sha1_match: | 165 if sha1_match: |
143 work_queue.put( | 166 work_queue.put( |
144 (sha1_match.groups(1)[0], input_filename.replace('.sha1', ''))) | 167 (sha1_match.groups(1)[0], input_filename.replace('.sha1', ''))) |
(...skipping 12 matching lines...) Expand all Loading... | |
157 if not recursive: | 180 if not recursive: |
158 for item in dirs[:]: | 181 for item in dirs[:]: |
159 dirs.remove(item) | 182 dirs.remove(item) |
160 else: | 183 else: |
161 for exclude in ['.svn', '.git']: | 184 for exclude in ['.svn', '.git']: |
162 if exclude in dirs: | 185 if exclude in dirs: |
163 dirs.remove(exclude) | 186 dirs.remove(exclude) |
164 for filename in files: | 187 for filename in files: |
165 full_path = os.path.join(root, filename) | 188 full_path = os.path.join(root, filename) |
166 if full_path.endswith('.sha1'): | 189 if full_path.endswith('.sha1'): |
190 if auto_platform: | |
191 # Skip if the platform does not match. | |
192 target_platform = check_platform(full_path) | |
193 if not target_platform: | |
194 err = ('--auto_platform passed in but no ' | |
M-A Ruel
2014/01/17 20:19:21
the wrapping seems excessive, try to extend nearer
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
195 'platform name found in the path of %s' | |
196 % full_path) | |
197 if not ignore_errors: | |
198 raise InvalidFileError(err) | |
199 print >> sys.stderr, err | |
200 continue | |
201 current_platform = PLATFORM_MAPPING[sys.platform] | |
202 if current_platform != target_platform: | |
203 continue | |
204 | |
167 with open(full_path, 'rb') as f: | 205 with open(full_path, 'rb') as f: |
168 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) | 206 sha1_match = re.match('^([A-Za-z0-9]{40})$', f.read(1024).rstrip()) |
169 if sha1_match: | 207 if sha1_match: |
170 work_queue.put( | 208 work_queue.put( |
171 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 209 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
172 work_queue_size += 1 | 210 work_queue_size += 1 |
173 else: | 211 else: |
174 if not ignore_errors: | 212 if not ignore_errors: |
175 raise InvalidFileError('No sha1 sum found in %s.' % filename) | 213 raise InvalidFileError('No sha1 sum found in %s.' % filename) |
176 print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 214 print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
(...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
233 while True: | 271 while True: |
234 line = output_queue.get() | 272 line = output_queue.get() |
235 # Its plausible we want to print empty lines. | 273 # Its plausible we want to print empty lines. |
236 if line is None: | 274 if line is None: |
237 break | 275 break |
238 print line | 276 print line |
239 | 277 |
240 | 278 |
241 def download_from_google_storage( | 279 def download_from_google_storage( |
242 input_filename, base_url, gsutil, num_threads, directory, recursive, | 280 input_filename, base_url, gsutil, num_threads, directory, recursive, |
243 force, output, ignore_errors, sha1_file, verbose): | 281 force, output, ignore_errors, sha1_file, verbose, auto_platform): |
244 # Start up all the worker threads. | 282 # Start up all the worker threads. |
245 all_threads = [] | 283 all_threads = [] |
246 download_start = time.time() | 284 download_start = time.time() |
247 stdout_queue = Queue.Queue() | 285 stdout_queue = Queue.Queue() |
248 work_queue = Queue.Queue() | 286 work_queue = Queue.Queue() |
249 ret_codes = Queue.Queue() | 287 ret_codes = Queue.Queue() |
250 ret_codes.put((0, None)) | 288 ret_codes.put((0, None)) |
251 for thread_num in range(num_threads): | 289 for thread_num in range(num_threads): |
252 t = threading.Thread( | 290 t = threading.Thread( |
253 target=_downloader_worker_thread, | 291 target=_downloader_worker_thread, |
254 args=[thread_num, work_queue, force, base_url, | 292 args=[thread_num, work_queue, force, base_url, |
255 gsutil, stdout_queue, ret_codes, verbose]) | 293 gsutil, stdout_queue, ret_codes, verbose]) |
256 t.daemon = True | 294 t.daemon = True |
257 t.start() | 295 t.start() |
258 all_threads.append(t) | 296 all_threads.append(t) |
259 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 297 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
260 printer_thread.daemon = True | 298 printer_thread.daemon = True |
261 printer_thread.start() | 299 printer_thread.start() |
262 | 300 |
263 # Enumerate our work queue. | 301 # Enumerate our work queue. |
264 work_queue_size = enumerate_work_queue( | 302 work_queue_size = enumerate_work_queue( |
265 input_filename, work_queue, directory, recursive, | 303 input_filename, work_queue, directory, recursive, |
266 ignore_errors, output, sha1_file) | 304 ignore_errors, output, sha1_file, auto_platform) |
267 for _ in all_threads: | 305 for _ in all_threads: |
268 work_queue.put((None, None)) # Used to tell worker threads to stop. | 306 work_queue.put((None, None)) # Used to tell worker threads to stop. |
269 | 307 |
270 # Wait for all downloads to finish. | 308 # Wait for all downloads to finish. |
271 for t in all_threads: | 309 for t in all_threads: |
272 t.join() | 310 t.join() |
273 stdout_queue.put(None) | 311 stdout_queue.put(None) |
274 printer_thread.join() | 312 printer_thread.join() |
275 | 313 |
276 # See if we ran into any errors. | 314 # See if we ran into any errors. |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
326 help='Alias for "gsutil config". Run this if you want ' | 364 help='Alias for "gsutil config". Run this if you want ' |
327 'to initialize your saved Google Storage ' | 365 'to initialize your saved Google Storage ' |
328 'credentials.') | 366 'credentials.') |
329 parser.add_option('-n', '--no_auth', action='store_true', | 367 parser.add_option('-n', '--no_auth', action='store_true', |
330 help='Skip auth checking. Use if it\'s known that the ' | 368 help='Skip auth checking. Use if it\'s known that the ' |
331 'target bucket is a public bucket.') | 369 'target bucket is a public bucket.') |
332 parser.add_option('-p', '--platform', | 370 parser.add_option('-p', '--platform', |
333 help='A regular expression that is compared against ' | 371 help='A regular expression that is compared against ' |
334 'Python\'s sys.platform. If this option is specified, ' | 372 'Python\'s sys.platform. If this option is specified, ' |
335 'the download will happen only if there is a match.') | 373 'the download will happen only if there is a match.') |
374 parser.add_option('-a', '--auto_platform', | |
375 help='Detects if any parent folder of the target matches ' | |
376 '(win|mac|linux). If so, the script will only ' | |
M-A Ruel
2014/01/17 20:19:21
sort
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
377 'process files that are in the paths that ' | |
378 'that matches the current platform.') | |
336 parser.add_option('-v', '--verbose', action='store_true', | 379 parser.add_option('-v', '--verbose', action='store_true', |
337 help='Output extra diagnostic and progress information.') | 380 help='Output extra diagnostic and progress information.') |
338 | 381 |
339 (options, args) = parser.parse_args() | 382 (options, args) = parser.parse_args() |
340 | 383 |
341 # Make sure we should run at all based on platform matching. | 384 # Make sure we should run at all based on platform matching. |
342 if options.platform: | 385 if options.platform: |
386 if options.auto_platform: | |
387 parser.error('--platform can not be specified with --auto_platform') | |
388 return 1 | |
M-A Ruel
2014/01/17 20:19:21
parser.error() calls sys.exit(2), no need for retu
Ryan Tseng
2014/01/17 20:58:37
Done.
| |
343 if not re.match(options.platform, GetNormalizedPlatform()): | 389 if not re.match(options.platform, GetNormalizedPlatform()): |
344 if options.verbose: | 390 if options.verbose: |
345 print('The current platform doesn\'t match "%s", skipping.' % | 391 print('The current platform doesn\'t match "%s", skipping.' % |
346 options.platform) | 392 options.platform) |
347 return 0 | 393 return 0 |
348 | 394 |
349 # Set the boto file to /dev/null if we don't need auth. | 395 # Set the boto file to /dev/null if we don't need auth. |
350 if options.no_auth: | 396 if options.no_auth: |
351 options.boto = os.devnull | 397 options.boto = os.devnull |
352 | 398 |
(...skipping 16 matching lines...) Expand all Loading... | |
369 parser.error('Too many targets.') | 415 parser.error('Too many targets.') |
370 if not options.bucket: | 416 if not options.bucket: |
371 parser.error('Missing bucket. Specify bucket with --bucket.') | 417 parser.error('Missing bucket. Specify bucket with --bucket.') |
372 if options.sha1_file and options.directory: | 418 if options.sha1_file and options.directory: |
373 parser.error('Both --directory and --sha1_file are specified, ' | 419 parser.error('Both --directory and --sha1_file are specified, ' |
374 'can only specify one.') | 420 'can only specify one.') |
375 if options.recursive and not options.directory: | 421 if options.recursive and not options.directory: |
376 parser.error('--recursive specified but --directory not specified.') | 422 parser.error('--recursive specified but --directory not specified.') |
377 if options.output and options.directory: | 423 if options.output and options.directory: |
378 parser.error('--directory is specified, so --output has no effect.') | 424 parser.error('--directory is specified, so --output has no effect.') |
425 if (not (options.sha1_file or options.directory) | |
426 and options.auto_platform): | |
427 parser.error('--auto_platform must be specified with either ' | |
428 '--sha1_file or --directory') | |
429 | |
379 input_filename = args[0] | 430 input_filename = args[0] |
380 | 431 |
381 # Set output filename if not specified. | 432 # Set output filename if not specified. |
382 if not options.output and not options.directory: | 433 if not options.output and not options.directory: |
383 if not options.sha1_file: | 434 if not options.sha1_file: |
384 # Target is a sha1 sum, so output filename would also be the sha1 sum. | 435 # Target is a sha1 sum, so output filename would also be the sha1 sum. |
385 options.output = input_filename | 436 options.output = input_filename |
386 elif options.sha1_file: | 437 elif options.sha1_file: |
387 # Target is a .sha1 file. | 438 # Target is a .sha1 file. |
388 if not input_filename.endswith('.sha1'): | 439 if not input_filename.endswith('.sha1'): |
(...skipping 12 matching lines...) Expand all Loading... | |
401 % options.output) | 452 % options.output) |
402 | 453 |
403 # Check we have a valid bucket with valid permissions. | 454 # Check we have a valid bucket with valid permissions. |
404 base_url, code = check_bucket_permissions(options.bucket, gsutil) | 455 base_url, code = check_bucket_permissions(options.bucket, gsutil) |
405 if code: | 456 if code: |
406 return code | 457 return code |
407 | 458 |
408 return download_from_google_storage( | 459 return download_from_google_storage( |
409 input_filename, base_url, gsutil, options.num_threads, options.directory, | 460 input_filename, base_url, gsutil, options.num_threads, options.directory, |
410 options.recursive, options.force, options.output, options.ignore_errors, | 461 options.recursive, options.force, options.output, options.ignore_errors, |
411 options.sha1_file, options.verbose) | 462 options.sha1_file, options.verbose, options.auto_platform) |
412 | 463 |
413 | 464 |
414 if __name__ == '__main__': | 465 if __name__ == '__main__': |
415 sys.exit(main(sys.argv)) | 466 sys.exit(main(sys.argv)) |
OLD | NEW |