OLD | NEW |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. | 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 """Download files from Google Storage based on SHA1 sums.""" | 6 """Download files from Google Storage based on SHA1 sums.""" |
7 | 7 |
8 | 8 |
9 import hashlib | 9 import hashlib |
10 import optparse | 10 import optparse |
11 import os | 11 import os |
12 import Queue | 12 import Queue |
13 import re | 13 import re |
14 import stat | |
14 import sys | 15 import sys |
15 import threading | 16 import threading |
16 import time | 17 import time |
17 | 18 |
18 import subprocess2 | 19 import subprocess2 |
19 | 20 |
20 | 21 |
21 GSUTIL_DEFAULT_PATH = os.path.join( | 22 GSUTIL_DEFAULT_PATH = os.path.join( |
22 os.path.dirname(os.path.abspath(__file__)), | 23 os.path.dirname(os.path.abspath(__file__)), |
23 'third_party', 'gsutil', 'gsutil') | 24 'third_party', 'gsutil', 'gsutil') |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
151 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) | 152 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) |
152 work_queue_size += 1 | 153 work_queue_size += 1 |
153 else: | 154 else: |
154 if not ignore_errors: | 155 if not ignore_errors: |
155 raise InvalidFileError('No sha1 sum found in %s.' % filename) | 156 raise InvalidFileError('No sha1 sum found in %s.' % filename) |
156 print >> sys.stderr, 'No sha1 sum found in %s.' % filename | 157 print >> sys.stderr, 'No sha1 sum found in %s.' % filename |
157 return work_queue_size | 158 return work_queue_size |
158 | 159 |
159 | 160 |
160 def _downloader_worker_thread(thread_num, q, force, base_url, | 161 def _downloader_worker_thread(thread_num, q, force, base_url, |
161 gsutil, out_q, ret_codes): | 162 gsutil, make_executable, out_q, ret_codes): |
162 while True: | 163 while True: |
163 input_sha1_sum, output_filename = q.get() | 164 input_sha1_sum, output_filename = q.get() |
164 if input_sha1_sum is None: | 165 if input_sha1_sum is None: |
165 return | 166 return |
166 if os.path.exists(output_filename) and not force: | 167 if os.path.exists(output_filename) and not force: |
167 if get_sha1(output_filename) == input_sha1_sum: | 168 if get_sha1(output_filename) == input_sha1_sum: |
168 out_q.put( | 169 out_q.put( |
169 '%d> File %s exists and SHA1 matches. Skipping.' % ( | 170 '%d> File %s exists and SHA1 matches. Skipping.' % ( |
170 thread_num, output_filename)) | 171 thread_num, output_filename)) |
171 continue | 172 continue |
172 # Check if file exists. | 173 # Check if file exists. |
173 file_url = '%s/%s' % (base_url, input_sha1_sum) | 174 file_url = '%s/%s' % (base_url, input_sha1_sum) |
174 if gsutil.check_call('ls', file_url)[0] != 0: | 175 if gsutil.check_call('ls', file_url)[0] != 0: |
175 out_q.put('%d> File %s for %s does not exist, skipping.' % ( | 176 out_q.put('%d> File %s for %s does not exist, skipping.' % ( |
176 thread_num, file_url, output_filename)) | 177 thread_num, file_url, output_filename)) |
177 ret_codes.put((1, 'File %s for %s does not exist.' % ( | 178 ret_codes.put((1, 'File %s for %s does not exist.' % ( |
178 file_url, output_filename))) | 179 file_url, output_filename))) |
179 continue | 180 continue |
180 # Fetch the file. | 181 # Fetch the file. |
181 out_q.put('%d> Downloading %s...' % ( | 182 out_q.put('%d> Downloading %s...' % ( |
182 thread_num, output_filename)) | 183 thread_num, output_filename)) |
183 code, _, err = gsutil.check_call('cp', '-q', file_url, output_filename) | 184 code, _, err = gsutil.check_call('cp', '-q', file_url, output_filename) |
184 if code != 0: | 185 if code != 0: |
185 out_q.put('%d> %s' % (thread_num, err)) | 186 out_q.put('%d> %s' % (thread_num, err)) |
186 ret_codes.put((code, err)) | 187 ret_codes.put((code, err)) |
187 | 188 |
189 # Mark executable if necessary. | |
190 if make_executable and not sys.platform.startswith('win'): | |
Ryan Tseng
2013/10/24 23:42:24
We discussed this offline, but posting a comment f
| |
191 st = os.stat(output_filename) | |
192 os.chmod(output_filename, st.st_mode | stat.S_IEXEC) | |
188 | 193 |
189 def printer_worker(output_queue): | 194 def printer_worker(output_queue): |
190 while True: | 195 while True: |
191 line = output_queue.get() | 196 line = output_queue.get() |
192 # Its plausible we want to print empty lines. | 197 # Its plausible we want to print empty lines. |
193 if line is None: | 198 if line is None: |
194 break | 199 break |
195 print line | 200 print line |
196 | 201 |
197 | 202 |
198 def download_from_google_storage( | 203 def download_from_google_storage( |
199 input_filename, base_url, gsutil, num_threads, directory, recursive, | 204 input_filename, base_url, gsutil, make_executable, num_threads, directory, |
200 force, output, ignore_errors, sha1_file): | 205 recursive, force, output, ignore_errors, sha1_file): |
201 # Start up all the worker threads. | 206 # Start up all the worker threads. |
202 all_threads = [] | 207 all_threads = [] |
203 download_start = time.time() | 208 download_start = time.time() |
204 stdout_queue = Queue.Queue() | 209 stdout_queue = Queue.Queue() |
205 work_queue = Queue.Queue() | 210 work_queue = Queue.Queue() |
206 ret_codes = Queue.Queue() | 211 ret_codes = Queue.Queue() |
207 ret_codes.put((0, None)) | 212 ret_codes.put((0, None)) |
208 for thread_num in range(num_threads): | 213 for thread_num in range(num_threads): |
209 t = threading.Thread( | 214 t = threading.Thread( |
210 target=_downloader_worker_thread, | 215 target=_downloader_worker_thread, |
211 args=[thread_num, work_queue, force, base_url, | 216 args=[thread_num, work_queue, force, base_url, |
212 gsutil, stdout_queue, ret_codes]) | 217 gsutil, make_executable, stdout_queue, ret_codes]) |
213 t.daemon = True | 218 t.daemon = True |
214 t.start() | 219 t.start() |
215 all_threads.append(t) | 220 all_threads.append(t) |
216 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) | 221 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) |
217 printer_thread.daemon = True | 222 printer_thread.daemon = True |
218 printer_thread.start() | 223 printer_thread.start() |
219 | 224 |
220 # Enumerate our work queue. | 225 # Enumerate our work queue. |
221 work_queue_size = enumerate_work_queue( | 226 work_queue_size = enumerate_work_queue( |
222 input_filename, work_queue, directory, recursive, | 227 input_filename, work_queue, directory, recursive, |
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
275 parser.add_option('-d', '--directory', action='store_true', | 280 parser.add_option('-d', '--directory', action='store_true', |
276 help='The target is a directory. ' | 281 help='The target is a directory. ' |
277 'Cannot be used with -s/--sha1_file.') | 282 'Cannot be used with -s/--sha1_file.') |
278 parser.add_option('-s', '--sha1_file', action='store_true', | 283 parser.add_option('-s', '--sha1_file', action='store_true', |
279 help='The target is a file containing a sha1 sum. ' | 284 help='The target is a file containing a sha1 sum. ' |
280 'Cannot be used with -d/--directory.') | 285 'Cannot be used with -d/--directory.') |
281 parser.add_option('-g', '--config', action='store_true', | 286 parser.add_option('-g', '--config', action='store_true', |
282 help='Alias for "gsutil config". Run this if you want ' | 287 help='Alias for "gsutil config". Run this if you want ' |
283 'to initialize your saved Google Storage ' | 288 'to initialize your saved Google Storage ' |
284 'credentials.') | 289 'credentials.') |
290 parser.add_option('', '--platform', | |
Ryan Tseng
2013/10/24 23:42:24
how about '-p'
| |
291 help='A regular expression that is compared against ' | |
292 'Python\'s sys.platform. If this option is specified, ' | |
293 'the download will happen only if there is a match.') | |
294 parser.add_option('', '--make_exec', action='store_true', | |
Ryan Tseng
2013/10/24 23:42:24
how about '-x'
| |
295 help='Mark the file(s) downloaded executable on Posix ' | |
296 'systems.') | |
285 | 297 |
286 (options, args) = parser.parse_args() | 298 (options, args) = parser.parse_args() |
287 # First, make sure we can find a working instance of gsutil. | 299 |
300 # Make sure we should run at all based on platform matching. | |
301 if options.platform: | |
302 platform_regexp = re.compile(options.platform) | |
303 if not platform_regexp.match(sys.platform): | |
304 return 0 # Skip rnuning on this platform | |
Ryan Tseng
2013/10/24 23:42:24
/s/rnuning/running/
Also lets print a helpful mes
| |
305 | |
306 # Make sure we can find a working instance of gsutil. | |
288 if os.path.exists(GSUTIL_DEFAULT_PATH): | 307 if os.path.exists(GSUTIL_DEFAULT_PATH): |
289 gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto) | 308 gsutil = Gsutil(GSUTIL_DEFAULT_PATH, boto_path=options.boto) |
290 else: | 309 else: |
291 gsutil = None | 310 gsutil = None |
292 for path in os.environ["PATH"].split(os.pathsep): | 311 for path in os.environ["PATH"].split(os.pathsep): |
293 if os.path.exists(path) and 'gsutil' in os.listdir(path): | 312 if os.path.exists(path) and 'gsutil' in os.listdir(path): |
294 gsutil = Gsutil(os.path.join(path, 'gsutil'), boto_path=options.boto) | 313 gsutil = Gsutil(os.path.join(path, 'gsutil'), boto_path=options.boto) |
295 if not gsutil: | 314 if not gsutil: |
296 parser.error('gsutil not found in %s, bad depot_tools checkout?' % | 315 parser.error('gsutil not found in %s, bad depot_tools checkout?' % |
297 GSUTIL_DEFAULT_PATH) | 316 GSUTIL_DEFAULT_PATH) |
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
336 if os.path.exists(options.output): | 355 if os.path.exists(options.output): |
337 parser.error('Output file %s exists and --no_resume is specified.' | 356 parser.error('Output file %s exists and --no_resume is specified.' |
338 % options.output) | 357 % options.output) |
339 | 358 |
340 # Check we have a valid bucket with valid permissions. | 359 # Check we have a valid bucket with valid permissions. |
341 base_url, code = check_bucket_permissions(options.bucket, gsutil) | 360 base_url, code = check_bucket_permissions(options.bucket, gsutil) |
342 if code: | 361 if code: |
343 return code | 362 return code |
344 | 363 |
345 return download_from_google_storage( | 364 return download_from_google_storage( |
346 input_filename, base_url, gsutil, options.num_threads, options.directory, | 365 input_filename, base_url, gsutil, options.make_exec, options.num_threads, |
347 options.recursive, options.force, options.output, options.ignore_errors, | 366 options.directory, options.recursive, options.force, options.output, |
348 options.sha1_file) | 367 options.ignore_errors, options.sha1_file) |
349 | 368 |
350 | 369 |
351 if __name__ == '__main__': | 370 if __name__ == '__main__': |
352 sys.exit(main(sys.argv)) | 371 sys.exit(main(sys.argv)) |
OLD | NEW |