Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(102)

Side by Side Diff: download_from_google_storage.py

Issue 1209033006: Revert of Add support for tar.gz archive files to download from download_from_google_storage (Closed) Base URL: http://src.chromium.org/svn/trunk/tools/depot_tools/
Patch Set: Created 5 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | tests/download_from_google_storage_unittests.py » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be 3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file. 4 # found in the LICENSE file.
5 5
6 """Download files from Google Storage based on SHA1 sums.""" 6 """Download files from Google Storage based on SHA1 sums."""
7 7
8 8
9 import hashlib 9 import hashlib
10 import optparse 10 import optparse
11 import os 11 import os
12 import Queue 12 import Queue
13 import re 13 import re
14 import shutil
15 import stat 14 import stat
16 import sys 15 import sys
17 import tarfile
18 import threading 16 import threading
19 import time 17 import time
20 18
21 import subprocess2 19 import subprocess2
22 20
23 21
24 GSUTIL_DEFAULT_PATH = os.path.join( 22 GSUTIL_DEFAULT_PATH = os.path.join(
25 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py') 23 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
26 # Maps sys.platform to what we actually want to call them. 24 # Maps sys.platform to what we actually want to call them.
27 PLATFORM_MAPPING = { 25 PLATFORM_MAPPING = {
(...skipping 16 matching lines...) Expand all
44 pass 42 pass
45 43
46 44
47 def GetNormalizedPlatform(): 45 def GetNormalizedPlatform():
48 """Returns the result of sys.platform accounting for cygwin. 46 """Returns the result of sys.platform accounting for cygwin.
49 Under cygwin, this will always return "win32" like the native Python.""" 47 Under cygwin, this will always return "win32" like the native Python."""
50 if sys.platform == 'cygwin': 48 if sys.platform == 'cygwin':
51 return 'win32' 49 return 'win32'
52 return sys.platform 50 return sys.platform
53 51
52
54 # Common utilities 53 # Common utilities
55 class Gsutil(object): 54 class Gsutil(object):
56 """Call gsutil with some predefined settings. This is a convenience object, 55 """Call gsutil with some predefined settings. This is a convenience object,
57 and is also immutable.""" 56 and is also immutable."""
58 def __init__(self, path, boto_path=None, timeout=None, version='4.7'): 57 def __init__(self, path, boto_path=None, timeout=None, version='4.7'):
59 if not os.path.exists(path): 58 if not os.path.exists(path):
60 raise FileNotFoundError('GSUtil not found in %s' % path) 59 raise FileNotFoundError('GSUtil not found in %s' % path)
61 self.path = path 60 self.path = path
62 self.timeout = timeout 61 self.timeout = timeout
63 self.boto_path = boto_path 62 self.boto_path = boto_path
(...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after
180 work_queue.put( 179 work_queue.put(
181 (sha1_match.groups(1)[0], full_path.replace('.sha1', ''))) 180 (sha1_match.groups(1)[0], full_path.replace('.sha1', '')))
182 work_queue_size += 1 181 work_queue_size += 1
183 else: 182 else:
184 if not ignore_errors: 183 if not ignore_errors:
185 raise InvalidFileError('No sha1 sum found in %s.' % filename) 184 raise InvalidFileError('No sha1 sum found in %s.' % filename)
186 print >> sys.stderr, 'No sha1 sum found in %s.' % filename 185 print >> sys.stderr, 'No sha1 sum found in %s.' % filename
187 return work_queue_size 186 return work_queue_size
188 187
189 188
190 def _validate_tar_file(tar, prefix):
191 def _validate(tarinfo):
192 """Returns false if the tarinfo is something we explicitly forbid."""
193 if tarinfo.issym() or tarinfo.islnk():
194 return False
195 if '..' in tarinfo.name or not tarinfo.name.startswith(prefix):
196 return False
197 return True
198 return all(map(_validate, tar.getmembers()))
199
200 def _downloader_worker_thread(thread_num, q, force, base_url, 189 def _downloader_worker_thread(thread_num, q, force, base_url,
201 gsutil, out_q, ret_codes, verbose, extract, 190 gsutil, out_q, ret_codes, verbose):
202 delete=True):
203 while True: 191 while True:
204 input_sha1_sum, output_filename = q.get() 192 input_sha1_sum, output_filename = q.get()
205 if input_sha1_sum is None: 193 if input_sha1_sum is None:
206 return 194 return
207 if os.path.exists(output_filename) and not force: 195 if os.path.exists(output_filename) and not force:
208 if get_sha1(output_filename) == input_sha1_sum: 196 if get_sha1(output_filename) == input_sha1_sum:
209 if verbose: 197 if verbose:
210 out_q.put( 198 out_q.put(
211 '%d> File %s exists and SHA1 matches. Skipping.' % ( 199 '%d> File %s exists and SHA1 matches. Skipping.' % (
212 thread_num, output_filename)) 200 thread_num, output_filename))
(...skipping 10 matching lines...) Expand all
223 else: 211 else:
224 # Other error, probably auth related (bad ~/.boto, etc). 212 # Other error, probably auth related (bad ~/.boto, etc).
225 out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % ( 213 out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % (
226 thread_num, file_url, output_filename, err)) 214 thread_num, file_url, output_filename, err))
227 ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % ( 215 ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % (
228 file_url, output_filename, err))) 216 file_url, output_filename, err)))
229 continue 217 continue
230 # Fetch the file. 218 # Fetch the file.
231 out_q.put('%d> Downloading %s...' % (thread_num, output_filename)) 219 out_q.put('%d> Downloading %s...' % (thread_num, output_filename))
232 try: 220 try:
233 if delete: 221 os.remove(output_filename) # Delete the file if it exists already.
234 os.remove(output_filename) # Delete the file if it exists already.
235 except OSError: 222 except OSError:
236 if os.path.exists(output_filename): 223 if os.path.exists(output_filename):
237 out_q.put('%d> Warning: deleting %s failed.' % ( 224 out_q.put('%d> Warning: deleting %s failed.' % (
238 thread_num, output_filename)) 225 thread_num, output_filename))
239 code, _, err = gsutil.check_call('cp', file_url, output_filename) 226 code, _, err = gsutil.check_call('cp', file_url, output_filename)
240 if code != 0: 227 if code != 0:
241 out_q.put('%d> %s' % (thread_num, err)) 228 out_q.put('%d> %s' % (thread_num, err))
242 ret_codes.put((code, err)) 229 ret_codes.put((code, err))
243 230
244 if extract:
245 if (not tarfile.is_tarfile(output_filename)
246 or not output_filename.endswith('.tar.gz')):
247 out_q.put('%d> Error: %s is not a tar.gz archive.' % (
248 thread_num, output_filename))
249 ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename)))
250 continue
251 with tarfile.open(output_filename, 'r:gz') as tar:
252 dirname = os.path.dirname(os.path.abspath(output_filename))
253 extract_dir = output_filename[0:len(output_filename)-7]
254 if not _validate_tar_file(tar, os.path.basename(extract_dir)):
255 out_q.put('%d> Error: %s contains files outside %s.' % (
256 thread_num, output_filename, extract_dir))
257 ret_codes.put((1, '%s contains invalid entries.' % (output_filename)))
258 continue
259 if os.path.exists(extract_dir):
260 try:
261 shutil.rmtree(extract_dir)
262 out_q.put('%d> Removed %s...' % (thread_num, extract_dir))
263 except OSError:
264 out_q.put('%d> Warning: Can\'t delete: %s' % (
265 thread_num, extract_dir))
266 ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir)))
267 continue
268 out_q.put('%d> Extracting %d entries from %s to %s' %
269 (thread_num, len(tar.getmembers()),output_filename,
270 extract_dir))
271 tar.extractall(path=dirname)
272 # Set executable bit. 231 # Set executable bit.
273 if sys.platform == 'cygwin': 232 if sys.platform == 'cygwin':
274 # Under cygwin, mark all files as executable. The executable flag in 233 # Under cygwin, mark all files as executable. The executable flag in
275 # Google Storage will not be set when uploading from Windows, so if 234 # Google Storage will not be set when uploading from Windows, so if
276 # this script is running under cygwin and we're downloading an 235 # this script is running under cygwin and we're downloading an
277 # executable, it will be unrunnable from inside cygwin without this. 236 # executable, it will be unrunnable from inside cygwin without this.
278 st = os.stat(output_filename) 237 st = os.stat(output_filename)
279 os.chmod(output_filename, st.st_mode | stat.S_IEXEC) 238 os.chmod(output_filename, st.st_mode | stat.S_IEXEC)
280 elif sys.platform != 'win32': 239 elif sys.platform != 'win32':
281 # On non-Windows platforms, key off of the custom header 240 # On non-Windows platforms, key off of the custom header
(...skipping 10 matching lines...) Expand all
292 while True: 251 while True:
293 line = output_queue.get() 252 line = output_queue.get()
294 # Its plausible we want to print empty lines. 253 # Its plausible we want to print empty lines.
295 if line is None: 254 if line is None:
296 break 255 break
297 print line 256 print line
298 257
299 258
300 def download_from_google_storage( 259 def download_from_google_storage(
301 input_filename, base_url, gsutil, num_threads, directory, recursive, 260 input_filename, base_url, gsutil, num_threads, directory, recursive,
302 force, output, ignore_errors, sha1_file, verbose, auto_platform, extract): 261 force, output, ignore_errors, sha1_file, verbose, auto_platform):
303 # Start up all the worker threads. 262 # Start up all the worker threads.
304 all_threads = [] 263 all_threads = []
305 download_start = time.time() 264 download_start = time.time()
306 stdout_queue = Queue.Queue() 265 stdout_queue = Queue.Queue()
307 work_queue = Queue.Queue() 266 work_queue = Queue.Queue()
308 ret_codes = Queue.Queue() 267 ret_codes = Queue.Queue()
309 ret_codes.put((0, None)) 268 ret_codes.put((0, None))
310 for thread_num in range(num_threads): 269 for thread_num in range(num_threads):
311 t = threading.Thread( 270 t = threading.Thread(
312 target=_downloader_worker_thread, 271 target=_downloader_worker_thread,
313 args=[thread_num, work_queue, force, base_url, 272 args=[thread_num, work_queue, force, base_url,
314 gsutil, stdout_queue, ret_codes, verbose, extract]) 273 gsutil, stdout_queue, ret_codes, verbose])
315 t.daemon = True 274 t.daemon = True
316 t.start() 275 t.start()
317 all_threads.append(t) 276 all_threads.append(t)
318 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue]) 277 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])
319 printer_thread.daemon = True 278 printer_thread.daemon = True
320 printer_thread.start() 279 printer_thread.start()
321 280
322 # Enumerate our work queue. 281 # Enumerate our work queue.
323 work_queue_size = enumerate_work_queue( 282 work_queue_size = enumerate_work_queue(
324 input_filename, work_queue, directory, recursive, 283 input_filename, work_queue, directory, recursive,
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
392 parser.add_option('-p', '--platform', 351 parser.add_option('-p', '--platform',
393 help='A regular expression that is compared against ' 352 help='A regular expression that is compared against '
394 'Python\'s sys.platform. If this option is specified, ' 353 'Python\'s sys.platform. If this option is specified, '
395 'the download will happen only if there is a match.') 354 'the download will happen only if there is a match.')
396 parser.add_option('-a', '--auto_platform', 355 parser.add_option('-a', '--auto_platform',
397 action='store_true', 356 action='store_true',
398 help='Detects if any parent folder of the target matches ' 357 help='Detects if any parent folder of the target matches '
399 '(linux|mac|win). If so, the script will only ' 358 '(linux|mac|win). If so, the script will only '
400 'process files that are in the paths that ' 359 'process files that are in the paths that '
401 'that matches the current platform.') 360 'that matches the current platform.')
402 parser.add_option('-u', '--extract',
403 action='store_true',
404 help='Extract a downloaded tar.gz file. '
405 'Leaves the tar.gz file around for sha1 verification'
406 'If a directory with the same name as the tar.gz '
407 'file already exists, is deleted (to get a '
408 'clean state in case of update.)')
409 parser.add_option('-v', '--verbose', action='store_true', 361 parser.add_option('-v', '--verbose', action='store_true',
410 help='Output extra diagnostic and progress information.') 362 help='Output extra diagnostic and progress information.')
411 363
412 (options, args) = parser.parse_args() 364 (options, args) = parser.parse_args()
413 365
414 # Make sure we should run at all based on platform matching. 366 # Make sure we should run at all based on platform matching.
415 if options.platform: 367 if options.platform:
416 if options.auto_platform: 368 if options.auto_platform:
417 parser.error('--platform can not be specified with --auto_platform') 369 parser.error('--platform can not be specified with --auto_platform')
418 if not re.match(options.platform, GetNormalizedPlatform()): 370 if not re.match(options.platform, GetNormalizedPlatform()):
(...skipping 73 matching lines...) Expand 10 before | Expand all | Expand 10 after
492 if not options.directory and not options.force and not options.no_resume: 444 if not options.directory and not options.force and not options.no_resume:
493 if os.path.exists(options.output): 445 if os.path.exists(options.output):
494 parser.error('Output file %s exists and --no_resume is specified.' 446 parser.error('Output file %s exists and --no_resume is specified.'
495 % options.output) 447 % options.output)
496 448
497 base_url = 'gs://%s' % options.bucket 449 base_url = 'gs://%s' % options.bucket
498 450
499 return download_from_google_storage( 451 return download_from_google_storage(
500 input_filename, base_url, gsutil, options.num_threads, options.directory, 452 input_filename, base_url, gsutil, options.num_threads, options.directory,
501 options.recursive, options.force, options.output, options.ignore_errors, 453 options.recursive, options.force, options.output, options.ignore_errors,
502 options.sha1_file, options.verbose, options.auto_platform, 454 options.sha1_file, options.verbose, options.auto_platform)
503 options.extract)
504 455
505 456
506 if __name__ == '__main__': 457 if __name__ == '__main__':
507 sys.exit(main(sys.argv)) 458 sys.exit(main(sys.argv))
OLDNEW
« no previous file with comments | « no previous file | tests/download_from_google_storage_unittests.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698