download_from_google_storage.py - Issue 1209033006: Revert of Add support for tar.gz archive files to download from download_from_google_storage

Side by Side Diff: download_from_google_storage.py

Issue 1209033006: Revert of Add support for tar.gz archive files to download from download_from_google_storage (Closed) Base URL: http://src.chromium.org/svn/trunk/tools/depot_tools/

Patch Set: Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
1 #!/usr/bin/env python	1 #!/usr/bin/env python

2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.	2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.

3 # Use of this source code is governed by a BSD-style license that can be	3 # Use of this source code is governed by a BSD-style license that can be

4 # found in the LICENSE file.	4 # found in the LICENSE file.

5	5

6 """Download files from Google Storage based on SHA1 sums."""	6 """Download files from Google Storage based on SHA1 sums."""

7	7

8	8

9 import hashlib	9 import hashlib

10 import optparse	10 import optparse

11 import os	11 import os

12 import Queue	12 import Queue

13 import re	13 import re

14 import shutil

15 import stat	14 import stat

16 import sys	15 import sys

17 import tarfile

18 import threading	16 import threading

19 import time	17 import time

20	18

21 import subprocess2	19 import subprocess2

22	20

23	21

24 GSUTIL_DEFAULT_PATH = os.path.join(	22 GSUTIL_DEFAULT_PATH = os.path.join(

25 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')	23 os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')

26 # Maps sys.platform to what we actually want to call them.	24 # Maps sys.platform to what we actually want to call them.

27 PLATFORM_MAPPING = {	25 PLATFORM_MAPPING = {

(...skipping 16 matching lines...) Expand all Loading...
44 pass	42 pass

45	43

46	44

47 def GetNormalizedPlatform():	45 def GetNormalizedPlatform():

48 """Returns the result of sys.platform accounting for cygwin.	46 """Returns the result of sys.platform accounting for cygwin.

49 Under cygwin, this will always return "win32" like the native Python."""	47 Under cygwin, this will always return "win32" like the native Python."""

50 if sys.platform == 'cygwin':	48 if sys.platform == 'cygwin':

51 return 'win32'	49 return 'win32'

52 return sys.platform	50 return sys.platform

53	51

	52

54 # Common utilities	53 # Common utilities

55 class Gsutil(object):	54 class Gsutil(object):

56 """Call gsutil with some predefined settings. This is a convenience object,	55 """Call gsutil with some predefined settings. This is a convenience object,

57 and is also immutable."""	56 and is also immutable."""

58 def __init__(self, path, boto_path=None, timeout=None, version='4.7'):	57 def __init__(self, path, boto_path=None, timeout=None, version='4.7'):

59 if not os.path.exists(path):	58 if not os.path.exists(path):

60 raise FileNotFoundError('GSUtil not found in %s' % path)	59 raise FileNotFoundError('GSUtil not found in %s' % path)

61 self.path = path	60 self.path = path

62 self.timeout = timeout	61 self.timeout = timeout

63 self.boto_path = boto_path	62 self.boto_path = boto_path

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
180 work_queue.put(	179 work_queue.put(

181 (sha1_match.groups(1)[0], full_path.replace('.sha1', '')))	180 (sha1_match.groups(1)[0], full_path.replace('.sha1', '')))

182 work_queue_size += 1	181 work_queue_size += 1

183 else:	182 else:

184 if not ignore_errors:	183 if not ignore_errors:

185 raise InvalidFileError('No sha1 sum found in %s.' % filename)	184 raise InvalidFileError('No sha1 sum found in %s.' % filename)

186 print >> sys.stderr, 'No sha1 sum found in %s.' % filename	185 print >> sys.stderr, 'No sha1 sum found in %s.' % filename

187 return work_queue_size	186 return work_queue_size

188	187

189	188

190 def _validate_tar_file(tar, prefix):

191 def _validate(tarinfo):

192 """Returns false if the tarinfo is something we explicitly forbid."""

193 if tarinfo.issym() or tarinfo.islnk():

194 return False

195 if '..' in tarinfo.name or not tarinfo.name.startswith(prefix):

196 return False

197 return True

198 return all(map(_validate, tar.getmembers()))

199

200 def _downloader_worker_thread(thread_num, q, force, base_url,	189 def _downloader_worker_thread(thread_num, q, force, base_url,

201 gsutil, out_q, ret_codes, verbose, extract,	190 gsutil, out_q, ret_codes, verbose):

202 delete=True):

203 while True:	191 while True:

204 input_sha1_sum, output_filename = q.get()	192 input_sha1_sum, output_filename = q.get()

205 if input_sha1_sum is None:	193 if input_sha1_sum is None:

206 return	194 return

207 if os.path.exists(output_filename) and not force:	195 if os.path.exists(output_filename) and not force:

208 if get_sha1(output_filename) == input_sha1_sum:	196 if get_sha1(output_filename) == input_sha1_sum:

209 if verbose:	197 if verbose:

210 out_q.put(	198 out_q.put(

211 '%d> File %s exists and SHA1 matches. Skipping.' % (	199 '%d> File %s exists and SHA1 matches. Skipping.' % (

212 thread_num, output_filename))	200 thread_num, output_filename))

(...skipping 10 matching lines...) Expand all Loading...
223 else:	211 else:

224 # Other error, probably auth related (bad ~/.boto, etc).	212 # Other error, probably auth related (bad ~/.boto, etc).

225 out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % (	213 out_q.put('%d> Failed to fetch file %s for %s, skipping. [Err: %s]' % (

226 thread_num, file_url, output_filename, err))	214 thread_num, file_url, output_filename, err))

227 ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % (	215 ret_codes.put((1, 'Failed to fetch file %s for %s. [Err: %s]' % (

228 file_url, output_filename, err)))	216 file_url, output_filename, err)))

229 continue	217 continue

230 # Fetch the file.	218 # Fetch the file.

231 out_q.put('%d> Downloading %s...' % (thread_num, output_filename))	219 out_q.put('%d> Downloading %s...' % (thread_num, output_filename))

232 try:	220 try:

233 if delete:	221 os.remove(output_filename) # Delete the file if it exists already.

234 os.remove(output_filename) # Delete the file if it exists already.

235 except OSError:	222 except OSError:

236 if os.path.exists(output_filename):	223 if os.path.exists(output_filename):

237 out_q.put('%d> Warning: deleting %s failed.' % (	224 out_q.put('%d> Warning: deleting %s failed.' % (

238 thread_num, output_filename))	225 thread_num, output_filename))

239 code, _, err = gsutil.check_call('cp', file_url, output_filename)	226 code, _, err = gsutil.check_call('cp', file_url, output_filename)

240 if code != 0:	227 if code != 0:

241 out_q.put('%d> %s' % (thread_num, err))	228 out_q.put('%d> %s' % (thread_num, err))

242 ret_codes.put((code, err))	229 ret_codes.put((code, err))

243	230

244 if extract:

245 if (not tarfile.is_tarfile(output_filename)

246 or not output_filename.endswith('.tar.gz')):

247 out_q.put('%d> Error: %s is not a tar.gz archive.' % (

248 thread_num, output_filename))

249 ret_codes.put((1, '%s is not a tar.gz archive.' % (output_filename)))

250 continue

251 with tarfile.open(output_filename, 'r:gz') as tar:

252 dirname = os.path.dirname(os.path.abspath(output_filename))

253 extract_dir = output_filename[0:len(output_filename)-7]

254 if not _validate_tar_file(tar, os.path.basename(extract_dir)):

255 out_q.put('%d> Error: %s contains files outside %s.' % (

256 thread_num, output_filename, extract_dir))

257 ret_codes.put((1, '%s contains invalid entries.' % (output_filename)))

258 continue

259 if os.path.exists(extract_dir):

260 try:

261 shutil.rmtree(extract_dir)

262 out_q.put('%d> Removed %s...' % (thread_num, extract_dir))

263 except OSError:

264 out_q.put('%d> Warning: Can\'t delete: %s' % (

265 thread_num, extract_dir))

266 ret_codes.put((1, 'Can\'t delete %s.' % (extract_dir)))

267 continue

268 out_q.put('%d> Extracting %d entries from %s to %s' %

269 (thread_num, len(tar.getmembers()),output_filename,

270 extract_dir))

271 tar.extractall(path=dirname)

272 # Set executable bit.	231 # Set executable bit.

273 if sys.platform == 'cygwin':	232 if sys.platform == 'cygwin':

274 # Under cygwin, mark all files as executable. The executable flag in	233 # Under cygwin, mark all files as executable. The executable flag in

275 # Google Storage will not be set when uploading from Windows, so if	234 # Google Storage will not be set when uploading from Windows, so if

276 # this script is running under cygwin and we're downloading an	235 # this script is running under cygwin and we're downloading an

277 # executable, it will be unrunnable from inside cygwin without this.	236 # executable, it will be unrunnable from inside cygwin without this.

278 st = os.stat(output_filename)	237 st = os.stat(output_filename)

279 os.chmod(output_filename, st.st_mode \| stat.S_IEXEC)	238 os.chmod(output_filename, st.st_mode \| stat.S_IEXEC)

280 elif sys.platform != 'win32':	239 elif sys.platform != 'win32':

281 # On non-Windows platforms, key off of the custom header	240 # On non-Windows platforms, key off of the custom header

(...skipping 10 matching lines...) Expand all Loading...
292 while True:	251 while True:

293 line = output_queue.get()	252 line = output_queue.get()

294 # Its plausible we want to print empty lines.	253 # Its plausible we want to print empty lines.

295 if line is None:	254 if line is None:

296 break	255 break

297 print line	256 print line

298	257

299	258

300 def download_from_google_storage(	259 def download_from_google_storage(

301 input_filename, base_url, gsutil, num_threads, directory, recursive,	260 input_filename, base_url, gsutil, num_threads, directory, recursive,

302 force, output, ignore_errors, sha1_file, verbose, auto_platform, extract):	261 force, output, ignore_errors, sha1_file, verbose, auto_platform):

303 # Start up all the worker threads.	262 # Start up all the worker threads.

304 all_threads = []	263 all_threads = []

305 download_start = time.time()	264 download_start = time.time()

306 stdout_queue = Queue.Queue()	265 stdout_queue = Queue.Queue()

307 work_queue = Queue.Queue()	266 work_queue = Queue.Queue()

308 ret_codes = Queue.Queue()	267 ret_codes = Queue.Queue()

309 ret_codes.put((0, None))	268 ret_codes.put((0, None))

310 for thread_num in range(num_threads):	269 for thread_num in range(num_threads):

311 t = threading.Thread(	270 t = threading.Thread(

312 target=_downloader_worker_thread,	271 target=_downloader_worker_thread,

313 args=[thread_num, work_queue, force, base_url,	272 args=[thread_num, work_queue, force, base_url,

314 gsutil, stdout_queue, ret_codes, verbose, extract])	273 gsutil, stdout_queue, ret_codes, verbose])

315 t.daemon = True	274 t.daemon = True

316 t.start()	275 t.start()

317 all_threads.append(t)	276 all_threads.append(t)

318 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])	277 printer_thread = threading.Thread(target=printer_worker, args=[stdout_queue])

319 printer_thread.daemon = True	278 printer_thread.daemon = True

320 printer_thread.start()	279 printer_thread.start()

321	280

322 # Enumerate our work queue.	281 # Enumerate our work queue.

323 work_queue_size = enumerate_work_queue(	282 work_queue_size = enumerate_work_queue(

324 input_filename, work_queue, directory, recursive,	283 input_filename, work_queue, directory, recursive,

(...skipping 67 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
392 parser.add_option('-p', '--platform',	351 parser.add_option('-p', '--platform',

393 help='A regular expression that is compared against '	352 help='A regular expression that is compared against '

394 'Python\'s sys.platform. If this option is specified, '	353 'Python\'s sys.platform. If this option is specified, '

395 'the download will happen only if there is a match.')	354 'the download will happen only if there is a match.')

396 parser.add_option('-a', '--auto_platform',	355 parser.add_option('-a', '--auto_platform',

397 action='store_true',	356 action='store_true',

398 help='Detects if any parent folder of the target matches '	357 help='Detects if any parent folder of the target matches '

399 '(linux\|mac\|win). If so, the script will only '	358 '(linux\|mac\|win). If so, the script will only '

400 'process files that are in the paths that '	359 'process files that are in the paths that '

401 'that matches the current platform.')	360 'that matches the current platform.')

402 parser.add_option('-u', '--extract',

403 action='store_true',

404 help='Extract a downloaded tar.gz file. '

405 'Leaves the tar.gz file around for sha1 verification'

406 'If a directory with the same name as the tar.gz '

407 'file already exists, is deleted (to get a '

408 'clean state in case of update.)')

409 parser.add_option('-v', '--verbose', action='store_true',	361 parser.add_option('-v', '--verbose', action='store_true',

410 help='Output extra diagnostic and progress information.')	362 help='Output extra diagnostic and progress information.')

411	363

412 (options, args) = parser.parse_args()	364 (options, args) = parser.parse_args()

413	365

414 # Make sure we should run at all based on platform matching.	366 # Make sure we should run at all based on platform matching.

415 if options.platform:	367 if options.platform:

416 if options.auto_platform:	368 if options.auto_platform:

417 parser.error('--platform can not be specified with --auto_platform')	369 parser.error('--platform can not be specified with --auto_platform')

418 if not re.match(options.platform, GetNormalizedPlatform()):	370 if not re.match(options.platform, GetNormalizedPlatform()):

(...skipping 73 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
492 if not options.directory and not options.force and not options.no_resume:	444 if not options.directory and not options.force and not options.no_resume:

493 if os.path.exists(options.output):	445 if os.path.exists(options.output):

494 parser.error('Output file %s exists and --no_resume is specified.'	446 parser.error('Output file %s exists and --no_resume is specified.'

495 % options.output)	447 % options.output)

496	448

497 base_url = 'gs://%s' % options.bucket	449 base_url = 'gs://%s' % options.bucket

498	450

499 return download_from_google_storage(	451 return download_from_google_storage(

500 input_filename, base_url, gsutil, options.num_threads, options.directory,	452 input_filename, base_url, gsutil, options.num_threads, options.directory,

501 options.recursive, options.force, options.output, options.ignore_errors,	453 options.recursive, options.force, options.output, options.ignore_errors,

502 options.sha1_file, options.verbose, options.auto_platform,	454 options.sha1_file, options.verbose, options.auto_platform)

503 options.extract)

504	455

505	456

506 if __name__ == '__main__':	457 if __name__ == '__main__':

507 sys.exit(main(sys.argv))	458 sys.exit(main(sys.argv))

OLD	NEW

« no previous file with comments | « no previous file | tests/download_from_google_storage_unittests.py » ('j') | no next file with comments »