Index: tools/download_and_extract.py |
diff --git a/tools/download_and_extract.py b/tools/download_and_extract.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..d55ecb8e65b31e3ee6543cab646e5933cde6b47c |
--- /dev/null |
+++ b/tools/download_and_extract.py |
@@ -0,0 +1,480 @@ |
+#!/usr/bin/env python |
+# Copyright (c) 2014 The Chromium Authors. All rights reserved. |
Lei Zhang
2014/04/01 05:10:48
nit: no (c) in new copyright headers.
mithro-old
2014/04/02 04:22:33
Done.
|
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+# vim: set ts=2 sw=2 et sts=2 ai: |
+ |
+"""Download files for your platfrom and extract its contents into a directory. |
+ |
+Uses download_from_google_storage for the actual download process. |
+Uses tar to extract the files from the archive. |
+ |
+The input is a list of sha1 files suitable for download_from_google_storage. |
+""" |
+ |
+""" |
+TODO: Replace the other download/extract scripts that exist in the chrome tree. |
+ |
+Replace chrome/installer/linux/sysroot_scripts/install-debian.wheezy.sysroot.py |
+ download_and_extract.py \\ |
+ --default-file-arch Linux \\ |
+ --bucket chrome-linux-sysroot \\ |
+ --extract ????/debian_wheezy_???-sysroot \\ |
+ <sha1 files??> |
+ |
+Replace build/linux/install-arm-sysroot.py |
+ download_and_extract.py \\ |
+ --bucket nativeclient-archive2 \\ |
+ --extract build/linux/arm-sys-root \\ |
+ <sha1 files??> |
+ |
+Replace download part of tools/clang/scripts/update.(py|sh) |
+ download_and_extract.py \\ |
+ --bucket chromium-browser-clang \\ |
+ --extract third_party/llvm-build/Release+Asserts \\ |
+ <sha1 files??> |
+""" |
+ |
+ |
+import optparse |
+import os |
+import os.path |
Lei Zhang
2014/04/01 05:10:48
already covered under import os.
mithro-old
2014/04/02 04:22:33
Fixed.
Old habit from when os.path you had to bac
|
+import re |
+import shutil |
+import subprocess |
+import sys |
+ |
+ |
+def MatchPatterns(mapping, string, error): |
+ for pattern, result in mapping: |
+ if re.search(pattern, string, re.I): |
+ return result |
+ raise ValueError(error % string) |
+ |
+ |
+def ExtractAndNormalizeArch(string): |
+ """Extract and normalize an architecture string. |
+ >>> # Linux arch / uname output |
+ >>> ExtractAndNormalizeArch('x86_64') |
+ 'amd64' |
+ >>> ExtractAndNormalizeArch('i686') |
+ 'i386' |
+ >>> ExtractAndNormalizeArch('i386') |
+ 'i386' |
+ |
+ >>> # autoconf "platform" tuple |
+ >>> ExtractAndNormalizeArch('i686-pc-linux-gnu') |
+ 'i386' |
+ >>> ExtractAndNormalizeArch('x86_64-unknown-linux-gnu') |
+ 'amd64' |
+ |
+ >>> # platform.machine() |
+ >>> ExtractAndNormalizeArch('i386') |
+ 'i386' |
+ >>> ExtractAndNormalizeArch('x86_64') |
+ 'amd64' |
+ |
+ >>> # GYP Defines |
+ >>> ExtractAndNormalizeArch('target_arch=x64') |
+ 'amd64' |
+ >>> ExtractAndNormalizeArch('target_arch=ia32') |
+ 'i386' |
+ >>> ExtractAndNormalizeArch('x64') |
+ 'amd64' |
+ >>> ExtractAndNormalizeArch('ia32') |
+ 'i386' |
+ |
+ >>> # Empty Arch |
+ >>> ExtractAndNormalizeArch('') |
+ '' |
+ """ |
+ ARCH_MAPPING = [ |
+ # Linux 'arch' outputs |
+ ('i[3456]?86', 'i386'), |
+ ('i86pc', 'i386'), |
+ ('x86_64', 'amd64'), |
+ ('amd64', 'amd64'), |
+ ('mips64', 'mips64'), # Must be before mips |
+ ('mips', 'mips'), |
+ ('arm', 'arm'), |
+ ('aarch', 'arm64'), |
+ # Windows |
+ ('win32', 'i386'), |
+ ('win64', 'amd64'), |
+ # GYP defines |
+ ('ia32', 'i386'), |
+ ('x64', 'amd64'), |
+ # Empty arch |
+ ('^$', ''), |
+ ] |
+ return MatchPatterns( |
+ ARCH_MAPPING, string, |
+ 'Was not able to extract architecture from %s') |
+ |
+ |
+def ExtractAndNormalizeOS(string): |
+ """Extract and normalize an OS string. |
+ |
+ >>> # Used by download_from_storage |
+ >>> # sys.platform |
+ >>> ExtractAndNormalizeOS('linux2') |
+ 'Linux' |
+ >>> ExtractAndNormalizeOS('darwin') |
+ 'Mac' |
+ >>> ExtractAndNormalizeOS('cygwin') |
+ 'Win' |
+ >>> ExtractAndNormalizeOS('win32') |
+ 'Win' |
+ >>> ExtractAndNormalizeOS('win64') |
+ 'Win' |
+ |
+ >>> # platform.system() |
+ >>> ExtractAndNormalizeOS('Linux') |
+ 'Linux' |
+ >>> ExtractAndNormalizeOS('Windows') |
+ 'Win' |
+ |
+ >>> # Used by tools/clang/scripts |
+ >>> # uname -s |
+ >>> ExtractAndNormalizeOS('Linux') |
+ 'Linux' |
+ >>> ExtractAndNormalizeOS('Darwin') |
+ 'Mac' |
+ |
+ >>> # GYP defines |
+ >>> ExtractAndNormalizeOS('win') |
+ 'Win' |
+ >>> ExtractAndNormalizeOS('linux') |
+ 'Linux' |
+ >>> ExtractAndNormalizeOS('mac') |
+ 'Mac' |
+ |
+ >>> # GNU triplets |
+ >>> ExtractAndNormalizeOS('i686-pc-linux-gnu') |
+ 'Linux' |
+ >>> ExtractAndNormalizeOS('x86_64-unknown-linux-gnu') |
+ 'Linux' |
+ >>> ExtractAndNormalizeOS('i586-pc-mingw32') |
+ 'Win' |
+ >>> ExtractAndNormalizeOS('i386-pc-cygwin') |
+ 'Win' |
+ >>> ExtractAndNormalizeOS('i386-pc-win32') |
+ 'Win' |
+ >>> ExtractAndNormalizeOS('x86_64-apple-darwin10') |
+ 'Mac' |
+ """ |
+ PLATFORM_MAPPING = [ |
+ # Mac |
+ ('darwin', 'Mac'), |
+ ('mac', 'Mac'), |
+ # Linux |
+ ('linux.*', 'Linux'), |
+ # Windows |
+ ('cygwin', 'Win'), |
+ ('mingw', 'Win'), |
+ ('win', 'Win'), |
+ ] |
+ return MatchPatterns( |
+ PLATFORM_MAPPING, string, |
+ 'Was not able to extract operating system from %s') |
+ |
+ |
+def GetSystemArch(os_str): |
+ if os_str == 'Linux': |
+ # Try calling arch first, then fall back to uname |
+ try: |
+ return subprocess.check_output(['arch']).strip() |
+ except subprocess.CalledProcessError, e: |
+ # We want the architecture, which is roughly the machine hardware name |
+ # in uname. |
+ # -m, --machine; print the machine hardware name |
+ # These other two are possibilities? |
+ # -p, --processor; print the processor type or 'unknown' |
+ # -i, --hardware-platform; print the hardware platform or 'unknown' |
+ return subprocess.check_output(['uname', '-m']).strip() |
+ else: |
+ # TODO: Make this work under Mac / Windows |
+ return '' |
+ |
+ |
+def FilterFilesByPlatform(files, target_os, target_arch, |
+ file_os_default=None, file_arch_default=''): |
+ """Filter input files to given platform. |
+ |
+ We assume the target arch is the host arch if not overridden. |
+ |
+ >>> clang_style = [ |
+ ... 'abc/Linux_ia32/xxx.sha1', |
+ ... 'abc/Linux_x64/xxx.sha1', |
+ ... 'abc/Mac/xxx.sha1', |
+ ... 'abc/Win/xxx.sha1', |
+ ... ] |
+ >>> FilterFilesByPlatform(clang_style, 'Linux', 'i386') |
+ ['abc/Linux_ia32/xxx.sha1'] |
+ >>> FilterFilesByPlatform(clang_style, 'Linux', 'amd64') |
+ ['abc/Linux_x64/xxx.sha1'] |
+ >>> FilterFilesByPlatform(clang_style, 'Win', '') |
+ ['abc/Win/xxx.sha1'] |
+ >>> FilterFilesByPlatform(clang_style, 'Mac', '') |
+ ['abc/Mac/xxx.sha1'] |
+ |
+ >>> gnu_style = [ |
+ ... 'XXX-i686-pc-linux-gnu.XXX.sha1', |
+ ... 'XXX-x86_64-unknown-linux-gnu.XXX.sha1', |
+ ... 'XXX-i586-pc-mingw32.XXX.sha1', |
+ ... 'XXX-i386-pc-win32.XXX.sha1', |
+ ... 'XXX-x86_64-apple-darwin10.XXX.sha1', |
+ ... ] |
+ >>> FilterFilesByPlatform(gnu_style, 'Linux', 'i386') |
+ ['XXX-i686-pc-linux-gnu.XXX.sha1'] |
+ >>> FilterFilesByPlatform(gnu_style, 'Linux', 'amd64') |
+ ['XXX-x86_64-unknown-linux-gnu.XXX.sha1'] |
+ >>> FilterFilesByPlatform(gnu_style, 'Win', '') |
+ ['XXX-i586-pc-mingw32.XXX.sha1', 'XXX-i386-pc-win32.XXX.sha1'] |
+ >>> FilterFilesByPlatform(gnu_style, 'Mac', '') |
+ ['XXX-x86_64-apple-darwin10.XXX.sha1'] |
+ |
+ >>> simple_no_os = [ |
+ ... 'XXXX_amd64_XXX', |
+ ... 'XXXX_i386_XXX', |
+ ... ] |
+ >>> FilterFilesByPlatform( |
+ ... simple_no_os, 'Linux', 'i386', file_os_default='Linux') |
+ ['XXXX_i386_XXX'] |
+ >>> FilterFilesByPlatform( |
+ ... simple_no_os, 'Linux', 'amd64', file_os_default='Linux') |
+ ['XXXX_amd64_XXX'] |
+ >>> FilterFilesByPlatform( |
+ ... simple_no_os, 'Win', '', file_os_default='Linux') |
+ [] |
+ >>> # Fails when no default is provided and can't extract from filename. |
+ >>> FilterFilesByPlatform(simple_no_os, 'Linux', 'i386') |
+ Traceback (most recent call last): |
+ ... |
+ ValueError: Was not able to extract operating system from XXXX_amd64_XXX |
+ """ |
+ todo = [] |
Lei Zhang
2014/04/01 05:10:48
Would "sha1_paths" or "download_sha1s" be better n
mithro-old
2014/04/02 04:22:33
This function isn't actually specific to .sha1 nor
|
+ for filename in files: |
+ try: |
+ file_os = ExtractAndNormalizeOS(filename) |
+ except ValueError, e: |
+ if file_os_default is None: |
+ raise |
+ file_os = file_os_default |
+ |
+ try: |
+ file_arch = ExtractAndNormalizeArch(filename) |
+ except ValueError: |
+ if file_arch_default is None: |
+ raise |
+ file_arch = file_arch_default |
+ |
+ match = True |
+ if target_os != '' and file_os != '': |
+ match = target_os == file_os |
+ |
+ if target_arch != '' and file_arch != '': |
+ match = match and target_arch == file_arch |
+ |
+ if match: |
+ todo.append(filename) |
+ |
+ return todo |
+ |
+ |
+class StampFile(object): |
+ """Stores a stamp for when an action occured. |
+ |
+ This is normally a revision number or checksum of the input files to the |
+ process. Proper usage of stamping will mean a partial action is never |
+ considered successful. |
+ |
+ Proper usage is; |
+ * /Check/ -- Check the stamp file matches your revision/checksum. |
+ * /Delete/ -- Delete the existing stamp file. |
+ * /Clean up/ -- Clean up any partially failed previous attempt. |
+ * /Do action/ -- Do your work. |
+ * /Set/ -- Set the stamp file to your revision/checksum. |
+ |
+ For example: |
+ >>> stamp = 'mySHA1sum' |
+ >>> action_stamp = StampFile('stamp.action') |
+ >>> # Check stamp |
+ >>> if stamp != action_stamp.get(): # doctest: +SKIP |
+ ... # Delete stamp |
+ ... action_stamp.delete() |
+ ... # Clean up |
+ ... if os.path.exists(filename): |
+ ... os.unlink(filename) |
+ ... # Action |
+ ... do_download(filename) |
+ ... # Set stamp |
+ ... action_stamp.set(stamp) |
+ """ |
+ |
+ def __init__(self, filename): |
+ self.filename = filename |
+ |
+ def get(self): |
+ try: |
+ return file(self.filename, 'r').read() |
+ except IOError, e: |
+ # We use NaN because it is not equal even to itself, so equality checks |
+ # will always fail. |
+ return float('NaN') |
+ |
+ def delete(self): |
+ if os.path.exists(self.filename): |
+ os.unlink(self.filename) |
+ |
+ def set(self, stamp): |
+ if os.path.exists(self.filename): |
+ raise IOError('Stamp file %r currently exist!' % filename) |
+ |
+ f = file(self.filename, 'w') |
+ f.write(stamp) |
+ # Force a fsync so this ends up on disk and other processes can see it. |
+ os.fsync(f) |
+ f.close() |
+ |
+ |
+def main(args): |
+ usage = ( |
+ 'usage: %prog [options] targets\n\n' |
+ 'Downloads and extracts targets which match the platform.\n' |
+ 'Targets must be a list of a .sha1 file, containing a sha1 sum' |
+ 'used by download_from_google_storage tool.') |
+ |
+ parser = optparse.OptionParser(usage) |
+ parser.add_option( |
+ '-b', '--bucket', |
+ help='Google Storage bucket to fetch from.') |
+ parser.add_option( |
+ '-e', '--extract', |
+ help='Directory to extract the downloaded files into.') |
+ parser.add_option( |
+ '-a', '--target-arch', |
+ help='Override architecture to given value.') |
+ parser.add_option( |
+ '', '--default-file-arch', |
+ help='Override input file architecture to given value.') |
+ parser.add_option( |
+ '', '--default-file-os', |
+ help='Override input file operating system to given value.') |
+ parser.add_option( |
+ '', '--self-test', default=False, action="store_true", |
+ help='Run the internal tests.') |
+ |
+ (options, files) = parser.parse_args() |
+ |
+ if options.self_test: |
+ import doctest |
+ return doctest.testmod() |
+ |
+ errors = [] |
+ if not options.bucket: |
+ errors.append('--bucket (-b) is required option.') |
+ |
+ if not options.extract: |
+ errors.append('--extract (-e) is required option.') |
+ |
+ if not files: |
+ errors.append('Need to specify files to download.') |
+ |
+ for filename in files: |
+ if not os.path.exists(filename): |
+ errors.append('File %s does not exist.' % filename) |
+ |
+ if errors: |
+ parser.error('\n '.join(errors)) |
+ |
+ # Figure out which files we want to download. Filter by the current platform |
+ # the tool is being run on. |
+ target_os = ExtractAndNormalizeOS(sys.platform) |
+ |
+ target_arch = None |
+ if options.target_arch: |
+ target_arch = options.target_arch |
+ else: |
+ # Try to get target_arch out of GYP |
+ gyp_target_arch = re.search( |
+ '(target_arch=[^ ]*)', os.environ.get('GYP_DEFINES', '')) |
+ if gyp_target_arch: |
+ target_arch = gyp_target_arch.groups(1) |
+ |
+ if target_arch is None: # '' is a valid target_arch |
+ target_arch = GetSystemArch(target_os) |
+ |
+ target_arch = ExtractAndNormalizeArch(target_arch) |
+ |
+ todo = FilterFilesByPlatform( |
+ files, |
+ target_os, |
+ target_arch, |
+ file_os_default=options.default_file_os, |
+ file_arch_default=options.default_file_arch) |
+ |
+ if len(todo) == 0: |
+ print 'No files to download.' |
+ return 0 |
+ |
+ elif len(todo) > 1: |
+ # TODO: Support downloading and extracting multiple files. |
+ parser.error( |
+ 'Matched multiple files on this platform!\n' + '\n'.join(todo)) |
+ return 1 |
+ |
+ # Process the files |
+ for filename in todo: |
+ filename = os.path.abspath(filename) |
+ |
+ basename, ext = os.path.splitext(filename) |
+ assert ext == '.sha1', 'Input filename %s does not end in .sha1' % filename |
+ sha1 = file(filename, 'r').read().strip() |
+ |
+ # Download the tarball |
+ download_stamp = StampFile('%s.stamp.download' % basename) |
+ if sha1 != download_stamp.get(): |
+ print "Downloading", basename |
+ download_stamp.delete() |
+ if os.path.exists(basename): |
+ os.unlink(basename) |
+ |
+ subprocess.check_call([ |
+ 'download_from_google_storage', |
+ '--no_resume', |
+ '--no_auth', |
+ '--bucket', options.bucket, |
+ '-s', filename]) |
+ |
+ download_stamp.set(sha1) |
+ |
+ # Extract the tarball |
+ extract_stamp = StampFile('%s.stamp.untar' % basename) |
+ if sha1 != extract_stamp.get(): |
+ print "Extracting", basename |
+ extract_stamp.delete() |
+ if os.path.exists(options.extract): |
+ shutil.rmtree(options.extract) |
+ |
+ taroptions = 'xf' |
Lei Zhang
2014/04/01 05:10:48
you can tar axf any.common.tar.extension.
a = --a
mithro-old
2014/04/02 04:22:33
Done.
Removed the file type detection and using a
|
+ if basename.endswith('.bz2'): |
+ taroptions += 'j' |
+ if basename.endswith('.gz'): |
+ taroptions += 'z' |
+ if basename.endswith('.xz'): |
+ taroptions += 'J' |
+ |
+ os.makedirs(options.extract) |
+ # TODO: Use https://docs.python.org/2/library/tarfile.html rather than |
Lei Zhang
2014/04/01 05:10:48
I was just going to mention this. :)
Lei Zhang
2014/04/01 05:10:48
nit: TODO(username), same above.
mithro-old
2014/04/02 04:22:33
I did a search and replace for "TODO:" to "TODO(mi
mithro-old
2014/04/02 04:22:33
If we want to support windows then using tarfile i
Lei Zhang
2014/04/02 07:22:28
Erm. If you really can't commit to finishing up th
Lei Zhang
2014/04/02 07:22:28
This is also a bit troublesome. Since it's in tool
|
+ # tar cmdline. |
+ subprocess.check_call( |
+ ['tar', taroptions, basename], cwd=options.extract) |
+ |
+ extract_stamp.set(sha1) |
+ |
+ return 0 |
+ |
+ |
+if __name__ == '__main__': |
+ sys.exit(main(sys.argv)) |