Chromium Code Reviews| Index: tools/download_and_extract.py |
| diff --git a/tools/download_and_extract.py b/tools/download_and_extract.py |
| new file mode 100755 |
| index 0000000000000000000000000000000000000000..d55ecb8e65b31e3ee6543cab646e5933cde6b47c |
| --- /dev/null |
| +++ b/tools/download_and_extract.py |
| @@ -0,0 +1,480 @@ |
| +#!/usr/bin/env python |
| +# Copyright (c) 2014 The Chromium Authors. All rights reserved. |
|
Lei Zhang
2014/04/01 05:10:48
nit: no (c) in new copyright headers.
mithro-old
2014/04/02 04:22:33
Done.
|
| +# Use of this source code is governed by a BSD-style license that can be |
| +# found in the LICENSE file. |
| +# vim: set ts=2 sw=2 et sts=2 ai: |
| + |
| +"""Download files for your platfrom and extract its contents into a directory. |
| + |
| +Uses download_from_google_storage for the actual download process. |
| +Uses tar to extract the files from the archive. |
| + |
| +The input is a list of sha1 files suitable for download_from_google_storage. |
| +""" |
| + |
| +""" |
| +TODO: Replace the other download/extract scripts that exist in the chrome tree. |
| + |
| +Replace chrome/installer/linux/sysroot_scripts/install-debian.wheezy.sysroot.py |
| + download_and_extract.py \\ |
| + --default-file-arch Linux \\ |
| + --bucket chrome-linux-sysroot \\ |
| + --extract ????/debian_wheezy_???-sysroot \\ |
| + <sha1 files??> |
| + |
| +Replace build/linux/install-arm-sysroot.py |
| + download_and_extract.py \\ |
| + --bucket nativeclient-archive2 \\ |
| + --extract build/linux/arm-sys-root \\ |
| + <sha1 files??> |
| + |
| +Replace download part of tools/clang/scripts/update.(py|sh) |
| + download_and_extract.py \\ |
| + --bucket chromium-browser-clang \\ |
| + --extract third_party/llvm-build/Release+Asserts \\ |
| + <sha1 files??> |
| +""" |
| + |
| + |
| +import optparse |
| +import os |
| +import os.path |
|
Lei Zhang
2014/04/01 05:10:48
already covered under import os.
mithro-old
2014/04/02 04:22:33
Fixed.
Old habit from when os.path you had to bac
|
| +import re |
| +import shutil |
| +import subprocess |
| +import sys |
| + |
| + |
| +def MatchPatterns(mapping, string, error): |
| + for pattern, result in mapping: |
| + if re.search(pattern, string, re.I): |
| + return result |
| + raise ValueError(error % string) |
| + |
| + |
| +def ExtractAndNormalizeArch(string): |
| + """Extract and normalize an architecture string. |
| + >>> # Linux arch / uname output |
| + >>> ExtractAndNormalizeArch('x86_64') |
| + 'amd64' |
| + >>> ExtractAndNormalizeArch('i686') |
| + 'i386' |
| + >>> ExtractAndNormalizeArch('i386') |
| + 'i386' |
| + |
| + >>> # autoconf "platform" tuple |
| + >>> ExtractAndNormalizeArch('i686-pc-linux-gnu') |
| + 'i386' |
| + >>> ExtractAndNormalizeArch('x86_64-unknown-linux-gnu') |
| + 'amd64' |
| + |
| + >>> # platform.machine() |
| + >>> ExtractAndNormalizeArch('i386') |
| + 'i386' |
| + >>> ExtractAndNormalizeArch('x86_64') |
| + 'amd64' |
| + |
| + >>> # GYP Defines |
| + >>> ExtractAndNormalizeArch('target_arch=x64') |
| + 'amd64' |
| + >>> ExtractAndNormalizeArch('target_arch=ia32') |
| + 'i386' |
| + >>> ExtractAndNormalizeArch('x64') |
| + 'amd64' |
| + >>> ExtractAndNormalizeArch('ia32') |
| + 'i386' |
| + |
| + >>> # Empty Arch |
| + >>> ExtractAndNormalizeArch('') |
| + '' |
| + """ |
| + ARCH_MAPPING = [ |
| + # Linux 'arch' outputs |
| + ('i[3456]?86', 'i386'), |
| + ('i86pc', 'i386'), |
| + ('x86_64', 'amd64'), |
| + ('amd64', 'amd64'), |
| + ('mips64', 'mips64'), # Must be before mips |
| + ('mips', 'mips'), |
| + ('arm', 'arm'), |
| + ('aarch', 'arm64'), |
| + # Windows |
| + ('win32', 'i386'), |
| + ('win64', 'amd64'), |
| + # GYP defines |
| + ('ia32', 'i386'), |
| + ('x64', 'amd64'), |
| + # Empty arch |
| + ('^$', ''), |
| + ] |
| + return MatchPatterns( |
| + ARCH_MAPPING, string, |
| + 'Was not able to extract architecture from %s') |
| + |
| + |
| +def ExtractAndNormalizeOS(string): |
| + """Extract and normalize an OS string. |
| + |
| + >>> # Used by download_from_storage |
| + >>> # sys.platform |
| + >>> ExtractAndNormalizeOS('linux2') |
| + 'Linux' |
| + >>> ExtractAndNormalizeOS('darwin') |
| + 'Mac' |
| + >>> ExtractAndNormalizeOS('cygwin') |
| + 'Win' |
| + >>> ExtractAndNormalizeOS('win32') |
| + 'Win' |
| + >>> ExtractAndNormalizeOS('win64') |
| + 'Win' |
| + |
| + >>> # platform.system() |
| + >>> ExtractAndNormalizeOS('Linux') |
| + 'Linux' |
| + >>> ExtractAndNormalizeOS('Windows') |
| + 'Win' |
| + |
| + >>> # Used by tools/clang/scripts |
| + >>> # uname -s |
| + >>> ExtractAndNormalizeOS('Linux') |
| + 'Linux' |
| + >>> ExtractAndNormalizeOS('Darwin') |
| + 'Mac' |
| + |
| + >>> # GYP defines |
| + >>> ExtractAndNormalizeOS('win') |
| + 'Win' |
| + >>> ExtractAndNormalizeOS('linux') |
| + 'Linux' |
| + >>> ExtractAndNormalizeOS('mac') |
| + 'Mac' |
| + |
| + >>> # GNU triplets |
| + >>> ExtractAndNormalizeOS('i686-pc-linux-gnu') |
| + 'Linux' |
| + >>> ExtractAndNormalizeOS('x86_64-unknown-linux-gnu') |
| + 'Linux' |
| + >>> ExtractAndNormalizeOS('i586-pc-mingw32') |
| + 'Win' |
| + >>> ExtractAndNormalizeOS('i386-pc-cygwin') |
| + 'Win' |
| + >>> ExtractAndNormalizeOS('i386-pc-win32') |
| + 'Win' |
| + >>> ExtractAndNormalizeOS('x86_64-apple-darwin10') |
| + 'Mac' |
| + """ |
| + PLATFORM_MAPPING = [ |
| + # Mac |
| + ('darwin', 'Mac'), |
| + ('mac', 'Mac'), |
| + # Linux |
| + ('linux.*', 'Linux'), |
| + # Windows |
| + ('cygwin', 'Win'), |
| + ('mingw', 'Win'), |
| + ('win', 'Win'), |
| + ] |
| + return MatchPatterns( |
| + PLATFORM_MAPPING, string, |
| + 'Was not able to extract operating system from %s') |
| + |
| + |
| +def GetSystemArch(os_str): |
| + if os_str == 'Linux': |
| + # Try calling arch first, then fall back to uname |
| + try: |
| + return subprocess.check_output(['arch']).strip() |
| + except subprocess.CalledProcessError, e: |
| + # We want the architecture, which is roughly the machine hardware name |
| + # in uname. |
| + # -m, --machine; print the machine hardware name |
| + # These other two are possibilities? |
| + # -p, --processor; print the processor type or 'unknown' |
| + # -i, --hardware-platform; print the hardware platform or 'unknown' |
| + return subprocess.check_output(['uname', '-m']).strip() |
| + else: |
| + # TODO: Make this work under Mac / Windows |
| + return '' |
| + |
| + |
| +def FilterFilesByPlatform(files, target_os, target_arch, |
| + file_os_default=None, file_arch_default=''): |
| + """Filter input files to given platform. |
| + |
| + We assume the target arch is the host arch if not overridden. |
| + |
| + >>> clang_style = [ |
| + ... 'abc/Linux_ia32/xxx.sha1', |
| + ... 'abc/Linux_x64/xxx.sha1', |
| + ... 'abc/Mac/xxx.sha1', |
| + ... 'abc/Win/xxx.sha1', |
| + ... ] |
| + >>> FilterFilesByPlatform(clang_style, 'Linux', 'i386') |
| + ['abc/Linux_ia32/xxx.sha1'] |
| + >>> FilterFilesByPlatform(clang_style, 'Linux', 'amd64') |
| + ['abc/Linux_x64/xxx.sha1'] |
| + >>> FilterFilesByPlatform(clang_style, 'Win', '') |
| + ['abc/Win/xxx.sha1'] |
| + >>> FilterFilesByPlatform(clang_style, 'Mac', '') |
| + ['abc/Mac/xxx.sha1'] |
| + |
| + >>> gnu_style = [ |
| + ... 'XXX-i686-pc-linux-gnu.XXX.sha1', |
| + ... 'XXX-x86_64-unknown-linux-gnu.XXX.sha1', |
| + ... 'XXX-i586-pc-mingw32.XXX.sha1', |
| + ... 'XXX-i386-pc-win32.XXX.sha1', |
| + ... 'XXX-x86_64-apple-darwin10.XXX.sha1', |
| + ... ] |
| + >>> FilterFilesByPlatform(gnu_style, 'Linux', 'i386') |
| + ['XXX-i686-pc-linux-gnu.XXX.sha1'] |
| + >>> FilterFilesByPlatform(gnu_style, 'Linux', 'amd64') |
| + ['XXX-x86_64-unknown-linux-gnu.XXX.sha1'] |
| + >>> FilterFilesByPlatform(gnu_style, 'Win', '') |
| + ['XXX-i586-pc-mingw32.XXX.sha1', 'XXX-i386-pc-win32.XXX.sha1'] |
| + >>> FilterFilesByPlatform(gnu_style, 'Mac', '') |
| + ['XXX-x86_64-apple-darwin10.XXX.sha1'] |
| + |
| + >>> simple_no_os = [ |
| + ... 'XXXX_amd64_XXX', |
| + ... 'XXXX_i386_XXX', |
| + ... ] |
| + >>> FilterFilesByPlatform( |
| + ... simple_no_os, 'Linux', 'i386', file_os_default='Linux') |
| + ['XXXX_i386_XXX'] |
| + >>> FilterFilesByPlatform( |
| + ... simple_no_os, 'Linux', 'amd64', file_os_default='Linux') |
| + ['XXXX_amd64_XXX'] |
| + >>> FilterFilesByPlatform( |
| + ... simple_no_os, 'Win', '', file_os_default='Linux') |
| + [] |
| + >>> # Fails when no default is provided and can't extract from filename. |
| + >>> FilterFilesByPlatform(simple_no_os, 'Linux', 'i386') |
| + Traceback (most recent call last): |
| + ... |
| + ValueError: Was not able to extract operating system from XXXX_amd64_XXX |
| + """ |
| + todo = [] |
|
Lei Zhang
2014/04/01 05:10:48
Would "sha1_paths" or "download_sha1s" be better n
mithro-old
2014/04/02 04:22:33
This function isn't actually specific to .sha1 nor
|
| + for filename in files: |
| + try: |
| + file_os = ExtractAndNormalizeOS(filename) |
| + except ValueError, e: |
| + if file_os_default is None: |
| + raise |
| + file_os = file_os_default |
| + |
| + try: |
| + file_arch = ExtractAndNormalizeArch(filename) |
| + except ValueError: |
| + if file_arch_default is None: |
| + raise |
| + file_arch = file_arch_default |
| + |
| + match = True |
| + if target_os != '' and file_os != '': |
| + match = target_os == file_os |
| + |
| + if target_arch != '' and file_arch != '': |
| + match = match and target_arch == file_arch |
| + |
| + if match: |
| + todo.append(filename) |
| + |
| + return todo |
| + |
| + |
| +class StampFile(object): |
| + """Stores a stamp for when an action occured. |
| + |
| + This is normally a revision number or checksum of the input files to the |
| + process. Proper usage of stamping will mean a partial action is never |
| + considered successful. |
| + |
| + Proper usage is; |
| + * /Check/ -- Check the stamp file matches your revision/checksum. |
| + * /Delete/ -- Delete the existing stamp file. |
| + * /Clean up/ -- Clean up any partially failed previous attempt. |
| + * /Do action/ -- Do your work. |
| + * /Set/ -- Set the stamp file to your revision/checksum. |
| + |
| + For example: |
| + >>> stamp = 'mySHA1sum' |
| + >>> action_stamp = StampFile('stamp.action') |
| + >>> # Check stamp |
| + >>> if stamp != action_stamp.get(): # doctest: +SKIP |
| + ... # Delete stamp |
| + ... action_stamp.delete() |
| + ... # Clean up |
| + ... if os.path.exists(filename): |
| + ... os.unlink(filename) |
| + ... # Action |
| + ... do_download(filename) |
| + ... # Set stamp |
| + ... action_stamp.set(stamp) |
| + """ |
| + |
| + def __init__(self, filename): |
| + self.filename = filename |
| + |
| + def get(self): |
| + try: |
| + return file(self.filename, 'r').read() |
| + except IOError, e: |
| + # We use NaN because it is not equal even to itself, so equality checks |
| + # will always fail. |
| + return float('NaN') |
| + |
| + def delete(self): |
| + if os.path.exists(self.filename): |
| + os.unlink(self.filename) |
| + |
| + def set(self, stamp): |
| + if os.path.exists(self.filename): |
| + raise IOError('Stamp file %r currently exist!' % filename) |
| + |
| + f = file(self.filename, 'w') |
| + f.write(stamp) |
| + # Force a fsync so this ends up on disk and other processes can see it. |
| + os.fsync(f) |
| + f.close() |
| + |
| + |
| +def main(args): |
| + usage = ( |
| + 'usage: %prog [options] targets\n\n' |
| + 'Downloads and extracts targets which match the platform.\n' |
| + 'Targets must be a list of a .sha1 file, containing a sha1 sum' |
| + 'used by download_from_google_storage tool.') |
| + |
| + parser = optparse.OptionParser(usage) |
| + parser.add_option( |
| + '-b', '--bucket', |
| + help='Google Storage bucket to fetch from.') |
| + parser.add_option( |
| + '-e', '--extract', |
| + help='Directory to extract the downloaded files into.') |
| + parser.add_option( |
| + '-a', '--target-arch', |
| + help='Override architecture to given value.') |
| + parser.add_option( |
| + '', '--default-file-arch', |
| + help='Override input file architecture to given value.') |
| + parser.add_option( |
| + '', '--default-file-os', |
| + help='Override input file operating system to given value.') |
| + parser.add_option( |
| + '', '--self-test', default=False, action="store_true", |
| + help='Run the internal tests.') |
| + |
| + (options, files) = parser.parse_args() |
| + |
| + if options.self_test: |
| + import doctest |
| + return doctest.testmod() |
| + |
| + errors = [] |
| + if not options.bucket: |
| + errors.append('--bucket (-b) is required option.') |
| + |
| + if not options.extract: |
| + errors.append('--extract (-e) is required option.') |
| + |
| + if not files: |
| + errors.append('Need to specify files to download.') |
| + |
| + for filename in files: |
| + if not os.path.exists(filename): |
| + errors.append('File %s does not exist.' % filename) |
| + |
| + if errors: |
| + parser.error('\n '.join(errors)) |
| + |
| + # Figure out which files we want to download. Filter by the current platform |
| + # the tool is being run on. |
| + target_os = ExtractAndNormalizeOS(sys.platform) |
| + |
| + target_arch = None |
| + if options.target_arch: |
| + target_arch = options.target_arch |
| + else: |
| + # Try to get target_arch out of GYP |
| + gyp_target_arch = re.search( |
| + '(target_arch=[^ ]*)', os.environ.get('GYP_DEFINES', '')) |
| + if gyp_target_arch: |
| + target_arch = gyp_target_arch.groups(1) |
| + |
| + if target_arch is None: # '' is a valid target_arch |
| + target_arch = GetSystemArch(target_os) |
| + |
| + target_arch = ExtractAndNormalizeArch(target_arch) |
| + |
| + todo = FilterFilesByPlatform( |
| + files, |
| + target_os, |
| + target_arch, |
| + file_os_default=options.default_file_os, |
| + file_arch_default=options.default_file_arch) |
| + |
| + if len(todo) == 0: |
| + print 'No files to download.' |
| + return 0 |
| + |
| + elif len(todo) > 1: |
| + # TODO: Support downloading and extracting multiple files. |
| + parser.error( |
| + 'Matched multiple files on this platform!\n' + '\n'.join(todo)) |
| + return 1 |
| + |
| + # Process the files |
| + for filename in todo: |
| + filename = os.path.abspath(filename) |
| + |
| + basename, ext = os.path.splitext(filename) |
| + assert ext == '.sha1', 'Input filename %s does not end in .sha1' % filename |
| + sha1 = file(filename, 'r').read().strip() |
| + |
| + # Download the tarball |
| + download_stamp = StampFile('%s.stamp.download' % basename) |
| + if sha1 != download_stamp.get(): |
| + print "Downloading", basename |
| + download_stamp.delete() |
| + if os.path.exists(basename): |
| + os.unlink(basename) |
| + |
| + subprocess.check_call([ |
| + 'download_from_google_storage', |
| + '--no_resume', |
| + '--no_auth', |
| + '--bucket', options.bucket, |
| + '-s', filename]) |
| + |
| + download_stamp.set(sha1) |
| + |
| + # Extract the tarball |
| + extract_stamp = StampFile('%s.stamp.untar' % basename) |
| + if sha1 != extract_stamp.get(): |
| + print "Extracting", basename |
| + extract_stamp.delete() |
| + if os.path.exists(options.extract): |
| + shutil.rmtree(options.extract) |
| + |
| + taroptions = 'xf' |
|
Lei Zhang
2014/04/01 05:10:48
you can tar axf any.common.tar.extension.
a = --a
mithro-old
2014/04/02 04:22:33
Done.
Removed the file type detection and using a
|
| + if basename.endswith('.bz2'): |
| + taroptions += 'j' |
| + if basename.endswith('.gz'): |
| + taroptions += 'z' |
| + if basename.endswith('.xz'): |
| + taroptions += 'J' |
| + |
| + os.makedirs(options.extract) |
| + # TODO: Use https://docs.python.org/2/library/tarfile.html rather than |
|
Lei Zhang
2014/04/01 05:10:48
I was just going to mention this. :)
Lei Zhang
2014/04/01 05:10:48
nit: TODO(username), same above.
mithro-old
2014/04/02 04:22:33
I did a search and replace for "TODO:" to "TODO(mi
mithro-old
2014/04/02 04:22:33
If we want to support windows then using tarfile i
Lei Zhang
2014/04/02 07:22:28
Erm. If you really can't commit to finishing up th
Lei Zhang
2014/04/02 07:22:28
This is also a bit troublesome. Since it's in tool
|
| + # tar cmdline. |
| + subprocess.check_call( |
| + ['tar', taroptions, basename], cwd=options.extract) |
| + |
| + extract_stamp.set(sha1) |
| + |
| + return 0 |
| + |
| + |
| +if __name__ == '__main__': |
| + sys.exit(main(sys.argv)) |