Index: build_tools/cygtar.py |
diff --git a/build_tools/cygtar.py b/build_tools/cygtar.py |
new file mode 100755 |
index 0000000000000000000000000000000000000000..2a7c104cdc14c27c231e1a292b436a1480e8872e |
--- /dev/null |
+++ b/build_tools/cygtar.py |
@@ -0,0 +1,424 @@ |
+#!/usr/bin/python |
+# Copyright (c) 2012 The Native Client Authors. All rights reserved. |
+# Use of this source code is governed by a BSD-style license that can be |
+# found in the LICENSE file. |
+ |
+# This file was copied, unmodified, from the native_client repository: |
+# https://chromium.googlesource.com/native_client/src/native_client/+/master/build/cygtar.py |
+ |
+import glob |
+import hashlib |
+import optparse |
+import os |
+import posixpath |
+import shutil |
+import subprocess |
+import stat |
+import sys |
+import tarfile |
+ |
+"""A Cygwin aware version compress/extract object. |
+ |
+This module supports creating and unpacking a tarfile on all platforms. For |
+Cygwin, Mac, and Linux, it will use the standard tarfile implementation. For |
+Win32 it will detect Cygwin style symlinks as it archives and convert them to |
+symlinks. |
+ |
+For Win32, it is unfortunate that os.stat does not return a FileID in the ino |
+field which would allow us to correctly determine which files are hardlinks, so |
+instead we assume that any files in the archive that are an exact match are |
+hardlinks to the same data. |
+ |
+We know they are not Symlinks because we are using Cygwin style symlinks only, |
+which appear to Win32 a normal file. |
+ |
+All paths stored and retrieved from a TAR file are expected to be POSIX style, |
+Win32 style paths will be rejected. |
+ |
+NOTE: |
+ All paths represent by the tarfile and all API functions are POSIX style paths |
+ except for CygTar.Add which assumes a Native path. |
+""" |
+ |
+ |
+def ToNativePath(native_path): |
+ """Convert to a posix style path if this is win32.""" |
+ if sys.platform == 'win32': |
+ return native_path.replace('/', '\\') |
+ return native_path |
+ |
+ |
+def IsCygwinSymlink(symtext): |
+ """Return true if the provided text looks like a Cygwin symlink.""" |
+ return symtext[:12] == '!<symlink>\xff\xfe' |
+ |
+ |
+def SymDatToPath(symtext): |
+ """Convert a Cygwin style symlink data to a relative path.""" |
+ return ''.join([ch for ch in symtext[12:] if ch != '\x00']) |
+ |
+ |
+def PathToSymDat(filepath): |
+ """Convert a filepath to cygwin style symlink data.""" |
+ symtag = '!<symlink>\xff\xfe' |
+ unipath = ''.join([ch + '\x00' for ch in filepath]) |
+ strterm = '\x00\x00' |
+ return symtag + unipath + strterm |
+ |
+ |
+def CreateWin32Link(filepath, targpath, verbose): |
+ """Create a link on Win32 if possible |
+ |
+ Uses mklink to create a link (hardlink or junction) if possible. On failure, |
+ it will assume mklink is unavailible and copy the file instead. Future calls |
+ will not attempt to use mklink.""" |
+ |
+ targ_is_dir = os.path.isdir(targpath) |
+ |
+ call_mklink = False |
+ if targ_is_dir and CreateWin32Link.try_junction: |
+ # Creating a link to a directory will fail, but a junction (which is more |
+ # like a symlink) will work. |
+ mklink_flag = '/J' |
+ call_mklink = True |
+ elif not targ_is_dir and CreateWin32Link.try_hardlink: |
+ mklink_flag = '/H' |
+ call_mklink = True |
+ |
+ # Assume an error, if subprocess succeeds, then it should return 0 |
+ err = 1 |
+ if call_mklink: |
+ try: |
+ cmd = ['cmd', '/C', 'mklink %s %s %s' % ( |
+ mklink_flag, ToNativePath(filepath), ToNativePath(targpath))] |
+ err = subprocess.call(cmd, |
+ stdout = open(os.devnull, 'wb'), |
+ stderr = open(os.devnull, 'wb')) |
+ except EnvironmentError: |
+ if targ_is_dir: |
+ CreateWin32Link.try_junction = False |
+ else: |
+ CreateWin32Link.try_hardlink = False |
+ |
+ # If we failed to create a link, then just copy it. We wrap this in a |
+ # retry for Windows which often has stale file lock issues. |
+ if err or not os.path.exists(filepath): |
+ if targ_is_dir and verbose: |
+ print 'Failed to create junction %s -> %s. Copying instead.\n' % ( |
+ filepath, targpath) |
+ |
+ for cnt in range(1,4): |
+ try: |
+ if targ_is_dir: |
+ shutil.copytree(targpath, filepath) |
+ else: |
+ shutil.copyfile(targpath, filepath) |
+ return False |
+ except EnvironmentError: |
+ if verbose: |
+ print 'Try %d: Failed hardlink %s -> %s\n' % (cnt, filepath, targpath) |
+ if verbose: |
+ print 'Giving up.' |
+ |
+CreateWin32Link.try_hardlink = True |
+CreateWin32Link.try_junction = True |
+ |
+ |
+ |
+def ComputeFileHash(filepath): |
+ """Generate a sha1 hash for the file at the given path.""" |
+ sha1 = hashlib.sha1() |
+ with open(filepath, 'rb') as fp: |
+ sha1.update(fp.read()) |
+ return sha1.hexdigest() |
+ |
+ |
+def ReadableSizeOf(num): |
+ """Convert to a human readable number.""" |
+ if num < 1024.0: |
+ return '[%5dB]' % num |
+ for x in ['B','K','M','G','T']: |
+ if num < 1024.0: |
+ return '[%5.1f%s]' % (num, x) |
+ num /= 1024.0 |
+ return '[%dT]' % int(num) |
+ |
+ |
+class CygTar(object): |
+ """ CygTar is an object which represents a Win32 and Cygwin aware tarball.""" |
+ def __init__(self, filename, mode='r', verbose=False): |
+ self.size_map = {} |
+ self.file_hashes = {} |
+ # Set errorlevel=1 so that fatal errors actually raise! |
+ if 'r' in mode: |
+ self.read_file = open(filename, 'rb') |
+ self.read_filesize = os.path.getsize(filename) |
+ self.tar = tarfile.open(mode=mode, fileobj=self.read_file, errorlevel=1) |
+ else: |
+ self.read_file = None |
+ self.read_filesize = 0 |
+ self.tar = tarfile.open(filename, mode=mode, errorlevel=1) |
+ self.verbose = verbose |
+ |
+ def __DumpInfo(self, tarinfo): |
+ """Prints information on a single object in the tarball.""" |
+ typeinfo = '?' |
+ lnk = '' |
+ if tarinfo.issym(): |
+ typeinfo = 'S' |
+ lnk = '-> ' + tarinfo.linkname |
+ if tarinfo.islnk(): |
+ typeinfo = 'H' |
+ lnk = '-> ' + tarinfo.linkname |
+ if tarinfo.isdir(): |
+ typeinfo = 'D' |
+ if tarinfo.isfile(): |
+ typeinfo = 'F' |
+ reable_size = ReadableSizeOf(tarinfo.size) |
+ print '%s %s : %s %s' % (reable_size, typeinfo, tarinfo.name, lnk) |
+ return tarinfo |
+ |
+ def __AddFile(self, tarinfo, fileobj=None): |
+ """Add a file to the archive.""" |
+ if self.verbose: |
+ self.__DumpInfo(tarinfo) |
+ self.tar.addfile(tarinfo, fileobj) |
+ |
+ def __AddLink(self, tarinfo, linktype, linkpath): |
+ """Add a Win32 symlink or hardlink to the archive.""" |
+ tarinfo.linkname = linkpath |
+ tarinfo.type = linktype |
+ tarinfo.size = 0 |
+ self.__AddFile(tarinfo) |
+ |
+ def Add(self, filepath, prefix=None): |
+ """Add path filepath to the archive which may be Native style. |
+ |
+ Add files individually recursing on directories. For POSIX we use |
+ tarfile.addfile directly on symlinks and hardlinks. For files, we must |
+ check if they are duplicates which we convert to hardlinks or symlinks |
+ which we convert from a file to a symlink in the tarfile. All other files |
+ are added as a standard file. |
+ """ |
+ |
+ # At this point tarinfo.name will contain a POSIX style path regardless |
+ # of the original filepath. |
+ tarinfo = self.tar.gettarinfo(filepath) |
+ if prefix: |
+ tarinfo.name = posixpath.join(prefix, tarinfo.name) |
+ |
+ if sys.platform == 'win32': |
+ # On win32 os.stat() always claims that files are world writable |
+ # which means that unless we remove this bit here we end up with |
+ # world writables files in the archive, which is almost certainly |
+ # not intended. |
+ tarinfo.mode &= ~stat.S_IWOTH |
+ tarinfo.mode &= ~stat.S_IWGRP |
+ |
+ # If we want cygwin to be able to extract this archive and use |
+ # executables and dll files we need to mark all the archive members as |
+ # executable. This is essentially what happens anyway when the |
+ # archive is extracted on win32. |
+ tarinfo.mode |= stat.S_IXUSR | stat.S_IXOTH | stat.S_IXGRP |
+ |
+ # If this a symlink or hardlink, add it |
+ if tarinfo.issym() or tarinfo.islnk(): |
+ tarinfo.size = 0 |
+ self.__AddFile(tarinfo) |
+ return True |
+ |
+ # If it's a directory, then you want to recurse into it |
+ if tarinfo.isdir(): |
+ self.__AddFile(tarinfo) |
+ native_files = glob.glob(os.path.join(filepath, '*')) |
+ for native_file in native_files: |
+ if not self.Add(native_file, prefix): return False |
+ return True |
+ |
+ # At this point we only allow addition of "FILES" |
+ if not tarinfo.isfile(): |
+ print 'Failed to add non real file: %s' % filepath |
+ return False |
+ |
+ # Now check if it is a Cygwin style link disguised as a file. |
+ # We go ahead and check on all platforms just in case we are tar'ing a |
+ # mount shared with windows. |
+ if tarinfo.size <= 524: |
+ with open(filepath) as fp: |
+ symtext = fp.read() |
+ if IsCygwinSymlink(symtext): |
+ self.__AddLink(tarinfo, tarfile.SYMTYPE, SymDatToPath(symtext)) |
+ return True |
+ |
+ # Otherwise, check if its a hardlink by seeing if it matches any unique |
+ # hash within the list of hashed files for that file size. |
+ nodelist = self.size_map.get(tarinfo.size, []) |
+ |
+ # If that size bucket is empty, add this file, no need to get the hash until |
+ # we get a bucket collision for the first time.. |
+ if not nodelist: |
+ self.size_map[tarinfo.size] = [filepath] |
+ with open(filepath, 'rb') as fp: |
+ self.__AddFile(tarinfo, fp) |
+ return True |
+ |
+ # If the size collides with anything, we'll need to check hashes. We assume |
+ # no hash collisions for SHA1 on a given bucket, since the number of files |
+ # in a bucket over possible SHA1 values is near zero. |
+ newhash = ComputeFileHash(filepath) |
+ self.file_hashes[filepath] = newhash |
+ |
+ for oldname in nodelist: |
+ oldhash = self.file_hashes.get(oldname, None) |
+ if not oldhash: |
+ oldhash = ComputeFileHash(oldname) |
+ self.file_hashes[oldname] = oldhash |
+ |
+ if oldhash == newhash: |
+ self.__AddLink(tarinfo, tarfile.LNKTYPE, oldname) |
+ return True |
+ |
+ # Otherwise, we missed, so add it to the bucket for this size |
+ self.size_map[tarinfo.size].append(filepath) |
+ with open(filepath, 'rb') as fp: |
+ self.__AddFile(tarinfo, fp) |
+ return True |
+ |
+ def Extract(self): |
+ """Extract the tarfile to the current directory.""" |
+ if self.verbose: |
+ sys.stdout.write('|' + ('-' * 48) + '|\n') |
+ sys.stdout.flush() |
+ dots_outputted = 0 |
+ |
+ win32_symlinks = {} |
+ for m in self.tar: |
+ if self.verbose: |
+ cnt = self.read_file.tell() |
+ curdots = cnt * 50 / self.read_filesize |
+ if dots_outputted < curdots: |
+ for dot in xrange(dots_outputted, curdots): |
+ sys.stdout.write('.') |
+ sys.stdout.flush() |
+ dots_outputted = curdots |
+ |
+ # For hardlinks in Windows, we try to use mklink, and instead copy on |
+ # failure. |
+ if m.islnk() and sys.platform == 'win32': |
+ CreateWin32Link(m.name, m.linkname, self.verbose) |
+ # On Windows we treat symlinks as if they were hard links. |
+ # Proper Windows symlinks supported by everything can be made with |
+ # mklink, but only by an Administrator. The older toolchains are |
+ # built with Cygwin, so they could use Cygwin-style symlinks; but |
+ # newer toolchains do not use Cygwin, and nothing else on the system |
+ # understands Cygwin-style symlinks, so avoid them. |
+ elif m.issym() and sys.platform == 'win32': |
+ # For a hard link, the link target (m.linkname) always appears |
+ # in the archive before the link itself (m.name), so the links |
+ # can just be made on the fly. However, a symlink might well |
+ # appear in the archive before its target file, so there would |
+ # not yet be any file to hard-link to. Hence, we have to collect |
+ # all the symlinks and create them in dependency order at the end. |
+ linkname = m.linkname |
+ if not posixpath.isabs(linkname): |
+ linkname = posixpath.join(posixpath.dirname(m.name), linkname) |
+ linkname = posixpath.normpath(linkname) |
+ win32_symlinks[posixpath.normpath(m.name)] = linkname |
+ # Otherwise, extract normally. |
+ else: |
+ self.tar.extract(m) |
+ |
+ win32_symlinks_left = win32_symlinks.items() |
+ while win32_symlinks_left: |
+ this_symlink = win32_symlinks_left.pop(0) |
+ name, linkname = this_symlink |
+ if linkname in win32_symlinks: |
+ # The target is itself a symlink not yet created. |
+ # Wait for it to come 'round on the guitar. |
+ win32_symlinks_left.append(this_symlink) |
+ else: |
+ del win32_symlinks[name] |
+ CreateWin32Link(name, linkname, self.verbose) |
+ |
+ if self.verbose: |
+ sys.stdout.write('\n') |
+ sys.stdout.flush() |
+ |
+ def List(self): |
+ """List the set of objects in the tarball.""" |
+ for tarinfo in self.tar: |
+ self.__DumpInfo(tarinfo) |
+ |
+ def Close(self): |
+ self.tar.close() |
+ if self.read_file is not None: |
+ self.read_file.close() |
+ self.read_file = None |
+ self.read_filesize = 0 |
+ |
+ |
+def Main(args): |
+ parser = optparse.OptionParser() |
+ # Modes |
+ parser.add_option('-c', '--create', help='Create a tarball.', |
+ action='store_const', const='c', dest='action', default='') |
+ parser.add_option('-x', '--extract', help='Extract a tarball.', |
+ action='store_const', const='x', dest='action') |
+ parser.add_option('-t', '--list', help='List sources in tarball.', |
+ action='store_const', const='t', dest='action') |
+ |
+ # Compression formats |
+ parser.add_option('-j', '--bzip2', help='Create a bz2 tarball.', |
+ action='store_const', const=':bz2', dest='format', default='') |
+ parser.add_option('-z', '--gzip', help='Create a gzip tarball.', |
+ action='store_const', const=':gz', dest='format', ) |
+ # Misc |
+ parser.add_option('-v', '--verbose', help='Use verbose output.', |
+ action='store_true', dest='verbose', default=False) |
+ parser.add_option('-f', '--file', help='Name of tarball.', |
+ dest='filename', default='') |
+ parser.add_option('-C', '--directory', help='Change directory.', |
+ dest='cd', default='') |
+ parser.add_option('--prefix', help='Subdirectory prefix for all paths') |
+ |
+ options, args = parser.parse_args(args[1:]) |
+ if not options.action: |
+ parser.error('Expecting compress or extract') |
+ if not options.filename: |
+ parser.error('Expecting a filename') |
+ |
+ if options.action in ['c'] and not args: |
+ parser.error('Expecting list of sources to add') |
+ if options.action in ['x', 't'] and args: |
+ parser.error('Unexpected source list on extract') |
+ |
+ if options.action == 'c': |
+ mode = 'w' + options.format |
+ else: |
+ mode = 'r'+ options.format |
+ |
+ tar = CygTar(options.filename, mode, verbose=options.verbose) |
+ if options.cd: |
+ os.chdir(options.cd) |
+ |
+ if options.action == 't': |
+ tar.List() |
+ return 0 |
+ |
+ if options.action == 'x': |
+ tar.Extract() |
+ return 0 |
+ |
+ if options.action == 'c': |
+ for filepath in args: |
+ if not tar.Add(filepath, options.prefix): |
+ return -1 |
+ tar.Close() |
+ return 0 |
+ |
+ parser.error('Missing action c, t, or x.') |
+ return -1 |
+ |
+ |
+if __name__ == '__main__': |
+ sys.exit(Main(sys.argv)) |