OLD | NEW |
(Empty) | |
| 1 #!/usr/bin/python |
| 2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. |
| 5 |
| 6 # This file was copied, unmodified, from the native_client repository: |
| 7 # https://chromium.googlesource.com/native_client/src/native_client/+/master/bui
ld/cygtar.py |
| 8 |
| 9 import glob |
| 10 import hashlib |
| 11 import optparse |
| 12 import os |
| 13 import posixpath |
| 14 import shutil |
| 15 import subprocess |
| 16 import stat |
| 17 import sys |
| 18 import tarfile |
| 19 |
| 20 """A Cygwin aware version compress/extract object. |
| 21 |
| 22 This module supports creating and unpacking a tarfile on all platforms. For |
| 23 Cygwin, Mac, and Linux, it will use the standard tarfile implementation. For |
| 24 Win32 it will detect Cygwin style symlinks as it archives and convert them to |
| 25 symlinks. |
| 26 |
| 27 For Win32, it is unfortunate that os.stat does not return a FileID in the ino |
| 28 field which would allow us to correctly determine which files are hardlinks, so |
| 29 instead we assume that any files in the archive that are an exact match are |
| 30 hardlinks to the same data. |
| 31 |
| 32 We know they are not Symlinks because we are using Cygwin style symlinks only, |
| 33 which appear to Win32 a normal file. |
| 34 |
| 35 All paths stored and retrieved from a TAR file are expected to be POSIX style, |
| 36 Win32 style paths will be rejected. |
| 37 |
| 38 NOTE: |
| 39 All paths represent by the tarfile and all API functions are POSIX style paths |
| 40 except for CygTar.Add which assumes a Native path. |
| 41 """ |
| 42 |
| 43 |
| 44 def ToNativePath(native_path): |
| 45 """Convert to a posix style path if this is win32.""" |
| 46 if sys.platform == 'win32': |
| 47 return native_path.replace('/', '\\') |
| 48 return native_path |
| 49 |
| 50 |
| 51 def IsCygwinSymlink(symtext): |
| 52 """Return true if the provided text looks like a Cygwin symlink.""" |
| 53 return symtext[:12] == '!<symlink>\xff\xfe' |
| 54 |
| 55 |
| 56 def SymDatToPath(symtext): |
| 57 """Convert a Cygwin style symlink data to a relative path.""" |
| 58 return ''.join([ch for ch in symtext[12:] if ch != '\x00']) |
| 59 |
| 60 |
| 61 def PathToSymDat(filepath): |
| 62 """Convert a filepath to cygwin style symlink data.""" |
| 63 symtag = '!<symlink>\xff\xfe' |
| 64 unipath = ''.join([ch + '\x00' for ch in filepath]) |
| 65 strterm = '\x00\x00' |
| 66 return symtag + unipath + strterm |
| 67 |
| 68 |
| 69 def CreateWin32Link(filepath, targpath, verbose): |
| 70 """Create a link on Win32 if possible |
| 71 |
| 72 Uses mklink to create a link (hardlink or junction) if possible. On failure, |
| 73 it will assume mklink is unavailible and copy the file instead. Future calls |
| 74 will not attempt to use mklink.""" |
| 75 |
| 76 targ_is_dir = os.path.isdir(targpath) |
| 77 |
| 78 call_mklink = False |
| 79 if targ_is_dir and CreateWin32Link.try_junction: |
| 80 # Creating a link to a directory will fail, but a junction (which is more |
| 81 # like a symlink) will work. |
| 82 mklink_flag = '/J' |
| 83 call_mklink = True |
| 84 elif not targ_is_dir and CreateWin32Link.try_hardlink: |
| 85 mklink_flag = '/H' |
| 86 call_mklink = True |
| 87 |
| 88 # Assume an error, if subprocess succeeds, then it should return 0 |
| 89 err = 1 |
| 90 if call_mklink: |
| 91 try: |
| 92 cmd = ['cmd', '/C', 'mklink %s %s %s' % ( |
| 93 mklink_flag, ToNativePath(filepath), ToNativePath(targpath))] |
| 94 err = subprocess.call(cmd, |
| 95 stdout = open(os.devnull, 'wb'), |
| 96 stderr = open(os.devnull, 'wb')) |
| 97 except EnvironmentError: |
| 98 if targ_is_dir: |
| 99 CreateWin32Link.try_junction = False |
| 100 else: |
| 101 CreateWin32Link.try_hardlink = False |
| 102 |
| 103 # If we failed to create a link, then just copy it. We wrap this in a |
| 104 # retry for Windows which often has stale file lock issues. |
| 105 if err or not os.path.exists(filepath): |
| 106 if targ_is_dir and verbose: |
| 107 print 'Failed to create junction %s -> %s. Copying instead.\n' % ( |
| 108 filepath, targpath) |
| 109 |
| 110 for cnt in range(1,4): |
| 111 try: |
| 112 if targ_is_dir: |
| 113 shutil.copytree(targpath, filepath) |
| 114 else: |
| 115 shutil.copyfile(targpath, filepath) |
| 116 return False |
| 117 except EnvironmentError: |
| 118 if verbose: |
| 119 print 'Try %d: Failed hardlink %s -> %s\n' % (cnt, filepath, targpath) |
| 120 if verbose: |
| 121 print 'Giving up.' |
| 122 |
| 123 CreateWin32Link.try_hardlink = True |
| 124 CreateWin32Link.try_junction = True |
| 125 |
| 126 |
| 127 |
| 128 def ComputeFileHash(filepath): |
| 129 """Generate a sha1 hash for the file at the given path.""" |
| 130 sha1 = hashlib.sha1() |
| 131 with open(filepath, 'rb') as fp: |
| 132 sha1.update(fp.read()) |
| 133 return sha1.hexdigest() |
| 134 |
| 135 |
| 136 def ReadableSizeOf(num): |
| 137 """Convert to a human readable number.""" |
| 138 if num < 1024.0: |
| 139 return '[%5dB]' % num |
| 140 for x in ['B','K','M','G','T']: |
| 141 if num < 1024.0: |
| 142 return '[%5.1f%s]' % (num, x) |
| 143 num /= 1024.0 |
| 144 return '[%dT]' % int(num) |
| 145 |
| 146 |
| 147 class CygTar(object): |
| 148 """ CygTar is an object which represents a Win32 and Cygwin aware tarball.""" |
| 149 def __init__(self, filename, mode='r', verbose=False): |
| 150 self.size_map = {} |
| 151 self.file_hashes = {} |
| 152 # Set errorlevel=1 so that fatal errors actually raise! |
| 153 if 'r' in mode: |
| 154 self.read_file = open(filename, 'rb') |
| 155 self.read_filesize = os.path.getsize(filename) |
| 156 self.tar = tarfile.open(mode=mode, fileobj=self.read_file, errorlevel=1) |
| 157 else: |
| 158 self.read_file = None |
| 159 self.read_filesize = 0 |
| 160 self.tar = tarfile.open(filename, mode=mode, errorlevel=1) |
| 161 self.verbose = verbose |
| 162 |
| 163 def __DumpInfo(self, tarinfo): |
| 164 """Prints information on a single object in the tarball.""" |
| 165 typeinfo = '?' |
| 166 lnk = '' |
| 167 if tarinfo.issym(): |
| 168 typeinfo = 'S' |
| 169 lnk = '-> ' + tarinfo.linkname |
| 170 if tarinfo.islnk(): |
| 171 typeinfo = 'H' |
| 172 lnk = '-> ' + tarinfo.linkname |
| 173 if tarinfo.isdir(): |
| 174 typeinfo = 'D' |
| 175 if tarinfo.isfile(): |
| 176 typeinfo = 'F' |
| 177 reable_size = ReadableSizeOf(tarinfo.size) |
| 178 print '%s %s : %s %s' % (reable_size, typeinfo, tarinfo.name, lnk) |
| 179 return tarinfo |
| 180 |
| 181 def __AddFile(self, tarinfo, fileobj=None): |
| 182 """Add a file to the archive.""" |
| 183 if self.verbose: |
| 184 self.__DumpInfo(tarinfo) |
| 185 self.tar.addfile(tarinfo, fileobj) |
| 186 |
| 187 def __AddLink(self, tarinfo, linktype, linkpath): |
| 188 """Add a Win32 symlink or hardlink to the archive.""" |
| 189 tarinfo.linkname = linkpath |
| 190 tarinfo.type = linktype |
| 191 tarinfo.size = 0 |
| 192 self.__AddFile(tarinfo) |
| 193 |
| 194 def Add(self, filepath, prefix=None): |
| 195 """Add path filepath to the archive which may be Native style. |
| 196 |
| 197 Add files individually recursing on directories. For POSIX we use |
| 198 tarfile.addfile directly on symlinks and hardlinks. For files, we must |
| 199 check if they are duplicates which we convert to hardlinks or symlinks |
| 200 which we convert from a file to a symlink in the tarfile. All other files |
| 201 are added as a standard file. |
| 202 """ |
| 203 |
| 204 # At this point tarinfo.name will contain a POSIX style path regardless |
| 205 # of the original filepath. |
| 206 tarinfo = self.tar.gettarinfo(filepath) |
| 207 if prefix: |
| 208 tarinfo.name = posixpath.join(prefix, tarinfo.name) |
| 209 |
| 210 if sys.platform == 'win32': |
| 211 # On win32 os.stat() always claims that files are world writable |
| 212 # which means that unless we remove this bit here we end up with |
| 213 # world writables files in the archive, which is almost certainly |
| 214 # not intended. |
| 215 tarinfo.mode &= ~stat.S_IWOTH |
| 216 tarinfo.mode &= ~stat.S_IWGRP |
| 217 |
| 218 # If we want cygwin to be able to extract this archive and use |
| 219 # executables and dll files we need to mark all the archive members as |
| 220 # executable. This is essentially what happens anyway when the |
| 221 # archive is extracted on win32. |
| 222 tarinfo.mode |= stat.S_IXUSR | stat.S_IXOTH | stat.S_IXGRP |
| 223 |
| 224 # If this a symlink or hardlink, add it |
| 225 if tarinfo.issym() or tarinfo.islnk(): |
| 226 tarinfo.size = 0 |
| 227 self.__AddFile(tarinfo) |
| 228 return True |
| 229 |
| 230 # If it's a directory, then you want to recurse into it |
| 231 if tarinfo.isdir(): |
| 232 self.__AddFile(tarinfo) |
| 233 native_files = glob.glob(os.path.join(filepath, '*')) |
| 234 for native_file in native_files: |
| 235 if not self.Add(native_file, prefix): return False |
| 236 return True |
| 237 |
| 238 # At this point we only allow addition of "FILES" |
| 239 if not tarinfo.isfile(): |
| 240 print 'Failed to add non real file: %s' % filepath |
| 241 return False |
| 242 |
| 243 # Now check if it is a Cygwin style link disguised as a file. |
| 244 # We go ahead and check on all platforms just in case we are tar'ing a |
| 245 # mount shared with windows. |
| 246 if tarinfo.size <= 524: |
| 247 with open(filepath) as fp: |
| 248 symtext = fp.read() |
| 249 if IsCygwinSymlink(symtext): |
| 250 self.__AddLink(tarinfo, tarfile.SYMTYPE, SymDatToPath(symtext)) |
| 251 return True |
| 252 |
| 253 # Otherwise, check if its a hardlink by seeing if it matches any unique |
| 254 # hash within the list of hashed files for that file size. |
| 255 nodelist = self.size_map.get(tarinfo.size, []) |
| 256 |
| 257 # If that size bucket is empty, add this file, no need to get the hash until |
| 258 # we get a bucket collision for the first time.. |
| 259 if not nodelist: |
| 260 self.size_map[tarinfo.size] = [filepath] |
| 261 with open(filepath, 'rb') as fp: |
| 262 self.__AddFile(tarinfo, fp) |
| 263 return True |
| 264 |
| 265 # If the size collides with anything, we'll need to check hashes. We assume |
| 266 # no hash collisions for SHA1 on a given bucket, since the number of files |
| 267 # in a bucket over possible SHA1 values is near zero. |
| 268 newhash = ComputeFileHash(filepath) |
| 269 self.file_hashes[filepath] = newhash |
| 270 |
| 271 for oldname in nodelist: |
| 272 oldhash = self.file_hashes.get(oldname, None) |
| 273 if not oldhash: |
| 274 oldhash = ComputeFileHash(oldname) |
| 275 self.file_hashes[oldname] = oldhash |
| 276 |
| 277 if oldhash == newhash: |
| 278 self.__AddLink(tarinfo, tarfile.LNKTYPE, oldname) |
| 279 return True |
| 280 |
| 281 # Otherwise, we missed, so add it to the bucket for this size |
| 282 self.size_map[tarinfo.size].append(filepath) |
| 283 with open(filepath, 'rb') as fp: |
| 284 self.__AddFile(tarinfo, fp) |
| 285 return True |
| 286 |
| 287 def Extract(self): |
| 288 """Extract the tarfile to the current directory.""" |
| 289 if self.verbose: |
| 290 sys.stdout.write('|' + ('-' * 48) + '|\n') |
| 291 sys.stdout.flush() |
| 292 dots_outputted = 0 |
| 293 |
| 294 win32_symlinks = {} |
| 295 for m in self.tar: |
| 296 if self.verbose: |
| 297 cnt = self.read_file.tell() |
| 298 curdots = cnt * 50 / self.read_filesize |
| 299 if dots_outputted < curdots: |
| 300 for dot in xrange(dots_outputted, curdots): |
| 301 sys.stdout.write('.') |
| 302 sys.stdout.flush() |
| 303 dots_outputted = curdots |
| 304 |
| 305 # For hardlinks in Windows, we try to use mklink, and instead copy on |
| 306 # failure. |
| 307 if m.islnk() and sys.platform == 'win32': |
| 308 CreateWin32Link(m.name, m.linkname, self.verbose) |
| 309 # On Windows we treat symlinks as if they were hard links. |
| 310 # Proper Windows symlinks supported by everything can be made with |
| 311 # mklink, but only by an Administrator. The older toolchains are |
| 312 # built with Cygwin, so they could use Cygwin-style symlinks; but |
| 313 # newer toolchains do not use Cygwin, and nothing else on the system |
| 314 # understands Cygwin-style symlinks, so avoid them. |
| 315 elif m.issym() and sys.platform == 'win32': |
| 316 # For a hard link, the link target (m.linkname) always appears |
| 317 # in the archive before the link itself (m.name), so the links |
| 318 # can just be made on the fly. However, a symlink might well |
| 319 # appear in the archive before its target file, so there would |
| 320 # not yet be any file to hard-link to. Hence, we have to collect |
| 321 # all the symlinks and create them in dependency order at the end. |
| 322 linkname = m.linkname |
| 323 if not posixpath.isabs(linkname): |
| 324 linkname = posixpath.join(posixpath.dirname(m.name), linkname) |
| 325 linkname = posixpath.normpath(linkname) |
| 326 win32_symlinks[posixpath.normpath(m.name)] = linkname |
| 327 # Otherwise, extract normally. |
| 328 else: |
| 329 self.tar.extract(m) |
| 330 |
| 331 win32_symlinks_left = win32_symlinks.items() |
| 332 while win32_symlinks_left: |
| 333 this_symlink = win32_symlinks_left.pop(0) |
| 334 name, linkname = this_symlink |
| 335 if linkname in win32_symlinks: |
| 336 # The target is itself a symlink not yet created. |
| 337 # Wait for it to come 'round on the guitar. |
| 338 win32_symlinks_left.append(this_symlink) |
| 339 else: |
| 340 del win32_symlinks[name] |
| 341 CreateWin32Link(name, linkname, self.verbose) |
| 342 |
| 343 if self.verbose: |
| 344 sys.stdout.write('\n') |
| 345 sys.stdout.flush() |
| 346 |
| 347 def List(self): |
| 348 """List the set of objects in the tarball.""" |
| 349 for tarinfo in self.tar: |
| 350 self.__DumpInfo(tarinfo) |
| 351 |
| 352 def Close(self): |
| 353 self.tar.close() |
| 354 if self.read_file is not None: |
| 355 self.read_file.close() |
| 356 self.read_file = None |
| 357 self.read_filesize = 0 |
| 358 |
| 359 |
| 360 def Main(args): |
| 361 parser = optparse.OptionParser() |
| 362 # Modes |
| 363 parser.add_option('-c', '--create', help='Create a tarball.', |
| 364 action='store_const', const='c', dest='action', default='') |
| 365 parser.add_option('-x', '--extract', help='Extract a tarball.', |
| 366 action='store_const', const='x', dest='action') |
| 367 parser.add_option('-t', '--list', help='List sources in tarball.', |
| 368 action='store_const', const='t', dest='action') |
| 369 |
| 370 # Compression formats |
| 371 parser.add_option('-j', '--bzip2', help='Create a bz2 tarball.', |
| 372 action='store_const', const=':bz2', dest='format', default='') |
| 373 parser.add_option('-z', '--gzip', help='Create a gzip tarball.', |
| 374 action='store_const', const=':gz', dest='format', ) |
| 375 # Misc |
| 376 parser.add_option('-v', '--verbose', help='Use verbose output.', |
| 377 action='store_true', dest='verbose', default=False) |
| 378 parser.add_option('-f', '--file', help='Name of tarball.', |
| 379 dest='filename', default='') |
| 380 parser.add_option('-C', '--directory', help='Change directory.', |
| 381 dest='cd', default='') |
| 382 parser.add_option('--prefix', help='Subdirectory prefix for all paths') |
| 383 |
| 384 options, args = parser.parse_args(args[1:]) |
| 385 if not options.action: |
| 386 parser.error('Expecting compress or extract') |
| 387 if not options.filename: |
| 388 parser.error('Expecting a filename') |
| 389 |
| 390 if options.action in ['c'] and not args: |
| 391 parser.error('Expecting list of sources to add') |
| 392 if options.action in ['x', 't'] and args: |
| 393 parser.error('Unexpected source list on extract') |
| 394 |
| 395 if options.action == 'c': |
| 396 mode = 'w' + options.format |
| 397 else: |
| 398 mode = 'r'+ options.format |
| 399 |
| 400 tar = CygTar(options.filename, mode, verbose=options.verbose) |
| 401 if options.cd: |
| 402 os.chdir(options.cd) |
| 403 |
| 404 if options.action == 't': |
| 405 tar.List() |
| 406 return 0 |
| 407 |
| 408 if options.action == 'x': |
| 409 tar.Extract() |
| 410 return 0 |
| 411 |
| 412 if options.action == 'c': |
| 413 for filepath in args: |
| 414 if not tar.Add(filepath, options.prefix): |
| 415 return -1 |
| 416 tar.Close() |
| 417 return 0 |
| 418 |
| 419 parser.error('Missing action c, t, or x.') |
| 420 return -1 |
| 421 |
| 422 |
| 423 if __name__ == '__main__': |
| 424 sys.exit(Main(sys.argv)) |
OLD | NEW |