| OLD | NEW |
| 1 #!/usr/bin/python | 1 #!/usr/bin/python |
| 2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. | 2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
| 3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
| 4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
| 5 | 5 |
| 6 import glob | 6 import glob |
| 7 import hashlib | 7 import hashlib |
| 8 import optparse | 8 import optparse |
| 9 import os | 9 import os |
| 10 import posixpath |
| 10 import shutil | 11 import shutil |
| 11 import subprocess | 12 import subprocess |
| 12 import stat | 13 import stat |
| 13 import sys | 14 import sys |
| 14 import tarfile | 15 import tarfile |
| 15 | 16 |
| 16 """A Cygwin aware version compress/extract object. | 17 """A Cygwin aware version compress/extract object. |
| 17 | 18 |
| 18 This module supports creating and unpacking a tarfile on all platforms. For | 19 This module supports creating and unpacking a tarfile on all platforms. For |
| 19 Cygwin, Mac, and Linux, it will use the standard tarfile implementation. For | 20 Cygwin, Mac, and Linux, it will use the standard tarfile implementation. For |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 122 return False | 123 return False |
| 123 except EnvironmentError: | 124 except EnvironmentError: |
| 124 print 'Try %d: Failed hardlink %s -> %s\n' % (cnt, filepath, targpath) | 125 print 'Try %d: Failed hardlink %s -> %s\n' % (cnt, filepath, targpath) |
| 125 print 'Giving up.' | 126 print 'Giving up.' |
| 126 return try_mklink | 127 return try_mklink |
| 127 | 128 |
| 128 | 129 |
| 129 def ComputeFileHash(filepath): | 130 def ComputeFileHash(filepath): |
| 130 """Generate a sha1 hash for the file at the given path.""" | 131 """Generate a sha1 hash for the file at the given path.""" |
| 131 sha1 = hashlib.sha1() | 132 sha1 = hashlib.sha1() |
| 132 sha1.update(open(filepath, 'rb').read()) | 133 with open(filepath, 'rb') as fp: |
| 134 sha1.update(fp.read()) |
| 133 return sha1.hexdigest() | 135 return sha1.hexdigest() |
| 134 | 136 |
| 135 | 137 |
| 136 def ReadableSizeOf(num): | 138 def ReadableSizeOf(num): |
| 137 """Convert to a human readable number.""" | 139 """Convert to a human readable number.""" |
| 138 if num < 1024.0: | 140 if num < 1024.0: |
| 139 return '[%5dB]' % num | 141 return '[%5dB]' % num |
| 140 for x in ['B','K','M','G','T']: | 142 for x in ['B','K','M','G','T']: |
| 141 if num < 1024.0: | 143 if num < 1024.0: |
| 142 return '[%5.1f%s]' % (num, x) | 144 return '[%5.1f%s]' % (num, x) |
| 143 num /= 1024.0 | 145 num /= 1024.0 |
| 144 return '[%dT]' % int(num) | 146 return '[%dT]' % int(num) |
| 145 | 147 |
| 146 | 148 |
| 147 class CygTar(object): | 149 class CygTar(object): |
| 148 """ CygTar is an object which represents a Win32 and Cygwin aware tarball.""" | 150 """ CygTar is an object which represents a Win32 and Cygwin aware tarball.""" |
| 149 def __init__(self, filename, mode='r', verbose=False): | 151 def __init__(self, filename, mode='r', verbose=False): |
| 150 self.size_map = {} | 152 self.size_map = {} |
| 153 self.file_hashes = {} |
| 151 # Set errorlevel=1 so that fatal errors actually raise! | 154 # Set errorlevel=1 so that fatal errors actually raise! |
| 152 self.tar = tarfile.open(filename, mode, errorlevel=1) | 155 self.tar = tarfile.open(filename, mode, errorlevel=1) |
| 153 self.verbose = verbose | 156 self.verbose = verbose |
| 154 | 157 |
| 155 def __DumpInfo(self, tarinfo): | 158 def __DumpInfo(self, tarinfo): |
| 156 """Prints information on a single object in the tarball.""" | 159 """Prints information on a single object in the tarball.""" |
| 157 typeinfo = '?' | 160 typeinfo = '?' |
| 158 lnk = '' | 161 lnk = '' |
| 159 if tarinfo.issym(): | 162 if tarinfo.issym(): |
| 160 typeinfo = 'S' | 163 typeinfo = 'S' |
| (...skipping 15 matching lines...) Expand all Loading... |
| 176 self.__DumpInfo(tarinfo) | 179 self.__DumpInfo(tarinfo) |
| 177 self.tar.addfile(tarinfo, fileobj) | 180 self.tar.addfile(tarinfo, fileobj) |
| 178 | 181 |
| 179 def __AddLink(self, tarinfo, linktype, linkpath): | 182 def __AddLink(self, tarinfo, linktype, linkpath): |
| 180 """Add a Win32 symlink or hardlink to the archive.""" | 183 """Add a Win32 symlink or hardlink to the archive.""" |
| 181 tarinfo.linkname = linkpath | 184 tarinfo.linkname = linkpath |
| 182 tarinfo.type = linktype | 185 tarinfo.type = linktype |
| 183 tarinfo.size = 0 | 186 tarinfo.size = 0 |
| 184 self.__AddFile(tarinfo) | 187 self.__AddFile(tarinfo) |
| 185 | 188 |
| 186 def Add(self, filepath): | 189 def Add(self, filepath, prefix=None): |
| 187 """Add path filepath to the archive which may be Native style. | 190 """Add path filepath to the archive which may be Native style. |
| 188 | 191 |
| 189 Add files individually recursing on directories. For POSIX we use | 192 Add files individually recursing on directories. For POSIX we use |
| 190 tarfile.addfile directly on symlinks and hardlinks. For files, we | 193 tarfile.addfile directly on symlinks and hardlinks. For files, we |
| 191 must check if they are duplicates which we convert to hardlinks | 194 must check if they are duplicates which we convert to hardlinks |
| 192 or Cygwin style symlinks which we convert form a file to a symlink | 195 or Cygwin style symlinks which we convert form a file to a symlink |
| 193 in the tarfile. All other files are added as a standard file. | 196 in the tarfile. All other files are added as a standard file. |
| 194 """ | 197 """ |
| 195 | 198 |
| 196 # At this point tarinfo.name will contain a POSIX style path regardless | 199 # At this point tarinfo.name will contain a POSIX style path regardless |
| 197 # of the original filepath. | 200 # of the original filepath. |
| 198 tarinfo = self.tar.gettarinfo(filepath) | 201 tarinfo = self.tar.gettarinfo(filepath) |
| 202 if prefix: |
| 203 tarinfo.name = posixpath.join(prefix, tarinfo.name) |
| 204 |
| 199 if sys.platform == 'win32': | 205 if sys.platform == 'win32': |
| 200 # On win32 os.stat() always claims that files are world writable | 206 # On win32 os.stat() always claims that files are world writable |
| 201 # which means that unless we remove this bit here we end up with | 207 # which means that unless we remove this bit here we end up with |
| 202 # world writables files in the archive, which is almost certainly | 208 # world writables files in the archive, which is almost certainly |
| 203 # not indented. | 209 # not intended. |
| 204 tarinfo.mode &= ~stat.S_IWOTH | 210 tarinfo.mode &= ~stat.S_IWOTH |
| 205 tarinfo.mode &= ~stat.S_IWGRP | 211 tarinfo.mode &= ~stat.S_IWGRP |
| 206 | 212 |
| 207 # If this a symlink or hardlink, add it | 213 # If this a symlink or hardlink, add it |
| 208 if tarinfo.issym() or tarinfo.islnk(): | 214 if tarinfo.issym() or tarinfo.islnk(): |
| 209 tarinfo.size = 0 | 215 tarinfo.size = 0 |
| 210 self.__AddFile(tarinfo) | 216 self.__AddFile(tarinfo) |
| 211 return True | 217 return True |
| 212 | 218 |
| 213 # If it's a directory, then you want to recurse into it | 219 # If it's a directory, then you want to recurse into it |
| 214 if tarinfo.isdir(): | 220 if tarinfo.isdir(): |
| 215 self.__AddFile(tarinfo) | 221 self.__AddFile(tarinfo) |
| 216 native_files = glob.glob(os.path.join(filepath, '*')) | 222 native_files = glob.glob(os.path.join(filepath, '*')) |
| 217 for native_file in native_files: | 223 for native_file in native_files: |
| 218 if not self.Add(native_file): return False | 224 if not self.Add(native_file, prefix): return False |
| 219 return True | 225 return True |
| 220 | 226 |
| 221 # At this point we only allow addition of "FILES" | 227 # At this point we only allow addition of "FILES" |
| 222 if not tarinfo.isfile(): | 228 if not tarinfo.isfile(): |
| 223 print 'Failed to add non real file: %s' % filepath | 229 print 'Failed to add non real file: %s' % filepath |
| 224 return False | 230 return False |
| 225 | 231 |
| 226 # Now check if it is a Cygwin style link disguised as a file. | 232 # Now check if it is a Cygwin style link disguised as a file. |
| 227 # We go ahead and check on all platforms just in case we are tar'ing a | 233 # We go ahead and check on all platforms just in case we are tar'ing a |
| 228 # mount shared with windows. | 234 # mount shared with windows. |
| 229 if tarinfo.size <= 524: | 235 if tarinfo.size <= 524: |
| 230 symtext = open(tarinfo.name).read() | 236 with open(filepath) as fp: |
| 237 symtext = fp.read() |
| 231 if IsCygwinSymlink(symtext): | 238 if IsCygwinSymlink(symtext): |
| 232 self.__AddLink(tarinfo, tarfile.SYMTYPE, SymDatToPath(symtext)) | 239 self.__AddLink(tarinfo, tarfile.SYMTYPE, SymDatToPath(symtext)) |
| 233 return True | 240 return True |
| 234 | 241 |
| 235 # Otherwise, check if its a hardlink by seeing if it matches any unique | 242 # Otherwise, check if its a hardlink by seeing if it matches any unique |
| 236 # hash within the list of hashed files for that file size. | 243 # hash within the list of hashed files for that file size. |
| 237 nodelist = self.size_map.get(tarinfo.size, []) | 244 nodelist = self.size_map.get(tarinfo.size, []) |
| 238 | 245 |
| 239 # If that size bucket is empty, add this file, no need to get the hash until | 246 # If that size bucket is empty, add this file, no need to get the hash until |
| 240 # we get a bucket collision for the first time.. | 247 # we get a bucket collision for the first time.. |
| 241 if not nodelist: | 248 if not nodelist: |
| 242 self.size_map[tarinfo.size] = [(tarinfo.name, None)] | 249 self.size_map[tarinfo.size] = [filepath] |
| 243 fp = open(tarinfo.name, 'rb') | 250 with open(filepath, 'rb') as fp: |
| 244 self.__AddFile(tarinfo, fp) | 251 self.__AddFile(tarinfo, fp) |
| 245 fp.close() | |
| 246 return True | 252 return True |
| 247 | 253 |
| 248 # If the size collides with anything, we'll need to check hashes. We assume | 254 # If the size collides with anything, we'll need to check hashes. We assume |
| 249 # no hash collisions for SHA1 on a given bucket, since the number of files | 255 # no hash collisions for SHA1 on a given bucket, since the number of files |
| 250 # in a bucket over possible SHA1 values is near zero. | 256 # in a bucket over possible SHA1 values is near zero. |
| 251 newhash = ComputeFileHash(tarinfo.name) | 257 newhash = ComputeFileHash(filepath) |
| 252 for (oldname, oldhash) in nodelist: | 258 self.file_hashes[filepath] = newhash |
| 253 # if this is the first collision, we may need to compute the hash | 259 |
| 254 # for this first node. | 260 for oldname in nodelist: |
| 255 if oldhash is None: | 261 oldhash = self.file_hashes.get(oldname, None) |
| 262 if not oldhash: |
| 256 oldhash = ComputeFileHash(oldname) | 263 oldhash = ComputeFileHash(oldname) |
| 264 self.file_hashes[oldname] = oldhash |
| 257 | 265 |
| 258 if oldhash == newhash: | 266 if oldhash == newhash: |
| 259 self.__AddLink(tarinfo, tarfile.LNKTYPE, oldname) | 267 self.__AddLink(tarinfo, tarfile.LNKTYPE, oldname) |
| 260 return True | 268 return True |
| 261 | 269 |
| 262 # Otherwise, we missed, so add it to the bucket for this size | 270 # Otherwise, we missed, so add it to the bucket for this size |
| 263 self.size_map[tarinfo.size].append((tarinfo.name, newhash)) | 271 self.size_map[tarinfo.size].append(filepath) |
| 264 fp = open(tarinfo.name, 'rb') | 272 with open(filepath, 'rb') as fp: |
| 265 self.__AddFile(tarinfo, fp) | 273 self.__AddFile(tarinfo, fp) |
| 266 fp.close() | |
| 267 return True | 274 return True |
| 268 | 275 |
| 269 def Extract(self): | 276 def Extract(self): |
| 270 """Extract the tarfile to the current directory.""" | 277 """Extract the tarfile to the current directory.""" |
| 271 try_mklink = True | 278 try_mklink = True |
| 272 div = float(len(self.tar.getmembers())) / 50.0 | 279 div = float(len(self.tar.getmembers())) / 50.0 |
| 273 dots = 0 | 280 dots = 0 |
| 274 cnt = 0 | 281 cnt = 0 |
| 275 | 282 |
| 276 if self.verbose: | 283 if self.verbose: |
| (...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 326 action='store_const', const=':bz2', dest='format', default='') | 333 action='store_const', const=':bz2', dest='format', default='') |
| 327 parser.add_option('-z', '--gzip', help='Create a gzip tarball.', | 334 parser.add_option('-z', '--gzip', help='Create a gzip tarball.', |
| 328 action='store_const', const=':gz', dest='format', ) | 335 action='store_const', const=':gz', dest='format', ) |
| 329 # Misc | 336 # Misc |
| 330 parser.add_option('-v', '--verbose', help='Use verbose output.', | 337 parser.add_option('-v', '--verbose', help='Use verbose output.', |
| 331 action='store_true', dest='verbose', default=False) | 338 action='store_true', dest='verbose', default=False) |
| 332 parser.add_option('-f', '--file', help='Name of tarball.', | 339 parser.add_option('-f', '--file', help='Name of tarball.', |
| 333 dest='filename', default='') | 340 dest='filename', default='') |
| 334 parser.add_option('-C', '--directory', help='Change directory.', | 341 parser.add_option('-C', '--directory', help='Change directory.', |
| 335 dest='cd', default='') | 342 dest='cd', default='') |
| 343 parser.add_option('--prefix', help='Subdirectory prefix for all paths') |
| 336 | 344 |
| 337 options, args = parser.parse_args(args[1:]) | 345 options, args = parser.parse_args(args[1:]) |
| 338 if not options.action: | 346 if not options.action: |
| 339 parser.error('Expecting compress or extract') | 347 parser.error('Expecting compress or extract') |
| 340 if not options.filename: | 348 if not options.filename: |
| 341 parser.error('Expecting a filename') | 349 parser.error('Expecting a filename') |
| 342 | 350 |
| 343 if options.action in ['c'] and not args: | 351 if options.action in ['c'] and not args: |
| 344 parser.error('Expecting list of sources to add') | 352 parser.error('Expecting list of sources to add') |
| 345 if options.action in ['x', 't'] and args: | 353 if options.action in ['x', 't'] and args: |
| (...skipping 11 matching lines...) Expand all Loading... |
| 357 if options.action == 't': | 365 if options.action == 't': |
| 358 tar.List() | 366 tar.List() |
| 359 return 0 | 367 return 0 |
| 360 | 368 |
| 361 if options.action == 'x': | 369 if options.action == 'x': |
| 362 tar.Extract() | 370 tar.Extract() |
| 363 return 0 | 371 return 0 |
| 364 | 372 |
| 365 if options.action == 'c': | 373 if options.action == 'c': |
| 366 for filepath in args: | 374 for filepath in args: |
| 367 if not tar.Add(filepath): | 375 if not tar.Add(filepath, options.prefix): |
| 368 return -1 | 376 return -1 |
| 369 tar.Close() | 377 tar.Close() |
| 370 return 0 | 378 return 0 |
| 371 | 379 |
| 372 parser.error('Missing action c, t, or x.') | 380 parser.error('Missing action c, t, or x.') |
| 373 return -1 | 381 return -1 |
| 374 | 382 |
| 375 | 383 |
| 376 if __name__ == '__main__': | 384 if __name__ == '__main__': |
| 377 sys.exit(Main(sys.argv)) | 385 sys.exit(Main(sys.argv)) |
| OLD | NEW |