OLD | NEW |
1 #!/usr/bin/python | 1 #!/usr/bin/python |
2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. | 2 # Copyright (c) 2012 The Native Client Authors. All rights reserved. |
3 # Use of this source code is governed by a BSD-style license that can be | 3 # Use of this source code is governed by a BSD-style license that can be |
4 # found in the LICENSE file. | 4 # found in the LICENSE file. |
5 | 5 |
6 import glob | 6 import glob |
7 import hashlib | 7 import hashlib |
8 import optparse | 8 import optparse |
9 import os | 9 import os |
| 10 import posixpath |
10 import shutil | 11 import shutil |
11 import subprocess | 12 import subprocess |
12 import stat | 13 import stat |
13 import sys | 14 import sys |
14 import tarfile | 15 import tarfile |
15 | 16 |
16 """A Cygwin aware version compress/extract object. | 17 """A Cygwin aware version compress/extract object. |
17 | 18 |
18 This module supports creating and unpacking a tarfile on all platforms. For | 19 This module supports creating and unpacking a tarfile on all platforms. For |
19 Cygwin, Mac, and Linux, it will use the standard tarfile implementation. For | 20 Cygwin, Mac, and Linux, it will use the standard tarfile implementation. For |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
122 return False | 123 return False |
123 except EnvironmentError: | 124 except EnvironmentError: |
124 print 'Try %d: Failed hardlink %s -> %s\n' % (cnt, filepath, targpath) | 125 print 'Try %d: Failed hardlink %s -> %s\n' % (cnt, filepath, targpath) |
125 print 'Giving up.' | 126 print 'Giving up.' |
126 return try_mklink | 127 return try_mklink |
127 | 128 |
128 | 129 |
129 def ComputeFileHash(filepath): | 130 def ComputeFileHash(filepath): |
130 """Generate a sha1 hash for the file at the given path.""" | 131 """Generate a sha1 hash for the file at the given path.""" |
131 sha1 = hashlib.sha1() | 132 sha1 = hashlib.sha1() |
132 sha1.update(open(filepath, 'rb').read()) | 133 with open(filepath, 'rb') as fp: |
| 134 sha1.update(fp.read()) |
133 return sha1.hexdigest() | 135 return sha1.hexdigest() |
134 | 136 |
135 | 137 |
136 def ReadableSizeOf(num): | 138 def ReadableSizeOf(num): |
137 """Convert to a human readable number.""" | 139 """Convert to a human readable number.""" |
138 if num < 1024.0: | 140 if num < 1024.0: |
139 return '[%5dB]' % num | 141 return '[%5dB]' % num |
140 for x in ['B','K','M','G','T']: | 142 for x in ['B','K','M','G','T']: |
141 if num < 1024.0: | 143 if num < 1024.0: |
142 return '[%5.1f%s]' % (num, x) | 144 return '[%5.1f%s]' % (num, x) |
143 num /= 1024.0 | 145 num /= 1024.0 |
144 return '[%dT]' % int(num) | 146 return '[%dT]' % int(num) |
145 | 147 |
146 | 148 |
147 class CygTar(object): | 149 class CygTar(object): |
148 """ CygTar is an object which represents a Win32 and Cygwin aware tarball.""" | 150 """ CygTar is an object which represents a Win32 and Cygwin aware tarball.""" |
149 def __init__(self, filename, mode='r', verbose=False): | 151 def __init__(self, filename, mode='r', verbose=False): |
150 self.size_map = {} | 152 self.size_map = {} |
| 153 self.file_hashes = {} |
151 # Set errorlevel=1 so that fatal errors actually raise! | 154 # Set errorlevel=1 so that fatal errors actually raise! |
152 self.tar = tarfile.open(filename, mode, errorlevel=1) | 155 self.tar = tarfile.open(filename, mode, errorlevel=1) |
153 self.verbose = verbose | 156 self.verbose = verbose |
154 | 157 |
155 def __DumpInfo(self, tarinfo): | 158 def __DumpInfo(self, tarinfo): |
156 """Prints information on a single object in the tarball.""" | 159 """Prints information on a single object in the tarball.""" |
157 typeinfo = '?' | 160 typeinfo = '?' |
158 lnk = '' | 161 lnk = '' |
159 if tarinfo.issym(): | 162 if tarinfo.issym(): |
160 typeinfo = 'S' | 163 typeinfo = 'S' |
(...skipping 15 matching lines...) Expand all Loading... |
176 self.__DumpInfo(tarinfo) | 179 self.__DumpInfo(tarinfo) |
177 self.tar.addfile(tarinfo, fileobj) | 180 self.tar.addfile(tarinfo, fileobj) |
178 | 181 |
179 def __AddLink(self, tarinfo, linktype, linkpath): | 182 def __AddLink(self, tarinfo, linktype, linkpath): |
180 """Add a Win32 symlink or hardlink to the archive.""" | 183 """Add a Win32 symlink or hardlink to the archive.""" |
181 tarinfo.linkname = linkpath | 184 tarinfo.linkname = linkpath |
182 tarinfo.type = linktype | 185 tarinfo.type = linktype |
183 tarinfo.size = 0 | 186 tarinfo.size = 0 |
184 self.__AddFile(tarinfo) | 187 self.__AddFile(tarinfo) |
185 | 188 |
186 def Add(self, filepath): | 189 def Add(self, filepath, prefix=None): |
187 """Add path filepath to the archive which may be Native style. | 190 """Add path filepath to the archive which may be Native style. |
188 | 191 |
189 Add files individually recursing on directories. For POSIX we use | 192 Add files individually recursing on directories. For POSIX we use |
190 tarfile.addfile directly on symlinks and hardlinks. For files, we | 193 tarfile.addfile directly on symlinks and hardlinks. For files, we |
191 must check if they are duplicates which we convert to hardlinks | 194 must check if they are duplicates which we convert to hardlinks |
192 or Cygwin style symlinks which we convert form a file to a symlink | 195 or Cygwin style symlinks which we convert form a file to a symlink |
193 in the tarfile. All other files are added as a standard file. | 196 in the tarfile. All other files are added as a standard file. |
194 """ | 197 """ |
195 | 198 |
196 # At this point tarinfo.name will contain a POSIX style path regardless | 199 # At this point tarinfo.name will contain a POSIX style path regardless |
197 # of the original filepath. | 200 # of the original filepath. |
198 tarinfo = self.tar.gettarinfo(filepath) | 201 tarinfo = self.tar.gettarinfo(filepath) |
| 202 if prefix: |
| 203 tarinfo.name = posixpath.join(prefix, tarinfo.name) |
| 204 |
199 if sys.platform == 'win32': | 205 if sys.platform == 'win32': |
200 # On win32 os.stat() always claims that files are world writable | 206 # On win32 os.stat() always claims that files are world writable |
201 # which means that unless we remove this bit here we end up with | 207 # which means that unless we remove this bit here we end up with |
202 # world writables files in the archive, which is almost certainly | 208 # world writables files in the archive, which is almost certainly |
203 # not indented. | 209 # not intended. |
204 tarinfo.mode &= ~stat.S_IWOTH | 210 tarinfo.mode &= ~stat.S_IWOTH |
205 tarinfo.mode &= ~stat.S_IWGRP | 211 tarinfo.mode &= ~stat.S_IWGRP |
206 | 212 |
207 # If this a symlink or hardlink, add it | 213 # If this a symlink or hardlink, add it |
208 if tarinfo.issym() or tarinfo.islnk(): | 214 if tarinfo.issym() or tarinfo.islnk(): |
209 tarinfo.size = 0 | 215 tarinfo.size = 0 |
210 self.__AddFile(tarinfo) | 216 self.__AddFile(tarinfo) |
211 return True | 217 return True |
212 | 218 |
213 # If it's a directory, then you want to recurse into it | 219 # If it's a directory, then you want to recurse into it |
214 if tarinfo.isdir(): | 220 if tarinfo.isdir(): |
215 self.__AddFile(tarinfo) | 221 self.__AddFile(tarinfo) |
216 native_files = glob.glob(os.path.join(filepath, '*')) | 222 native_files = glob.glob(os.path.join(filepath, '*')) |
217 for native_file in native_files: | 223 for native_file in native_files: |
218 if not self.Add(native_file): return False | 224 if not self.Add(native_file, prefix): return False |
219 return True | 225 return True |
220 | 226 |
221 # At this point we only allow addition of "FILES" | 227 # At this point we only allow addition of "FILES" |
222 if not tarinfo.isfile(): | 228 if not tarinfo.isfile(): |
223 print 'Failed to add non real file: %s' % filepath | 229 print 'Failed to add non real file: %s' % filepath |
224 return False | 230 return False |
225 | 231 |
226 # Now check if it is a Cygwin style link disguised as a file. | 232 # Now check if it is a Cygwin style link disguised as a file. |
227 # We go ahead and check on all platforms just in case we are tar'ing a | 233 # We go ahead and check on all platforms just in case we are tar'ing a |
228 # mount shared with windows. | 234 # mount shared with windows. |
229 if tarinfo.size <= 524: | 235 if tarinfo.size <= 524: |
230 symtext = open(tarinfo.name).read() | 236 with open(filepath) as fp: |
| 237 symtext = fp.read() |
231 if IsCygwinSymlink(symtext): | 238 if IsCygwinSymlink(symtext): |
232 self.__AddLink(tarinfo, tarfile.SYMTYPE, SymDatToPath(symtext)) | 239 self.__AddLink(tarinfo, tarfile.SYMTYPE, SymDatToPath(symtext)) |
233 return True | 240 return True |
234 | 241 |
235 # Otherwise, check if its a hardlink by seeing if it matches any unique | 242 # Otherwise, check if its a hardlink by seeing if it matches any unique |
236 # hash within the list of hashed files for that file size. | 243 # hash within the list of hashed files for that file size. |
237 nodelist = self.size_map.get(tarinfo.size, []) | 244 nodelist = self.size_map.get(tarinfo.size, []) |
238 | 245 |
239 # If that size bucket is empty, add this file, no need to get the hash until | 246 # If that size bucket is empty, add this file, no need to get the hash until |
240 # we get a bucket collision for the first time.. | 247 # we get a bucket collision for the first time.. |
241 if not nodelist: | 248 if not nodelist: |
242 self.size_map[tarinfo.size] = [(tarinfo.name, None)] | 249 self.size_map[tarinfo.size] = [filepath] |
243 fp = open(tarinfo.name, 'rb') | 250 with open(filepath, 'rb') as fp: |
244 self.__AddFile(tarinfo, fp) | 251 self.__AddFile(tarinfo, fp) |
245 fp.close() | |
246 return True | 252 return True |
247 | 253 |
248 # If the size collides with anything, we'll need to check hashes. We assume | 254 # If the size collides with anything, we'll need to check hashes. We assume |
249 # no hash collisions for SHA1 on a given bucket, since the number of files | 255 # no hash collisions for SHA1 on a given bucket, since the number of files |
250 # in a bucket over possible SHA1 values is near zero. | 256 # in a bucket over possible SHA1 values is near zero. |
251 newhash = ComputeFileHash(tarinfo.name) | 257 newhash = ComputeFileHash(filepath) |
252 for (oldname, oldhash) in nodelist: | 258 self.file_hashes[filepath] = newhash |
253 # if this is the first collision, we may need to compute the hash | 259 |
254 # for this first node. | 260 for oldname in nodelist: |
255 if oldhash is None: | 261 oldhash = self.file_hashes.get(oldname, None) |
| 262 if not oldhash: |
256 oldhash = ComputeFileHash(oldname) | 263 oldhash = ComputeFileHash(oldname) |
| 264 self.file_hashes[oldname] = oldhash |
257 | 265 |
258 if oldhash == newhash: | 266 if oldhash == newhash: |
259 self.__AddLink(tarinfo, tarfile.LNKTYPE, oldname) | 267 self.__AddLink(tarinfo, tarfile.LNKTYPE, oldname) |
260 return True | 268 return True |
261 | 269 |
262 # Otherwise, we missed, so add it to the bucket for this size | 270 # Otherwise, we missed, so add it to the bucket for this size |
263 self.size_map[tarinfo.size].append((tarinfo.name, newhash)) | 271 self.size_map[tarinfo.size].append(filepath) |
264 fp = open(tarinfo.name, 'rb') | 272 with open(filepath, 'rb') as fp: |
265 self.__AddFile(tarinfo, fp) | 273 self.__AddFile(tarinfo, fp) |
266 fp.close() | |
267 return True | 274 return True |
268 | 275 |
269 def Extract(self): | 276 def Extract(self): |
270 """Extract the tarfile to the current directory.""" | 277 """Extract the tarfile to the current directory.""" |
271 try_mklink = True | 278 try_mklink = True |
272 div = float(len(self.tar.getmembers())) / 50.0 | 279 div = float(len(self.tar.getmembers())) / 50.0 |
273 dots = 0 | 280 dots = 0 |
274 cnt = 0 | 281 cnt = 0 |
275 | 282 |
276 if self.verbose: | 283 if self.verbose: |
(...skipping 49 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
326 action='store_const', const=':bz2', dest='format', default='') | 333 action='store_const', const=':bz2', dest='format', default='') |
327 parser.add_option('-z', '--gzip', help='Create a gzip tarball.', | 334 parser.add_option('-z', '--gzip', help='Create a gzip tarball.', |
328 action='store_const', const=':gz', dest='format', ) | 335 action='store_const', const=':gz', dest='format', ) |
329 # Misc | 336 # Misc |
330 parser.add_option('-v', '--verbose', help='Use verbose output.', | 337 parser.add_option('-v', '--verbose', help='Use verbose output.', |
331 action='store_true', dest='verbose', default=False) | 338 action='store_true', dest='verbose', default=False) |
332 parser.add_option('-f', '--file', help='Name of tarball.', | 339 parser.add_option('-f', '--file', help='Name of tarball.', |
333 dest='filename', default='') | 340 dest='filename', default='') |
334 parser.add_option('-C', '--directory', help='Change directory.', | 341 parser.add_option('-C', '--directory', help='Change directory.', |
335 dest='cd', default='') | 342 dest='cd', default='') |
| 343 parser.add_option('--prefix', help='Subdirectory prefix for all paths') |
336 | 344 |
337 options, args = parser.parse_args(args[1:]) | 345 options, args = parser.parse_args(args[1:]) |
338 if not options.action: | 346 if not options.action: |
339 parser.error('Expecting compress or extract') | 347 parser.error('Expecting compress or extract') |
340 if not options.filename: | 348 if not options.filename: |
341 parser.error('Expecting a filename') | 349 parser.error('Expecting a filename') |
342 | 350 |
343 if options.action in ['c'] and not args: | 351 if options.action in ['c'] and not args: |
344 parser.error('Expecting list of sources to add') | 352 parser.error('Expecting list of sources to add') |
345 if options.action in ['x', 't'] and args: | 353 if options.action in ['x', 't'] and args: |
(...skipping 11 matching lines...) Expand all Loading... |
357 if options.action == 't': | 365 if options.action == 't': |
358 tar.List() | 366 tar.List() |
359 return 0 | 367 return 0 |
360 | 368 |
361 if options.action == 'x': | 369 if options.action == 'x': |
362 tar.Extract() | 370 tar.Extract() |
363 return 0 | 371 return 0 |
364 | 372 |
365 if options.action == 'c': | 373 if options.action == 'c': |
366 for filepath in args: | 374 for filepath in args: |
367 if not tar.Add(filepath): | 375 if not tar.Add(filepath, options.prefix): |
368 return -1 | 376 return -1 |
369 tar.Close() | 377 tar.Close() |
370 return 0 | 378 return 0 |
371 | 379 |
372 parser.error('Missing action c, t, or x.') | 380 parser.error('Missing action c, t, or x.') |
373 return -1 | 381 return -1 |
374 | 382 |
375 | 383 |
376 if __name__ == '__main__': | 384 if __name__ == '__main__': |
377 sys.exit(Main(sys.argv)) | 385 sys.exit(Main(sys.argv)) |
OLD | NEW |