| OLD | NEW |
| (Empty) |
| 1 # -*- test-case-name: twisted.python.test.test_zipstream -*- | |
| 2 # Copyright (c) 2001-2008 Twisted Matrix Laboratories. | |
| 3 # See LICENSE for details. | |
| 4 | |
| 5 """ | |
| 6 An incremental approach to unzipping files. This allows you to unzip a little | |
| 7 bit of a file at a time, which means you can report progress as a file unzips. | |
| 8 """ | |
| 9 | |
| 10 import warnings | |
| 11 import zipfile | |
| 12 import os.path | |
| 13 import zlib | |
| 14 import struct | |
| 15 | |
| 16 _fileHeaderSize = struct.calcsize(zipfile.structFileHeader) | |
| 17 | |
| 18 class ChunkingZipFile(zipfile.ZipFile): | |
| 19 """ | |
| 20 A ZipFile object which, with readfile(), also gives you access to a | |
| 21 filelike object for each entry. | |
| 22 """ | |
| 23 | |
| 24 def readfile(self, name): | |
| 25 """ | |
| 26 Return file-like object for name. | |
| 27 """ | |
| 28 if self.mode not in ("r", "a"): | |
| 29 raise RuntimeError('read() requires mode "r" or "a"') | |
| 30 if not self.fp: | |
| 31 raise RuntimeError( | |
| 32 "Attempt to read ZIP archive that was already closed") | |
| 33 zinfo = self.getinfo(name) | |
| 34 | |
| 35 self.fp.seek(zinfo.header_offset, 0) | |
| 36 | |
| 37 fheader = self.fp.read(_fileHeaderSize) | |
| 38 if fheader[0:4] != zipfile.stringFileHeader: | |
| 39 raise zipfile.BadZipfile("Bad magic number for file header") | |
| 40 | |
| 41 fheader = struct.unpack(zipfile.structFileHeader, fheader) | |
| 42 fname = self.fp.read(fheader[zipfile._FH_FILENAME_LENGTH]) | |
| 43 | |
| 44 if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]: | |
| 45 self.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH]) | |
| 46 | |
| 47 if fname != zinfo.orig_filename: | |
| 48 raise zipfile.BadZipfile( | |
| 49 'File name in directory "%s" and header "%s" differ.' % ( | |
| 50 zinfo.orig_filename, fname)) | |
| 51 | |
| 52 if zinfo.compress_type == zipfile.ZIP_STORED: | |
| 53 return ZipFileEntry(self, zinfo.compress_size) | |
| 54 elif zinfo.compress_type == zipfile.ZIP_DEFLATED: | |
| 55 return DeflatedZipFileEntry(self, zinfo.compress_size) | |
| 56 else: | |
| 57 raise zipfile.BadZipfile( | |
| 58 "Unsupported compression method %d for file %s" % | |
| 59 (zinfo.compress_type, name)) | |
| 60 | |
| 61 | |
| 62 | |
| 63 class _FileEntry(object): | |
| 64 """ | |
| 65 Abstract superclass of both compressed and uncompressed variants of | |
| 66 file-like objects within a zip archive. | |
| 67 | |
| 68 @ivar chunkingZipFile: a chunking zip file. | |
| 69 @type chunkingZipFile: L{ChunkingZipFile} | |
| 70 | |
| 71 @ivar length: The number of bytes within the zip file that represent this | |
| 72 file. (This is the size on disk, not the number of decompressed bytes | |
| 73 which will result from reading it.) | |
| 74 | |
| 75 @ivar fp: the underlying file object (that contains pkzip data). Do not | |
| 76 touch this, please. It will quite likely move or go away. | |
| 77 | |
| 78 @ivar closed: File-like 'closed' attribute; True before this file has been | |
| 79 closed, False after. | |
| 80 @type closed: L{bool} | |
| 81 | |
| 82 @ivar finished: An older, broken synonym for 'closed'. Do not touch this, | |
| 83 please. | |
| 84 @type finished: L{int} | |
| 85 """ | |
| 86 def __init__(self, chunkingZipFile, length): | |
| 87 """ | |
| 88 Create a L{_FileEntry} from a L{ChunkingZipFile}. | |
| 89 """ | |
| 90 self.chunkingZipFile = chunkingZipFile | |
| 91 self.fp = self.chunkingZipFile.fp | |
| 92 self.length = length | |
| 93 self.finished = 0 | |
| 94 self.closed = False | |
| 95 | |
| 96 | |
| 97 def isatty(self): | |
| 98 """ | |
| 99 Returns false because zip files should not be ttys | |
| 100 """ | |
| 101 return False | |
| 102 | |
| 103 | |
| 104 def close(self): | |
| 105 """ | |
| 106 Close self (file-like object) | |
| 107 """ | |
| 108 self.closed = True | |
| 109 self.finished = 1 | |
| 110 del self.fp | |
| 111 | |
| 112 | |
| 113 def readline(self): | |
| 114 """ | |
| 115 Read a line. | |
| 116 """ | |
| 117 bytes = "" | |
| 118 for byte in iter(lambda : self.read(1), ""): | |
| 119 bytes += byte | |
| 120 if byte == "\n": | |
| 121 break | |
| 122 return bytes | |
| 123 | |
| 124 | |
| 125 def next(self): | |
| 126 """ | |
| 127 Implement next as file does (like readline, except raises StopIteration | |
| 128 at EOF) | |
| 129 """ | |
| 130 nextline = self.readline() | |
| 131 if nextline: | |
| 132 return nextline | |
| 133 raise StopIteration() | |
| 134 | |
| 135 | |
| 136 def readlines(self): | |
| 137 """ | |
| 138 Returns a list of all the lines | |
| 139 """ | |
| 140 return list(self) | |
| 141 | |
| 142 | |
| 143 def xreadlines(self): | |
| 144 """ | |
| 145 Returns an iterator (so self) | |
| 146 """ | |
| 147 return self | |
| 148 | |
| 149 | |
| 150 def __iter__(self): | |
| 151 """ | |
| 152 Returns an iterator (so self) | |
| 153 """ | |
| 154 return self | |
| 155 | |
| 156 | |
| 157 | |
| 158 class ZipFileEntry(_FileEntry): | |
| 159 """ | |
| 160 File-like object used to read an uncompressed entry in a ZipFile | |
| 161 """ | |
| 162 | |
| 163 def __init__(self, chunkingZipFile, length): | |
| 164 _FileEntry.__init__(self, chunkingZipFile, length) | |
| 165 self.readBytes = 0 | |
| 166 | |
| 167 | |
| 168 def tell(self): | |
| 169 return self.readBytes | |
| 170 | |
| 171 | |
| 172 def read(self, n=None): | |
| 173 if n is None: | |
| 174 n = self.length - self.readBytes | |
| 175 if n == 0 or self.finished: | |
| 176 return '' | |
| 177 data = self.chunkingZipFile.fp.read( | |
| 178 min(n, self.length - self.readBytes)) | |
| 179 self.readBytes += len(data) | |
| 180 if self.readBytes == self.length or len(data) < n: | |
| 181 self.finished = 1 | |
| 182 return data | |
| 183 | |
| 184 | |
| 185 | |
| 186 class DeflatedZipFileEntry(_FileEntry): | |
| 187 """ | |
| 188 File-like object used to read a deflated entry in a ZipFile | |
| 189 """ | |
| 190 | |
| 191 def __init__(self, chunkingZipFile, length): | |
| 192 _FileEntry.__init__(self, chunkingZipFile, length) | |
| 193 self.returnedBytes = 0 | |
| 194 self.readBytes = 0 | |
| 195 self.decomp = zlib.decompressobj(-15) | |
| 196 self.buffer = "" | |
| 197 | |
| 198 | |
| 199 def tell(self): | |
| 200 return self.returnedBytes | |
| 201 | |
| 202 | |
| 203 def read(self, n=None): | |
| 204 if self.finished: | |
| 205 return "" | |
| 206 if n is None: | |
| 207 result = [self.buffer,] | |
| 208 result.append( | |
| 209 self.decomp.decompress( | |
| 210 self.chunkingZipFile.fp.read( | |
| 211 self.length - self.readBytes))) | |
| 212 result.append(self.decomp.decompress("Z")) | |
| 213 result.append(self.decomp.flush()) | |
| 214 self.buffer = "" | |
| 215 self.finished = 1 | |
| 216 result = "".join(result) | |
| 217 self.returnedBytes += len(result) | |
| 218 return result | |
| 219 else: | |
| 220 while len(self.buffer) < n: | |
| 221 data = self.chunkingZipFile.fp.read( | |
| 222 min(n, 1024, self.length - self.readBytes)) | |
| 223 self.readBytes += len(data) | |
| 224 if not data: | |
| 225 result = (self.buffer | |
| 226 + self.decomp.decompress("Z") | |
| 227 + self.decomp.flush()) | |
| 228 self.finished = 1 | |
| 229 self.buffer = "" | |
| 230 self.returnedBytes += len(result) | |
| 231 return result | |
| 232 else: | |
| 233 self.buffer += self.decomp.decompress(data) | |
| 234 result = self.buffer[:n] | |
| 235 self.buffer = self.buffer[n:] | |
| 236 self.returnedBytes += len(result) | |
| 237 return result | |
| 238 | |
| 239 | |
| 240 | |
| 241 def unzip(filename, directory=".", overwrite=0): | |
| 242 """ | |
| 243 Unzip the file | |
| 244 | |
| 245 @param filename: the name of the zip file | |
| 246 @param directory: the directory into which the files will be | |
| 247 extracted | |
| 248 @param overwrite: if on, overwrite files when they exist. You can | |
| 249 still get an error if you try to create a directory over a file | |
| 250 with the same name or vice-versa. | |
| 251 """ | |
| 252 for i in unzipIter(filename, directory, overwrite): | |
| 253 pass | |
| 254 | |
| 255 DIR_BIT = 16 | |
| 256 | |
| 257 def unzipIter(filename, directory='.', overwrite=0): | |
| 258 """ | |
| 259 Return a generator for the zipfile. This implementation will yield | |
| 260 after every file. | |
| 261 | |
| 262 The value it yields is the number of files left to unzip. | |
| 263 """ | |
| 264 zf = zipfile.ZipFile(filename, 'r') | |
| 265 names = zf.namelist() | |
| 266 if not os.path.exists(directory): | |
| 267 os.makedirs(directory) | |
| 268 remaining = len(zf.namelist()) | |
| 269 for entry in names: | |
| 270 remaining -= 1 | |
| 271 isdir = zf.getinfo(entry).external_attr & DIR_BIT | |
| 272 f = os.path.join(directory, entry) | |
| 273 if isdir: | |
| 274 # overwrite flag only applies to files | |
| 275 if not os.path.exists(f): | |
| 276 os.makedirs(f) | |
| 277 else: | |
| 278 # create the directory the file will be in first, | |
| 279 # since we can't guarantee it exists | |
| 280 fdir = os.path.split(f)[0] | |
| 281 if not os.path.exists(fdir): | |
| 282 os.makedirs(f) | |
| 283 if overwrite or not os.path.exists(f): | |
| 284 outfile = file(f, 'wb') | |
| 285 outfile.write(zf.read(entry)) | |
| 286 outfile.close() | |
| 287 yield remaining | |
| 288 | |
| 289 | |
| 290 def countZipFileChunks(filename, chunksize): | |
| 291 """ | |
| 292 Predict the number of chunks that will be extracted from the entire | |
| 293 zipfile, given chunksize blocks. | |
| 294 """ | |
| 295 totalchunks = 0 | |
| 296 zf = ChunkingZipFile(filename) | |
| 297 for info in zf.infolist(): | |
| 298 totalchunks += countFileChunks(info, chunksize) | |
| 299 return totalchunks | |
| 300 | |
| 301 | |
| 302 def countFileChunks(zipinfo, chunksize): | |
| 303 """ | |
| 304 Count the number of chunks that will result from the given L{ZipInfo}. | |
| 305 | |
| 306 @param zipinfo: a L{zipfile.ZipInfo} instance describing an entry in a zip | |
| 307 archive to be counted. | |
| 308 | |
| 309 @return: the number of chunks present in the zip file. (Even an empty file | |
| 310 counts as one chunk.) | |
| 311 @rtype: L{int} | |
| 312 """ | |
| 313 count, extra = divmod(zipinfo.file_size, chunksize) | |
| 314 if extra > 0: | |
| 315 count += 1 | |
| 316 return count or 1 | |
| 317 | |
| 318 | |
| 319 def countZipFileEntries(filename): | |
| 320 """ | |
| 321 Count the number of entries in a zip archive. (Don't use this function.) | |
| 322 | |
| 323 @param filename: The filename of a zip archive. | |
| 324 @type filename: L{str} | |
| 325 """ | |
| 326 warnings.warn("countZipFileEntries is deprecated.", | |
| 327 DeprecationWarning, 2) | |
| 328 zf = zipfile.ZipFile(filename) | |
| 329 return len(zf.namelist()) | |
| 330 | |
| 331 | |
| 332 def unzipIterChunky(filename, directory='.', overwrite=0, | |
| 333 chunksize=4096): | |
| 334 """ | |
| 335 Return a generator for the zipfile. This implementation will yield after | |
| 336 every chunksize uncompressed bytes, or at the end of a file, whichever | |
| 337 comes first. | |
| 338 | |
| 339 The value it yields is the number of chunks left to unzip. | |
| 340 """ | |
| 341 czf = ChunkingZipFile(filename, 'r') | |
| 342 if not os.path.exists(directory): | |
| 343 os.makedirs(directory) | |
| 344 remaining = countZipFileChunks(filename, chunksize) | |
| 345 names = czf.namelist() | |
| 346 infos = czf.infolist() | |
| 347 | |
| 348 for entry, info in zip(names, infos): | |
| 349 isdir = info.external_attr & DIR_BIT | |
| 350 f = os.path.join(directory, entry) | |
| 351 if isdir: | |
| 352 # overwrite flag only applies to files | |
| 353 if not os.path.exists(f): | |
| 354 os.makedirs(f) | |
| 355 remaining -= 1 | |
| 356 yield remaining | |
| 357 else: | |
| 358 # create the directory the file will be in first, | |
| 359 # since we can't guarantee it exists | |
| 360 fdir = os.path.split(f)[0] | |
| 361 if not os.path.exists(fdir): | |
| 362 os.makedirs(f) | |
| 363 if overwrite or not os.path.exists(f): | |
| 364 outfile = file(f, 'wb') | |
| 365 fp = czf.readfile(entry) | |
| 366 if info.file_size == 0: | |
| 367 remaining -= 1 | |
| 368 yield remaining | |
| 369 while fp.tell() < info.file_size: | |
| 370 hunk = fp.read(chunksize) | |
| 371 outfile.write(hunk) | |
| 372 remaining -= 1 | |
| 373 yield remaining | |
| 374 outfile.close() | |
| 375 else: | |
| 376 remaining -= countFileChunks(info, chunksize) | |
| 377 yield remaining | |
| OLD | NEW |