OLD | NEW |
| (Empty) |
1 # -*- test-case-name: twisted.python.test.test_zipstream -*- | |
2 # Copyright (c) 2001-2008 Twisted Matrix Laboratories. | |
3 # See LICENSE for details. | |
4 | |
5 """ | |
6 An incremental approach to unzipping files. This allows you to unzip a little | |
7 bit of a file at a time, which means you can report progress as a file unzips. | |
8 """ | |
9 | |
10 import warnings | |
11 import zipfile | |
12 import os.path | |
13 import zlib | |
14 import struct | |
15 | |
16 _fileHeaderSize = struct.calcsize(zipfile.structFileHeader) | |
17 | |
18 class ChunkingZipFile(zipfile.ZipFile): | |
19 """ | |
20 A ZipFile object which, with readfile(), also gives you access to a | |
21 filelike object for each entry. | |
22 """ | |
23 | |
24 def readfile(self, name): | |
25 """ | |
26 Return file-like object for name. | |
27 """ | |
28 if self.mode not in ("r", "a"): | |
29 raise RuntimeError('read() requires mode "r" or "a"') | |
30 if not self.fp: | |
31 raise RuntimeError( | |
32 "Attempt to read ZIP archive that was already closed") | |
33 zinfo = self.getinfo(name) | |
34 | |
35 self.fp.seek(zinfo.header_offset, 0) | |
36 | |
37 fheader = self.fp.read(_fileHeaderSize) | |
38 if fheader[0:4] != zipfile.stringFileHeader: | |
39 raise zipfile.BadZipfile("Bad magic number for file header") | |
40 | |
41 fheader = struct.unpack(zipfile.structFileHeader, fheader) | |
42 fname = self.fp.read(fheader[zipfile._FH_FILENAME_LENGTH]) | |
43 | |
44 if fheader[zipfile._FH_EXTRA_FIELD_LENGTH]: | |
45 self.fp.read(fheader[zipfile._FH_EXTRA_FIELD_LENGTH]) | |
46 | |
47 if fname != zinfo.orig_filename: | |
48 raise zipfile.BadZipfile( | |
49 'File name in directory "%s" and header "%s" differ.' % ( | |
50 zinfo.orig_filename, fname)) | |
51 | |
52 if zinfo.compress_type == zipfile.ZIP_STORED: | |
53 return ZipFileEntry(self, zinfo.compress_size) | |
54 elif zinfo.compress_type == zipfile.ZIP_DEFLATED: | |
55 return DeflatedZipFileEntry(self, zinfo.compress_size) | |
56 else: | |
57 raise zipfile.BadZipfile( | |
58 "Unsupported compression method %d for file %s" % | |
59 (zinfo.compress_type, name)) | |
60 | |
61 | |
62 | |
63 class _FileEntry(object): | |
64 """ | |
65 Abstract superclass of both compressed and uncompressed variants of | |
66 file-like objects within a zip archive. | |
67 | |
68 @ivar chunkingZipFile: a chunking zip file. | |
69 @type chunkingZipFile: L{ChunkingZipFile} | |
70 | |
71 @ivar length: The number of bytes within the zip file that represent this | |
72 file. (This is the size on disk, not the number of decompressed bytes | |
73 which will result from reading it.) | |
74 | |
75 @ivar fp: the underlying file object (that contains pkzip data). Do not | |
76 touch this, please. It will quite likely move or go away. | |
77 | |
78 @ivar closed: File-like 'closed' attribute; True before this file has been | |
79 closed, False after. | |
80 @type closed: L{bool} | |
81 | |
82 @ivar finished: An older, broken synonym for 'closed'. Do not touch this, | |
83 please. | |
84 @type finished: L{int} | |
85 """ | |
86 def __init__(self, chunkingZipFile, length): | |
87 """ | |
88 Create a L{_FileEntry} from a L{ChunkingZipFile}. | |
89 """ | |
90 self.chunkingZipFile = chunkingZipFile | |
91 self.fp = self.chunkingZipFile.fp | |
92 self.length = length | |
93 self.finished = 0 | |
94 self.closed = False | |
95 | |
96 | |
97 def isatty(self): | |
98 """ | |
99 Returns false because zip files should not be ttys | |
100 """ | |
101 return False | |
102 | |
103 | |
104 def close(self): | |
105 """ | |
106 Close self (file-like object) | |
107 """ | |
108 self.closed = True | |
109 self.finished = 1 | |
110 del self.fp | |
111 | |
112 | |
113 def readline(self): | |
114 """ | |
115 Read a line. | |
116 """ | |
117 bytes = "" | |
118 for byte in iter(lambda : self.read(1), ""): | |
119 bytes += byte | |
120 if byte == "\n": | |
121 break | |
122 return bytes | |
123 | |
124 | |
125 def next(self): | |
126 """ | |
127 Implement next as file does (like readline, except raises StopIteration | |
128 at EOF) | |
129 """ | |
130 nextline = self.readline() | |
131 if nextline: | |
132 return nextline | |
133 raise StopIteration() | |
134 | |
135 | |
136 def readlines(self): | |
137 """ | |
138 Returns a list of all the lines | |
139 """ | |
140 return list(self) | |
141 | |
142 | |
143 def xreadlines(self): | |
144 """ | |
145 Returns an iterator (so self) | |
146 """ | |
147 return self | |
148 | |
149 | |
150 def __iter__(self): | |
151 """ | |
152 Returns an iterator (so self) | |
153 """ | |
154 return self | |
155 | |
156 | |
157 | |
158 class ZipFileEntry(_FileEntry): | |
159 """ | |
160 File-like object used to read an uncompressed entry in a ZipFile | |
161 """ | |
162 | |
163 def __init__(self, chunkingZipFile, length): | |
164 _FileEntry.__init__(self, chunkingZipFile, length) | |
165 self.readBytes = 0 | |
166 | |
167 | |
168 def tell(self): | |
169 return self.readBytes | |
170 | |
171 | |
172 def read(self, n=None): | |
173 if n is None: | |
174 n = self.length - self.readBytes | |
175 if n == 0 or self.finished: | |
176 return '' | |
177 data = self.chunkingZipFile.fp.read( | |
178 min(n, self.length - self.readBytes)) | |
179 self.readBytes += len(data) | |
180 if self.readBytes == self.length or len(data) < n: | |
181 self.finished = 1 | |
182 return data | |
183 | |
184 | |
185 | |
186 class DeflatedZipFileEntry(_FileEntry): | |
187 """ | |
188 File-like object used to read a deflated entry in a ZipFile | |
189 """ | |
190 | |
191 def __init__(self, chunkingZipFile, length): | |
192 _FileEntry.__init__(self, chunkingZipFile, length) | |
193 self.returnedBytes = 0 | |
194 self.readBytes = 0 | |
195 self.decomp = zlib.decompressobj(-15) | |
196 self.buffer = "" | |
197 | |
198 | |
199 def tell(self): | |
200 return self.returnedBytes | |
201 | |
202 | |
203 def read(self, n=None): | |
204 if self.finished: | |
205 return "" | |
206 if n is None: | |
207 result = [self.buffer,] | |
208 result.append( | |
209 self.decomp.decompress( | |
210 self.chunkingZipFile.fp.read( | |
211 self.length - self.readBytes))) | |
212 result.append(self.decomp.decompress("Z")) | |
213 result.append(self.decomp.flush()) | |
214 self.buffer = "" | |
215 self.finished = 1 | |
216 result = "".join(result) | |
217 self.returnedBytes += len(result) | |
218 return result | |
219 else: | |
220 while len(self.buffer) < n: | |
221 data = self.chunkingZipFile.fp.read( | |
222 min(n, 1024, self.length - self.readBytes)) | |
223 self.readBytes += len(data) | |
224 if not data: | |
225 result = (self.buffer | |
226 + self.decomp.decompress("Z") | |
227 + self.decomp.flush()) | |
228 self.finished = 1 | |
229 self.buffer = "" | |
230 self.returnedBytes += len(result) | |
231 return result | |
232 else: | |
233 self.buffer += self.decomp.decompress(data) | |
234 result = self.buffer[:n] | |
235 self.buffer = self.buffer[n:] | |
236 self.returnedBytes += len(result) | |
237 return result | |
238 | |
239 | |
240 | |
241 def unzip(filename, directory=".", overwrite=0): | |
242 """ | |
243 Unzip the file | |
244 | |
245 @param filename: the name of the zip file | |
246 @param directory: the directory into which the files will be | |
247 extracted | |
248 @param overwrite: if on, overwrite files when they exist. You can | |
249 still get an error if you try to create a directory over a file | |
250 with the same name or vice-versa. | |
251 """ | |
252 for i in unzipIter(filename, directory, overwrite): | |
253 pass | |
254 | |
255 DIR_BIT = 16 | |
256 | |
257 def unzipIter(filename, directory='.', overwrite=0): | |
258 """ | |
259 Return a generator for the zipfile. This implementation will yield | |
260 after every file. | |
261 | |
262 The value it yields is the number of files left to unzip. | |
263 """ | |
264 zf = zipfile.ZipFile(filename, 'r') | |
265 names = zf.namelist() | |
266 if not os.path.exists(directory): | |
267 os.makedirs(directory) | |
268 remaining = len(zf.namelist()) | |
269 for entry in names: | |
270 remaining -= 1 | |
271 isdir = zf.getinfo(entry).external_attr & DIR_BIT | |
272 f = os.path.join(directory, entry) | |
273 if isdir: | |
274 # overwrite flag only applies to files | |
275 if not os.path.exists(f): | |
276 os.makedirs(f) | |
277 else: | |
278 # create the directory the file will be in first, | |
279 # since we can't guarantee it exists | |
280 fdir = os.path.split(f)[0] | |
281 if not os.path.exists(fdir): | |
282 os.makedirs(f) | |
283 if overwrite or not os.path.exists(f): | |
284 outfile = file(f, 'wb') | |
285 outfile.write(zf.read(entry)) | |
286 outfile.close() | |
287 yield remaining | |
288 | |
289 | |
290 def countZipFileChunks(filename, chunksize): | |
291 """ | |
292 Predict the number of chunks that will be extracted from the entire | |
293 zipfile, given chunksize blocks. | |
294 """ | |
295 totalchunks = 0 | |
296 zf = ChunkingZipFile(filename) | |
297 for info in zf.infolist(): | |
298 totalchunks += countFileChunks(info, chunksize) | |
299 return totalchunks | |
300 | |
301 | |
302 def countFileChunks(zipinfo, chunksize): | |
303 """ | |
304 Count the number of chunks that will result from the given L{ZipInfo}. | |
305 | |
306 @param zipinfo: a L{zipfile.ZipInfo} instance describing an entry in a zip | |
307 archive to be counted. | |
308 | |
309 @return: the number of chunks present in the zip file. (Even an empty file | |
310 counts as one chunk.) | |
311 @rtype: L{int} | |
312 """ | |
313 count, extra = divmod(zipinfo.file_size, chunksize) | |
314 if extra > 0: | |
315 count += 1 | |
316 return count or 1 | |
317 | |
318 | |
319 def countZipFileEntries(filename): | |
320 """ | |
321 Count the number of entries in a zip archive. (Don't use this function.) | |
322 | |
323 @param filename: The filename of a zip archive. | |
324 @type filename: L{str} | |
325 """ | |
326 warnings.warn("countZipFileEntries is deprecated.", | |
327 DeprecationWarning, 2) | |
328 zf = zipfile.ZipFile(filename) | |
329 return len(zf.namelist()) | |
330 | |
331 | |
332 def unzipIterChunky(filename, directory='.', overwrite=0, | |
333 chunksize=4096): | |
334 """ | |
335 Return a generator for the zipfile. This implementation will yield after | |
336 every chunksize uncompressed bytes, or at the end of a file, whichever | |
337 comes first. | |
338 | |
339 The value it yields is the number of chunks left to unzip. | |
340 """ | |
341 czf = ChunkingZipFile(filename, 'r') | |
342 if not os.path.exists(directory): | |
343 os.makedirs(directory) | |
344 remaining = countZipFileChunks(filename, chunksize) | |
345 names = czf.namelist() | |
346 infos = czf.infolist() | |
347 | |
348 for entry, info in zip(names, infos): | |
349 isdir = info.external_attr & DIR_BIT | |
350 f = os.path.join(directory, entry) | |
351 if isdir: | |
352 # overwrite flag only applies to files | |
353 if not os.path.exists(f): | |
354 os.makedirs(f) | |
355 remaining -= 1 | |
356 yield remaining | |
357 else: | |
358 # create the directory the file will be in first, | |
359 # since we can't guarantee it exists | |
360 fdir = os.path.split(f)[0] | |
361 if not os.path.exists(fdir): | |
362 os.makedirs(f) | |
363 if overwrite or not os.path.exists(f): | |
364 outfile = file(f, 'wb') | |
365 fp = czf.readfile(entry) | |
366 if info.file_size == 0: | |
367 remaining -= 1 | |
368 yield remaining | |
369 while fp.tell() < info.file_size: | |
370 hunk = fp.read(chunksize) | |
371 outfile.write(hunk) | |
372 remaining -= 1 | |
373 yield remaining | |
374 outfile.close() | |
375 else: | |
376 remaining -= countFileChunks(info, chunksize) | |
377 yield remaining | |
OLD | NEW |