OLD | NEW |
---|---|
(Empty) | |
1 # Copyright 2016 The LUCI Authors. All rights reserved. | |
2 # Use of this source code is governed under the Apache License, Version 2.0 | |
3 # that can be found in the LICENSE file. | |
4 | |
5 import collections | |
6 import doctest | |
7 import os | |
8 import shutil | |
9 import stat | |
10 import struct | |
11 | |
12 AR_MAGIC_START = '!<arch>\n' | |
13 AR_MAGIC_BIT = '\x60\n' | |
14 AR_PADDING = '\n' | |
15 | |
16 AR_FORMAT_SIMPLE = 'Simple Format' | |
17 AR_FORMAT_BSD = '4.4BSD Format' | |
18 AR_FORMAT_SYSV = 'System V / GNU Format' | |
19 | |
20 AR_DEFAULT_MTIME = 1447140471 | |
21 AR_DEFAULT_UID = 1000 | |
22 AR_DEFAULT_GID = 1000 | |
23 AR_DEFAULT_MODE = 0100640 # 100640 -- Octal | |
24 | |
25 _ArInfoStruct = struct.Struct('16s 12s 6s 6s 8s 10s 2s') | |
26 _ArFormatSimpleForbidden = " " | |
M-A Ruel
2016/06/22 14:21:31
use single quote thoroughly.
I don't think it's wo
mithro
2016/06/23 07:10:54
Done.
| |
27 | |
28 _ArInfoBase = collections.namedtuple('ArInfo', [ | |
29 'format', 'name', 'size', 'mtime', 'uid', 'gid', 'mode']) | |
M-A Ruel
2016/06/22 14:21:32
please add empty lines between.
mithro
2016/06/23 07:10:54
Done.
| |
30 class ArInfo(_ArInfoBase): | |
31 """A ArInfo object represents one member in an ArFile. | |
32 | |
33 It does *not* contain the file's data. | |
34 """ | |
35 | |
36 @staticmethod | |
37 def _format(path, arformat=None): | |
M-A Ruel
2016/06/22 14:21:31
why use a default argument? All call sites specify
mithro
2016/06/23 07:10:54
Done.
| |
38 u""" | |
39 Allow forcing the format to a given type | |
40 >>> assert ArInfo._format('a') == AR_FORMAT_SIMPLE | |
41 >>> assert ArInfo._format(u'\u2603') == AR_FORMAT_SIMPLE | |
42 >>> assert ArInfo._format('a', AR_FORMAT_BSD) == AR_FORMAT_BSD | |
43 | |
44 Certain file paths require the BSD format | |
45 >>> assert ArInfo._format('f f') == AR_FORMAT_BSD | |
46 >>> assert ArInfo._format('123456789abcdef..') == AR_FORMAT_BSD | |
47 | |
48 >>> ArInfo._format('123456789abcdef..', AR_FORMAT_SIMPLE) | |
49 Traceback (most recent call last): | |
50 ... | |
51 IOError: File name too long for format! | |
52 | |
53 >>> ArInfo._format('f f', AR_FORMAT_SIMPLE) | |
54 Traceback (most recent call last): | |
55 ... | |
56 IOError: File name contains forbidden character for format! | |
57 """ | |
58 if isinstance(path, unicode): | |
59 path = path.encode('utf-8') | |
60 | |
61 if path.startswith("#1/"): | |
62 if arformat is None: | |
M-A Ruel
2016/06/22 14:21:32
replace all with
if not arformat:
mithro
2016/06/23 07:10:54
Done.
| |
63 arformat = AR_FORMAT_BSD | |
64 elif arformat == AR_FORMAT_SIMPLE: | |
65 raise IOError('File name starts with special for format!') | |
66 | |
67 if len(path) >= 16: | |
68 if arformat is None: | |
69 arformat = AR_FORMAT_BSD | |
70 elif arformat == AR_FORMAT_SIMPLE: | |
71 raise IOError('File name too long for format!') | |
72 | |
73 if _ArFormatSimpleForbidden in path: | |
74 if arformat is None: | |
75 arformat = AR_FORMAT_BSD | |
76 elif arformat == AR_FORMAT_SIMPLE: | |
77 raise IOError('File name contains forbidden character for format!') | |
78 | |
79 if arformat is None: | |
80 arformat = AR_FORMAT_SIMPLE | |
81 | |
82 return arformat | |
83 | |
84 @property | |
85 def needspadding(self): | |
86 """ | |
87 >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).needspadding | |
88 False | |
89 >>> ArInfo(AR_FORMAT_SIMPLE, '', 11, 0, 0, 0, 0).needspadding | |
90 True | |
91 >>> ArInfo(AR_FORMAT_BSD, 'a', 10, 0, 0, 0, 0).needspadding | |
92 True | |
93 >>> ArInfo(AR_FORMAT_BSD, 'ab', 10, 0, 0, 0, 0).needspadding | |
94 False | |
95 >>> ArInfo(AR_FORMAT_BSD, 'ab', 11, 0, 0, 0, 0).needspadding | |
96 True | |
97 >>> ArInfo(AR_FORMAT_BSD, 'ab', 12, 0, 0, 0, 0).needspadding | |
98 False | |
99 """ | |
100 return self.datasize % 2 != 0 | |
101 | |
102 @property | |
103 def datasize(self): | |
104 """ | |
105 >>> ArInfo(AR_FORMAT_SIMPLE, '', 1, 0, 0, 0, 0).datasize | |
106 1 | |
107 >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).datasize | |
108 10 | |
109 >>> ArInfo(AR_FORMAT_BSD, '', 1, 0, 0, 0, 0).datasize | |
110 1 | |
111 >>> ArInfo(AR_FORMAT_BSD, 'a', 1, 0, 0, 0, 0).datasize | |
112 2 | |
113 >>> ArInfo(AR_FORMAT_BSD, '', 10, 0, 0, 0, 0).datasize | |
114 10 | |
115 >>> ArInfo(AR_FORMAT_BSD, 'abc', 10, 0, 0, 0, 0).datasize | |
116 13 | |
117 """ | |
118 if self.format is AR_FORMAT_SIMPLE: | |
M-A Ruel
2016/06/22 14:21:31
s/is/==/
where relevant, otherwise you would have
mithro
2016/06/23 07:10:54
I actually want an *is* here. I changed the types
| |
119 return self.size | |
120 elif self.format is AR_FORMAT_BSD: | |
121 return len(self.name)+self.size | |
122 assert False, 'Unknown format %r' % self.format | |
123 | |
124 @classmethod | |
125 def fromfileobj(cls, fileobj, fullparse=True): | |
M-A Ruel
2016/06/22 14:21:31
no need for =True, all call sites specify fullpars
mithro
2016/06/23 07:10:54
I'd prefer to keep it here as it is a public inter
| |
126 """Create and return a ArInfo object from fileobj. | |
127 | |
128 Raises IOError if the buffer is invalid. | |
129 """ | |
130 buf = fileobj.read(_ArInfoStruct.size) | |
131 if not buf: | |
132 return None | |
133 | |
134 if len(buf) < _ArInfoStruct.size: | |
135 raise IOError( | |
136 'not enough data for header, got %r, needed %r' % ( | |
137 len(buf), _ArInfoStruct.size)) | |
138 | |
139 name, mtime, uid, gid, mode, datasize, magic = _ArInfoStruct.unpack(buf) | |
140 | |
141 datasize = int(datasize) | |
142 if fullparse: | |
143 mtime = int(mtime) | |
144 uid = int(uid) | |
145 gid = int(gid) | |
146 mode = int(mode, 8) | |
147 | |
148 if name.startswith('#1/'): | |
149 arformat = AR_FORMAT_BSD | |
150 | |
151 try: | |
152 filenamesize = int(name[3:]) | |
153 except ValueError: | |
154 raise IOError('invalid file name length: %r' % name[3:]) | |
155 | |
156 filename = fileobj.read(filenamesize) | |
157 if len(filename) != filenamesize: | |
158 raise IOError( | |
159 'not enough data for filename, got %r, needed %r' % ( | |
160 len(name), filenamesize)) | |
161 | |
162 filesize = datasize - filenamesize | |
163 | |
164 elif name.startswith('/'): | |
165 arformat = AR_FORMAT_SYSV | |
166 raise SystemError('%s format is not supported.' % arformat) | |
167 | |
168 else: | |
169 arformat = AR_FORMAT_SIMPLE | |
170 filename = name.strip() | |
171 filesize = datasize | |
172 | |
173 if magic != AR_MAGIC_BIT: | |
174 raise IOError('file magic invalid, got %r, needed %r' % ( | |
175 magic, AR_MAGIC_BIT)) | |
176 | |
177 return cls( | |
178 arformat, filename.decode('utf-8'), filesize, mtime, uid, gid, mode) | |
179 | |
180 @classmethod | |
181 def frompath(cls, path, arformat=None): | |
M-A Ruel
2016/06/22 14:21:32
This function is never called, remove unless you p
mithro
2016/06/23 07:10:54
I'd like to keep it, so I added it to cli and a un
| |
182 """Return an ArInfo object from a file path for information.""" | |
183 st = os.stat(path) | |
184 | |
185 if not stat.S_ISREG(st): | |
186 raise IOError('Only work on regular files.') | |
187 | |
188 return cls( | |
189 cls._format(path, arformat), path, | |
190 st.st_size, st.st_mtime, st.st_uid, st.st_gid, st.st_mode) | |
191 | |
192 @classmethod | |
193 def fromdefault(cls, path, size, arformat=None): | |
194 """Return an ArInfo object using name and size (with defaults elsewhere). | |
195 | |
196 Only a file's name and content are needed to create the ArInfo, all of the | |
197 modification time, user, group and mode information will be set to default | |
198 values. This means that you don't need to perform an expensive stat the | |
199 file. | |
200 | |
201 >>> ai = ArInfo.fromdefault('abc123', 10) | |
202 >>> ai.name | |
203 'abc123' | |
204 >>> ai.size | |
205 10 | |
206 >>> assert ai.mtime == AR_DEFAULT_MTIME | |
207 >>> assert ai.uid == AR_DEFAULT_UID | |
208 >>> assert ai.gid == AR_DEFAULT_GID | |
209 >>> assert ai.mode == AR_DEFAULT_MODE | |
210 """ | |
211 return cls( | |
212 cls._format(path, arformat), path, size, | |
213 AR_DEFAULT_MTIME, AR_DEFAULT_UID, AR_DEFAULT_GID, AR_DEFAULT_MODE) | |
214 | |
215 def tofileobj(self, fileobj): | |
216 """Create a string buffer from a TarInfo object.""" | |
M-A Ruel
2016/06/22 14:21:31
s/string/str/
mithro
2016/06/23 07:10:54
Actually the whole comment is wrong. Replaced with
| |
217 # File name, 16 bytes | |
218 name = self.name.encode('utf-8') | |
219 if self.format is AR_FORMAT_SIMPLE: | |
220 assert len(name) < 16 | |
221 fileobj.write('%-16s' % name) | |
222 datasize = self.size | |
223 elif self.format is AR_FORMAT_BSD: | |
224 fileobj.write('#1/%-13s' % str(len(name))) | |
225 datasize = self.size + len(name) | |
226 | |
227 # Modtime, 12 bytes | |
228 fileobj.write('%-12i' % self.mtime) | |
229 # Owner ID, 6 bytes | |
230 fileobj.write('%-6i' % self.uid) | |
231 # Group ID, 6 bytes | |
232 fileobj.write('%-6i' % self.gid) | |
233 # File mode, 8 bytes | |
234 fileobj.write('%-8o' % self.mode) | |
235 # File size, 10 bytes | |
236 fileobj.write('%-10s' % datasize) | |
237 # File magic, 2 bytes | |
238 fileobj.write(AR_MAGIC_BIT) | |
239 | |
240 # Filename - BSD variant | |
241 if self.format is AR_FORMAT_BSD: | |
242 fileobj.write(name) | |
243 | |
244 | |
245 class ArFileReader(object): | |
246 """Read an ar archive from the given input buffer.""" | |
247 | |
248 def __init__(self, fileobj, fullparse=True): | |
249 self.fullparse = fullparse | |
250 self.fileobj = fileobj | |
251 | |
252 magic = self.fileobj.read(len(AR_MAGIC_START)) | |
253 if magic != AR_MAGIC_START: | |
254 raise IOError( | |
255 'Not an ar file, invalid magic, got %r, wanted %r.' % ( | |
256 magic, AR_MAGIC_START)) | |
257 | |
258 def __iter__(self): | |
259 while True: | |
260 if self.fileobj.closed: | |
261 raise IOError('Tried to read after the file closed.') | |
262 ai = ArInfo.fromfileobj(self.fileobj, self.fullparse) | |
263 if ai is None: | |
M-A Ruel
2016/06/22 14:21:32
if not ai:
mithro
2016/06/23 07:10:54
Done.
| |
264 return | |
265 | |
266 start = self.fileobj.tell() | |
267 yield ai, self.fileobj | |
268 end = self.fileobj.tell() | |
269 | |
270 read = end - start | |
271 # If the reader didn't touch the input buffer, seek past the file. | |
272 if read == 0: | |
M-A Ruel
2016/06/22 14:21:31
if not read:
mithro
2016/06/23 07:10:54
Done.
| |
273 self.fileobj.seek(ai.size, os.SEEK_CUR) | |
274 elif read != ai.size: | |
275 raise IOError( | |
276 'Wrong amount of data read from fileobj! got %i, wanted %i' % ( | |
277 read, ai.size)) | |
278 | |
279 if ai.needspadding: | |
280 padding = self.fileobj.read(len(AR_PADDING)) | |
281 if padding != AR_PADDING: | |
282 raise IOError( | |
283 'incorrect padding, got %r, wanted %r' % ( | |
284 padding, AR_PADDING)) | |
285 | |
286 def close(self): | |
287 """Close the archive. | |
288 | |
289 Will close the output buffer.""" | |
M-A Ruel
2016/06/22 14:21:32
trailing """ on multiline docstring on its own lin
mithro
2016/06/23 07:10:54
Done.
| |
290 self.fileobj.close() | |
291 | |
292 | |
293 class ArFileWriter(object): | |
294 """Write an ar archive from the given output buffer.""" | |
295 | |
296 def __init__(self, fileobj): | |
297 self.fileobj = fileobj | |
298 self.fileobj.write(AR_MAGIC_START) | |
299 | |
300 def addfile(self, arinfo, fileobj=None): | |
301 if not fileobj and arinfo.size != 0: | |
M-A Ruel
2016/06/22 14:21:32
if not fileobj and arinfo.size:
mithro
2016/06/23 07:10:54
Done.
| |
302 raise ValueError('Need to supply fileobj if file is non-zero in side.') | |
303 | |
304 arinfo.tofileobj(self.fileobj) | |
305 if fileobj: | |
306 shutil.copyfileobj(fileobj, self.fileobj, arinfo.size) | |
307 | |
308 if arinfo.needspadding: | |
309 self.fileobj.write(AR_PADDING) | |
310 | |
311 def flush(self): | |
312 """Flush the output buffer.""" | |
313 self.fileobj.flush() | |
314 | |
315 def close(self): | |
316 """Close the archive. | |
317 | |
318 Will close the output buffer.""" | |
319 self.fileobj.close() | |
320 | |
321 | |
322 def is_arfile(name): | |
323 with file(name, 'rb') as f: | |
324 return f.read(len(AR_MAGIC_START)) == AR_MAGIC_START | |
325 | |
326 | |
327 # pylint: disable=redefined-builtin | |
328 def open(name=None, mode='r', fileobj=None): | |
329 if name is None and fileobj is None: | |
330 raise ValueError('Nothing to open!') | |
331 | |
332 if name is not None: | |
333 if fileobj is not None: | |
334 raise ValueError('Provided both a file name and file object!') | |
335 fileobj = file(name, mode+'b') | |
336 | |
337 if 'b' not in fileobj.mode: | |
338 raise ValueError('File object not open in binary mode.') | |
339 | |
340 if mode == 'rb': | |
341 return ArFileReader(fileobj) | |
342 elif mode == 'wb': | |
343 return ArFileWriter(fileobj) | |
344 | |
345 raise ValueError('Unknown file mode.') | |
346 | |
347 | |
348 if __name__ == '__main__': | |
349 doctest.testmod() | |
OLD | NEW |