Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1359)

Side by Side Diff: client/libs/arfile/arfile.py

Issue 2049523004: luci-py: Tools for working with BSD style ar archives. (Closed) Base URL: https://github.com/luci/luci-py.git@master
Patch Set: Adding missing unicode tests. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file.
4
5 import collections
6 import doctest
7 import os
8 import shutil
9 import stat
10 import struct
11
12 AR_MAGIC_START = '!<arch>\n'
13 AR_MAGIC_BIT = '\x60\n'
14 AR_PADDING = '\n'
15
16 AR_FORMAT_SIMPLE = 'Simple Format'
17 AR_FORMAT_BSD = '4.4BSD Format'
18 AR_FORMAT_SYSV = 'System V / GNU Format'
19
20 AR_DEFAULT_MTIME = 1447140471
21 AR_DEFAULT_UID = 1000
22 AR_DEFAULT_GID = 1000
23 AR_DEFAULT_MODE = 0100640 # 100640 -- Octal
24
25 _ArInfoStruct = struct.Struct('16s 12s 6s 6s 8s 10s 2s')
26 _ArFormatSimpleForbidden = " "
M-A Ruel 2016/06/22 14:21:31 use single quote thoroughly. I don't think it's wo
mithro 2016/06/23 07:10:54 Done.
27
28 _ArInfoBase = collections.namedtuple('ArInfo', [
29 'format', 'name', 'size', 'mtime', 'uid', 'gid', 'mode'])
M-A Ruel 2016/06/22 14:21:32 please add empty lines between.
mithro 2016/06/23 07:10:54 Done.
30 class ArInfo(_ArInfoBase):
31 """A ArInfo object represents one member in an ArFile.
32
33 It does *not* contain the file's data.
34 """
35
36 @staticmethod
37 def _format(path, arformat=None):
M-A Ruel 2016/06/22 14:21:31 why use a default argument? All call sites specify
mithro 2016/06/23 07:10:54 Done.
38 u"""
39 Allow forcing the format to a given type
40 >>> assert ArInfo._format('a') == AR_FORMAT_SIMPLE
41 >>> assert ArInfo._format(u'\u2603') == AR_FORMAT_SIMPLE
42 >>> assert ArInfo._format('a', AR_FORMAT_BSD) == AR_FORMAT_BSD
43
44 Certain file paths require the BSD format
45 >>> assert ArInfo._format('f f') == AR_FORMAT_BSD
46 >>> assert ArInfo._format('123456789abcdef..') == AR_FORMAT_BSD
47
48 >>> ArInfo._format('123456789abcdef..', AR_FORMAT_SIMPLE)
49 Traceback (most recent call last):
50 ...
51 IOError: File name too long for format!
52
53 >>> ArInfo._format('f f', AR_FORMAT_SIMPLE)
54 Traceback (most recent call last):
55 ...
56 IOError: File name contains forbidden character for format!
57 """
58 if isinstance(path, unicode):
59 path = path.encode('utf-8')
60
61 if path.startswith("#1/"):
62 if arformat is None:
M-A Ruel 2016/06/22 14:21:32 replace all with if not arformat:
mithro 2016/06/23 07:10:54 Done.
63 arformat = AR_FORMAT_BSD
64 elif arformat == AR_FORMAT_SIMPLE:
65 raise IOError('File name starts with special for format!')
66
67 if len(path) >= 16:
68 if arformat is None:
69 arformat = AR_FORMAT_BSD
70 elif arformat == AR_FORMAT_SIMPLE:
71 raise IOError('File name too long for format!')
72
73 if _ArFormatSimpleForbidden in path:
74 if arformat is None:
75 arformat = AR_FORMAT_BSD
76 elif arformat == AR_FORMAT_SIMPLE:
77 raise IOError('File name contains forbidden character for format!')
78
79 if arformat is None:
80 arformat = AR_FORMAT_SIMPLE
81
82 return arformat
83
84 @property
85 def needspadding(self):
86 """
87 >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).needspadding
88 False
89 >>> ArInfo(AR_FORMAT_SIMPLE, '', 11, 0, 0, 0, 0).needspadding
90 True
91 >>> ArInfo(AR_FORMAT_BSD, 'a', 10, 0, 0, 0, 0).needspadding
92 True
93 >>> ArInfo(AR_FORMAT_BSD, 'ab', 10, 0, 0, 0, 0).needspadding
94 False
95 >>> ArInfo(AR_FORMAT_BSD, 'ab', 11, 0, 0, 0, 0).needspadding
96 True
97 >>> ArInfo(AR_FORMAT_BSD, 'ab', 12, 0, 0, 0, 0).needspadding
98 False
99 """
100 return self.datasize % 2 != 0
101
102 @property
103 def datasize(self):
104 """
105 >>> ArInfo(AR_FORMAT_SIMPLE, '', 1, 0, 0, 0, 0).datasize
106 1
107 >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).datasize
108 10
109 >>> ArInfo(AR_FORMAT_BSD, '', 1, 0, 0, 0, 0).datasize
110 1
111 >>> ArInfo(AR_FORMAT_BSD, 'a', 1, 0, 0, 0, 0).datasize
112 2
113 >>> ArInfo(AR_FORMAT_BSD, '', 10, 0, 0, 0, 0).datasize
114 10
115 >>> ArInfo(AR_FORMAT_BSD, 'abc', 10, 0, 0, 0, 0).datasize
116 13
117 """
118 if self.format is AR_FORMAT_SIMPLE:
M-A Ruel 2016/06/22 14:21:31 s/is/==/ where relevant, otherwise you would have
mithro 2016/06/23 07:10:54 I actually want an *is* here. I changed the types
119 return self.size
120 elif self.format is AR_FORMAT_BSD:
121 return len(self.name)+self.size
122 assert False, 'Unknown format %r' % self.format
123
124 @classmethod
125 def fromfileobj(cls, fileobj, fullparse=True):
M-A Ruel 2016/06/22 14:21:31 no need for =True, all call sites specify fullpars
mithro 2016/06/23 07:10:54 I'd prefer to keep it here as it is a public inter
126 """Create and return a ArInfo object from fileobj.
127
128 Raises IOError if the buffer is invalid.
129 """
130 buf = fileobj.read(_ArInfoStruct.size)
131 if not buf:
132 return None
133
134 if len(buf) < _ArInfoStruct.size:
135 raise IOError(
136 'not enough data for header, got %r, needed %r' % (
137 len(buf), _ArInfoStruct.size))
138
139 name, mtime, uid, gid, mode, datasize, magic = _ArInfoStruct.unpack(buf)
140
141 datasize = int(datasize)
142 if fullparse:
143 mtime = int(mtime)
144 uid = int(uid)
145 gid = int(gid)
146 mode = int(mode, 8)
147
148 if name.startswith('#1/'):
149 arformat = AR_FORMAT_BSD
150
151 try:
152 filenamesize = int(name[3:])
153 except ValueError:
154 raise IOError('invalid file name length: %r' % name[3:])
155
156 filename = fileobj.read(filenamesize)
157 if len(filename) != filenamesize:
158 raise IOError(
159 'not enough data for filename, got %r, needed %r' % (
160 len(name), filenamesize))
161
162 filesize = datasize - filenamesize
163
164 elif name.startswith('/'):
165 arformat = AR_FORMAT_SYSV
166 raise SystemError('%s format is not supported.' % arformat)
167
168 else:
169 arformat = AR_FORMAT_SIMPLE
170 filename = name.strip()
171 filesize = datasize
172
173 if magic != AR_MAGIC_BIT:
174 raise IOError('file magic invalid, got %r, needed %r' % (
175 magic, AR_MAGIC_BIT))
176
177 return cls(
178 arformat, filename.decode('utf-8'), filesize, mtime, uid, gid, mode)
179
180 @classmethod
181 def frompath(cls, path, arformat=None):
M-A Ruel 2016/06/22 14:21:32 This function is never called, remove unless you p
mithro 2016/06/23 07:10:54 I'd like to keep it, so I added it to cli and a un
182 """Return an ArInfo object from a file path for information."""
183 st = os.stat(path)
184
185 if not stat.S_ISREG(st):
186 raise IOError('Only work on regular files.')
187
188 return cls(
189 cls._format(path, arformat), path,
190 st.st_size, st.st_mtime, st.st_uid, st.st_gid, st.st_mode)
191
192 @classmethod
193 def fromdefault(cls, path, size, arformat=None):
194 """Return an ArInfo object using name and size (with defaults elsewhere).
195
196 Only a file's name and content are needed to create the ArInfo, all of the
197 modification time, user, group and mode information will be set to default
198 values. This means that you don't need to perform an expensive stat the
199 file.
200
201 >>> ai = ArInfo.fromdefault('abc123', 10)
202 >>> ai.name
203 'abc123'
204 >>> ai.size
205 10
206 >>> assert ai.mtime == AR_DEFAULT_MTIME
207 >>> assert ai.uid == AR_DEFAULT_UID
208 >>> assert ai.gid == AR_DEFAULT_GID
209 >>> assert ai.mode == AR_DEFAULT_MODE
210 """
211 return cls(
212 cls._format(path, arformat), path, size,
213 AR_DEFAULT_MTIME, AR_DEFAULT_UID, AR_DEFAULT_GID, AR_DEFAULT_MODE)
214
215 def tofileobj(self, fileobj):
216 """Create a string buffer from a TarInfo object."""
M-A Ruel 2016/06/22 14:21:31 s/string/str/
mithro 2016/06/23 07:10:54 Actually the whole comment is wrong. Replaced with
217 # File name, 16 bytes
218 name = self.name.encode('utf-8')
219 if self.format is AR_FORMAT_SIMPLE:
220 assert len(name) < 16
221 fileobj.write('%-16s' % name)
222 datasize = self.size
223 elif self.format is AR_FORMAT_BSD:
224 fileobj.write('#1/%-13s' % str(len(name)))
225 datasize = self.size + len(name)
226
227 # Modtime, 12 bytes
228 fileobj.write('%-12i' % self.mtime)
229 # Owner ID, 6 bytes
230 fileobj.write('%-6i' % self.uid)
231 # Group ID, 6 bytes
232 fileobj.write('%-6i' % self.gid)
233 # File mode, 8 bytes
234 fileobj.write('%-8o' % self.mode)
235 # File size, 10 bytes
236 fileobj.write('%-10s' % datasize)
237 # File magic, 2 bytes
238 fileobj.write(AR_MAGIC_BIT)
239
240 # Filename - BSD variant
241 if self.format is AR_FORMAT_BSD:
242 fileobj.write(name)
243
244
245 class ArFileReader(object):
246 """Read an ar archive from the given input buffer."""
247
248 def __init__(self, fileobj, fullparse=True):
249 self.fullparse = fullparse
250 self.fileobj = fileobj
251
252 magic = self.fileobj.read(len(AR_MAGIC_START))
253 if magic != AR_MAGIC_START:
254 raise IOError(
255 'Not an ar file, invalid magic, got %r, wanted %r.' % (
256 magic, AR_MAGIC_START))
257
258 def __iter__(self):
259 while True:
260 if self.fileobj.closed:
261 raise IOError('Tried to read after the file closed.')
262 ai = ArInfo.fromfileobj(self.fileobj, self.fullparse)
263 if ai is None:
M-A Ruel 2016/06/22 14:21:32 if not ai:
mithro 2016/06/23 07:10:54 Done.
264 return
265
266 start = self.fileobj.tell()
267 yield ai, self.fileobj
268 end = self.fileobj.tell()
269
270 read = end - start
271 # If the reader didn't touch the input buffer, seek past the file.
272 if read == 0:
M-A Ruel 2016/06/22 14:21:31 if not read:
mithro 2016/06/23 07:10:54 Done.
273 self.fileobj.seek(ai.size, os.SEEK_CUR)
274 elif read != ai.size:
275 raise IOError(
276 'Wrong amount of data read from fileobj! got %i, wanted %i' % (
277 read, ai.size))
278
279 if ai.needspadding:
280 padding = self.fileobj.read(len(AR_PADDING))
281 if padding != AR_PADDING:
282 raise IOError(
283 'incorrect padding, got %r, wanted %r' % (
284 padding, AR_PADDING))
285
286 def close(self):
287 """Close the archive.
288
289 Will close the output buffer."""
M-A Ruel 2016/06/22 14:21:32 trailing """ on multiline docstring on its own lin
mithro 2016/06/23 07:10:54 Done.
290 self.fileobj.close()
291
292
293 class ArFileWriter(object):
294 """Write an ar archive from the given output buffer."""
295
296 def __init__(self, fileobj):
297 self.fileobj = fileobj
298 self.fileobj.write(AR_MAGIC_START)
299
300 def addfile(self, arinfo, fileobj=None):
301 if not fileobj and arinfo.size != 0:
M-A Ruel 2016/06/22 14:21:32 if not fileobj and arinfo.size:
mithro 2016/06/23 07:10:54 Done.
302 raise ValueError('Need to supply fileobj if file is non-zero in side.')
303
304 arinfo.tofileobj(self.fileobj)
305 if fileobj:
306 shutil.copyfileobj(fileobj, self.fileobj, arinfo.size)
307
308 if arinfo.needspadding:
309 self.fileobj.write(AR_PADDING)
310
311 def flush(self):
312 """Flush the output buffer."""
313 self.fileobj.flush()
314
315 def close(self):
316 """Close the archive.
317
318 Will close the output buffer."""
319 self.fileobj.close()
320
321
322 def is_arfile(name):
323 with file(name, 'rb') as f:
324 return f.read(len(AR_MAGIC_START)) == AR_MAGIC_START
325
326
327 # pylint: disable=redefined-builtin
328 def open(name=None, mode='r', fileobj=None):
329 if name is None and fileobj is None:
330 raise ValueError('Nothing to open!')
331
332 if name is not None:
333 if fileobj is not None:
334 raise ValueError('Provided both a file name and file object!')
335 fileobj = file(name, mode+'b')
336
337 if 'b' not in fileobj.mode:
338 raise ValueError('File object not open in binary mode.')
339
340 if mode == 'rb':
341 return ArFileReader(fileobj)
342 elif mode == 'wb':
343 return ArFileWriter(fileobj)
344
345 raise ValueError('Unknown file mode.')
346
347
348 if __name__ == '__main__':
349 doctest.testmod()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698