Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(205)

Side by Side Diff: client/libs/arfile/arfile.py

Issue 2049523004: luci-py: Tools for working with BSD style ar archives. (Closed) Base URL: https://github.com/luci/luci-py.git@master
Patch Set: Fixing for Marc's review. Created 4 years, 6 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 # Copyright 2016 The LUCI Authors. All rights reserved.
2 # Use of this source code is governed under the Apache License, Version 2.0
3 # that can be found in the LICENSE file.
4
5 import collections
6 import doctest
7 import os
8 import shutil
9 import stat
10 import struct
11
12 AR_MAGIC_START = '!<arch>\n'
13 AR_MAGIC_BIT = '\x60\n'
14 AR_PADDING = '\n'
15
16 AR_FORMAT_SIMPLE = 'Simple Format'
17 AR_FORMAT_BSD = '4.4BSD Format'
18 AR_FORMAT_SYSV = 'System V / GNU Format'
19
20 AR_DEFAULT_MTIME = 1447140471
21 AR_DEFAULT_UID = 1000
22 AR_DEFAULT_GID = 1000
23 AR_DEFAULT_MODE = 0100640 # 100640 -- Octal
24
25 _ArInfoStruct = struct.Struct('16s 12s 6s 6s 8s 10s 2s')
26 _ArFormatSimpleForbidden = " "
27
28 _ArInfoBase = collections.namedtuple('ArInfo', [
29 'format', 'name', 'size', 'mtime', 'uid', 'gid', 'mode'])
30 class ArInfo(_ArInfoBase):
31 """A ArInfo object represents one member in an ArFile.
32
33 It does *not* contain the file's data.
34 """
35
36 @staticmethod
37 def _format(path, arformat=None):
38 """
39 Allow forcing the format to a given type
40 >>> assert ArInfo._format('a') == AR_FORMAT_SIMPLE
41 >>> assert ArInfo._format('a', AR_FORMAT_BSD) == AR_FORMAT_BSD
42
43 Certain file paths require the BSD format
44 >>> assert ArInfo._format('f f') == AR_FORMAT_BSD
45 >>> assert ArInfo._format('123456789abcdef..') == AR_FORMAT_BSD
46
47 >>> ArInfo._format('123456789abcdef..', AR_FORMAT_SIMPLE)
48 Traceback (most recent call last):
49 ...
50 IOError: File name too long for format!
51
52 >>> ArInfo._format('f f', AR_FORMAT_SIMPLE)
53 Traceback (most recent call last):
54 ...
55 IOError: File name contains forbidden character for format!
56 """
57 if path.startswith("#1/"):
58 if arformat is None:
59 arformat = AR_FORMAT_BSD
60 elif arformat == AR_FORMAT_SIMPLE:
61 raise IOError('File name starts with special for format!')
62
63 if len(path) >= 16:
64 if arformat is None:
65 arformat = AR_FORMAT_BSD
66 elif arformat == AR_FORMAT_SIMPLE:
67 raise IOError('File name too long for format!')
68
69 if _ArFormatSimpleForbidden in path:
70 if arformat is None:
71 arformat = AR_FORMAT_BSD
72 elif arformat == AR_FORMAT_SIMPLE:
73 raise IOError('File name contains forbidden character for format!')
74
75 if arformat is None:
76 arformat = AR_FORMAT_SIMPLE
77
78 return arformat
79
80 @property
81 def needspadding(self):
82 """
83 >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).needspadding
84 False
85 >>> ArInfo(AR_FORMAT_SIMPLE, '', 11, 0, 0, 0, 0).needspadding
86 True
87 >>> ArInfo(AR_FORMAT_BSD, 'a', 10, 0, 0, 0, 0).needspadding
88 True
89 >>> ArInfo(AR_FORMAT_BSD, 'ab', 10, 0, 0, 0, 0).needspadding
90 False
91 >>> ArInfo(AR_FORMAT_BSD, 'ab', 11, 0, 0, 0, 0).needspadding
92 True
93 >>> ArInfo(AR_FORMAT_BSD, 'ab', 12, 0, 0, 0, 0).needspadding
94 False
95 """
96 return self.datasize % 2 != 0
97
98 @property
99 def datasize(self):
100 """
101 >>> ArInfo(AR_FORMAT_SIMPLE, '', 1, 0, 0, 0, 0).datasize
102 1
103 >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).datasize
104 10
105 >>> ArInfo(AR_FORMAT_BSD, '', 1, 0, 0, 0, 0).datasize
106 1
107 >>> ArInfo(AR_FORMAT_BSD, 'a', 1, 0, 0, 0, 0).datasize
108 2
109 >>> ArInfo(AR_FORMAT_BSD, '', 10, 0, 0, 0, 0).datasize
110 10
111 >>> ArInfo(AR_FORMAT_BSD, 'abc', 10, 0, 0, 0, 0).datasize
112 13
113 """
114 if self.format is AR_FORMAT_SIMPLE:
115 return self.size
116 elif self.format is AR_FORMAT_BSD:
117 return len(self.name)+self.size
118 assert False, 'Unknown format %r' % self.format
119
120 @classmethod
121 def fromfileobj(cls, fileobj, fullparse=True):
122 """Create and return a ArInfo object from fileobj.
123
124 Raises IOError if the buffer is invalid.
125 """
126 buf = fileobj.read(_ArInfoStruct.size)
127 if not buf:
128 return None
129
130 if len(buf) < _ArInfoStruct.size:
131 raise IOError(
132 'not enough data for header, got %r, needed %r' % (
133 len(buf), _ArInfoStruct.size))
134
135 name, mtime, uid, gid, mode, datasize, magic = _ArInfoStruct.unpack(buf)
136
137 datasize = int(datasize)
138 if fullparse:
139 mtime = int(mtime)
140 uid = int(uid)
141 gid = int(gid)
142 mode = int(mode, 8)
143
144 if name.startswith('#1/'):
145 arformat = AR_FORMAT_BSD
146
147 try:
148 filenamesize = int(name[3:])
149 except ValueError:
150 raise IOError('invalid file name length: %r' % name[3:])
151
152 filename = fileobj.read(filenamesize)
153 if len(filename) != filenamesize:
154 raise IOError(
155 'not enough data for filename, got %r, needed %r' % (
156 len(name), filenamesize))
157
158 filesize = datasize - filenamesize
159
160 elif name.startswith('/'):
161 arformat = AR_FORMAT_SYSV
162 raise SystemError('%s format is not supported.' % arformat)
163
164 else:
165 arformat = AR_FORMAT_SIMPLE
166 filename = name.strip()
167 filesize = datasize
168
169 if magic != AR_MAGIC_BIT:
170 raise IOError('file magic invalid, got %r, needed %r' % (
171 magic, AR_MAGIC_BIT))
172
173 return cls(arformat, filename, filesize, mtime, uid, gid, mode)
174
175 @classmethod
176 def frompath(cls, path, arformat=None):
177 """Return an ArInfo object from a file path for information."""
178 st = os.stat(path)
179
180 if not stat.S_ISREG(st):
181 raise IOError('Only work on regular files.')
182
183 return cls(
184 cls._format(path, arformat), path,
185 st.st_size, st.st_mtime, st.st_uid, st.st_gid, st.st_mode)
186
187 @classmethod
188 def fromdefault(cls, path, size, arformat=None):
189 """Return an ArInfo object using name and size (with defaults elsewhere).
190
191 Only a file's name and content are needed to create the ArInfo, all of the
192 modification time, user, group and mode information will be set to default
193 values. This means that you don't need to perform an expensive stat the
194 file.
195
196 >>> ai = ArInfo.fromdefault('abc123', 10)
197 >>> ai.name
198 'abc123'
199 >>> ai.size
200 10
201 >>> assert ai.mtime == AR_DEFAULT_MTIME
202 >>> assert ai.uid == AR_DEFAULT_UID
203 >>> assert ai.gid == AR_DEFAULT_GID
204 >>> assert ai.mode == AR_DEFAULT_MODE
205 """
206 return cls(
207 cls._format(path, arformat), path, size,
208 AR_DEFAULT_MTIME, AR_DEFAULT_UID, AR_DEFAULT_GID, AR_DEFAULT_MODE)
209
210 def tofileobj(self, fileobj):
211 """Create a string buffer from a TarInfo object."""
212 # File name, 16 bytes
213 if self.format is AR_FORMAT_SIMPLE:
214 assert len(self.name) < 16
215 fileobj.write('%-16s' % self.name)
216 datasize = self.size
217 elif self.format is AR_FORMAT_BSD:
218 fileobj.write('#1/%-13s' % str(len(self.name)))
219 datasize = self.size + len(self.name)
220
221 # Modtime, 12 bytes
222 fileobj.write('%-12i' % self.mtime)
223 # Owner ID, 6 bytes
224 fileobj.write('%-6i' % self.uid)
225 # Group ID, 6 bytes
226 fileobj.write('%-6i' % self.gid)
227 # File mode, 8 bytes
228 fileobj.write('%-8o' % self.mode)
229 # File size, 10 bytes
230 fileobj.write('%-10s' % datasize)
231 # File magic, 2 bytes
232 fileobj.write(AR_MAGIC_BIT)
233
234 # Filename - BSD variant
235 if self.format is AR_FORMAT_BSD:
236 fileobj.write(self.name)
237
238
239 class ArFileReader(object):
240 """Read an ar archive from the given input buffer."""
241
242 def __init__(self, fileobj, fullparse=True):
243 self.fullparse = fullparse
244 self.fileobj = fileobj
245
246 magic = self.fileobj.read(len(AR_MAGIC_START))
247 if magic != AR_MAGIC_START:
248 raise IOError(
249 'Not an ar file, invalid magic, got %r, wanted %r.' % (
250 magic, AR_MAGIC_START))
251
252 def __iter__(self):
253 while True:
254 if self.fileobj.closed:
255 raise IOError('Tried to read after the file closed.')
256 ai = ArInfo.fromfileobj(self.fileobj, self.fullparse)
257 if ai is None:
258 return
259
260 start = self.fileobj.tell()
261 yield ai, self.fileobj
262 end = self.fileobj.tell()
263
264 read = end - start
265 # If the reader didn't touch the input buffer, seek past the file.
266 if read == 0:
267 self.fileobj.seek(ai.size, os.SEEK_CUR)
268 elif read != ai.size:
269 raise IOError(
270 'Wrong amount of data read from fileobj! got %i, wanted %i' % (
271 read, ai.size))
272
273 if ai.needspadding:
274 padding = self.fileobj.read(len(AR_PADDING))
275 if padding != AR_PADDING:
276 raise IOError(
277 'incorrect padding, got %r, wanted %r' % (
278 padding, AR_PADDING))
279
280 def close(self):
281 """Close the archive.
282
283 Will close the output buffer."""
284 self.fileobj.close()
285
286
287 class ArFileWriter(object):
288 """Write an ar archive from the given output buffer."""
289
290 def __init__(self, fileobj):
291 self.fileobj = fileobj
292 self.fileobj.write(AR_MAGIC_START)
293
294 def addfile(self, arinfo, fileobj=None):
295 if not fileobj and arinfo.size != 0:
296 raise ValueError('Need to supply fileobj if file is non-zero in side.')
297
298 arinfo.tofileobj(self.fileobj)
299 if fileobj:
300 shutil.copyfileobj(fileobj, self.fileobj, arinfo.size)
301
302 if arinfo.needspadding:
303 self.fileobj.write(AR_PADDING)
304
305 def flush(self):
306 """Flush the output buffer."""
307 self.fileobj.flush()
308
309 def close(self):
310 """Close the archive.
311
312 Will close the output buffer."""
313 self.fileobj.close()
314
315
316 def is_arfile(name):
317 with file(name, 'rb') as f:
318 return f.read(len(AR_MAGIC_START)) == AR_MAGIC_START
319
320
321 # pylint: disable=redefined-builtin
322 def open(name=None, mode='r', fileobj=None):
323 if name is None and fileobj is None:
324 raise ValueError('Nothing to open!')
325
326 if name is not None:
327 if fileobj is not None:
328 raise ValueError('Provided both a file name and file object!')
329 fileobj = file(name, mode+'b')
330
331 if 'b' not in fileobj.mode:
332 raise ValueError('File object not open in binary mode.')
333
334 if mode == 'rb':
335 return ArFileReader(fileobj)
336 elif mode == 'wb':
337 return ArFileWriter(fileobj)
338
339 raise ValueError('Unknown file mode.')
340
341
342 if __name__ == '__main__':
343 doctest.testmod()
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698