OLD | NEW |
(Empty) | |
| 1 # Copyright 2016 The LUCI Authors. All rights reserved. |
| 2 # Use of this source code is governed under the Apache License, Version 2.0 |
| 3 # that can be found in the LICENSE file. |
| 4 |
| 5 import collections |
| 6 import doctest |
| 7 import os |
| 8 import shutil |
| 9 import stat |
| 10 import struct |
| 11 |
| 12 AR_MAGIC_START = '!<arch>\n' |
| 13 AR_MAGIC_BIT = '\x60\n' |
| 14 AR_PADDING = '\n' |
| 15 |
| 16 AR_FORMAT_SIMPLE = 'Simple Format' |
| 17 AR_FORMAT_BSD = '4.4BSD Format' |
| 18 AR_FORMAT_SYSV = 'System V / GNU Format' |
| 19 |
| 20 AR_DEFAULT_MTIME = 1447140471 |
| 21 AR_DEFAULT_UID = 1000 |
| 22 AR_DEFAULT_GID = 1000 |
| 23 AR_DEFAULT_MODE = 0100640 # 100640 -- Octal |
| 24 |
| 25 _ArInfoStruct = struct.Struct('16s 12s 6s 6s 8s 10s 2s') |
| 26 _ArFormatSimpleForbidden = " " |
| 27 |
| 28 _ArInfoBase = collections.namedtuple('ArInfo', [ |
| 29 'format', 'name', 'size', 'mtime', 'uid', 'gid', 'mode']) |
| 30 class ArInfo(_ArInfoBase): |
| 31 """A ArInfo object represents one member in an ArFile. |
| 32 |
| 33 It does *not* contain the file's data. |
| 34 """ |
| 35 |
| 36 @staticmethod |
| 37 def _format(path, arformat=None): |
| 38 """ |
| 39 Allow forcing the format to a given type |
| 40 >>> assert ArInfo._format('a') == AR_FORMAT_SIMPLE |
| 41 >>> assert ArInfo._format('a', AR_FORMAT_BSD) == AR_FORMAT_BSD |
| 42 |
| 43 Certain file paths require the BSD format |
| 44 >>> assert ArInfo._format('f f') == AR_FORMAT_BSD |
| 45 >>> assert ArInfo._format('123456789abcdef..') == AR_FORMAT_BSD |
| 46 |
| 47 >>> ArInfo._format('123456789abcdef..', AR_FORMAT_SIMPLE) |
| 48 Traceback (most recent call last): |
| 49 ... |
| 50 IOError: File name too long for format! |
| 51 |
| 52 >>> ArInfo._format('f f', AR_FORMAT_SIMPLE) |
| 53 Traceback (most recent call last): |
| 54 ... |
| 55 IOError: File name contains forbidden character for format! |
| 56 """ |
| 57 if path.startswith("#1/"): |
| 58 if arformat is None: |
| 59 arformat = AR_FORMAT_BSD |
| 60 elif arformat == AR_FORMAT_SIMPLE: |
| 61 raise IOError('File name starts with special for format!') |
| 62 |
| 63 if len(path) >= 16: |
| 64 if arformat is None: |
| 65 arformat = AR_FORMAT_BSD |
| 66 elif arformat == AR_FORMAT_SIMPLE: |
| 67 raise IOError('File name too long for format!') |
| 68 |
| 69 if _ArFormatSimpleForbidden in path: |
| 70 if arformat is None: |
| 71 arformat = AR_FORMAT_BSD |
| 72 elif arformat == AR_FORMAT_SIMPLE: |
| 73 raise IOError('File name contains forbidden character for format!') |
| 74 |
| 75 if arformat is None: |
| 76 arformat = AR_FORMAT_SIMPLE |
| 77 |
| 78 return arformat |
| 79 |
| 80 @property |
| 81 def needspadding(self): |
| 82 """ |
| 83 >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).needspadding |
| 84 False |
| 85 >>> ArInfo(AR_FORMAT_SIMPLE, '', 11, 0, 0, 0, 0).needspadding |
| 86 True |
| 87 >>> ArInfo(AR_FORMAT_BSD, 'a', 10, 0, 0, 0, 0).needspadding |
| 88 True |
| 89 >>> ArInfo(AR_FORMAT_BSD, 'ab', 10, 0, 0, 0, 0).needspadding |
| 90 False |
| 91 >>> ArInfo(AR_FORMAT_BSD, 'ab', 11, 0, 0, 0, 0).needspadding |
| 92 True |
| 93 >>> ArInfo(AR_FORMAT_BSD, 'ab', 12, 0, 0, 0, 0).needspadding |
| 94 False |
| 95 """ |
| 96 return self.datasize % 2 != 0 |
| 97 |
| 98 @property |
| 99 def datasize(self): |
| 100 """ |
| 101 >>> ArInfo(AR_FORMAT_SIMPLE, '', 1, 0, 0, 0, 0).datasize |
| 102 1 |
| 103 >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).datasize |
| 104 10 |
| 105 >>> ArInfo(AR_FORMAT_BSD, '', 1, 0, 0, 0, 0).datasize |
| 106 1 |
| 107 >>> ArInfo(AR_FORMAT_BSD, 'a', 1, 0, 0, 0, 0).datasize |
| 108 2 |
| 109 >>> ArInfo(AR_FORMAT_BSD, '', 10, 0, 0, 0, 0).datasize |
| 110 10 |
| 111 >>> ArInfo(AR_FORMAT_BSD, 'abc', 10, 0, 0, 0, 0).datasize |
| 112 13 |
| 113 """ |
| 114 if self.format is AR_FORMAT_SIMPLE: |
| 115 return self.size |
| 116 elif self.format is AR_FORMAT_BSD: |
| 117 return len(self.name)+self.size |
| 118 assert False, 'Unknown format %r' % self.format |
| 119 |
| 120 @classmethod |
| 121 def fromfileobj(cls, fileobj, fullparse=True): |
| 122 """Create and return a ArInfo object from fileobj. |
| 123 |
| 124 Raises IOError if the buffer is invalid. |
| 125 """ |
| 126 buf = fileobj.read(_ArInfoStruct.size) |
| 127 if not buf: |
| 128 return None |
| 129 |
| 130 if len(buf) < _ArInfoStruct.size: |
| 131 raise IOError( |
| 132 'not enough data for header, got %r, needed %r' % ( |
| 133 len(buf), _ArInfoStruct.size)) |
| 134 |
| 135 name, mtime, uid, gid, mode, datasize, magic = _ArInfoStruct.unpack(buf) |
| 136 |
| 137 datasize = int(datasize) |
| 138 if fullparse: |
| 139 mtime = int(mtime) |
| 140 uid = int(uid) |
| 141 gid = int(gid) |
| 142 mode = int(mode, 8) |
| 143 |
| 144 if name.startswith('#1/'): |
| 145 arformat = AR_FORMAT_BSD |
| 146 |
| 147 try: |
| 148 filenamesize = int(name[3:]) |
| 149 except ValueError: |
| 150 raise IOError('invalid file name length: %r' % name[3:]) |
| 151 |
| 152 filename = fileobj.read(filenamesize) |
| 153 if len(filename) != filenamesize: |
| 154 raise IOError( |
| 155 'not enough data for filename, got %r, needed %r' % ( |
| 156 len(name), filenamesize)) |
| 157 |
| 158 filesize = datasize - filenamesize |
| 159 |
| 160 elif name.startswith('/'): |
| 161 arformat = AR_FORMAT_SYSV |
| 162 raise SystemError('%s format is not supported.' % arformat) |
| 163 |
| 164 else: |
| 165 arformat = AR_FORMAT_SIMPLE |
| 166 filename = name.strip() |
| 167 filesize = datasize |
| 168 |
| 169 if magic != AR_MAGIC_BIT: |
| 170 raise IOError('file magic invalid, got %r, needed %r' % ( |
| 171 magic, AR_MAGIC_BIT)) |
| 172 |
| 173 return cls(arformat, filename, filesize, mtime, uid, gid, mode) |
| 174 |
| 175 @classmethod |
| 176 def frompath(cls, path, arformat=None): |
| 177 """Return an ArInfo object from a file path for information.""" |
| 178 st = os.stat(path) |
| 179 |
| 180 if not stat.S_ISREG(st): |
| 181 raise IOError('Only work on regular files.') |
| 182 |
| 183 return cls( |
| 184 cls._format(path, arformat), path, |
| 185 st.st_size, st.st_mtime, st.st_uid, st.st_gid, st.st_mode) |
| 186 |
| 187 @classmethod |
| 188 def fromdefault(cls, path, size, arformat=None): |
| 189 """Return an ArInfo object using name and size (with defaults elsewhere). |
| 190 |
| 191 Only a file's name and content are needed to create the ArInfo, all of the |
| 192 modification time, user, group and mode information will be set to default |
| 193 values. This means that you don't need to perform an expensive stat the |
| 194 file. |
| 195 |
| 196 >>> ai = ArInfo.fromdefault('abc123', 10) |
| 197 >>> ai.name |
| 198 'abc123' |
| 199 >>> ai.size |
| 200 10 |
| 201 >>> assert ai.mtime == AR_DEFAULT_MTIME |
| 202 >>> assert ai.uid == AR_DEFAULT_UID |
| 203 >>> assert ai.gid == AR_DEFAULT_GID |
| 204 >>> assert ai.mode == AR_DEFAULT_MODE |
| 205 """ |
| 206 return cls( |
| 207 cls._format(path, arformat), path, size, |
| 208 AR_DEFAULT_MTIME, AR_DEFAULT_UID, AR_DEFAULT_GID, AR_DEFAULT_MODE) |
| 209 |
| 210 def tofileobj(self, fileobj): |
| 211 """Create a string buffer from a TarInfo object.""" |
| 212 # File name, 16 bytes |
| 213 if self.format is AR_FORMAT_SIMPLE: |
| 214 assert len(self.name) < 16 |
| 215 fileobj.write('%-16s' % self.name) |
| 216 datasize = self.size |
| 217 elif self.format is AR_FORMAT_BSD: |
| 218 fileobj.write('#1/%-13s' % str(len(self.name))) |
| 219 datasize = self.size + len(self.name) |
| 220 |
| 221 # Modtime, 12 bytes |
| 222 fileobj.write('%-12i' % self.mtime) |
| 223 # Owner ID, 6 bytes |
| 224 fileobj.write('%-6i' % self.uid) |
| 225 # Group ID, 6 bytes |
| 226 fileobj.write('%-6i' % self.gid) |
| 227 # File mode, 8 bytes |
| 228 fileobj.write('%-8o' % self.mode) |
| 229 # File size, 10 bytes |
| 230 fileobj.write('%-10s' % datasize) |
| 231 # File magic, 2 bytes |
| 232 fileobj.write(AR_MAGIC_BIT) |
| 233 |
| 234 # Filename - BSD variant |
| 235 if self.format is AR_FORMAT_BSD: |
| 236 fileobj.write(self.name) |
| 237 |
| 238 |
| 239 class ArFileReader(object): |
| 240 """Read an ar archive from the given input buffer.""" |
| 241 |
| 242 def __init__(self, fileobj, fullparse=True): |
| 243 self.fullparse = fullparse |
| 244 self.fileobj = fileobj |
| 245 |
| 246 magic = self.fileobj.read(len(AR_MAGIC_START)) |
| 247 if magic != AR_MAGIC_START: |
| 248 raise IOError( |
| 249 'Not an ar file, invalid magic, got %r, wanted %r.' % ( |
| 250 magic, AR_MAGIC_START)) |
| 251 |
| 252 def __iter__(self): |
| 253 while True: |
| 254 if self.fileobj.closed: |
| 255 raise IOError('Tried to read after the file closed.') |
| 256 ai = ArInfo.fromfileobj(self.fileobj, self.fullparse) |
| 257 if ai is None: |
| 258 return |
| 259 |
| 260 start = self.fileobj.tell() |
| 261 yield ai, self.fileobj |
| 262 end = self.fileobj.tell() |
| 263 |
| 264 read = end - start |
| 265 # If the reader didn't touch the input buffer, seek past the file. |
| 266 if read == 0: |
| 267 self.fileobj.seek(ai.size, os.SEEK_CUR) |
| 268 elif read != ai.size: |
| 269 raise IOError( |
| 270 'Wrong amount of data read from fileobj! got %i, wanted %i' % ( |
| 271 read, ai.size)) |
| 272 |
| 273 if ai.needspadding: |
| 274 padding = self.fileobj.read(len(AR_PADDING)) |
| 275 if padding != AR_PADDING: |
| 276 raise IOError( |
| 277 'incorrect padding, got %r, wanted %r' % ( |
| 278 padding, AR_PADDING)) |
| 279 |
| 280 def close(self): |
| 281 """Close the archive. |
| 282 |
| 283 Will close the output buffer.""" |
| 284 self.fileobj.close() |
| 285 |
| 286 |
| 287 class ArFileWriter(object): |
| 288 """Write an ar archive from the given output buffer.""" |
| 289 |
| 290 def __init__(self, fileobj): |
| 291 self.fileobj = fileobj |
| 292 self.fileobj.write(AR_MAGIC_START) |
| 293 |
| 294 def addfile(self, arinfo, fileobj=None): |
| 295 if not fileobj and arinfo.size != 0: |
| 296 raise ValueError('Need to supply fileobj if file is non-zero in side.') |
| 297 |
| 298 arinfo.tofileobj(self.fileobj) |
| 299 if fileobj: |
| 300 shutil.copyfileobj(fileobj, self.fileobj, arinfo.size) |
| 301 |
| 302 if arinfo.needspadding: |
| 303 self.fileobj.write(AR_PADDING) |
| 304 |
| 305 def flush(self): |
| 306 """Flush the output buffer.""" |
| 307 self.fileobj.flush() |
| 308 |
| 309 def close(self): |
| 310 """Close the archive. |
| 311 |
| 312 Will close the output buffer.""" |
| 313 self.fileobj.close() |
| 314 |
| 315 |
| 316 def is_arfile(name): |
| 317 with file(name, 'rb') as f: |
| 318 return f.read(len(AR_MAGIC_START)) == AR_MAGIC_START |
| 319 |
| 320 |
| 321 # pylint: disable=redefined-builtin |
| 322 def open(name=None, mode='r', fileobj=None): |
| 323 if name is None and fileobj is None: |
| 324 raise ValueError('Nothing to open!') |
| 325 |
| 326 if name is not None: |
| 327 if fileobj is not None: |
| 328 raise ValueError('Provided both a file name and file object!') |
| 329 fileobj = file(name, mode+'b') |
| 330 |
| 331 if 'b' not in fileobj.mode: |
| 332 raise ValueError('File object not open in binary mode.') |
| 333 |
| 334 if mode == 'rb': |
| 335 return ArFileReader(fileobj) |
| 336 elif mode == 'wb': |
| 337 return ArFileWriter(fileobj) |
| 338 |
| 339 raise ValueError('Unknown file mode.') |
| 340 |
| 341 |
| 342 if __name__ == '__main__': |
| 343 doctest.testmod() |
OLD | NEW |