Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(146)

Unified Diff: client/libs/arfile/arfile.py

Issue 2049523004: luci-py: Tools for working with BSD style ar archives. (Closed) Base URL: https://github.com/luci/luci-py.git@master
Patch Set: Actually fix the unicode problem properly. Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « client/libs/arfile/__init__.py ('k') | client/libs/arfile/arfile_test.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: client/libs/arfile/arfile.py
diff --git a/client/libs/arfile/arfile.py b/client/libs/arfile/arfile.py
new file mode 100644
index 0000000000000000000000000000000000000000..a38794657fb0e0bf433623bb393ef82896f8aafd
--- /dev/null
+++ b/client/libs/arfile/arfile.py
@@ -0,0 +1,353 @@
+# Copyright 2016 The LUCI Authors. All rights reserved.
+# Use of this source code is governed under the Apache License, Version 2.0
+# that can be found in the LICENSE file.
+
+import collections
+import doctest
+import os
+import shutil
+import stat
+import struct
+
+AR_MAGIC_START = '!<arch>\n'
+AR_MAGIC_BIT = '\x60\n'
+AR_PADDING = '\n'
+
+AR_FORMAT_SIMPLE = ('Simple Format',)
+AR_FORMAT_BSD = ('4.4BSD Format',)
+AR_FORMAT_SYSV = ('System V / GNU Format',)
+
+AR_DEFAULT_MTIME = 1447140471
+AR_DEFAULT_UID = 1000
+AR_DEFAULT_GID = 1000
+AR_DEFAULT_MODE = 0100640 # 100640 -- Octal
+
+_ArInfoStruct = struct.Struct('16s 12s 6s 6s 8s 10s 2s')
+
+_ArInfoBase = collections.namedtuple('ArInfo', [
+ 'format', 'name', 'size', 'mtime', 'uid', 'gid', 'mode'])
+
+class ArInfo(_ArInfoBase):
+ """A ArInfo object represents one member in an ArFile.
+
+ It does *not* contain the file's data.
+ """
+
+ @staticmethod
+ def _format(path, arformat):
+ u"""
+ Allow forcing the format to a given type
+ >>> assert ArInfo._format('a', None) == AR_FORMAT_SIMPLE
+ >>> assert ArInfo._format(u'\u2603', None) == AR_FORMAT_SIMPLE
+ >>> assert ArInfo._format('a', AR_FORMAT_BSD) == AR_FORMAT_BSD
+
+ Certain file paths require the BSD format
+ >>> assert ArInfo._format('f f', None) == AR_FORMAT_BSD
+ >>> assert ArInfo._format('123456789abcdef..', None) == AR_FORMAT_BSD
+
+ >>> ArInfo._format('123456789abcdef..', AR_FORMAT_SIMPLE)
+ Traceback (most recent call last):
+ ...
+ IOError: File name too long for format!
+
+ >>> ArInfo._format('f f', AR_FORMAT_SIMPLE)
+ Traceback (most recent call last):
+ ...
+ IOError: File name contains forbidden character for format!
+ """
+ if isinstance(path, unicode):
+ path = path.encode('utf-8')
+
+ if path.startswith('#1/'):
+ if not arformat:
+ arformat = AR_FORMAT_BSD
+ elif arformat is AR_FORMAT_SIMPLE:
+ raise IOError('File name starts with special for format!')
+
+ if len(path) >= 16:
+ if arformat is None:
+ arformat = AR_FORMAT_BSD
+ elif arformat is AR_FORMAT_SIMPLE:
+ raise IOError('File name too long for format!')
+
+ if ' ' in path:
+ if not arformat:
+ arformat = AR_FORMAT_BSD
+ elif arformat is AR_FORMAT_SIMPLE:
+ raise IOError('File name contains forbidden character for format!')
+
+ if arformat is None:
+ arformat = AR_FORMAT_SIMPLE
+
+ return arformat
+
+ @property
+ def needspadding(self):
+ """
+ >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).needspadding
+ False
+ >>> ArInfo(AR_FORMAT_SIMPLE, '', 11, 0, 0, 0, 0).needspadding
+ True
+ >>> ArInfo(AR_FORMAT_BSD, 'a', 10, 0, 0, 0, 0).needspadding
+ True
+ >>> ArInfo(AR_FORMAT_BSD, 'ab', 10, 0, 0, 0, 0).needspadding
+ False
+ >>> ArInfo(AR_FORMAT_BSD, 'ab', 11, 0, 0, 0, 0).needspadding
+ True
+ >>> ArInfo(AR_FORMAT_BSD, 'ab', 12, 0, 0, 0, 0).needspadding
+ False
+ """
+ return self.datasize % 2 != 0
+
+ @property
+ def datasize(self):
+ """
+ >>> ArInfo(AR_FORMAT_SIMPLE, '', 1, 0, 0, 0, 0).datasize
+ 1
+ >>> ArInfo(AR_FORMAT_SIMPLE, '', 10, 0, 0, 0, 0).datasize
+ 10
+ >>> ArInfo(AR_FORMAT_BSD, '', 1, 0, 0, 0, 0).datasize
+ 1
+ >>> ArInfo(AR_FORMAT_BSD, 'a', 1, 0, 0, 0, 0).datasize
+ 2
+ >>> ArInfo(AR_FORMAT_BSD, '', 10, 0, 0, 0, 0).datasize
+ 10
+ >>> ArInfo(AR_FORMAT_BSD, 'abc', 10, 0, 0, 0, 0).datasize
+ 13
+ """
+ if self.format is AR_FORMAT_SIMPLE:
+ return self.size
+ elif self.format is AR_FORMAT_BSD:
+ return len(self.name)+self.size
+ assert False, 'Unknown format %r' % self.format
+
+ @classmethod
+ def fromfileobj(cls, fileobj, fullparse=True):
+ """Create and return a ArInfo object from fileobj.
+
+ Raises IOError if the buffer is invalid.
+ """
+ buf = fileobj.read(_ArInfoStruct.size)
+ if not buf:
+ return None
+
+ if len(buf) < _ArInfoStruct.size:
+ raise IOError(
+ 'not enough data for header, got %r, needed %r' % (
+ len(buf), _ArInfoStruct.size))
+
+ name, mtime, uid, gid, mode, datasize, magic = _ArInfoStruct.unpack(buf)
+
+ datasize = int(datasize)
+ if fullparse:
+ mtime = int(mtime)
+ uid = int(uid)
+ gid = int(gid)
+ mode = int(mode, 8)
+
+ if name.startswith('#1/'):
+ arformat = AR_FORMAT_BSD
+
+ try:
+ filenamesize = int(name[3:])
+ except ValueError:
+ raise IOError('invalid file name length: %r' % name[3:])
+
+ filename = fileobj.read(filenamesize)
+ if len(filename) != filenamesize:
+ raise IOError(
+ 'not enough data for filename, got %r, needed %r' % (
+ len(name), filenamesize))
+
+ filesize = datasize - filenamesize
+
+ elif name.startswith('/'):
+ arformat = AR_FORMAT_SYSV
+ raise SystemError('%s format is not supported.' % arformat)
+
+ else:
+ arformat = AR_FORMAT_SIMPLE
+ filename = name.strip()
+ filesize = datasize
+
+ if magic != AR_MAGIC_BIT:
+ raise IOError('file magic invalid, got %r, needed %r' % (
+ magic, AR_MAGIC_BIT))
+
+ return cls(
+ arformat, filename.decode('utf-8'), filesize, mtime, uid, gid, mode)
+
+ @classmethod
+ def frompath(cls, path, arformat=None, cwd=None):
+ """Return an ArInfo object from a file path for information."""
+ fp = path
+ if cwd:
+ fp = os.path.join(cwd, path)
+ st = os.stat(fp)
+
+ if not stat.S_ISREG(st.st_mode):
+ raise IOError('Only work on regular files.')
+
+ return cls(
+ cls._format(path, arformat), path,
+ st.st_size, st.st_mtime, st.st_uid, st.st_gid, st.st_mode)
+
+ @classmethod
+ def fromdefault(cls, path, size, arformat=None):
+ """Return an ArInfo object using name and size (with defaults elsewhere).
+
+ Only a file's name and content are needed to create the ArInfo, all of the
+ modification time, user, group and mode information will be set to default
+ values. This means that you don't need to perform an expensive stat the
+ file.
+
+ >>> ai = ArInfo.fromdefault('abc123', 10)
+ >>> ai.name
+ 'abc123'
+ >>> ai.size
+ 10
+ >>> assert ai.mtime == AR_DEFAULT_MTIME
+ >>> assert ai.uid == AR_DEFAULT_UID
+ >>> assert ai.gid == AR_DEFAULT_GID
+ >>> assert ai.mode == AR_DEFAULT_MODE
+ """
+ return cls(
+ cls._format(path, arformat), path, size,
+ AR_DEFAULT_MTIME, AR_DEFAULT_UID, AR_DEFAULT_GID, AR_DEFAULT_MODE)
+
+ def tofileobj(self, fileobj):
+ """Write an ArInfo object to file like object."""
+ # File name, 16 bytes
+ name = self.name.encode('utf-8')
+ if self.format is AR_FORMAT_SIMPLE:
+ assert len(name) < 16
+ fileobj.write('%-16s' % name)
+ datasize = self.size
+ elif self.format is AR_FORMAT_BSD:
+ fileobj.write('#1/%-13s' % str(len(name)))
+ datasize = self.size + len(name)
+
+ # Modtime, 12 bytes
+ fileobj.write('%-12i' % self.mtime)
+ # Owner ID, 6 bytes
+ fileobj.write('%-6i' % self.uid)
+ # Group ID, 6 bytes
+ fileobj.write('%-6i' % self.gid)
+ # File mode, 8 bytes
+ fileobj.write('%-8o' % self.mode)
+ # File size, 10 bytes
+ fileobj.write('%-10s' % datasize)
+ # File magic, 2 bytes
+ fileobj.write(AR_MAGIC_BIT)
+
+ # Filename - BSD variant
+ if self.format is AR_FORMAT_BSD:
+ fileobj.write(name)
+
+
+class ArFileReader(object):
+ """Read an ar archive from the given input buffer."""
+
+ def __init__(self, fileobj, fullparse=True):
+ self.fullparse = fullparse
+ self.fileobj = fileobj
+
+ magic = self.fileobj.read(len(AR_MAGIC_START))
+ if magic != AR_MAGIC_START:
+ raise IOError(
+ 'Not an ar file, invalid magic, got %r, wanted %r.' % (
+ magic, AR_MAGIC_START))
+
+ def __iter__(self):
+ while True:
+ if self.fileobj.closed:
+ raise IOError('Tried to read after the file closed.')
+ ai = ArInfo.fromfileobj(self.fileobj, self.fullparse)
+ if not ai:
+ return
+
+ start = self.fileobj.tell()
+ yield ai, self.fileobj
+ end = self.fileobj.tell()
+
+ read = end - start
+ # If the reader didn't touch the input buffer, seek past the file.
+ if not read:
+ self.fileobj.seek(ai.size, os.SEEK_CUR)
+ elif read != ai.size:
+ raise IOError(
+ 'Wrong amount of data read from fileobj! got %i, wanted %i' % (
+ read, ai.size))
+
+ if ai.needspadding:
+ padding = self.fileobj.read(len(AR_PADDING))
+ if padding != AR_PADDING:
+ raise IOError(
+ 'incorrect padding, got %r, wanted %r' % (
+ padding, AR_PADDING))
+
+ def close(self):
+ """Close the archive.
+
+ Will close the output buffer.
+ """
+ self.fileobj.close()
+
+
+class ArFileWriter(object):
+ """Write an ar archive from the given output buffer."""
+
+ def __init__(self, fileobj):
+ self.fileobj = fileobj
+ self.fileobj.write(AR_MAGIC_START)
+
+ def addfile(self, arinfo, fileobj=None):
+ if not fileobj and arinfo.size:
+ raise ValueError('Need to supply fileobj if file is non-zero in size.')
+
+ arinfo.tofileobj(self.fileobj)
+ if fileobj:
+ shutil.copyfileobj(fileobj, self.fileobj, arinfo.size)
+
+ if arinfo.needspadding:
+ self.fileobj.write(AR_PADDING)
+
+ def flush(self):
+ """Flush the output buffer."""
+ self.fileobj.flush()
+
+ def close(self):
+ """Close the archive.
+
+ Will close the output buffer."""
+ self.fileobj.close()
+
+
+def is_arfile(name):
+ with file(name, 'rb') as f:
+ return f.read(len(AR_MAGIC_START)) == AR_MAGIC_START
+
+
+# pylint: disable=redefined-builtin
+def open(name=None, mode='r', fileobj=None):
+ if name is None and fileobj is None:
+ raise ValueError('Nothing to open!')
+
+ if name is not None:
+ if fileobj is not None:
+ raise ValueError('Provided both a file name and file object!')
+ fileobj = file(name, mode+'b')
+
+ if 'b' not in fileobj.mode:
+ raise ValueError('File object not open in binary mode.')
+
+ if mode == 'rb':
+ return ArFileReader(fileobj)
+ elif mode == 'wb':
+ return ArFileWriter(fileobj)
+
+ raise ValueError('Unknown file mode.')
+
+
+if __name__ == '__main__':
+ doctest.testmod()
« no previous file with comments | « client/libs/arfile/__init__.py ('k') | client/libs/arfile/arfile_test.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698