| OLD | NEW |
| (Empty) |
| 1 # Copyright 2013 The Chromium Authors. All rights reserved. | |
| 2 # Use of this source code is governed by a BSD-style license that can be | |
| 3 # found in the LICENSE file. | |
| 4 | |
| 5 """Utilities to work with importable python zip packages.""" | |
| 6 | |
| 7 import atexit | |
| 8 import collections | |
| 9 import cStringIO as StringIO | |
| 10 import os | |
| 11 import pkgutil | |
| 12 import re | |
| 13 import sys | |
| 14 import tempfile | |
| 15 import threading | |
| 16 import zipfile | |
| 17 import zipimport | |
| 18 | |
| 19 | |
| 20 # Glob patterns for files to exclude from a package by default. | |
| 21 EXCLUDE_LIST = ( | |
| 22 # Ignore hidden files (including .svn and .git). | |
| 23 r'\..*', | |
| 24 | |
| 25 # Ignore precompiled python files since they depend on python version and we | |
| 26 # don't want zip package to be version-depended. | |
| 27 r'.*\.pyc$', | |
| 28 r'.*\.pyo$', | |
| 29 ) | |
| 30 | |
| 31 | |
| 32 # Temporary files extracted by extract_resource. Removed in atexit hook. | |
| 33 _extracted_files = [] | |
| 34 _extracted_files_lock = threading.Lock() | |
| 35 | |
| 36 | |
| 37 class ZipPackageError(RuntimeError): | |
| 38 """Failed to create a zip package.""" | |
| 39 | |
| 40 | |
| 41 class ZipPackage(object): | |
| 42 """A set of files that can be zipped to file on disk or into memory buffer. | |
| 43 | |
| 44 Usage: | |
| 45 package = ZipPackage(root) | |
| 46 package.add_file('some_file.py', '__main__.py') | |
| 47 package.add_directory('some_directory') | |
| 48 package.add_buffer('generated.py', 'any string here') | |
| 49 | |
| 50 buf = package.zip_into_buffer() | |
| 51 package.zip_into_file('my_zip.zip') | |
| 52 """ | |
| 53 | |
| 54 _FileRef = collections.namedtuple('_FileRef', ['abs_path']) | |
| 55 _BufferRef = collections.namedtuple('_BufferRef', ['buffer']) | |
| 56 | |
| 57 def __init__(self, root): | |
| 58 """Initializes new empty ZipPackage. | |
| 59 | |
| 60 All files added to the package should live under the |root|. It will also | |
| 61 be used when calculating relative paths of files in the package. | |
| 62 | |
| 63 |root| must be an absolute path. | |
| 64 """ | |
| 65 assert os.path.isabs(root), root | |
| 66 self.root = root.rstrip(os.sep) + os.sep | |
| 67 self._items = {} | |
| 68 | |
| 69 @property | |
| 70 def files(self): | |
| 71 """Files added to the package as a list of relative paths in zip.""" | |
| 72 return self._items.keys() | |
| 73 | |
| 74 def add_file(self, absolute_path, archive_path=None): | |
| 75 """Adds a single file to the package. | |
| 76 | |
| 77 |archive_path| is a relative path in archive for this file, by default it's | |
| 78 equal to |absolute_path| taken relative to |root|. In that case | |
| 79 |absolute_path| must be in a |root| subtree. | |
| 80 | |
| 81 If |archive_path| is given, |absolute_path| can point to any file. | |
| 82 """ | |
| 83 assert os.path.isabs(absolute_path), absolute_path | |
| 84 absolute_path = os.path.normpath(absolute_path) | |
| 85 # If |archive_path| is not given, ensure that |absolute_path| is under root. | |
| 86 if not archive_path and not absolute_path.startswith(self.root): | |
| 87 raise ZipPackageError( | |
| 88 'Path %s is not inside root %s' % (absolute_path, self.root)) | |
| 89 if not os.path.exists(absolute_path): | |
| 90 raise ZipPackageError('No such file: %s' % absolute_path) | |
| 91 if not os.path.isfile(absolute_path): | |
| 92 raise ZipPackageError('Object %s is not a regular file' % absolute_path) | |
| 93 archive_path = archive_path or absolute_path[len(self.root):] | |
| 94 self._add_entry(archive_path, ZipPackage._FileRef(absolute_path)) | |
| 95 | |
| 96 def add_python_file(self, absolute_path, archive_path=None): | |
| 97 """Adds a single python file to the package. | |
| 98 | |
| 99 Recognizes *.pyc files and adds corresponding *.py file instead. | |
| 100 """ | |
| 101 base, ext = os.path.splitext(absolute_path) | |
| 102 if ext in ('.pyc', '.pyo'): | |
| 103 absolute_path = base + '.py' | |
| 104 elif ext != '.py': | |
| 105 raise ZipPackageError('Not a python file: %s' % absolute_path) | |
| 106 self.add_file(absolute_path, archive_path) | |
| 107 | |
| 108 def add_directory(self, absolute_path, archive_path=None, | |
| 109 exclude=EXCLUDE_LIST): | |
| 110 """Recursively adds all files from given directory to the package. | |
| 111 | |
| 112 |archive_path| is a relative path in archive for this directory, by default | |
| 113 it's equal to |absolute_path| taken relative to |root|. In that case | |
| 114 |absolute_path| must be in |root| subtree. | |
| 115 | |
| 116 If |archive_path| is given, |absolute_path| can point to any directory. | |
| 117 | |
| 118 |exclude| defines a list of regular expressions for file names to exclude | |
| 119 from the package. | |
| 120 | |
| 121 Only non-empty directories will be actually added to the package. | |
| 122 """ | |
| 123 assert os.path.isabs(absolute_path), absolute_path | |
| 124 absolute_path = os.path.normpath(absolute_path).rstrip(os.sep) + os.sep | |
| 125 # If |archive_path| is not given, ensure that |path| is under root. | |
| 126 if not archive_path and not absolute_path.startswith(self.root): | |
| 127 raise ZipPackageError( | |
| 128 'Path %s is not inside root %s' % (absolute_path, self.root)) | |
| 129 if not os.path.exists(absolute_path): | |
| 130 raise ZipPackageError('No such directory: %s' % absolute_path) | |
| 131 if not os.path.isdir(absolute_path): | |
| 132 raise ZipPackageError('Object %s is not a directory' % absolute_path) | |
| 133 | |
| 134 # Precompile regular expressions. | |
| 135 exclude_regexps = [re.compile(r) for r in exclude] | |
| 136 # Returns True if |name| should be excluded from the package. | |
| 137 should_exclude = lambda name: any(r.match(name) for r in exclude_regexps) | |
| 138 | |
| 139 archive_path = archive_path or absolute_path[len(self.root):] | |
| 140 for cur_dir, dirs, files in os.walk(absolute_path): | |
| 141 # Add all non-excluded files. | |
| 142 for name in files: | |
| 143 if not should_exclude(name): | |
| 144 absolute = os.path.join(cur_dir, name) | |
| 145 relative = absolute[len(absolute_path):] | |
| 146 assert absolute.startswith(absolute_path) | |
| 147 self.add_file(absolute, os.path.join(archive_path, relative)) | |
| 148 # Remove excluded directories from enumeration. | |
| 149 for name in [d for d in dirs if should_exclude(d)]: | |
| 150 dirs.remove(name) | |
| 151 | |
| 152 def add_buffer(self, archive_path, buf): | |
| 153 """Adds a contents of the given string |buf| to the package as a file. | |
| 154 | |
| 155 |archive_path| is a path in archive for this file. | |
| 156 """ | |
| 157 # Only 'str' is allowed here, no 'unicode' | |
| 158 assert isinstance(buf, str) | |
| 159 self._add_entry(archive_path, ZipPackage._BufferRef(buf)) | |
| 160 | |
| 161 def zip_into_buffer(self, compress=True): | |
| 162 """Zips added files into in-memory zip file and returns it as str.""" | |
| 163 stream = StringIO.StringIO() | |
| 164 try: | |
| 165 self._zip_into_stream(stream, compress) | |
| 166 return stream.getvalue() | |
| 167 finally: | |
| 168 stream.close() | |
| 169 | |
| 170 def zip_into_file(self, path, compress=True): | |
| 171 """Zips added files into a file on disk.""" | |
| 172 with open(path, 'wb') as stream: | |
| 173 self._zip_into_stream(stream, compress) | |
| 174 | |
| 175 def _add_entry(self, archive_path, ref): | |
| 176 """Adds new zip package entry.""" | |
| 177 # Always use forward slashes in zip. | |
| 178 archive_path = archive_path.replace(os.sep, '/') | |
| 179 # Ensure there are no suspicious components in the path. | |
| 180 assert not any(p in ('', '.', '..') for p in archive_path.split('/')) | |
| 181 # Ensure there's no file overwrites. | |
| 182 if archive_path in self._items: | |
| 183 raise ZipPackageError('Duplicated entry: %s' % archive_path) | |
| 184 self._items[archive_path] = ref | |
| 185 | |
| 186 def _zip_into_stream(self, stream, compress): | |
| 187 """Zips files added so far into some output stream. | |
| 188 | |
| 189 Some measures are taken to guarantee that final zip depends only on the | |
| 190 content of added files: | |
| 191 * File modification time is not stored. | |
| 192 * Entries are sorted by file name in archive. | |
| 193 """ | |
| 194 compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED | |
| 195 zip_file = zipfile.ZipFile(stream, 'w', compression) | |
| 196 try: | |
| 197 for archive_path in sorted(self._items): | |
| 198 ref = self._items[archive_path] | |
| 199 info = zipfile.ZipInfo(filename=archive_path) | |
| 200 info.compress_type = compression | |
| 201 info.create_system = 3 | |
| 202 if isinstance(ref, ZipPackage._FileRef): | |
| 203 info.external_attr = (os.stat(ref.abs_path)[0] & 0xFFFF) << 16L | |
| 204 with open(ref.abs_path, 'rb') as f: | |
| 205 buf = f.read() | |
| 206 elif isinstance(ref, ZipPackage._BufferRef): | |
| 207 buf = ref.buffer | |
| 208 else: | |
| 209 assert False, 'Unexpected type %s' % ref | |
| 210 zip_file.writestr(info, buf) | |
| 211 finally: | |
| 212 zip_file.close() | |
| 213 | |
| 214 | |
| 215 def get_module_zip_archive(module): | |
| 216 """Given a module, returns path to a zip package that contains it or None.""" | |
| 217 loader = pkgutil.get_loader(module) | |
| 218 if not isinstance(loader, zipimport.zipimporter): | |
| 219 return None | |
| 220 # 'archive' property is documented only for python 2.7, but it appears to be | |
| 221 # there at least since python 2.5.2. | |
| 222 return loader.archive | |
| 223 | |
| 224 | |
| 225 def is_zipped_module(module): | |
| 226 """True if given module was loaded from a zip package.""" | |
| 227 return bool(get_module_zip_archive(module)) | |
| 228 | |
| 229 | |
| 230 def get_main_script_path(): | |
| 231 """If running from zip returns path to a zip file, else path to __main__. | |
| 232 | |
| 233 Basically returns path to a file passed to python for execution | |
| 234 as in 'python <main_script>' considering a case of executable zip package. | |
| 235 | |
| 236 Returns path relative to a current directory of when process was started. | |
| 237 """ | |
| 238 # If running from interactive console __file__ is not defined. | |
| 239 main = sys.modules['__main__'] | |
| 240 return get_module_zip_archive(main) or getattr(main, '__file__', None) | |
| 241 | |
| 242 | |
| 243 def extract_resource(package, resource): | |
| 244 """Returns real file system path to a |resource| file from a |package|. | |
| 245 | |
| 246 If it's inside a zip package, will extract it first into temp file created | |
| 247 with tempfile.mkstemp. Such file is readable and writable only by the creating | |
| 248 user ID. | |
| 249 | |
| 250 |package| is a python module object that represents a package. | |
| 251 |resource| should be a relative filename, using '/'' as the path separator. | |
| 252 | |
| 253 Raises ValueError if no such resource. | |
| 254 """ | |
| 255 # For regular non-zip packages just construct an absolute path. | |
| 256 if not is_zipped_module(package): | |
| 257 # Package's __file__ attribute is always an absolute path. | |
| 258 path = os.path.join(os.path.dirname(package.__file__), | |
| 259 resource.replace('/', os.sep)) | |
| 260 if not os.path.exists(path): | |
| 261 raise ValueError('No such resource in %s: %s' % (package, resource)) | |
| 262 return path | |
| 263 | |
| 264 # For zipped packages extract the resource into a temp file. | |
| 265 data = pkgutil.get_data(package.__name__, resource) | |
| 266 if data is None: | |
| 267 raise ValueError('No such resource in zipped %s: %s' % (package, resource)) | |
| 268 fd, path = tempfile.mkstemp() | |
| 269 with os.fdopen(fd, 'w') as stream: | |
| 270 stream.write(data) | |
| 271 | |
| 272 # Register it for removal when process dies. | |
| 273 with _extracted_files_lock: | |
| 274 _extracted_files.append(path) | |
| 275 # First extracted file -> register atexit hook that cleans them all. | |
| 276 if len(_extracted_files) == 1: | |
| 277 atexit.register(cleanup_extracted_resources) | |
| 278 | |
| 279 return path | |
| 280 | |
| 281 | |
| 282 def cleanup_extracted_resources(): | |
| 283 """Removes all temporary files created by extract_resource. | |
| 284 | |
| 285 Executed as atexit hook. | |
| 286 """ | |
| 287 with _extracted_files_lock: | |
| 288 while _extracted_files: | |
| 289 try: | |
| 290 os.remove(_extracted_files.pop()) | |
| 291 except OSError: | |
| 292 pass | |
| OLD | NEW |