OLD | NEW |
| (Empty) |
1 # Copyright 2013 The Chromium Authors. All rights reserved. | |
2 # Use of this source code is governed by a BSD-style license that can be | |
3 # found in the LICENSE file. | |
4 | |
5 """Utilities to work with importable python zip packages.""" | |
6 | |
7 import atexit | |
8 import collections | |
9 import cStringIO as StringIO | |
10 import os | |
11 import pkgutil | |
12 import re | |
13 import sys | |
14 import tempfile | |
15 import threading | |
16 import zipfile | |
17 import zipimport | |
18 | |
19 | |
20 # Glob patterns for files to exclude from a package by default. | |
21 EXCLUDE_LIST = ( | |
22 # Ignore hidden files (including .svn and .git). | |
23 r'\..*', | |
24 | |
25 # Ignore precompiled python files since they depend on python version and we | |
26 # don't want zip package to be version-depended. | |
27 r'.*\.pyc$', | |
28 r'.*\.pyo$', | |
29 ) | |
30 | |
31 | |
32 # Temporary files extracted by extract_resource. Removed in atexit hook. | |
33 _extracted_files = [] | |
34 _extracted_files_lock = threading.Lock() | |
35 | |
36 | |
37 class ZipPackageError(RuntimeError): | |
38 """Failed to create a zip package.""" | |
39 | |
40 | |
41 class ZipPackage(object): | |
42 """A set of files that can be zipped to file on disk or into memory buffer. | |
43 | |
44 Usage: | |
45 package = ZipPackage(root) | |
46 package.add_file('some_file.py', '__main__.py') | |
47 package.add_directory('some_directory') | |
48 package.add_buffer('generated.py', 'any string here') | |
49 | |
50 buf = package.zip_into_buffer() | |
51 package.zip_into_file('my_zip.zip') | |
52 """ | |
53 | |
54 _FileRef = collections.namedtuple('_FileRef', ['abs_path']) | |
55 _BufferRef = collections.namedtuple('_BufferRef', ['buffer']) | |
56 | |
57 def __init__(self, root): | |
58 """Initializes new empty ZipPackage. | |
59 | |
60 All files added to the package should live under the |root|. It will also | |
61 be used when calculating relative paths of files in the package. | |
62 | |
63 |root| must be an absolute path. | |
64 """ | |
65 assert os.path.isabs(root), root | |
66 self.root = root.rstrip(os.sep) + os.sep | |
67 self._items = {} | |
68 | |
69 @property | |
70 def files(self): | |
71 """Files added to the package as a list of relative paths in zip.""" | |
72 return self._items.keys() | |
73 | |
74 def add_file(self, absolute_path, archive_path=None): | |
75 """Adds a single file to the package. | |
76 | |
77 |archive_path| is a relative path in archive for this file, by default it's | |
78 equal to |absolute_path| taken relative to |root|. In that case | |
79 |absolute_path| must be in a |root| subtree. | |
80 | |
81 If |archive_path| is given, |absolute_path| can point to any file. | |
82 """ | |
83 assert os.path.isabs(absolute_path), absolute_path | |
84 absolute_path = os.path.normpath(absolute_path) | |
85 # If |archive_path| is not given, ensure that |absolute_path| is under root. | |
86 if not archive_path and not absolute_path.startswith(self.root): | |
87 raise ZipPackageError( | |
88 'Path %s is not inside root %s' % (absolute_path, self.root)) | |
89 if not os.path.exists(absolute_path): | |
90 raise ZipPackageError('No such file: %s' % absolute_path) | |
91 if not os.path.isfile(absolute_path): | |
92 raise ZipPackageError('Object %s is not a regular file' % absolute_path) | |
93 archive_path = archive_path or absolute_path[len(self.root):] | |
94 self._add_entry(archive_path, ZipPackage._FileRef(absolute_path)) | |
95 | |
96 def add_python_file(self, absolute_path, archive_path=None): | |
97 """Adds a single python file to the package. | |
98 | |
99 Recognizes *.pyc files and adds corresponding *.py file instead. | |
100 """ | |
101 base, ext = os.path.splitext(absolute_path) | |
102 if ext in ('.pyc', '.pyo'): | |
103 absolute_path = base + '.py' | |
104 elif ext != '.py': | |
105 raise ZipPackageError('Not a python file: %s' % absolute_path) | |
106 self.add_file(absolute_path, archive_path) | |
107 | |
108 def add_directory(self, absolute_path, archive_path=None, | |
109 exclude=EXCLUDE_LIST): | |
110 """Recursively adds all files from given directory to the package. | |
111 | |
112 |archive_path| is a relative path in archive for this directory, by default | |
113 it's equal to |absolute_path| taken relative to |root|. In that case | |
114 |absolute_path| must be in |root| subtree. | |
115 | |
116 If |archive_path| is given, |absolute_path| can point to any directory. | |
117 | |
118 |exclude| defines a list of regular expressions for file names to exclude | |
119 from the package. | |
120 | |
121 Only non-empty directories will be actually added to the package. | |
122 """ | |
123 assert os.path.isabs(absolute_path), absolute_path | |
124 absolute_path = os.path.normpath(absolute_path).rstrip(os.sep) + os.sep | |
125 # If |archive_path| is not given, ensure that |path| is under root. | |
126 if not archive_path and not absolute_path.startswith(self.root): | |
127 raise ZipPackageError( | |
128 'Path %s is not inside root %s' % (absolute_path, self.root)) | |
129 if not os.path.exists(absolute_path): | |
130 raise ZipPackageError('No such directory: %s' % absolute_path) | |
131 if not os.path.isdir(absolute_path): | |
132 raise ZipPackageError('Object %s is not a directory' % absolute_path) | |
133 | |
134 # Precompile regular expressions. | |
135 exclude_regexps = [re.compile(r) for r in exclude] | |
136 # Returns True if |name| should be excluded from the package. | |
137 should_exclude = lambda name: any(r.match(name) for r in exclude_regexps) | |
138 | |
139 archive_path = archive_path or absolute_path[len(self.root):] | |
140 for cur_dir, dirs, files in os.walk(absolute_path): | |
141 # Add all non-excluded files. | |
142 for name in files: | |
143 if not should_exclude(name): | |
144 absolute = os.path.join(cur_dir, name) | |
145 relative = absolute[len(absolute_path):] | |
146 assert absolute.startswith(absolute_path) | |
147 self.add_file(absolute, os.path.join(archive_path, relative)) | |
148 # Remove excluded directories from enumeration. | |
149 for name in [d for d in dirs if should_exclude(d)]: | |
150 dirs.remove(name) | |
151 | |
152 def add_buffer(self, archive_path, buf): | |
153 """Adds a contents of the given string |buf| to the package as a file. | |
154 | |
155 |archive_path| is a path in archive for this file. | |
156 """ | |
157 # Only 'str' is allowed here, no 'unicode' | |
158 assert isinstance(buf, str) | |
159 self._add_entry(archive_path, ZipPackage._BufferRef(buf)) | |
160 | |
161 def zip_into_buffer(self, compress=True): | |
162 """Zips added files into in-memory zip file and returns it as str.""" | |
163 stream = StringIO.StringIO() | |
164 try: | |
165 self._zip_into_stream(stream, compress) | |
166 return stream.getvalue() | |
167 finally: | |
168 stream.close() | |
169 | |
170 def zip_into_file(self, path, compress=True): | |
171 """Zips added files into a file on disk.""" | |
172 with open(path, 'wb') as stream: | |
173 self._zip_into_stream(stream, compress) | |
174 | |
175 def _add_entry(self, archive_path, ref): | |
176 """Adds new zip package entry.""" | |
177 # Always use forward slashes in zip. | |
178 archive_path = archive_path.replace(os.sep, '/') | |
179 # Ensure there are no suspicious components in the path. | |
180 assert not any(p in ('', '.', '..') for p in archive_path.split('/')) | |
181 # Ensure there's no file overwrites. | |
182 if archive_path in self._items: | |
183 raise ZipPackageError('Duplicated entry: %s' % archive_path) | |
184 self._items[archive_path] = ref | |
185 | |
186 def _zip_into_stream(self, stream, compress): | |
187 """Zips files added so far into some output stream. | |
188 | |
189 Some measures are taken to guarantee that final zip depends only on the | |
190 content of added files: | |
191 * File modification time is not stored. | |
192 * Entries are sorted by file name in archive. | |
193 """ | |
194 compression = zipfile.ZIP_DEFLATED if compress else zipfile.ZIP_STORED | |
195 zip_file = zipfile.ZipFile(stream, 'w', compression) | |
196 try: | |
197 for archive_path in sorted(self._items): | |
198 ref = self._items[archive_path] | |
199 info = zipfile.ZipInfo(filename=archive_path) | |
200 info.compress_type = compression | |
201 info.create_system = 3 | |
202 if isinstance(ref, ZipPackage._FileRef): | |
203 info.external_attr = (os.stat(ref.abs_path)[0] & 0xFFFF) << 16L | |
204 with open(ref.abs_path, 'rb') as f: | |
205 buf = f.read() | |
206 elif isinstance(ref, ZipPackage._BufferRef): | |
207 buf = ref.buffer | |
208 else: | |
209 assert False, 'Unexpected type %s' % ref | |
210 zip_file.writestr(info, buf) | |
211 finally: | |
212 zip_file.close() | |
213 | |
214 | |
215 def get_module_zip_archive(module): | |
216 """Given a module, returns path to a zip package that contains it or None.""" | |
217 loader = pkgutil.get_loader(module) | |
218 if not isinstance(loader, zipimport.zipimporter): | |
219 return None | |
220 # 'archive' property is documented only for python 2.7, but it appears to be | |
221 # there at least since python 2.5.2. | |
222 return loader.archive | |
223 | |
224 | |
225 def is_zipped_module(module): | |
226 """True if given module was loaded from a zip package.""" | |
227 return bool(get_module_zip_archive(module)) | |
228 | |
229 | |
230 def get_main_script_path(): | |
231 """If running from zip returns path to a zip file, else path to __main__. | |
232 | |
233 Basically returns path to a file passed to python for execution | |
234 as in 'python <main_script>' considering a case of executable zip package. | |
235 | |
236 Returns path relative to a current directory of when process was started. | |
237 """ | |
238 # If running from interactive console __file__ is not defined. | |
239 main = sys.modules['__main__'] | |
240 return get_module_zip_archive(main) or getattr(main, '__file__', None) | |
241 | |
242 | |
243 def extract_resource(package, resource): | |
244 """Returns real file system path to a |resource| file from a |package|. | |
245 | |
246 If it's inside a zip package, will extract it first into temp file created | |
247 with tempfile.mkstemp. Such file is readable and writable only by the creating | |
248 user ID. | |
249 | |
250 |package| is a python module object that represents a package. | |
251 |resource| should be a relative filename, using '/'' as the path separator. | |
252 | |
253 Raises ValueError if no such resource. | |
254 """ | |
255 # For regular non-zip packages just construct an absolute path. | |
256 if not is_zipped_module(package): | |
257 # Package's __file__ attribute is always an absolute path. | |
258 path = os.path.join(os.path.dirname(package.__file__), | |
259 resource.replace('/', os.sep)) | |
260 if not os.path.exists(path): | |
261 raise ValueError('No such resource in %s: %s' % (package, resource)) | |
262 return path | |
263 | |
264 # For zipped packages extract the resource into a temp file. | |
265 data = pkgutil.get_data(package.__name__, resource) | |
266 if data is None: | |
267 raise ValueError('No such resource in zipped %s: %s' % (package, resource)) | |
268 fd, path = tempfile.mkstemp() | |
269 with os.fdopen(fd, 'w') as stream: | |
270 stream.write(data) | |
271 | |
272 # Register it for removal when process dies. | |
273 with _extracted_files_lock: | |
274 _extracted_files.append(path) | |
275 # First extracted file -> register atexit hook that cleans them all. | |
276 if len(_extracted_files) == 1: | |
277 atexit.register(cleanup_extracted_resources) | |
278 | |
279 return path | |
280 | |
281 | |
282 def cleanup_extracted_resources(): | |
283 """Removes all temporary files created by extract_resource. | |
284 | |
285 Executed as atexit hook. | |
286 """ | |
287 with _extracted_files_lock: | |
288 while _extracted_files: | |
289 try: | |
290 os.remove(_extracted_files.pop()) | |
291 except OSError: | |
292 pass | |
OLD | NEW |