OLD | NEW |
| (Empty) |
1 """File wrangling.""" | |
2 | |
3 from coverage.backward import to_string | |
4 from coverage.misc import CoverageException | |
5 import fnmatch, os, os.path, re, sys | |
6 | |
7 class FileLocator(object): | |
8 """Understand how filenames work.""" | |
9 | |
10 def __init__(self): | |
11 # The absolute path to our current directory. | |
12 self.relative_dir = os.path.normcase(abs_file(os.curdir) + os.sep) | |
13 | |
14 # Cache of results of calling the canonical_filename() method, to | |
15 # avoid duplicating work. | |
16 self.canonical_filename_cache = {} | |
17 | |
18 def relative_filename(self, filename): | |
19 """Return the relative form of `filename`. | |
20 | |
21 The filename will be relative to the current directory when the | |
22 `FileLocator` was constructed. | |
23 | |
24 """ | |
25 fnorm = os.path.normcase(filename) | |
26 if fnorm.startswith(self.relative_dir): | |
27 filename = filename[len(self.relative_dir):] | |
28 return filename | |
29 | |
30 def canonical_filename(self, filename): | |
31 """Return a canonical filename for `filename`. | |
32 | |
33 An absolute path with no redundant components and normalized case. | |
34 | |
35 """ | |
36 if filename not in self.canonical_filename_cache: | |
37 if not os.path.isabs(filename): | |
38 for path in [os.curdir] + sys.path: | |
39 if path is None: | |
40 continue | |
41 f = os.path.join(path, filename) | |
42 if os.path.exists(f): | |
43 filename = f | |
44 break | |
45 cf = abs_file(filename) | |
46 self.canonical_filename_cache[filename] = cf | |
47 return self.canonical_filename_cache[filename] | |
48 | |
49 def get_zip_data(self, filename): | |
50 """Get data from `filename` if it is a zip file path. | |
51 | |
52 Returns the string data read from the zip file, or None if no zip file | |
53 could be found or `filename` isn't in it. The data returned will be | |
54 an empty string if the file is empty. | |
55 | |
56 """ | |
57 import zipimport | |
58 markers = ['.zip'+os.sep, '.egg'+os.sep] | |
59 for marker in markers: | |
60 if marker in filename: | |
61 parts = filename.split(marker) | |
62 try: | |
63 zi = zipimport.zipimporter(parts[0]+marker[:-1]) | |
64 except zipimport.ZipImportError: | |
65 continue | |
66 try: | |
67 data = zi.get_data(parts[1]) | |
68 except IOError: | |
69 continue | |
70 return to_string(data) | |
71 return None | |
72 | |
73 | |
74 if sys.platform == 'win32': | |
75 | |
76 def actual_path(path): | |
77 """Get the actual path of `path`, including the correct case.""" | |
78 if path in actual_path.cache: | |
79 return actual_path.cache[path] | |
80 | |
81 head, tail = os.path.split(path) | |
82 if not tail: | |
83 actpath = head | |
84 elif not head: | |
85 actpath = tail | |
86 else: | |
87 head = actual_path(head) | |
88 if head in actual_path.list_cache: | |
89 files = actual_path.list_cache[head] | |
90 else: | |
91 try: | |
92 files = os.listdir(head) | |
93 except OSError: | |
94 files = [] | |
95 actual_path.list_cache[head] = files | |
96 normtail = os.path.normcase(tail) | |
97 for f in files: | |
98 if os.path.normcase(f) == normtail: | |
99 tail = f | |
100 break | |
101 actpath = os.path.join(head, tail) | |
102 actual_path.cache[path] = actpath | |
103 return actpath | |
104 | |
105 actual_path.cache = {} | |
106 actual_path.list_cache = {} | |
107 | |
108 else: | |
109 def actual_path(filename): | |
110 """The actual path for non-Windows platforms.""" | |
111 return filename | |
112 | |
113 def abs_file(filename): | |
114 """Return the absolute normalized form of `filename`.""" | |
115 path = os.path.abspath(os.path.realpath(filename)) | |
116 path = actual_path(path) | |
117 return path | |
118 | |
119 | |
120 def prep_patterns(patterns): | |
121 """Prepare the file patterns for use in a `FnmatchMatcher`. | |
122 | |
123 If a pattern starts with a wildcard, it is used as a pattern | |
124 as-is. If it does not start with a wildcard, then it is made | |
125 absolute with the current directory. | |
126 | |
127 If `patterns` is None, an empty list is returned. | |
128 | |
129 """ | |
130 patterns = patterns or [] | |
131 prepped = [] | |
132 for p in patterns or []: | |
133 if p.startswith("*") or p.startswith("?"): | |
134 prepped.append(p) | |
135 else: | |
136 prepped.append(abs_file(p)) | |
137 return prepped | |
138 | |
139 | |
140 class TreeMatcher(object): | |
141 """A matcher for files in a tree.""" | |
142 def __init__(self, directories): | |
143 self.dirs = directories[:] | |
144 | |
145 def __repr__(self): | |
146 return "<TreeMatcher %r>" % self.dirs | |
147 | |
148 def add(self, directory): | |
149 """Add another directory to the list we match for.""" | |
150 self.dirs.append(directory) | |
151 | |
152 def match(self, fpath): | |
153 """Does `fpath` indicate a file in one of our trees?""" | |
154 for d in self.dirs: | |
155 if fpath.startswith(d): | |
156 if fpath == d: | |
157 # This is the same file! | |
158 return True | |
159 if fpath[len(d)] == os.sep: | |
160 # This is a file in the directory | |
161 return True | |
162 return False | |
163 | |
164 | |
165 class FnmatchMatcher(object): | |
166 """A matcher for files by filename pattern.""" | |
167 def __init__(self, pats): | |
168 self.pats = pats[:] | |
169 | |
170 def __repr__(self): | |
171 return "<FnmatchMatcher %r>" % self.pats | |
172 | |
173 def match(self, fpath): | |
174 """Does `fpath` match one of our filename patterns?""" | |
175 for pat in self.pats: | |
176 if fnmatch.fnmatch(fpath, pat): | |
177 return True | |
178 return False | |
179 | |
180 | |
181 def sep(s): | |
182 """Find the path separator used in this string, or os.sep if none.""" | |
183 sep_match = re.search(r"[\\/]", s) | |
184 if sep_match: | |
185 the_sep = sep_match.group(0) | |
186 else: | |
187 the_sep = os.sep | |
188 return the_sep | |
189 | |
190 | |
191 class PathAliases(object): | |
192 """A collection of aliases for paths. | |
193 | |
194 When combining data files from remote machines, often the paths to source | |
195 code are different, for example, due to OS differences, or because of | |
196 serialized checkouts on continuous integration machines. | |
197 | |
198 A `PathAliases` object tracks a list of pattern/result pairs, and can | |
199 map a path through those aliases to produce a unified path. | |
200 | |
201 `locator` is a FileLocator that is used to canonicalize the results. | |
202 | |
203 """ | |
204 def __init__(self, locator=None): | |
205 self.aliases = [] | |
206 self.locator = locator | |
207 | |
208 def add(self, pattern, result): | |
209 """Add the `pattern`/`result` pair to the list of aliases. | |
210 | |
211 `pattern` is an `fnmatch`-style pattern. `result` is a simple | |
212 string. When mapping paths, if a path starts with a match against | |
213 `pattern`, then that match is replaced with `result`. This models | |
214 isomorphic source trees being rooted at different places on two | |
215 different machines. | |
216 | |
217 `pattern` can't end with a wildcard component, since that would | |
218 match an entire tree, and not just its root. | |
219 | |
220 """ | |
221 # The pattern can't end with a wildcard component. | |
222 pattern = pattern.rstrip(r"\/") | |
223 if pattern.endswith("*"): | |
224 raise CoverageException("Pattern must not end with wildcards.") | |
225 pattern_sep = sep(pattern) | |
226 pattern += pattern_sep | |
227 | |
228 # Make a regex from the pattern. fnmatch always adds a \Z or $ to | |
229 # match the whole string, which we don't want. | |
230 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') | |
231 if regex_pat.endswith("$"): | |
232 regex_pat = regex_pat[:-1] | |
233 # We want */a/b.py to match on Windows to, so change slash to match | |
234 # either separator. | |
235 regex_pat = regex_pat.replace(r"\/", r"[\\/]") | |
236 # We want case-insensitive matching, so add that flag. | |
237 regex = re.compile(r"(?i)" + regex_pat) | |
238 | |
239 # Normalize the result: it must end with a path separator. | |
240 result_sep = sep(result) | |
241 result = result.rstrip(r"\/") + result_sep | |
242 self.aliases.append((regex, result, pattern_sep, result_sep)) | |
243 | |
244 def map(self, path): | |
245 """Map `path` through the aliases. | |
246 | |
247 `path` is checked against all of the patterns. The first pattern to | |
248 match is used to replace the root of the path with the result root. | |
249 Only one pattern is ever used. If no patterns match, `path` is | |
250 returned unchanged. | |
251 | |
252 The separator style in the result is made to match that of the result | |
253 in the alias. | |
254 | |
255 """ | |
256 for regex, result, pattern_sep, result_sep in self.aliases: | |
257 m = regex.match(path) | |
258 if m: | |
259 new = path.replace(m.group(0), result) | |
260 if pattern_sep != result_sep: | |
261 new = new.replace(pattern_sep, result_sep) | |
262 if self.locator: | |
263 new = self.locator.canonical_filename(new) | |
264 return new | |
265 return path | |
266 | |
267 | |
268 def find_python_files(dirname): | |
269 """Yield all of the importable Python files in `dirname`, recursively. | |
270 | |
271 To be importable, the files have to be in a directory with a __init__.py, | |
272 except for `dirname` itself, which isn't required to have one. The | |
273 assumption is that `dirname` was specified directly, so the user knows | |
274 best, but subdirectories are checked for a __init__.py to be sure we only | |
275 find the importable files. | |
276 | |
277 """ | |
278 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): | |
279 if i > 0 and '__init__.py' not in filenames: | |
280 # If a directory doesn't have __init__.py, then it isn't | |
281 # importable and neither are its files | |
282 del dirnames[:] | |
283 continue | |
284 for filename in filenames: | |
285 # We're only interested in files that look like reasonable Python | |
286 # files: Must end with .py, and must not have certain funny | |
287 # characters that probably mean they are editor junk. | |
288 if re.match(r"^[^.#~!$@%^&*()+=,]+\.py$", filename): | |
289 yield os.path.join(dirpath, filename) | |
OLD | NEW |