OLD | NEW |
| (Empty) |
1 """File wrangling.""" | |
2 | |
3 from coverage.backward import to_string | |
4 from coverage.misc import CoverageException | |
5 import fnmatch, os, re, sys | |
6 | |
7 class FileLocator(object): | |
8 """Understand how filenames work.""" | |
9 | |
10 def __init__(self): | |
11 # The absolute path to our current directory. | |
12 self.relative_dir = self.abs_file(os.curdir) + os.sep | |
13 | |
14 # Cache of results of calling the canonical_filename() method, to | |
15 # avoid duplicating work. | |
16 self.canonical_filename_cache = {} | |
17 | |
18 def abs_file(self, filename): | |
19 """Return the absolute normalized form of `filename`.""" | |
20 return os.path.normcase(os.path.abspath(os.path.realpath(filename))) | |
21 | |
22 def relative_filename(self, filename): | |
23 """Return the relative form of `filename`. | |
24 | |
25 The filename will be relative to the current directory when the | |
26 `FileLocator` was constructed. | |
27 | |
28 """ | |
29 if filename.startswith(self.relative_dir): | |
30 filename = filename.replace(self.relative_dir, "") | |
31 return filename | |
32 | |
33 def canonical_filename(self, filename): | |
34 """Return a canonical filename for `filename`. | |
35 | |
36 An absolute path with no redundant components and normalized case. | |
37 | |
38 """ | |
39 if filename not in self.canonical_filename_cache: | |
40 f = filename | |
41 if os.path.isabs(f) and not os.path.exists(f): | |
42 if self.get_zip_data(f) is None: | |
43 f = os.path.basename(f) | |
44 if not os.path.isabs(f): | |
45 for path in [os.curdir] + sys.path: | |
46 if path is None: | |
47 continue | |
48 g = os.path.join(path, f) | |
49 if os.path.exists(g): | |
50 f = g | |
51 break | |
52 cf = self.abs_file(f) | |
53 self.canonical_filename_cache[filename] = cf | |
54 return self.canonical_filename_cache[filename] | |
55 | |
56 def get_zip_data(self, filename): | |
57 """Get data from `filename` if it is a zip file path. | |
58 | |
59 Returns the string data read from the zip file, or None if no zip file | |
60 could be found or `filename` isn't in it. The data returned will be | |
61 an empty string if the file is empty. | |
62 | |
63 """ | |
64 import zipimport | |
65 markers = ['.zip'+os.sep, '.egg'+os.sep] | |
66 for marker in markers: | |
67 if marker in filename: | |
68 parts = filename.split(marker) | |
69 try: | |
70 zi = zipimport.zipimporter(parts[0]+marker[:-1]) | |
71 except zipimport.ZipImportError: | |
72 continue | |
73 try: | |
74 data = zi.get_data(parts[1]) | |
75 except IOError: | |
76 continue | |
77 return to_string(data) | |
78 return None | |
79 | |
80 | |
81 class TreeMatcher(object): | |
82 """A matcher for files in a tree.""" | |
83 def __init__(self, directories): | |
84 self.dirs = directories[:] | |
85 | |
86 def __repr__(self): | |
87 return "<TreeMatcher %r>" % self.dirs | |
88 | |
89 def add(self, directory): | |
90 """Add another directory to the list we match for.""" | |
91 self.dirs.append(directory) | |
92 | |
93 def match(self, fpath): | |
94 """Does `fpath` indicate a file in one of our trees?""" | |
95 for d in self.dirs: | |
96 if fpath.startswith(d): | |
97 if fpath == d: | |
98 # This is the same file! | |
99 return True | |
100 if fpath[len(d)] == os.sep: | |
101 # This is a file in the directory | |
102 return True | |
103 return False | |
104 | |
105 | |
106 class FnmatchMatcher(object): | |
107 """A matcher for files by filename pattern.""" | |
108 def __init__(self, pats): | |
109 self.pats = pats[:] | |
110 | |
111 def __repr__(self): | |
112 return "<FnmatchMatcher %r>" % self.pats | |
113 | |
114 def match(self, fpath): | |
115 """Does `fpath` match one of our filename patterns?""" | |
116 for pat in self.pats: | |
117 if fnmatch.fnmatch(fpath, pat): | |
118 return True | |
119 return False | |
120 | |
121 | |
122 def sep(s): | |
123 """Find the path separator used in this string, or os.sep if none.""" | |
124 sep_match = re.search(r"[\\/]", s) | |
125 if sep_match: | |
126 the_sep = sep_match.group(0) | |
127 else: | |
128 the_sep = os.sep | |
129 return the_sep | |
130 | |
131 | |
132 class PathAliases(object): | |
133 """A collection of aliases for paths. | |
134 | |
135 When combining data files from remote machines, often the paths to source | |
136 code are different, for example, due to OS differences, or because of | |
137 serialized checkouts on continuous integration machines. | |
138 | |
139 A `PathAliases` object tracks a list of pattern/result pairs, and can | |
140 map a path through those aliases to produce a unified path. | |
141 | |
142 `locator` is a FileLocator that is used to canonicalize the results. | |
143 | |
144 """ | |
145 def __init__(self, locator=None): | |
146 self.aliases = [] | |
147 self.locator = locator | |
148 | |
149 def add(self, pattern, result): | |
150 """Add the `pattern`/`result` pair to the list of aliases. | |
151 | |
152 `pattern` is an `fnmatch`-style pattern. `result` is a simple | |
153 string. When mapping paths, if a path starts with a match against | |
154 `pattern`, then that match is replaced with `result`. This models | |
155 isomorphic source trees being rooted at different places on two | |
156 different machines. | |
157 | |
158 `pattern` can't end with a wildcard component, since that would | |
159 match an entire tree, and not just its root. | |
160 | |
161 """ | |
162 # The pattern can't end with a wildcard component. | |
163 pattern = pattern.rstrip(r"\/") | |
164 if pattern.endswith("*"): | |
165 raise CoverageException("Pattern must not end with wildcards.") | |
166 pattern_sep = sep(pattern) | |
167 pattern += pattern_sep | |
168 | |
169 # Make a regex from the pattern. fnmatch always adds a \Z or $ to | |
170 # match the whole string, which we don't want. | |
171 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') | |
172 if regex_pat.endswith("$"): | |
173 regex_pat = regex_pat[:-1] | |
174 # We want */a/b.py to match on Windows to, so change slash to match | |
175 # either separator. | |
176 regex_pat = regex_pat.replace(r"\/", r"[\\/]") | |
177 # We want case-insensitive matching, so add that flag. | |
178 regex = re.compile("(?i)" + regex_pat) | |
179 | |
180 # Normalize the result: it must end with a path separator. | |
181 result_sep = sep(result) | |
182 result = result.rstrip(r"\/") + result_sep | |
183 self.aliases.append((regex, result, pattern_sep, result_sep)) | |
184 | |
185 def map(self, path): | |
186 """Map `path` through the aliases. | |
187 | |
188 `path` is checked against all of the patterns. The first pattern to | |
189 match is used to replace the root of the path with the result root. | |
190 Only one pattern is ever used. If no patterns match, `path` is | |
191 returned unchanged. | |
192 | |
193 The separator style in the result is made to match that of the result | |
194 in the alias. | |
195 | |
196 """ | |
197 for regex, result, pattern_sep, result_sep in self.aliases: | |
198 m = regex.match(path) | |
199 if m: | |
200 new = path.replace(m.group(0), result) | |
201 if pattern_sep != result_sep: | |
202 new = new.replace(pattern_sep, result_sep) | |
203 if self.locator: | |
204 new = self.locator.canonical_filename(new) | |
205 return new | |
206 return path | |
207 | |
208 | |
209 def find_python_files(dirname): | |
210 """Yield all of the importable Python files in `dirname`, recursively.""" | |
211 for dirpath, dirnames, filenames in os.walk(dirname, topdown=True): | |
212 if '__init__.py' not in filenames: | |
213 # If a directory doesn't have __init__.py, then it isn't | |
214 # importable and neither are its files | |
215 del dirnames[:] | |
216 continue | |
217 for filename in filenames: | |
218 if fnmatch.fnmatch(filename, "*.py"): | |
219 yield os.path.join(dirpath, filename) | |
OLD | NEW |