OLD | NEW |
(Empty) | |
| 1 """File wrangling.""" |
| 2 |
| 3 from coverage.backward import to_string |
| 4 from coverage.misc import CoverageException |
| 5 import fnmatch, os, os.path, re, sys |
| 6 |
| 7 class FileLocator(object): |
| 8 """Understand how filenames work.""" |
| 9 |
| 10 def __init__(self): |
| 11 # The absolute path to our current directory. |
| 12 self.relative_dir = os.path.normcase(abs_file(os.curdir) + os.sep) |
| 13 |
| 14 # Cache of results of calling the canonical_filename() method, to |
| 15 # avoid duplicating work. |
| 16 self.canonical_filename_cache = {} |
| 17 |
| 18 def relative_filename(self, filename): |
| 19 """Return the relative form of `filename`. |
| 20 |
| 21 The filename will be relative to the current directory when the |
| 22 `FileLocator` was constructed. |
| 23 |
| 24 """ |
| 25 fnorm = os.path.normcase(filename) |
| 26 if fnorm.startswith(self.relative_dir): |
| 27 filename = filename[len(self.relative_dir):] |
| 28 return filename |
| 29 |
| 30 def canonical_filename(self, filename): |
| 31 """Return a canonical filename for `filename`. |
| 32 |
| 33 An absolute path with no redundant components and normalized case. |
| 34 |
| 35 """ |
| 36 if filename not in self.canonical_filename_cache: |
| 37 if not os.path.isabs(filename): |
| 38 for path in [os.curdir] + sys.path: |
| 39 if path is None: |
| 40 continue |
| 41 f = os.path.join(path, filename) |
| 42 if os.path.exists(f): |
| 43 filename = f |
| 44 break |
| 45 cf = abs_file(filename) |
| 46 self.canonical_filename_cache[filename] = cf |
| 47 return self.canonical_filename_cache[filename] |
| 48 |
| 49 def get_zip_data(self, filename): |
| 50 """Get data from `filename` if it is a zip file path. |
| 51 |
| 52 Returns the string data read from the zip file, or None if no zip file |
| 53 could be found or `filename` isn't in it. The data returned will be |
| 54 an empty string if the file is empty. |
| 55 |
| 56 """ |
| 57 import zipimport |
| 58 markers = ['.zip'+os.sep, '.egg'+os.sep] |
| 59 for marker in markers: |
| 60 if marker in filename: |
| 61 parts = filename.split(marker) |
| 62 try: |
| 63 zi = zipimport.zipimporter(parts[0]+marker[:-1]) |
| 64 except zipimport.ZipImportError: |
| 65 continue |
| 66 try: |
| 67 data = zi.get_data(parts[1]) |
| 68 except IOError: |
| 69 continue |
| 70 return to_string(data) |
| 71 return None |
| 72 |
| 73 |
| 74 if sys.platform == 'win32': |
| 75 |
| 76 def actual_path(path): |
| 77 """Get the actual path of `path`, including the correct case.""" |
| 78 if path in actual_path.cache: |
| 79 return actual_path.cache[path] |
| 80 |
| 81 head, tail = os.path.split(path) |
| 82 if not tail: |
| 83 actpath = head |
| 84 elif not head: |
| 85 actpath = tail |
| 86 else: |
| 87 head = actual_path(head) |
| 88 if head in actual_path.list_cache: |
| 89 files = actual_path.list_cache[head] |
| 90 else: |
| 91 try: |
| 92 files = os.listdir(head) |
| 93 except OSError: |
| 94 files = [] |
| 95 actual_path.list_cache[head] = files |
| 96 normtail = os.path.normcase(tail) |
| 97 for f in files: |
| 98 if os.path.normcase(f) == normtail: |
| 99 tail = f |
| 100 break |
| 101 actpath = os.path.join(head, tail) |
| 102 actual_path.cache[path] = actpath |
| 103 return actpath |
| 104 |
| 105 actual_path.cache = {} |
| 106 actual_path.list_cache = {} |
| 107 |
| 108 else: |
| 109 def actual_path(filename): |
| 110 """The actual path for non-Windows platforms.""" |
| 111 return filename |
| 112 |
| 113 def abs_file(filename): |
| 114 """Return the absolute normalized form of `filename`.""" |
| 115 path = os.path.abspath(os.path.realpath(filename)) |
| 116 path = actual_path(path) |
| 117 return path |
| 118 |
| 119 |
| 120 def prep_patterns(patterns): |
| 121 """Prepare the file patterns for use in a `FnmatchMatcher`. |
| 122 |
| 123 If a pattern starts with a wildcard, it is used as a pattern |
| 124 as-is. If it does not start with a wildcard, then it is made |
| 125 absolute with the current directory. |
| 126 |
| 127 If `patterns` is None, an empty list is returned. |
| 128 |
| 129 """ |
| 130 patterns = patterns or [] |
| 131 prepped = [] |
| 132 for p in patterns or []: |
| 133 if p.startswith("*") or p.startswith("?"): |
| 134 prepped.append(p) |
| 135 else: |
| 136 prepped.append(abs_file(p)) |
| 137 return prepped |
| 138 |
| 139 |
| 140 class TreeMatcher(object): |
| 141 """A matcher for files in a tree.""" |
| 142 def __init__(self, directories): |
| 143 self.dirs = directories[:] |
| 144 |
| 145 def __repr__(self): |
| 146 return "<TreeMatcher %r>" % self.dirs |
| 147 |
| 148 def add(self, directory): |
| 149 """Add another directory to the list we match for.""" |
| 150 self.dirs.append(directory) |
| 151 |
| 152 def match(self, fpath): |
| 153 """Does `fpath` indicate a file in one of our trees?""" |
| 154 for d in self.dirs: |
| 155 if fpath.startswith(d): |
| 156 if fpath == d: |
| 157 # This is the same file! |
| 158 return True |
| 159 if fpath[len(d)] == os.sep: |
| 160 # This is a file in the directory |
| 161 return True |
| 162 return False |
| 163 |
| 164 |
| 165 class FnmatchMatcher(object): |
| 166 """A matcher for files by filename pattern.""" |
| 167 def __init__(self, pats): |
| 168 self.pats = pats[:] |
| 169 |
| 170 def __repr__(self): |
| 171 return "<FnmatchMatcher %r>" % self.pats |
| 172 |
| 173 def match(self, fpath): |
| 174 """Does `fpath` match one of our filename patterns?""" |
| 175 for pat in self.pats: |
| 176 if fnmatch.fnmatch(fpath, pat): |
| 177 return True |
| 178 return False |
| 179 |
| 180 |
| 181 def sep(s): |
| 182 """Find the path separator used in this string, or os.sep if none.""" |
| 183 sep_match = re.search(r"[\\/]", s) |
| 184 if sep_match: |
| 185 the_sep = sep_match.group(0) |
| 186 else: |
| 187 the_sep = os.sep |
| 188 return the_sep |
| 189 |
| 190 |
| 191 class PathAliases(object): |
| 192 """A collection of aliases for paths. |
| 193 |
| 194 When combining data files from remote machines, often the paths to source |
| 195 code are different, for example, due to OS differences, or because of |
| 196 serialized checkouts on continuous integration machines. |
| 197 |
| 198 A `PathAliases` object tracks a list of pattern/result pairs, and can |
| 199 map a path through those aliases to produce a unified path. |
| 200 |
| 201 `locator` is a FileLocator that is used to canonicalize the results. |
| 202 |
| 203 """ |
| 204 def __init__(self, locator=None): |
| 205 self.aliases = [] |
| 206 self.locator = locator |
| 207 |
| 208 def add(self, pattern, result): |
| 209 """Add the `pattern`/`result` pair to the list of aliases. |
| 210 |
| 211 `pattern` is an `fnmatch`-style pattern. `result` is a simple |
| 212 string. When mapping paths, if a path starts with a match against |
| 213 `pattern`, then that match is replaced with `result`. This models |
| 214 isomorphic source trees being rooted at different places on two |
| 215 different machines. |
| 216 |
| 217 `pattern` can't end with a wildcard component, since that would |
| 218 match an entire tree, and not just its root. |
| 219 |
| 220 """ |
| 221 # The pattern can't end with a wildcard component. |
| 222 pattern = pattern.rstrip(r"\/") |
| 223 if pattern.endswith("*"): |
| 224 raise CoverageException("Pattern must not end with wildcards.") |
| 225 pattern_sep = sep(pattern) |
| 226 pattern += pattern_sep |
| 227 |
| 228 # Make a regex from the pattern. fnmatch always adds a \Z or $ to |
| 229 # match the whole string, which we don't want. |
| 230 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') |
| 231 if regex_pat.endswith("$"): |
| 232 regex_pat = regex_pat[:-1] |
| 233 # We want */a/b.py to match on Windows to, so change slash to match |
| 234 # either separator. |
| 235 regex_pat = regex_pat.replace(r"\/", r"[\\/]") |
| 236 # We want case-insensitive matching, so add that flag. |
| 237 regex = re.compile(r"(?i)" + regex_pat) |
| 238 |
| 239 # Normalize the result: it must end with a path separator. |
| 240 result_sep = sep(result) |
| 241 result = result.rstrip(r"\/") + result_sep |
| 242 self.aliases.append((regex, result, pattern_sep, result_sep)) |
| 243 |
| 244 def map(self, path): |
| 245 """Map `path` through the aliases. |
| 246 |
| 247 `path` is checked against all of the patterns. The first pattern to |
| 248 match is used to replace the root of the path with the result root. |
| 249 Only one pattern is ever used. If no patterns match, `path` is |
| 250 returned unchanged. |
| 251 |
| 252 The separator style in the result is made to match that of the result |
| 253 in the alias. |
| 254 |
| 255 """ |
| 256 for regex, result, pattern_sep, result_sep in self.aliases: |
| 257 m = regex.match(path) |
| 258 if m: |
| 259 new = path.replace(m.group(0), result) |
| 260 if pattern_sep != result_sep: |
| 261 new = new.replace(pattern_sep, result_sep) |
| 262 if self.locator: |
| 263 new = self.locator.canonical_filename(new) |
| 264 return new |
| 265 return path |
| 266 |
| 267 |
| 268 def find_python_files(dirname): |
| 269 """Yield all of the importable Python files in `dirname`, recursively. |
| 270 |
| 271 To be importable, the files have to be in a directory with a __init__.py, |
| 272 except for `dirname` itself, which isn't required to have one. The |
| 273 assumption is that `dirname` was specified directly, so the user knows |
| 274 best, but subdirectories are checked for a __init__.py to be sure we only |
| 275 find the importable files. |
| 276 |
| 277 """ |
| 278 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): |
| 279 if i > 0 and '__init__.py' not in filenames: |
| 280 # If a directory doesn't have __init__.py, then it isn't |
| 281 # importable and neither are its files |
| 282 del dirnames[:] |
| 283 continue |
| 284 for filename in filenames: |
| 285 # We're only interested in files that look like reasonable Python |
| 286 # files: Must end with .py, and must not have certain funny |
| 287 # characters that probably mean they are editor junk. |
| 288 if re.match(r"^[^.#~!$@%^&*()+=,]+\.py$", filename): |
| 289 yield os.path.join(dirpath, filename) |
OLD | NEW |