OLD | NEW |
(Empty) | |
| 1 """File wrangling.""" |
| 2 |
| 3 from coverage.backward import to_string |
| 4 from coverage.misc import CoverageException |
| 5 import fnmatch, os, os.path, re, sys |
| 6 import ntpath, posixpath |
| 7 |
| 8 class FileLocator(object): |
| 9 """Understand how filenames work.""" |
| 10 |
| 11 def __init__(self): |
| 12 # The absolute path to our current directory. |
| 13 self.relative_dir = os.path.normcase(abs_file(os.curdir) + os.sep) |
| 14 |
| 15 # Cache of results of calling the canonical_filename() method, to |
| 16 # avoid duplicating work. |
| 17 self.canonical_filename_cache = {} |
| 18 |
| 19 def relative_filename(self, filename): |
| 20 """Return the relative form of `filename`. |
| 21 |
| 22 The filename will be relative to the current directory when the |
| 23 `FileLocator` was constructed. |
| 24 |
| 25 """ |
| 26 fnorm = os.path.normcase(filename) |
| 27 if fnorm.startswith(self.relative_dir): |
| 28 filename = filename[len(self.relative_dir):] |
| 29 return filename |
| 30 |
| 31 def canonical_filename(self, filename): |
| 32 """Return a canonical filename for `filename`. |
| 33 |
| 34 An absolute path with no redundant components and normalized case. |
| 35 |
| 36 """ |
| 37 if filename not in self.canonical_filename_cache: |
| 38 if not os.path.isabs(filename): |
| 39 for path in [os.curdir] + sys.path: |
| 40 if path is None: |
| 41 continue |
| 42 f = os.path.join(path, filename) |
| 43 if os.path.exists(f): |
| 44 filename = f |
| 45 break |
| 46 cf = abs_file(filename) |
| 47 self.canonical_filename_cache[filename] = cf |
| 48 return self.canonical_filename_cache[filename] |
| 49 |
| 50 def get_zip_data(self, filename): |
| 51 """Get data from `filename` if it is a zip file path. |
| 52 |
| 53 Returns the string data read from the zip file, or None if no zip file |
| 54 could be found or `filename` isn't in it. The data returned will be |
| 55 an empty string if the file is empty. |
| 56 |
| 57 """ |
| 58 import zipimport |
| 59 markers = ['.zip'+os.sep, '.egg'+os.sep] |
| 60 for marker in markers: |
| 61 if marker in filename: |
| 62 parts = filename.split(marker) |
| 63 try: |
| 64 zi = zipimport.zipimporter(parts[0]+marker[:-1]) |
| 65 except zipimport.ZipImportError: |
| 66 continue |
| 67 try: |
| 68 data = zi.get_data(parts[1]) |
| 69 except IOError: |
| 70 continue |
| 71 return to_string(data) |
| 72 return None |
| 73 |
| 74 |
| 75 if sys.platform == 'win32': |
| 76 |
| 77 def actual_path(path): |
| 78 """Get the actual path of `path`, including the correct case.""" |
| 79 if path in actual_path.cache: |
| 80 return actual_path.cache[path] |
| 81 |
| 82 head, tail = os.path.split(path) |
| 83 if not tail: |
| 84 actpath = head |
| 85 elif not head: |
| 86 actpath = tail |
| 87 else: |
| 88 head = actual_path(head) |
| 89 if head in actual_path.list_cache: |
| 90 files = actual_path.list_cache[head] |
| 91 else: |
| 92 try: |
| 93 files = os.listdir(head) |
| 94 except OSError: |
| 95 files = [] |
| 96 actual_path.list_cache[head] = files |
| 97 normtail = os.path.normcase(tail) |
| 98 for f in files: |
| 99 if os.path.normcase(f) == normtail: |
| 100 tail = f |
| 101 break |
| 102 actpath = os.path.join(head, tail) |
| 103 actual_path.cache[path] = actpath |
| 104 return actpath |
| 105 |
| 106 actual_path.cache = {} |
| 107 actual_path.list_cache = {} |
| 108 |
| 109 else: |
| 110 def actual_path(filename): |
| 111 """The actual path for non-Windows platforms.""" |
| 112 return filename |
| 113 |
| 114 |
| 115 def abs_file(filename): |
| 116 """Return the absolute normalized form of `filename`.""" |
| 117 path = os.path.expandvars(os.path.expanduser(filename)) |
| 118 path = os.path.abspath(os.path.realpath(path)) |
| 119 path = actual_path(path) |
| 120 return path |
| 121 |
| 122 |
| 123 def isabs_anywhere(filename): |
| 124 """Is `filename` an absolute path on any OS?""" |
| 125 return ntpath.isabs(filename) or posixpath.isabs(filename) |
| 126 |
| 127 |
| 128 def prep_patterns(patterns): |
| 129 """Prepare the file patterns for use in a `FnmatchMatcher`. |
| 130 |
| 131 If a pattern starts with a wildcard, it is used as a pattern |
| 132 as-is. If it does not start with a wildcard, then it is made |
| 133 absolute with the current directory. |
| 134 |
| 135 If `patterns` is None, an empty list is returned. |
| 136 |
| 137 """ |
| 138 prepped = [] |
| 139 for p in patterns or []: |
| 140 if p.startswith("*") or p.startswith("?"): |
| 141 prepped.append(p) |
| 142 else: |
| 143 prepped.append(abs_file(p)) |
| 144 return prepped |
| 145 |
| 146 |
| 147 class TreeMatcher(object): |
| 148 """A matcher for files in a tree.""" |
| 149 def __init__(self, directories): |
| 150 self.dirs = directories[:] |
| 151 |
| 152 def __repr__(self): |
| 153 return "<TreeMatcher %r>" % self.dirs |
| 154 |
| 155 def info(self): |
| 156 """A list of strings for displaying when dumping state.""" |
| 157 return self.dirs |
| 158 |
| 159 def add(self, directory): |
| 160 """Add another directory to the list we match for.""" |
| 161 self.dirs.append(directory) |
| 162 |
| 163 def match(self, fpath): |
| 164 """Does `fpath` indicate a file in one of our trees?""" |
| 165 for d in self.dirs: |
| 166 if fpath.startswith(d): |
| 167 if fpath == d: |
| 168 # This is the same file! |
| 169 return True |
| 170 if fpath[len(d)] == os.sep: |
| 171 # This is a file in the directory |
| 172 return True |
| 173 return False |
| 174 |
| 175 |
| 176 class FnmatchMatcher(object): |
| 177 """A matcher for files by filename pattern.""" |
| 178 def __init__(self, pats): |
| 179 self.pats = pats[:] |
| 180 |
| 181 def __repr__(self): |
| 182 return "<FnmatchMatcher %r>" % self.pats |
| 183 |
| 184 def info(self): |
| 185 """A list of strings for displaying when dumping state.""" |
| 186 return self.pats |
| 187 |
| 188 def match(self, fpath): |
| 189 """Does `fpath` match one of our filename patterns?""" |
| 190 for pat in self.pats: |
| 191 if fnmatch.fnmatch(fpath, pat): |
| 192 return True |
| 193 return False |
| 194 |
| 195 |
| 196 def sep(s): |
| 197 """Find the path separator used in this string, or os.sep if none.""" |
| 198 sep_match = re.search(r"[\\/]", s) |
| 199 if sep_match: |
| 200 the_sep = sep_match.group(0) |
| 201 else: |
| 202 the_sep = os.sep |
| 203 return the_sep |
| 204 |
| 205 |
| 206 class PathAliases(object): |
| 207 """A collection of aliases for paths. |
| 208 |
| 209 When combining data files from remote machines, often the paths to source |
| 210 code are different, for example, due to OS differences, or because of |
| 211 serialized checkouts on continuous integration machines. |
| 212 |
| 213 A `PathAliases` object tracks a list of pattern/result pairs, and can |
| 214 map a path through those aliases to produce a unified path. |
| 215 |
| 216 `locator` is a FileLocator that is used to canonicalize the results. |
| 217 |
| 218 """ |
| 219 def __init__(self, locator=None): |
| 220 self.aliases = [] |
| 221 self.locator = locator |
| 222 |
| 223 def add(self, pattern, result): |
| 224 """Add the `pattern`/`result` pair to the list of aliases. |
| 225 |
| 226 `pattern` is an `fnmatch`-style pattern. `result` is a simple |
| 227 string. When mapping paths, if a path starts with a match against |
| 228 `pattern`, then that match is replaced with `result`. This models |
| 229 isomorphic source trees being rooted at different places on two |
| 230 different machines. |
| 231 |
| 232 `pattern` can't end with a wildcard component, since that would |
| 233 match an entire tree, and not just its root. |
| 234 |
| 235 """ |
| 236 # The pattern can't end with a wildcard component. |
| 237 pattern = pattern.rstrip(r"\/") |
| 238 if pattern.endswith("*"): |
| 239 raise CoverageException("Pattern must not end with wildcards.") |
| 240 pattern_sep = sep(pattern) |
| 241 |
| 242 # The pattern is meant to match a filepath. Let's make it absolute |
| 243 # unless it already is, or is meant to match any prefix. |
| 244 if not pattern.startswith('*') and not isabs_anywhere(pattern): |
| 245 pattern = abs_file(pattern) |
| 246 pattern += pattern_sep |
| 247 |
| 248 # Make a regex from the pattern. fnmatch always adds a \Z or $ to |
| 249 # match the whole string, which we don't want. |
| 250 regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(') |
| 251 if regex_pat.endswith("$"): |
| 252 regex_pat = regex_pat[:-1] |
| 253 # We want */a/b.py to match on Windows too, so change slash to match |
| 254 # either separator. |
| 255 regex_pat = regex_pat.replace(r"\/", r"[\\/]") |
| 256 # We want case-insensitive matching, so add that flag. |
| 257 regex = re.compile(r"(?i)" + regex_pat) |
| 258 |
| 259 # Normalize the result: it must end with a path separator. |
| 260 result_sep = sep(result) |
| 261 result = result.rstrip(r"\/") + result_sep |
| 262 self.aliases.append((regex, result, pattern_sep, result_sep)) |
| 263 |
| 264 def map(self, path): |
| 265 """Map `path` through the aliases. |
| 266 |
| 267 `path` is checked against all of the patterns. The first pattern to |
| 268 match is used to replace the root of the path with the result root. |
| 269 Only one pattern is ever used. If no patterns match, `path` is |
| 270 returned unchanged. |
| 271 |
| 272 The separator style in the result is made to match that of the result |
| 273 in the alias. |
| 274 |
| 275 """ |
| 276 for regex, result, pattern_sep, result_sep in self.aliases: |
| 277 m = regex.match(path) |
| 278 if m: |
| 279 new = path.replace(m.group(0), result) |
| 280 if pattern_sep != result_sep: |
| 281 new = new.replace(pattern_sep, result_sep) |
| 282 if self.locator: |
| 283 new = self.locator.canonical_filename(new) |
| 284 return new |
| 285 return path |
| 286 |
| 287 |
| 288 def find_python_files(dirname): |
| 289 """Yield all of the importable Python files in `dirname`, recursively. |
| 290 |
| 291 To be importable, the files have to be in a directory with a __init__.py, |
| 292 except for `dirname` itself, which isn't required to have one. The |
| 293 assumption is that `dirname` was specified directly, so the user knows |
| 294 best, but subdirectories are checked for a __init__.py to be sure we only |
| 295 find the importable files. |
| 296 |
| 297 """ |
| 298 for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)): |
| 299 if i > 0 and '__init__.py' not in filenames: |
| 300 # If a directory doesn't have __init__.py, then it isn't |
| 301 # importable and neither are its files |
| 302 del dirnames[:] |
| 303 continue |
| 304 for filename in filenames: |
| 305 # We're only interested in files that look like reasonable Python |
| 306 # files: Must end with .py or .pyw, and must not have certain funny |
| 307 # characters that probably mean they are editor junk. |
| 308 if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename): |
| 309 yield os.path.join(dirpath, filename) |
OLD | NEW |