OLD | NEW |
(Empty) | |
| 1 """ |
| 2 Filename globbing utility. Mostly a copy of `glob` from Python 3.5. |
| 3 |
| 4 Changes include: |
| 5 * `yield from` and PEP3102 `*` removed. |
| 6 * `bytes` changed to `six.binary_type`. |
| 7 * Hidden files are not ignored. |
| 8 """ |
| 9 |
| 10 import os |
| 11 import re |
| 12 import fnmatch |
| 13 from six import binary_type |
| 14 |
| 15 __all__ = ["glob", "iglob", "escape"] |
| 16 |
| 17 |
| 18 def glob(pathname, recursive=False): |
| 19 """Return a list of paths matching a pathname pattern. |
| 20 |
| 21 The pattern may contain simple shell-style wildcards a la |
| 22 fnmatch. However, unlike fnmatch, filenames starting with a |
| 23 dot are special cases that are not matched by '*' and '?' |
| 24 patterns. |
| 25 |
| 26 If recursive is true, the pattern '**' will match any files and |
| 27 zero or more directories and subdirectories. |
| 28 """ |
| 29 return list(iglob(pathname, recursive=recursive)) |
| 30 |
| 31 |
| 32 def iglob(pathname, recursive=False): |
| 33 """Return an iterator which yields the paths matching a pathname pattern. |
| 34 |
| 35 The pattern may contain simple shell-style wildcards a la |
| 36 fnmatch. However, unlike fnmatch, filenames starting with a |
| 37 dot are special cases that are not matched by '*' and '?' |
| 38 patterns. |
| 39 |
| 40 If recursive is true, the pattern '**' will match any files and |
| 41 zero or more directories and subdirectories. |
| 42 """ |
| 43 it = _iglob(pathname, recursive) |
| 44 if recursive and _isrecursive(pathname): |
| 45 s = next(it) # skip empty string |
| 46 assert not s |
| 47 return it |
| 48 |
| 49 |
| 50 def _iglob(pathname, recursive): |
| 51 dirname, basename = os.path.split(pathname) |
| 52 if not has_magic(pathname): |
| 53 if basename: |
| 54 if os.path.lexists(pathname): |
| 55 yield pathname |
| 56 else: |
| 57 # Patterns ending with a slash should match only directories |
| 58 if os.path.isdir(dirname): |
| 59 yield pathname |
| 60 return |
| 61 if not dirname: |
| 62 if recursive and _isrecursive(basename): |
| 63 for x in glob2(dirname, basename): |
| 64 yield x |
| 65 else: |
| 66 for x in glob1(dirname, basename): |
| 67 yield x |
| 68 return |
| 69 # `os.path.split()` returns the argument itself as a dirname if it is a |
| 70 # drive or UNC path. Prevent an infinite recursion if a drive or UNC path |
| 71 # contains magic characters (i.e. r'\\?\C:'). |
| 72 if dirname != pathname and has_magic(dirname): |
| 73 dirs = _iglob(dirname, recursive) |
| 74 else: |
| 75 dirs = [dirname] |
| 76 if has_magic(basename): |
| 77 if recursive and _isrecursive(basename): |
| 78 glob_in_dir = glob2 |
| 79 else: |
| 80 glob_in_dir = glob1 |
| 81 else: |
| 82 glob_in_dir = glob0 |
| 83 for dirname in dirs: |
| 84 for name in glob_in_dir(dirname, basename): |
| 85 yield os.path.join(dirname, name) |
| 86 |
| 87 |
| 88 # These 2 helper functions non-recursively glob inside a literal directory. |
| 89 # They return a list of basenames. `glob1` accepts a pattern while `glob0` |
| 90 # takes a literal basename (so it only has to check for its existence). |
| 91 |
| 92 |
| 93 def glob1(dirname, pattern): |
| 94 if not dirname: |
| 95 if isinstance(pattern, binary_type): |
| 96 dirname = os.curdir.encode('ASCII') |
| 97 else: |
| 98 dirname = os.curdir |
| 99 try: |
| 100 names = os.listdir(dirname) |
| 101 except OSError: |
| 102 return [] |
| 103 return fnmatch.filter(names, pattern) |
| 104 |
| 105 |
| 106 def glob0(dirname, basename): |
| 107 if not basename: |
| 108 # `os.path.split()` returns an empty basename for paths ending with a |
| 109 # directory separator. 'q*x/' should match only directories. |
| 110 if os.path.isdir(dirname): |
| 111 return [basename] |
| 112 else: |
| 113 if os.path.lexists(os.path.join(dirname, basename)): |
| 114 return [basename] |
| 115 return [] |
| 116 |
| 117 |
| 118 # This helper function recursively yields relative pathnames inside a literal |
| 119 # directory. |
| 120 |
| 121 |
| 122 def glob2(dirname, pattern): |
| 123 assert _isrecursive(pattern) |
| 124 yield pattern[:0] |
| 125 for x in _rlistdir(dirname): |
| 126 yield x |
| 127 |
| 128 |
| 129 # Recursively yields relative pathnames inside a literal directory. |
| 130 def _rlistdir(dirname): |
| 131 if not dirname: |
| 132 if isinstance(dirname, binary_type): |
| 133 dirname = binary_type(os.curdir, 'ASCII') |
| 134 else: |
| 135 dirname = os.curdir |
| 136 try: |
| 137 names = os.listdir(dirname) |
| 138 except os.error: |
| 139 return |
| 140 for x in names: |
| 141 yield x |
| 142 path = os.path.join(dirname, x) if dirname else x |
| 143 for y in _rlistdir(path): |
| 144 yield os.path.join(x, y) |
| 145 |
| 146 |
| 147 magic_check = re.compile('([*?[])') |
| 148 magic_check_bytes = re.compile(b'([*?[])') |
| 149 |
| 150 |
| 151 def has_magic(s): |
| 152 if isinstance(s, binary_type): |
| 153 match = magic_check_bytes.search(s) |
| 154 else: |
| 155 match = magic_check.search(s) |
| 156 return match is not None |
| 157 |
| 158 |
| 159 def _isrecursive(pattern): |
| 160 if isinstance(pattern, binary_type): |
| 161 return pattern == b'**' |
| 162 else: |
| 163 return pattern == '**' |
| 164 |
| 165 |
| 166 def escape(pathname): |
| 167 """Escape all special characters. |
| 168 """ |
| 169 # Escaping is done by wrapping any of "*?[" between square brackets. |
| 170 # Metacharacters do not work in the drive part and shouldn't be escaped. |
| 171 drive, pathname = os.path.splitdrive(pathname) |
| 172 if isinstance(pathname, binary_type): |
| 173 pathname = magic_check_bytes.sub(br'[\1]', pathname) |
| 174 else: |
| 175 pathname = magic_check.sub(r'[\1]', pathname) |
| 176 return drive + pathname |
OLD | NEW |