OLD | NEW |
(Empty) | |
| 1 # copyright 2003-2011 LOGILAB S.A. (Paris, FRANCE), all rights reserved. |
| 2 # contact http://www.logilab.fr/ -- mailto:contact@logilab.fr |
| 3 # |
| 4 # This file is part of logilab-common. |
| 5 # |
| 6 # logilab-common is free software: you can redistribute it and/or modify it unde
r |
| 7 # the terms of the GNU Lesser General Public License as published by the Free |
| 8 # Software Foundation, either version 2.1 of the License, or (at your option) an
y |
| 9 # later version. |
| 10 # |
| 11 # logilab-common is distributed in the hope that it will be useful, but WITHOUT |
| 12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
| 13 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more |
| 14 # details. |
| 15 # |
| 16 # You should have received a copy of the GNU Lesser General Public License along |
| 17 # with logilab-common. If not, see <http://www.gnu.org/licenses/>. |
| 18 """File and file-path manipulation utilities. |
| 19 |
| 20 :group path manipulation: first_level_directory, relative_path, is_binary,\ |
| 21 get_by_ext, remove_dead_links |
| 22 :group file manipulation: norm_read, norm_open, lines, stream_lines, lines,\ |
| 23 write_open_mode, ensure_fs_mode, export |
| 24 :sort: path manipulation, file manipulation |
| 25 """ |
| 26 |
| 27 from __future__ import print_function |
| 28 |
| 29 __docformat__ = "restructuredtext en" |
| 30 |
| 31 import sys |
| 32 import shutil |
| 33 import mimetypes |
| 34 from os.path import isabs, isdir, islink, split, exists, normpath, join |
| 35 from os.path import abspath |
| 36 from os import sep, mkdir, remove, listdir, stat, chmod, walk |
| 37 from stat import ST_MODE, S_IWRITE |
| 38 |
| 39 from logilab.common import STD_BLACKLIST as BASE_BLACKLIST, IGNORED_EXTENSIONS |
| 40 from logilab.common.shellutils import find |
| 41 from logilab.common.deprecation import deprecated |
| 42 from logilab.common.compat import FileIO |
| 43 |
| 44 def first_level_directory(path): |
| 45 """Return the first level directory of a path. |
| 46 |
| 47 >>> first_level_directory('home/syt/work') |
| 48 'home' |
| 49 >>> first_level_directory('/home/syt/work') |
| 50 '/' |
| 51 >>> first_level_directory('work') |
| 52 'work' |
| 53 >>> |
| 54 |
| 55 :type path: str |
| 56 :param path: the path for which we want the first level directory |
| 57 |
| 58 :rtype: str |
| 59 :return: the first level directory appearing in `path` |
| 60 """ |
| 61 head, tail = split(path) |
| 62 while head and tail: |
| 63 head, tail = split(head) |
| 64 if tail: |
| 65 return tail |
| 66 # path was absolute, head is the fs root |
| 67 return head |
| 68 |
| 69 def abspath_listdir(path): |
| 70 """Lists path's content using absolute paths. |
| 71 |
| 72 >>> os.listdir('/home') |
| 73 ['adim', 'alf', 'arthur', 'auc'] |
| 74 >>> abspath_listdir('/home') |
| 75 ['/home/adim', '/home/alf', '/home/arthur', '/home/auc'] |
| 76 """ |
| 77 path = abspath(path) |
| 78 return [join(path, filename) for filename in listdir(path)] |
| 79 |
| 80 |
| 81 def is_binary(filename): |
| 82 """Return true if filename may be a binary file, according to it's |
| 83 extension. |
| 84 |
| 85 :type filename: str |
| 86 :param filename: the name of the file |
| 87 |
| 88 :rtype: bool |
| 89 :return: |
| 90 true if the file is a binary file (actually if it's mime type |
| 91 isn't beginning by text/) |
| 92 """ |
| 93 try: |
| 94 return not mimetypes.guess_type(filename)[0].startswith('text') |
| 95 except AttributeError: |
| 96 return 1 |
| 97 |
| 98 |
| 99 def write_open_mode(filename): |
| 100 """Return the write mode that should used to open file. |
| 101 |
| 102 :type filename: str |
| 103 :param filename: the name of the file |
| 104 |
| 105 :rtype: str |
| 106 :return: the mode that should be use to open the file ('w' or 'wb') |
| 107 """ |
| 108 if is_binary(filename): |
| 109 return 'wb' |
| 110 return 'w' |
| 111 |
| 112 |
| 113 def ensure_fs_mode(filepath, desired_mode=S_IWRITE): |
| 114 """Check that the given file has the given mode(s) set, else try to |
| 115 set it. |
| 116 |
| 117 :type filepath: str |
| 118 :param filepath: path of the file |
| 119 |
| 120 :type desired_mode: int |
| 121 :param desired_mode: |
| 122 ORed flags describing the desired mode. Use constants from the |
| 123 `stat` module for file permission's modes |
| 124 """ |
| 125 mode = stat(filepath)[ST_MODE] |
| 126 if not mode & desired_mode: |
| 127 chmod(filepath, mode | desired_mode) |
| 128 |
| 129 |
| 130 # XXX (syt) unused? kill? |
| 131 class ProtectedFile(FileIO): |
| 132 """A special file-object class that automatically does a 'chmod +w' when |
| 133 needed. |
| 134 |
| 135 XXX: for now, the way it is done allows 'normal file-objects' to be |
| 136 created during the ProtectedFile object lifetime. |
| 137 One way to circumvent this would be to chmod / unchmod on each |
| 138 write operation. |
| 139 |
| 140 One other way would be to : |
| 141 |
| 142 - catch the IOError in the __init__ |
| 143 |
| 144 - if IOError, then create a StringIO object |
| 145 |
| 146 - each write operation writes in this StringIO object |
| 147 |
| 148 - on close()/del(), write/append the StringIO content to the file and |
| 149 do the chmod only once |
| 150 """ |
| 151 def __init__(self, filepath, mode): |
| 152 self.original_mode = stat(filepath)[ST_MODE] |
| 153 self.mode_changed = False |
| 154 if mode in ('w', 'a', 'wb', 'ab'): |
| 155 if not self.original_mode & S_IWRITE: |
| 156 chmod(filepath, self.original_mode | S_IWRITE) |
| 157 self.mode_changed = True |
| 158 FileIO.__init__(self, filepath, mode) |
| 159 |
| 160 def _restore_mode(self): |
| 161 """restores the original mode if needed""" |
| 162 if self.mode_changed: |
| 163 chmod(self.name, self.original_mode) |
| 164 # Don't re-chmod in case of several restore |
| 165 self.mode_changed = False |
| 166 |
| 167 def close(self): |
| 168 """restore mode before closing""" |
| 169 self._restore_mode() |
| 170 FileIO.close(self) |
| 171 |
| 172 def __del__(self): |
| 173 if not self.closed: |
| 174 self.close() |
| 175 |
| 176 |
| 177 class UnresolvableError(Exception): |
| 178 """Exception raised by relative path when it's unable to compute relative |
| 179 path between two paths. |
| 180 """ |
| 181 |
| 182 def relative_path(from_file, to_file): |
| 183 """Try to get a relative path from `from_file` to `to_file` |
| 184 (path will be absolute if to_file is an absolute file). This function |
| 185 is useful to create link in `from_file` to `to_file`. This typical use |
| 186 case is used in this function description. |
| 187 |
| 188 If both files are relative, they're expected to be relative to the same |
| 189 directory. |
| 190 |
| 191 >>> relative_path( from_file='toto/index.html', to_file='index.html') |
| 192 '../index.html' |
| 193 >>> relative_path( from_file='index.html', to_file='toto/index.html') |
| 194 'toto/index.html' |
| 195 >>> relative_path( from_file='tutu/index.html', to_file='toto/index.html') |
| 196 '../toto/index.html' |
| 197 >>> relative_path( from_file='toto/index.html', to_file='/index.html') |
| 198 '/index.html' |
| 199 >>> relative_path( from_file='/toto/index.html', to_file='/index.html') |
| 200 '../index.html' |
| 201 >>> relative_path( from_file='/toto/index.html', to_file='/toto/summary.html
') |
| 202 'summary.html' |
| 203 >>> relative_path( from_file='index.html', to_file='index.html') |
| 204 '' |
| 205 >>> relative_path( from_file='/index.html', to_file='toto/index.html') |
| 206 Traceback (most recent call last): |
| 207 File "<string>", line 1, in ? |
| 208 File "<stdin>", line 37, in relative_path |
| 209 UnresolvableError |
| 210 >>> relative_path( from_file='/index.html', to_file='/index.html') |
| 211 '' |
| 212 >>> |
| 213 |
| 214 :type from_file: str |
| 215 :param from_file: source file (where links will be inserted) |
| 216 |
| 217 :type to_file: str |
| 218 :param to_file: target file (on which links point) |
| 219 |
| 220 :raise UnresolvableError: if it has been unable to guess a correct path |
| 221 |
| 222 :rtype: str |
| 223 :return: the relative path of `to_file` from `from_file` |
| 224 """ |
| 225 from_file = normpath(from_file) |
| 226 to_file = normpath(to_file) |
| 227 if from_file == to_file: |
| 228 return '' |
| 229 if isabs(to_file): |
| 230 if not isabs(from_file): |
| 231 return to_file |
| 232 elif isabs(from_file): |
| 233 raise UnresolvableError() |
| 234 from_parts = from_file.split(sep) |
| 235 to_parts = to_file.split(sep) |
| 236 idem = 1 |
| 237 result = [] |
| 238 while len(from_parts) > 1: |
| 239 dirname = from_parts.pop(0) |
| 240 if idem and len(to_parts) > 1 and dirname == to_parts[0]: |
| 241 to_parts.pop(0) |
| 242 else: |
| 243 idem = 0 |
| 244 result.append('..') |
| 245 result += to_parts |
| 246 return sep.join(result) |
| 247 |
| 248 |
| 249 def norm_read(path): |
| 250 """Return the content of the file with normalized line feeds. |
| 251 |
| 252 :type path: str |
| 253 :param path: path to the file to read |
| 254 |
| 255 :rtype: str |
| 256 :return: the content of the file with normalized line feeds |
| 257 """ |
| 258 return open(path, 'U').read() |
| 259 norm_read = deprecated("use \"open(path, 'U').read()\"")(norm_read) |
| 260 |
| 261 def norm_open(path): |
| 262 """Return a stream for a file with content with normalized line feeds. |
| 263 |
| 264 :type path: str |
| 265 :param path: path to the file to open |
| 266 |
| 267 :rtype: file or StringIO |
| 268 :return: the opened file with normalized line feeds |
| 269 """ |
| 270 return open(path, 'U') |
| 271 norm_open = deprecated("use \"open(path, 'U')\"")(norm_open) |
| 272 |
| 273 def lines(path, comments=None): |
| 274 """Return a list of non empty lines in the file located at `path`. |
| 275 |
| 276 :type path: str |
| 277 :param path: path to the file |
| 278 |
| 279 :type comments: str or None |
| 280 :param comments: |
| 281 optional string which can be used to comment a line in the file |
| 282 (i.e. lines starting with this string won't be returned) |
| 283 |
| 284 :rtype: list |
| 285 :return: |
| 286 a list of stripped line in the file, without empty and commented |
| 287 lines |
| 288 |
| 289 :warning: at some point this function will probably return an iterator |
| 290 """ |
| 291 stream = open(path, 'U') |
| 292 result = stream_lines(stream, comments) |
| 293 stream.close() |
| 294 return result |
| 295 |
| 296 |
| 297 def stream_lines(stream, comments=None): |
| 298 """Return a list of non empty lines in the given `stream`. |
| 299 |
| 300 :type stream: object implementing 'xreadlines' or 'readlines' |
| 301 :param stream: file like object |
| 302 |
| 303 :type comments: str or None |
| 304 :param comments: |
| 305 optional string which can be used to comment a line in the file |
| 306 (i.e. lines starting with this string won't be returned) |
| 307 |
| 308 :rtype: list |
| 309 :return: |
| 310 a list of stripped line in the file, without empty and commented |
| 311 lines |
| 312 |
| 313 :warning: at some point this function will probably return an iterator |
| 314 """ |
| 315 try: |
| 316 readlines = stream.xreadlines |
| 317 except AttributeError: |
| 318 readlines = stream.readlines |
| 319 result = [] |
| 320 for line in readlines(): |
| 321 line = line.strip() |
| 322 if line and (comments is None or not line.startswith(comments)): |
| 323 result.append(line) |
| 324 return result |
| 325 |
| 326 |
| 327 def export(from_dir, to_dir, |
| 328 blacklist=BASE_BLACKLIST, ignore_ext=IGNORED_EXTENSIONS, |
| 329 verbose=0): |
| 330 """Make a mirror of `from_dir` in `to_dir`, omitting directories and |
| 331 files listed in the black list or ending with one of the given |
| 332 extensions. |
| 333 |
| 334 :type from_dir: str |
| 335 :param from_dir: directory to export |
| 336 |
| 337 :type to_dir: str |
| 338 :param to_dir: destination directory |
| 339 |
| 340 :type blacklist: list or tuple |
| 341 :param blacklist: |
| 342 list of files or directories to ignore, default to the content of |
| 343 `BASE_BLACKLIST` |
| 344 |
| 345 :type ignore_ext: list or tuple |
| 346 :param ignore_ext: |
| 347 list of extensions to ignore, default to the content of |
| 348 `IGNORED_EXTENSIONS` |
| 349 |
| 350 :type verbose: bool |
| 351 :param verbose: |
| 352 flag indicating whether information about exported files should be |
| 353 printed to stderr, default to False |
| 354 """ |
| 355 try: |
| 356 mkdir(to_dir) |
| 357 except OSError: |
| 358 pass # FIXME we should use "exists" if the point is about existing dir |
| 359 # else (permission problems?) shouldn't return / raise ? |
| 360 for directory, dirnames, filenames in walk(from_dir): |
| 361 for norecurs in blacklist: |
| 362 try: |
| 363 dirnames.remove(norecurs) |
| 364 except ValueError: |
| 365 continue |
| 366 for dirname in dirnames: |
| 367 src = join(directory, dirname) |
| 368 dest = to_dir + src[len(from_dir):] |
| 369 if isdir(src): |
| 370 if not exists(dest): |
| 371 mkdir(dest) |
| 372 for filename in filenames: |
| 373 # don't include binary files |
| 374 # endswith does not accept tuple in 2.4 |
| 375 if any([filename.endswith(ext) for ext in ignore_ext]): |
| 376 continue |
| 377 src = join(directory, filename) |
| 378 dest = to_dir + src[len(from_dir):] |
| 379 if verbose: |
| 380 print(src, '->', dest, file=sys.stderr) |
| 381 if exists(dest): |
| 382 remove(dest) |
| 383 shutil.copy2(src, dest) |
| 384 |
| 385 |
| 386 def remove_dead_links(directory, verbose=0): |
| 387 """Recursively traverse directory and remove all dead links. |
| 388 |
| 389 :type directory: str |
| 390 :param directory: directory to cleanup |
| 391 |
| 392 :type verbose: bool |
| 393 :param verbose: |
| 394 flag indicating whether information about deleted links should be |
| 395 printed to stderr, default to False |
| 396 """ |
| 397 for dirpath, dirname, filenames in walk(directory): |
| 398 for filename in dirnames + filenames: |
| 399 src = join(dirpath, filename) |
| 400 if islink(src) and not exists(src): |
| 401 if verbose: |
| 402 print('remove dead link', src) |
| 403 remove(src) |
| 404 |
OLD | NEW |